feat(server): 重构费用报销服务,优化报销单创建和审批流程逻辑

This commit is contained in:
caoxiaozhu
2026-05-14 09:32:36 +00:00
parent 8b39f48dec
commit b0fef46fc6
4 changed files with 244 additions and 14 deletions

View File

@@ -54,6 +54,32 @@ class ExpenseClaimAttachmentAnalysisRead(BaseModel):
suggestion: str = ""
class ExpenseClaimAttachmentDocumentFieldRead(BaseModel):
key: str
label: str
value: str
class ExpenseClaimAttachmentDocumentInfoRead(BaseModel):
document_type: str = "other"
document_type_label: str = "其他单据"
scene_code: str = "other"
scene_label: str = "其他票据"
fields: list[ExpenseClaimAttachmentDocumentFieldRead] = Field(default_factory=list)
class ExpenseClaimAttachmentRequirementRead(BaseModel):
matches: bool = False
current_expense_type: str = "other"
current_expense_type_label: str = "其他"
allowed_scene_labels: list[str] = Field(default_factory=list)
recognized_scene_code: str = "other"
recognized_scene_label: str = "其他票据"
recognized_document_type: str = "other"
recognized_document_type_label: str = "其他单据"
message: str = ""
class ExpenseClaimAttachmentRead(BaseModel):
file_name: str
storage_key: str
@@ -62,6 +88,8 @@ class ExpenseClaimAttachmentRead(BaseModel):
uploaded_at: datetime | None = None
previewable: bool = True
analysis: ExpenseClaimAttachmentAnalysisRead | None = None
document_info: ExpenseClaimAttachmentDocumentInfoRead | None = None
requirement_check: ExpenseClaimAttachmentRequirementRead | None = None
class ExpenseClaimItemUpdate(BaseModel):

View File

@@ -21,6 +21,7 @@ from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import ExpenseClaimItemCreate, ExpenseClaimItemUpdate
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.ocr import OcrService
EXPENSE_TYPE_LABELS = {
@@ -89,6 +90,18 @@ EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = {
"training": {"training"},
}
DOCUMENT_SCENE_LABELS = {
"travel": "差旅",
"hotel": "住宿",
"transport": "交通",
"meal": "餐饮",
"entertainment": "业务招待",
"office": "办公用品",
"meeting": "会务",
"training": "培训",
"other": "其他票据",
}
class ExpenseClaimService:
def __init__(self, db: Session) -> None:
@@ -307,19 +320,28 @@ class ExpenseClaimService:
item=item,
)
ocr_document = None
document_info = None
requirement_check = None
ocr_status = "empty"
ocr_error = ""
try:
ocr_result = OcrService().recognize_files(
ocr_result = OcrService(self.db).recognize_files(
[(normalized_name, content, media_type or "application/octet-stream")]
)
documents = list(ocr_result.documents or [])
if documents:
ocr_document = documents[0]
ocr_status = "recognized"
document_info = self._build_attachment_document_info(ocr_document)
requirement_check = self._build_attachment_requirement_check(
item=item,
document_info=document_info,
)
attachment_analysis = self._build_attachment_analysis(
document=ocr_document,
item=item,
document_info=document_info,
requirement_check=requirement_check,
)
except Exception as exc: # pragma: no cover - fallback path depends on OCR runtime
ocr_status = "failed"
@@ -342,12 +364,21 @@ class ExpenseClaimService:
"uploaded_at": datetime.now(UTC).isoformat(),
"previewable": self._is_previewable_media_type(media_type, normalized_name),
"analysis": attachment_analysis,
"document_info": document_info,
"requirement_check": requirement_check,
"ocr_status": ocr_status,
"ocr_error": ocr_error,
"ocr_text": str(getattr(ocr_document, "text", "") or ""),
"ocr_summary": str(getattr(ocr_document, "summary", "") or ""),
"ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0),
"ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0),
"ocr_classification_source": str(getattr(ocr_document, "classification_source", "") or ""),
"ocr_classification_confidence": float(getattr(ocr_document, "classification_confidence", 0.0) or 0.0),
"ocr_classification_evidence": [
str(item)
for item in getattr(ocr_document, "classification_evidence", []) or []
if str(item).strip()
],
"ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
}
self._write_attachment_meta(file_path, meta)
@@ -1129,6 +1160,14 @@ class ExpenseClaimService:
if not isinstance(analysis, dict):
analysis = None
document_info = metadata.get("document_info")
if not isinstance(document_info, dict):
document_info = None
requirement_check = metadata.get("requirement_check")
if not isinstance(requirement_check, dict):
requirement_check = None
return {
"file_name": str(metadata.get("file_name") or filename),
"storage_key": str(item.invoice_id or ""),
@@ -1137,6 +1176,8 @@ class ExpenseClaimService:
"uploaded_at": uploaded_at,
"previewable": bool(metadata.get("previewable", self._is_previewable_media_type(media_type, filename))),
"analysis": analysis,
"document_info": document_info,
"requirement_check": requirement_check,
}
@staticmethod
@@ -1153,6 +1194,120 @@ class ExpenseClaimService:
def _resolve_attachment_display_name(storage_key: str | None) -> str:
return Path(str(storage_key or "").strip()).name
def _build_attachment_document_info(self, document: Any) -> dict[str, Any]:
insight = build_document_insight(
filename=str(getattr(document, "filename", "") or ""),
summary=str(getattr(document, "summary", "") or ""),
text=str(getattr(document, "text", "") or ""),
)
raw_fields = list(getattr(document, "document_fields", []) or [])
normalized_fields: list[dict[str, str]] = []
for item in raw_fields:
key = ""
label = ""
value = ""
if isinstance(item, dict):
key = str(item.get("key") or "").strip()
label = str(item.get("label") or "").strip()
value = str(item.get("value") or "").strip()
else:
key = str(getattr(item, "key", "") or "").strip()
label = str(getattr(item, "label", "") or "").strip()
value = str(getattr(item, "value", "") or "").strip()
if key and label and value:
normalized_fields.append(
{
"key": key,
"label": label,
"value": value,
}
)
if not normalized_fields:
normalized_fields = [
{
"key": field.key,
"label": field.label,
"value": field.value,
}
for field in insight.fields
if field.value
]
document_type = str(getattr(document, "document_type", "") or "").strip()
if document_type in {"", "other"}:
document_type = insight.document_type
document_type_label = str(getattr(document, "document_type_label", "") or "").strip()
if not document_type_label or document_type_label == "其他单据":
document_type_label = insight.document_type_label
scene_code = str(getattr(document, "scene_code", "") or "").strip()
if scene_code in {"", "other"}:
scene_code = insight.scene_code
scene_label = str(getattr(document, "scene_label", "") or "").strip()
if not scene_label or scene_label == "其他票据":
scene_label = insight.scene_label
return {
"document_type": document_type,
"document_type_label": document_type_label,
"scene_code": scene_code,
"scene_label": scene_label,
"fields": normalized_fields,
}
def _build_attachment_requirement_check(
self,
*,
item: ExpenseClaimItem,
document_info: dict[str, Any],
) -> dict[str, Any]:
expense_type = str(item.item_type or "").strip().lower() or "other"
expense_label = self._resolve_expense_type_label(expense_type)
allowed_scenes = EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(expense_type, set())
allowed_scene_labels = [self._resolve_document_scene_label(code) for code in sorted(allowed_scenes)]
recognized_scene_code = str(document_info.get("scene_code") or "other").strip() or "other"
recognized_scene_label = str(
document_info.get("scene_label") or self._resolve_document_scene_label(recognized_scene_code)
).strip()
recognized_document_type = str(document_info.get("document_type") or "other").strip() or "other"
recognized_document_type_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据"
matches = not allowed_scenes or recognized_scene_code in allowed_scenes
if matches:
if allowed_scene_labels:
message = (
f"当前费用项目为{expense_label},已识别为{recognized_document_type_label}"
f"符合当前{expense_label}场景的附件要求。"
)
else:
message = f"当前费用项目为{expense_label},已识别为{recognized_document_type_label}"
else:
expected_text = "".join(label + "相关票据" for label in allowed_scene_labels) or "对应场景票据"
message = (
f"当前费用项目为{expense_label},要求上传{expected_text}"
f"当前识别为{recognized_document_type_label},不符合当前场景,建议过滤或更换附件。"
)
return {
"matches": matches,
"current_expense_type": expense_type,
"current_expense_type_label": expense_label,
"allowed_scene_labels": allowed_scene_labels,
"recognized_scene_code": recognized_scene_code,
"recognized_scene_label": recognized_scene_label,
"recognized_document_type": recognized_document_type,
"recognized_document_type_label": recognized_document_type_label,
"message": message,
}
@staticmethod
def _resolve_document_scene_label(scene_code: str) -> str:
normalized = str(scene_code or "").strip().lower()
return DOCUMENT_SCENE_LABELS.get(normalized, "其他票据")
@staticmethod
def _extract_amount_candidates(text: str) -> list[Decimal]:
values: list[Decimal] = []
@@ -1285,7 +1440,14 @@ class ExpenseClaimService:
"suggestion": "建议重新上传更清晰的票据图片,或稍后重试识别后再提交。",
}
def _build_attachment_analysis(self, *, document: Any, item: ExpenseClaimItem) -> dict[str, Any]:
def _build_attachment_analysis(
self,
*,
document: Any,
item: ExpenseClaimItem,
document_info: dict[str, Any] | None = None,
requirement_check: dict[str, Any] | None = None,
) -> dict[str, Any]:
warnings = [str(value).strip() for value in list(getattr(document, "warnings", []) or []) if str(value).strip()]
text = " ".join(
[
@@ -1296,11 +1458,19 @@ class ExpenseClaimService:
compact_text = text.replace(" ", "")
avg_score = float(getattr(document, "avg_score", 0.0) or 0.0)
line_count = int(getattr(document, "line_count", 0) or 0)
document_info = document_info or self._build_attachment_document_info(document)
requirement_check = requirement_check or self._build_attachment_requirement_check(
item=item,
document_info=document_info,
)
document_scene_matches = self._detect_expense_scenes(text)
purpose_mismatch_point = self._build_purpose_mismatch_point(
item=item,
document_scenes=set(document_scene_matches.keys()),
)
recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other"
recognized_document_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据"
requirement_matches = bool(requirement_check.get("matches"))
has_ticket_keyword = any(
keyword in compact_text
@@ -1329,8 +1499,8 @@ class ExpenseClaimService:
points.append(f"识别提示:{warnings[0]}")
if line_count == 0 or not compact_text:
points.append("附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。")
if not has_ticket_keyword:
points.append("票据类型:未识别到发票、票据、电子行程单等关键字。")
if recognized_document_type == "other" and not has_ticket_keyword:
points.append("票据类型:未识别到发票、票据、电子行程单等关键字,暂无法判断票据类型")
if not amount_candidates:
points.append("金额字段:未识别到可用于核对的金额。")
elif amount_mismatch:
@@ -1338,6 +1508,8 @@ class ExpenseClaimService:
points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。")
if not has_date_text:
points.append("日期字段:未识别到开票日期或业务发生日期。")
if not requirement_matches:
points.append(f"附件类型要求:{requirement_check.get('message')}")
if purpose_mismatch_point:
points.append(purpose_mismatch_point)
if avg_score and avg_score < 0.72:
@@ -1349,9 +1521,10 @@ class ExpenseClaimService:
"severity": "pass",
"label": "AI提示符合条件",
"headline": "AI提示附件符合基础校验条件",
"summary": "已识别到票据关键字段,附件可继续进入人工复核与报销流程",
"summary": "已识别到票据类型和关键字段,且符合当前费用场景的附件要求",
"points": [
"票据类型:已识别到可用于报销核验的票据关键字",
f"票据类型:已识别{recognized_document_label}",
f"附件类型要求:{requirement_check.get('message')}",
f"金额字段:已识别到与当前明细接近的金额 {item_amount} 元。",
],
"suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。",
@@ -1365,21 +1538,22 @@ class ExpenseClaimService:
if (
line_count == 0
or not compact_text
or (not has_ticket_keyword and issue_count >= 2)
or (recognized_document_type == "other" and not has_ticket_keyword and issue_count >= 2)
or not requirement_matches
or (purpose_mismatch_point and amount_mismatch)
):
severity = "high"
label = "高风险"
headline = "AI提示附件不符合票据校验条件"
summary = "当前附件存在明显异常,票据内容与填写信息不一致,或无法作为有效报销材料。"
summary = "当前附件存在明显异常,票据类型与当前费用场景不匹配,或无法作为有效报销材料。"
elif purpose_mismatch_point or amount_mismatch or issue_count >= 2 or warnings or (avg_score and avg_score < 0.72):
severity = "medium"
label = "中风险"
headline = "AI提示附件存在明显待整改项"
summary = "当前附件可见部分内容,但金额、用途、日期或票据类型仍有缺失或不一致。"
summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。"
suggestion = {
"high": "建议重新上传清晰的票据原件,确保包含发票抬头、金额、日期等核心字段",
"high": "建议过滤当前不匹配的票据,重新上传符合当前费用场景的清晰原件",
"medium": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。",
"low": "建议人工再次核对金额和业务说明,确认后可继续流转。",
}[severity]
@@ -1503,14 +1677,35 @@ class ExpenseClaimService:
list(metadata.get("ocr_warnings") or []),
)
):
stored_document_info = metadata.get("document_info")
if not isinstance(stored_document_info, dict):
stored_document_info = {}
document = SimpleNamespace(
filename=str(metadata.get("file_name") or file_path.name),
text=str(metadata.get("ocr_text") or ""),
summary=str(metadata.get("ocr_summary") or ""),
avg_score=float(metadata.get("ocr_avg_score") or 0.0),
line_count=int(metadata.get("ocr_line_count") or 0),
document_type=str(stored_document_info.get("document_type") or ""),
document_type_label=str(stored_document_info.get("document_type_label") or ""),
scene_code=str(stored_document_info.get("scene_code") or ""),
scene_label=str(stored_document_info.get("scene_label") or ""),
document_fields=list(stored_document_info.get("fields") or []),
warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()],
)
analysis = self._build_attachment_analysis(document=document, item=item)
document_info = self._build_attachment_document_info(document)
requirement_check = self._build_attachment_requirement_check(
item=item,
document_info=document_info,
)
analysis = self._build_attachment_analysis(
document=document,
item=item,
document_info=document_info,
requirement_check=requirement_check,
)
metadata["document_info"] = document_info
metadata["requirement_check"] = requirement_check
else:
analysis = self._build_fallback_attachment_analysis(media_type=media_type, item=item)

View File

@@ -187,6 +187,8 @@ def test_update_claim_item_reanalyzes_existing_attachment(monkeypatch, tmp_path)
)
assert uploaded_meta is not None
assert uploaded_meta["analysis"]["severity"] == "pass"
assert uploaded_meta["document_info"]["document_type"] == "office_invoice"
assert uploaded_meta["requirement_check"]["matches"] is True
updated = service.update_claim_item(
claim_id=claim.id,
@@ -207,8 +209,9 @@ def test_update_claim_item_reanalyzes_existing_attachment(monkeypatch, tmp_path)
current_user=current_user,
)
assert refreshed_meta is not None
assert refreshed_meta["analysis"]["severity"] == "medium"
assert any("用途字段" in point for point in refreshed_meta["analysis"]["points"])
assert refreshed_meta["analysis"]["severity"] == "high"
assert refreshed_meta["requirement_check"]["matches"] is False
assert any("附件类型要求" in point for point in refreshed_meta["analysis"]["points"])
def test_delete_claim_item_removes_row_and_attachment_files(monkeypatch, tmp_path) -> None:

View File

@@ -154,6 +154,8 @@ def test_claim_item_attachment_upload_preview_and_delete(monkeypatch, tmp_path)
upload_payload = upload_response.json()
assert upload_payload["attachment"]["file_name"] == "office-note.png"
assert upload_payload["attachment"]["analysis"]["label"] == "AI提示符合条件"
assert upload_payload["attachment"]["document_info"]["document_type"] == "office_invoice"
assert upload_payload["attachment"]["requirement_check"]["matches"] is True
assert upload_payload["invoice_id"]
meta_response = client.get(
@@ -164,6 +166,7 @@ def test_claim_item_attachment_upload_preview_and_delete(monkeypatch, tmp_path)
meta_payload = meta_response.json()
assert meta_payload["media_type"] == "image/png"
assert meta_payload["analysis"]["headline"]
assert meta_payload["document_info"]["fields"][0]["label"] == "金额"
content_response = client.get(
f"/api/v1/reimbursements/claims/{claim_id}/items/{item_id}/attachment",
@@ -228,7 +231,8 @@ def test_claim_item_attachment_upload_flags_purpose_and_amount_mismatch(monkeypa
analysis = upload_response.json()["attachment"]["analysis"]
assert analysis["severity"] == "high"
assert any("金额字段" in point for point in analysis["points"])
assert any("用途字段" in point for point in analysis["points"])
assert any("附件类型要求" in point for point in analysis["points"])
assert upload_response.json()["attachment"]["requirement_check"]["matches"] is False
def test_claim_item_attachment_upload_flags_non_invoice_image_as_high_risk(monkeypatch, tmp_path) -> None: