From 6317fc0ccde8bc67eea71af9820e09ee36a54937 Mon Sep 17 00:00:00 2001 From: caoxiaozhu Date: Wed, 13 May 2026 06:45:04 +0000 Subject: [PATCH] refactor(backend): update reimbursement and related services - endpoints/reimbursements.py: update reimbursement API endpoint - schemas/reimbursement.py: update reimbursement data schemas - services/expense_claims.py: update expense claims service - services/ontology.py: update ontology service - services/user_agent.py: update user agent service --- .../app/api/v1/endpoints/reimbursements.py | 239 ++++- server/src/app/schemas/reimbursement.py | 42 + server/src/app/services/expense_claims.py | 827 +++++++++++++++++- server/src/app/services/ontology.py | 28 + server/src/app/services/user_agent.py | 25 +- 5 files changed, 1154 insertions(+), 7 deletions(-) diff --git a/server/src/app/api/v1/endpoints/reimbursements.py b/server/src/app/api/v1/endpoints/reimbursements.py index 59f0047..226399a 100644 --- a/server/src/app/api/v1/endpoints/reimbursements.py +++ b/server/src/app/api/v1/endpoints/reimbursements.py @@ -2,13 +2,18 @@ from __future__ import annotations from typing import Annotated -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status +from fastapi.responses import FileResponse from sqlalchemy.orm import Session from app.api.deps import CurrentUserContext, get_current_user, get_db from app.schemas.common import ErrorResponse from app.schemas.reimbursement import ( + ExpenseClaimAttachmentActionResponse, ExpenseClaimActionResponse, + ExpenseClaimAttachmentRead, + ExpenseClaimItemCreate, + ExpenseClaimItemActionResponse, ExpenseClaimItemUpdate, ExpenseClaimRead, ReimbursementCreate, @@ -113,6 +118,238 @@ def update_expense_claim_item( return claim +@router.post( + "/claims/{claim_id}/items", + response_model=ExpenseClaimRead, + summary="新增草稿费用明细", + description="在草稿报销单中新增一条费用明细,供用户继续补充附件与字段。", + responses={ + status.HTTP_404_NOT_FOUND: { + "model": ErrorResponse, + "description": "报销单不存在。", + }, + status.HTTP_400_BAD_REQUEST: { + "model": ErrorResponse, + "description": "草稿状态校验失败或字段校验失败。", + }, + }, +) +def create_expense_claim_item( + claim_id: str, + payload: ExpenseClaimItemCreate | None, + db: DbSession, + current_user: CurrentUser, +) -> ExpenseClaimRead: + service = ExpenseClaimService(db) + try: + claim = service.create_claim_item( + claim_id=claim_id, + payload=payload, + current_user=current_user, + ) + except ValueError as error: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(error)) from error + + if claim is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found") + return claim + + +@router.delete( + "/claims/{claim_id}/items/{item_id}", + response_model=ExpenseClaimItemActionResponse, + summary="删除草稿费用明细", + description="删除草稿报销单中的一条费用明细,并同步清理该行已上传的附件与 OCR 元数据。", + responses={ + status.HTTP_404_NOT_FOUND: { + "model": ErrorResponse, + "description": "报销单或费用明细不存在。", + }, + status.HTTP_400_BAD_REQUEST: { + "model": ErrorResponse, + "description": "草稿状态校验失败。", + }, + }, +) +def delete_expense_claim_item( + claim_id: str, + item_id: str, + db: DbSession, + current_user: CurrentUser, +) -> ExpenseClaimItemActionResponse: + service = ExpenseClaimService(db) + try: + payload = service.delete_claim_item( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + except LookupError as error: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error + except ValueError as error: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(error)) from error + + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found") + return ExpenseClaimItemActionResponse(**payload) + + +@router.post( + "/claims/{claim_id}/items/{item_id}/attachment", + response_model=ExpenseClaimAttachmentActionResponse, + summary="上传费用明细附件", + description="为草稿费用明细上传真实附件文件,并返回附件元信息与 AI 校验结果。", + responses={ + status.HTTP_404_NOT_FOUND: { + "model": ErrorResponse, + "description": "报销单或费用明细不存在。", + }, + status.HTTP_400_BAD_REQUEST: { + "model": ErrorResponse, + "description": "草稿状态校验失败或文件不合法。", + }, + }, +) +async def upload_expense_claim_item_attachment( + claim_id: str, + item_id: str, + file: Annotated[UploadFile, File(description="待上传的附件文件。")], + db: DbSession, + current_user: CurrentUser, +) -> ExpenseClaimAttachmentActionResponse: + service = ExpenseClaimService(db) + try: + payload = service.upload_claim_item_attachment( + claim_id=claim_id, + item_id=item_id, + filename=str(file.filename or "attachment.bin"), + content=await file.read(), + media_type=file.content_type, + current_user=current_user, + ) + except LookupError as error: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error + except ValueError as error: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(error)) from error + + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found") + return ExpenseClaimAttachmentActionResponse(**payload) + + +@router.get( + "/claims/{claim_id}/items/{item_id}/attachment/meta", + response_model=ExpenseClaimAttachmentRead, + summary="读取费用明细附件元信息", + description="返回当前费用明细已上传附件的文件信息与 AI 校验结果。", + responses={ + status.HTTP_404_NOT_FOUND: { + "model": ErrorResponse, + "description": "报销单、费用明细或附件不存在。", + }, + }, +) +def get_expense_claim_item_attachment_meta( + claim_id: str, + item_id: str, + db: DbSession, + current_user: CurrentUser, +) -> ExpenseClaimAttachmentRead: + service = ExpenseClaimService(db) + try: + payload = service.get_claim_item_attachment_meta( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + except LookupError as error: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error + except FileNotFoundError as error: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error + + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found") + return ExpenseClaimAttachmentRead(**payload) + + +@router.get( + "/claims/{claim_id}/items/{item_id}/attachment", + response_class=FileResponse, + summary="读取费用明细附件内容", + description="用于详情页预览当前费用明细已上传的附件文件。", + responses={ + status.HTTP_404_NOT_FOUND: { + "model": ErrorResponse, + "description": "报销单、费用明细或附件不存在。", + }, + }, +) +def get_expense_claim_item_attachment( + claim_id: str, + item_id: str, + db: DbSession, + current_user: CurrentUser, +) -> FileResponse: + service = ExpenseClaimService(db) + try: + payload = service.get_claim_item_attachment_content( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + except LookupError as error: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error + except FileNotFoundError as error: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error + + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found") + + file_path, media_type, filename = payload + return FileResponse(file_path, media_type=media_type, filename=filename) + + +@router.delete( + "/claims/{claim_id}/items/{item_id}/attachment", + response_model=ExpenseClaimAttachmentActionResponse, + summary="删除费用明细附件", + description="删除草稿费用明细当前已上传的附件文件,并清空票据关联。", + responses={ + status.HTTP_404_NOT_FOUND: { + "model": ErrorResponse, + "description": "报销单、费用明细或附件不存在。", + }, + status.HTTP_400_BAD_REQUEST: { + "model": ErrorResponse, + "description": "当前状态不允许删除附件。", + }, + }, +) +def delete_expense_claim_item_attachment( + claim_id: str, + item_id: str, + db: DbSession, + current_user: CurrentUser, +) -> ExpenseClaimAttachmentActionResponse: + service = ExpenseClaimService(db) + try: + payload = service.delete_claim_item_attachment( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + except LookupError as error: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error + except FileNotFoundError as error: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error + except ValueError as error: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(error)) from error + + if payload is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found") + return ExpenseClaimAttachmentActionResponse(**payload) + + @router.post( "/claims/{claim_id}/submit", response_model=ExpenseClaimRead, diff --git a/server/src/app/schemas/reimbursement.py b/server/src/app/schemas/reimbursement.py index 780bd91..6174b0e 100644 --- a/server/src/app/schemas/reimbursement.py +++ b/server/src/app/schemas/reimbursement.py @@ -45,6 +45,25 @@ class ExpenseClaimItemRead(BaseModel): updated_at: datetime +class ExpenseClaimAttachmentAnalysisRead(BaseModel): + severity: str + label: str + headline: str + summary: str + points: list[str] = Field(default_factory=list) + suggestion: str = "" + + +class ExpenseClaimAttachmentRead(BaseModel): + file_name: str + storage_key: str + media_type: str + size_bytes: int + uploaded_at: datetime | None = None + previewable: bool = True + analysis: ExpenseClaimAttachmentAnalysisRead | None = None + + class ExpenseClaimItemUpdate(BaseModel): item_date: date | None = None item_type: str | None = None @@ -54,6 +73,15 @@ class ExpenseClaimItemUpdate(BaseModel): invoice_id: str | None = None +class ExpenseClaimItemCreate(BaseModel): + item_date: date | None = None + item_type: str | None = None + item_reason: str | None = None + item_location: str | None = None + item_amount: Decimal | None = None + invoice_id: str | None = None + + class ExpenseClaimRead(BaseModel): model_config = ConfigDict(from_attributes=True) @@ -84,3 +112,17 @@ class ExpenseClaimActionResponse(BaseModel): message: str claim_id: str status: str | None = None + + +class ExpenseClaimAttachmentActionResponse(BaseModel): + message: str + claim_id: str + item_id: str + invoice_id: str | None = None + attachment: ExpenseClaimAttachmentRead | None = None + + +class ExpenseClaimItemActionResponse(BaseModel): + message: str + claim_id: str + item_id: str diff --git a/server/src/app/services/expense_claims.py b/server/src/app/services/expense_claims.py index 0a383d4..cf83e44 100644 --- a/server/src/app/services/expense_claims.py +++ b/server/src/app/services/expense_claims.py @@ -1,19 +1,27 @@ from __future__ import annotations +import json +import mimetypes +import re +import shutil from datetime import UTC, date, datetime from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace from typing import Any from sqlalchemy import func, or_, select from sqlalchemy.orm import Session, selectinload from app.api.deps import CurrentUserContext +from app.core.config import get_settings from app.models.employee import Employee from app.models.financial_record import ExpenseClaim, ExpenseClaimItem from app.schemas.ontology import OntologyEntity, OntologyParseResult -from app.schemas.reimbursement import ExpenseClaimItemUpdate -from app.services.audit import AuditLogService +from app.schemas.reimbursement import ExpenseClaimItemCreate, ExpenseClaimItemUpdate from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.ocr import OcrService EXPENSE_TYPE_LABELS = { "travel": "差旅", @@ -22,10 +30,64 @@ EXPENSE_TYPE_LABELS = { "meal": "餐费", "meeting": "会务", "entertainment": "招待", + "office": "办公", + "training": "培训", + "communication": "通讯", + "welfare": "福利", } PRIVILEGED_CLAIM_ROLE_CODES = {"manager", "finance", "approver", "auditor", "executive"} MAX_DRAFT_CLAIMS_PER_USER = 3 +LOCATION_REQUIRED_EXPENSE_TYPES = { + "travel", + "hotel", + "transport", + "meal", + "meeting", + "entertainment", +} + +EXPENSE_SCENE_KEYWORDS = { + "travel": ("差旅", "出差", "行程"), + "hotel": ("酒店", "住宿", "房费", "客房", "入住", "离店"), + "transport": ( + "交通", + "打车", + "出租车", + "网约车", + "滴滴", + "出行", + "高铁", + "动车", + "火车", + "机票", + "航班", + "行程单", + "登机", + "客票", + "公交", + "地铁", + "过路费", + "通行费", + "停车", + ), + "meal": ("餐饮", "餐费", "用餐", "外卖", "快餐", "酒楼", "饭店", "饭馆", "食品", "咖啡"), + "entertainment": ("招待", "宴请", "接待", "客户餐", "商务餐", "业务招待"), + "office": ("办公", "办公用品", "文具", "耗材", "打印", "纸张", "硒鼓", "墨盒", "鼠标", "键盘", "电脑"), + "meeting": ("会议", "会务", "会展", "会议室", "会场", "场地费", "论坛"), + "training": ("培训", "课程", "讲师", "教材", "学费", "认证"), +} + +EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = { + "travel": {"travel", "hotel", "transport", "meal"}, + "hotel": {"hotel"}, + "transport": {"transport", "travel"}, + "meal": {"meal", "entertainment"}, + "entertainment": {"entertainment", "meal"}, + "office": {"office"}, + "meeting": {"meeting"}, + "training": {"training"}, +} class ExpenseClaimService: @@ -90,6 +152,7 @@ class ExpenseClaimService: if payload.invoice_id is not None: item.invoice_id = self._normalize_optional_text(payload.invoice_id, allow_empty=True) + self._refresh_item_attachment_analysis(item) self._sync_claim_from_items(claim) self.db.commit() self.db.refresh(claim) @@ -105,6 +168,279 @@ class ExpenseClaimService: return claim + def create_claim_item( + self, + *, + claim_id: str, + payload: ExpenseClaimItemCreate | None, + current_user: CurrentUserContext, + ) -> ExpenseClaim | None: + claim = self.get_claim(claim_id, current_user) + if claim is None: + return None + + self._ensure_draft_claim(claim) + before_json = self._serialize_claim(claim) + payload = payload or ExpenseClaimItemCreate() + + occurred_at = claim.occurred_at if claim.occurred_at is not None else datetime.now(UTC) + item_amount = Decimal("0.00") + if payload.item_amount is not None: + item_amount = payload.item_amount.quantize(Decimal("0.01")) + if item_amount < Decimal("0.00"): + raise ValueError("费用金额不能小于 0。") + + item = ExpenseClaimItem( + claim_id=claim.id, + item_date=payload.item_date or occurred_at.date(), + item_type=self._normalize_optional_text( + payload.item_type, + fallback=str(claim.expense_type or "").strip() or "other", + ) + or "other", + item_reason=self._normalize_optional_text(payload.item_reason, fallback="") or "", + item_location=self._normalize_optional_text(payload.item_location, fallback="") or "", + item_amount=item_amount, + invoice_id=self._normalize_optional_text(payload.invoice_id, allow_empty=True), + ) + claim.items.append(item) + self.db.add(item) + + self._sync_claim_from_items(claim) + self.db.commit() + self.db.refresh(claim) + + self.audit_service.log_action( + actor=current_user.name or current_user.username, + action="expense_claim.item_create", + resource_type="expense_claim", + resource_id=claim.id, + before_json=before_json, + after_json=self._serialize_claim(claim), + ) + + return claim + + def delete_claim_item( + self, + *, + claim_id: str, + item_id: str, + current_user: CurrentUserContext, + ) -> dict[str, Any] | None: + claim, item = self._get_claim_item_or_raise( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + if claim is None: + return None + + self._ensure_draft_claim(claim) + before_json = self._serialize_claim(claim) + item_label = str(item.item_reason or "").strip() or self._resolve_expense_type_label(item.item_type) + + self._delete_item_attachment_files(item) + claim.items = [entry for entry in claim.items if entry.id != item.id] + self.db.delete(item) + + self._sync_claim_from_items(claim) + self.db.commit() + self.db.refresh(claim) + + self.audit_service.log_action( + actor=current_user.name or current_user.username, + action="expense_claim.item_delete", + resource_type="expense_claim", + resource_id=claim.id, + before_json=before_json, + after_json=self._serialize_claim(claim), + ) + + return { + "message": f"费用明细“{item_label}”已删除。", + "claim_id": claim.id, + "item_id": item.id, + } + + def upload_claim_item_attachment( + self, + *, + claim_id: str, + item_id: str, + filename: str, + content: bytes, + media_type: str | None, + current_user: CurrentUserContext, + ) -> dict[str, Any] | None: + claim, item = self._get_claim_item_or_raise( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + if claim is None: + return None + + self._ensure_draft_claim(claim) + normalized_name = self._normalize_attachment_filename(filename) + if not content: + raise ValueError("上传文件不能为空。") + + before_json = self._serialize_claim(claim) + attachment_dir = self._build_item_attachment_dir(claim.id, item.id) + shutil.rmtree(attachment_dir, ignore_errors=True) + attachment_dir.mkdir(parents=True, exist_ok=True) + + file_path = attachment_dir / normalized_name + file_path.write_bytes(content) + + attachment_analysis = self._build_fallback_attachment_analysis( + media_type=media_type, + item=item, + ) + ocr_document = None + ocr_status = "empty" + ocr_error = "" + try: + ocr_result = OcrService().recognize_files( + [(normalized_name, content, media_type or "application/octet-stream")] + ) + documents = list(ocr_result.documents or []) + if documents: + ocr_document = documents[0] + ocr_status = "recognized" + attachment_analysis = self._build_attachment_analysis( + document=ocr_document, + item=item, + ) + except Exception as exc: # pragma: no cover - fallback path depends on OCR runtime + ocr_status = "failed" + ocr_error = str(exc) + attachment_analysis = self._build_failed_ocr_attachment_analysis( + media_type=media_type, + error_message=ocr_error, + item=item, + ) + + item.invoice_id = self._to_attachment_storage_key(file_path) + meta = { + "file_name": normalized_name, + "storage_key": item.invoice_id, + "media_type": self._resolve_attachment_media_type( + normalized_name, + fallback=media_type, + ), + "size_bytes": len(content), + "uploaded_at": datetime.now(UTC).isoformat(), + "previewable": self._is_previewable_media_type(media_type, normalized_name), + "analysis": attachment_analysis, + "ocr_status": ocr_status, + "ocr_error": ocr_error, + "ocr_text": str(getattr(ocr_document, "text", "") or ""), + "ocr_summary": str(getattr(ocr_document, "summary", "") or ""), + "ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0), + "ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0), + "ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []], + } + self._write_attachment_meta(file_path, meta) + + self._sync_claim_from_items(claim) + self.db.commit() + self.db.refresh(claim) + + self.audit_service.log_action( + actor=current_user.name or current_user.username, + action="expense_claim.attachment_upload", + resource_type="expense_claim", + resource_id=claim.id, + before_json=before_json, + after_json=self._serialize_claim(claim), + ) + + return { + "message": f"{normalized_name} 已上传并关联到当前费用明细。", + "claim_id": claim.id, + "item_id": item.id, + "invoice_id": item.invoice_id, + "attachment": self._build_attachment_payload(item), + } + + def get_claim_item_attachment_meta( + self, + *, + claim_id: str, + item_id: str, + current_user: CurrentUserContext, + ) -> dict[str, Any] | None: + claim, item = self._get_claim_item_or_raise( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + if claim is None: + return None + + return self._build_attachment_payload(item) + + def get_claim_item_attachment_content( + self, + *, + claim_id: str, + item_id: str, + current_user: CurrentUserContext, + ) -> tuple[Path, str, str] | None: + claim, item = self._get_claim_item_or_raise( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + if claim is None: + return None + + return self._resolve_item_attachment_content(item) + + def delete_claim_item_attachment( + self, + *, + claim_id: str, + item_id: str, + current_user: CurrentUserContext, + ) -> dict[str, Any] | None: + claim, item = self._get_claim_item_or_raise( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + if claim is None: + return None + + self._ensure_draft_claim(claim) + before_json = self._serialize_claim(claim) + previous_name = self._resolve_attachment_display_name(item.invoice_id) + self._delete_item_attachment_files(item) + item.invoice_id = None + + self._sync_claim_from_items(claim) + self.db.commit() + self.db.refresh(claim) + + self.audit_service.log_action( + actor=current_user.name or current_user.username, + action="expense_claim.attachment_delete", + resource_type="expense_claim", + resource_id=claim.id, + before_json=before_json, + after_json=self._serialize_claim(claim), + ) + + return { + "message": f"{previous_name or '附件'} 已删除。", + "claim_id": claim.id, + "item_id": item.id, + "invoice_id": item.invoice_id, + "attachment": None, + } + def submit_claim(self, claim_id: str, current_user: CurrentUserContext) -> ExpenseClaim | None: claim = self.get_claim(claim_id, current_user) if claim is None: @@ -144,6 +480,7 @@ class ExpenseClaimService: before_json = self._serialize_claim(claim) resource_id = claim.id + self._delete_claim_attachment_root(claim.id) self.db.delete(claim) self.db.commit() @@ -545,6 +882,14 @@ class ExpenseClaimService: return "meal" if "会务" in compact: return "meeting" + if any(word in compact for word in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")): + return "office" + if any(word in compact for word in ("培训费", "培训", "讲师费", "课时费", "课程费")): + return "training" + if any(word in compact for word in ("通讯费", "话费", "流量费", "宽带费")): + return "communication" + if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")): + return "welfare" for item in entities: if item.type == "expense_type": normalized = item.normalized_value.strip() @@ -669,6 +1014,381 @@ class ExpenseClaimService: except (TypeError, ValueError): return 0 + def _get_claim_item_or_raise( + self, + *, + claim_id: str, + item_id: str, + current_user: CurrentUserContext, + ) -> tuple[ExpenseClaim | None, ExpenseClaimItem]: + claim = self.get_claim(claim_id, current_user) + if claim is None: + return None, None # type: ignore[return-value] + + item = next((entry for entry in claim.items if entry.id == item_id), None) + if item is None: + raise LookupError("Item not found") + return claim, item + + def _get_attachment_storage_root(self) -> Path: + return (get_settings().resolved_storage_root_dir / "expense_claims").resolve() + + def _build_item_attachment_dir(self, claim_id: str, item_id: str) -> Path: + return (self._get_attachment_storage_root() / claim_id / item_id).resolve() + + def _delete_claim_attachment_root(self, claim_id: str) -> None: + shutil.rmtree((self._get_attachment_storage_root() / claim_id).resolve(), ignore_errors=True) + + @staticmethod + def _normalize_attachment_filename(filename: str | None) -> str: + normalized = Path(str(filename or "").strip()).name + normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._") + suffix = Path(normalized).suffix + if normalized: + return normalized + return f"attachment{suffix or '.bin'}" + + def _resolve_attachment_path(self, storage_key: str | None) -> Path | None: + normalized = str(storage_key or "").strip() + if not normalized: + return None + + root = self._get_attachment_storage_root() + path = (root / normalized).resolve() + try: + path.relative_to(root) + except ValueError as exc: + raise FileNotFoundError("Attachment path is invalid") from exc + return path + + def _to_attachment_storage_key(self, file_path: Path) -> str: + root = self._get_attachment_storage_root() + return file_path.resolve().relative_to(root).as_posix() + + def _resolve_item_attachment_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]: + file_path = self._resolve_attachment_path(item.invoice_id) + if file_path is None or not file_path.exists(): + raise FileNotFoundError("Attachment not found") + + metadata = self._read_attachment_meta(file_path) + filename = str(metadata.get("file_name") or file_path.name) + media_type = self._resolve_attachment_media_type( + filename, + fallback=str(metadata.get("media_type") or ""), + ) + return file_path, media_type, filename + + def _delete_item_attachment_files(self, item: ExpenseClaimItem) -> None: + file_path = self._resolve_attachment_path(item.invoice_id) + if file_path is None: + return + + root = self._get_attachment_storage_root() + if file_path.parent == root: + file_path.unlink(missing_ok=True) + self._attachment_meta_path(file_path).unlink(missing_ok=True) + return + + shutil.rmtree(file_path.parent, ignore_errors=True) + + @staticmethod + def _attachment_meta_path(file_path: Path) -> Path: + return file_path.with_name(f"{file_path.name}.meta.json") + + def _write_attachment_meta(self, file_path: Path, payload: dict[str, Any]) -> None: + meta_path = self._attachment_meta_path(file_path) + meta_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") + + def _read_attachment_meta(self, file_path: Path) -> dict[str, Any]: + meta_path = self._attachment_meta_path(file_path) + if not meta_path.exists(): + return {} + + try: + payload = json.loads(meta_path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return {} + return payload if isinstance(payload, dict) else {} + + def _build_attachment_payload(self, item: ExpenseClaimItem) -> dict[str, Any]: + file_path, media_type, filename = self._resolve_item_attachment_content(item) + metadata = self._read_attachment_meta(file_path) + uploaded_at_value = metadata.get("uploaded_at") + uploaded_at = None + if isinstance(uploaded_at_value, str) and uploaded_at_value.strip(): + try: + uploaded_at = datetime.fromisoformat(uploaded_at_value) + except ValueError: + uploaded_at = None + + analysis = metadata.get("analysis") + if not isinstance(analysis, dict): + analysis = None + + return { + "file_name": str(metadata.get("file_name") or filename), + "storage_key": str(item.invoice_id or ""), + "media_type": str(metadata.get("media_type") or media_type), + "size_bytes": int(metadata.get("size_bytes") or file_path.stat().st_size), + "uploaded_at": uploaded_at, + "previewable": bool(metadata.get("previewable", self._is_previewable_media_type(media_type, filename))), + "analysis": analysis, + } + + @staticmethod + def _resolve_attachment_media_type(filename: str, *, fallback: str | None = None) -> str: + guessed = mimetypes.guess_type(filename)[0] + return str(guessed or fallback or "application/octet-stream") + + @staticmethod + def _is_previewable_media_type(media_type: str | None, filename: str) -> bool: + resolved = str(media_type or "").strip() or (mimetypes.guess_type(filename)[0] or "") + return resolved.startswith("image/") or resolved == "application/pdf" + + @staticmethod + def _resolve_attachment_display_name(storage_key: str | None) -> str: + return Path(str(storage_key or "").strip()).name + + @staticmethod + def _extract_amount_candidates(text: str) -> list[Decimal]: + values: list[Decimal] = [] + seen: set[Decimal] = set() + + def append_candidate(raw: str) -> None: + compact = str(raw or "").replace(",", ".").strip() + if not compact: + return + try: + candidate = Decimal(compact).quantize(Decimal("0.01")) + except (InvalidOperation, ValueError): + return + if candidate in seen: + return + seen.add(candidate) + values.append(candidate) + + for pattern in ( + r"(?:金额|价税合计|合计|小写|实收金额|支付金额|订单金额|总额|票价|房费|餐费)[::\s¥¥]*([0-9]{1,6}(?:[.,][0-9]{1,2})?)", + r"[¥¥]\s*([0-9]{1,6}(?:[.,][0-9]{1,2})?)", + r"([0-9]{1,6}(?:[.,][0-9]{1,2})?)\s*元", + ): + for raw in re.findall(pattern, text, flags=re.IGNORECASE): + append_candidate(raw) + + if values: + return values + + for raw in re.findall(r"(? bool: + return bool(re.search(r"(20\d{2}[年/\-.]\d{1,2}[月/\-.]\d{1,2}日?)", text)) + + @staticmethod + def _normalize_match_text(text: str) -> str: + return re.sub(r"\s+", "", str(text or "")).lower() + + @staticmethod + def _resolve_expense_type_label(expense_type: str | None) -> str: + normalized = str(expense_type or "").strip().lower() + return EXPENSE_TYPE_LABELS.get(normalized, "其他") + + @staticmethod + def _resolve_allowed_document_scenes(expense_type: str | None) -> set[str]: + normalized = str(expense_type or "").strip().lower() + return set(EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(normalized, set())) + + def _detect_expense_scenes(self, text: str) -> dict[str, list[str]]: + normalized = self._normalize_match_text(text) + if not normalized: + return {} + + matches: dict[str, list[str]] = {} + for scene, keywords in EXPENSE_SCENE_KEYWORDS.items(): + matched = [keyword for keyword in keywords if keyword in normalized] + if matched: + matches[scene] = matched[:3] + return matches + + def _format_scene_labels(self, scene_codes: set[str]) -> str: + labels = [self._resolve_expense_type_label(code) for code in scene_codes] + unique_labels = list(dict.fromkeys(label for label in labels if label)) + return "、".join(unique_labels) if unique_labels else "其他" + + def _build_purpose_mismatch_point( + self, + *, + item: ExpenseClaimItem, + document_scenes: set[str], + ) -> str | None: + if not document_scenes: + return None + + allowed_scenes = self._resolve_allowed_document_scenes(item.item_type) + reason_text = str(item.item_reason or "").strip() + reason_scenes = set(self._detect_expense_scenes(reason_text).keys()) + document_scene_labels = self._format_scene_labels(document_scenes) + + if reason_scenes and document_scenes.isdisjoint(reason_scenes): + return ( + f"用途字段:用户填写用途“{reason_text[:24]}”与票据内容不一致," + f"当前附件更像{document_scene_labels}相关材料。" + ) + + if allowed_scenes and document_scenes.isdisjoint(allowed_scenes): + expense_label = self._resolve_expense_type_label(item.item_type) + return f"用途字段:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。" + + return None + + def _build_fallback_attachment_analysis( + self, + *, + media_type: str | None, + item: ExpenseClaimItem, + ) -> dict[str, Any]: + return { + "severity": "medium", + "label": "中风险", + "headline": "AI提示:附件已上传,待识别结果", + "summary": "附件已成功保存,但当前尚未拿到有效识别结果,建议人工先核对票据内容。", + "points": [ + f"附件格式:{self._resolve_attachment_media_type('attachment', fallback=media_type)}", + f"费用金额:当前明细金额为 {item.item_amount} 元", + ], + "suggestion": "建议打开附件确认金额、日期和票据类型是否完整,再继续提交审批。", + } + + def _build_failed_ocr_attachment_analysis( + self, + *, + media_type: str | None, + error_message: str, + item: ExpenseClaimItem, + ) -> dict[str, Any]: + return { + "severity": "medium", + "label": "中风险", + "headline": "AI提示:附件已上传,但识别失败", + "summary": "文件已经保存成功,但本次 AI 识别未完成,因此无法给出完整票据核验结论。", + "points": [ + f"识别异常:{error_message or 'OCR 服务暂不可用'}", + f"费用金额:当前明细金额为 {item.item_amount} 元", + f"附件格式:{self._resolve_attachment_media_type('attachment', fallback=media_type)}", + ], + "suggestion": "建议重新上传更清晰的票据图片,或稍后重试识别后再提交。", + } + + def _build_attachment_analysis(self, *, document: Any, item: ExpenseClaimItem) -> dict[str, Any]: + warnings = [str(value).strip() for value in list(getattr(document, "warnings", []) or []) if str(value).strip()] + text = " ".join( + [ + str(getattr(document, "summary", "") or "").strip(), + str(getattr(document, "text", "") or "").strip(), + ] + ).strip() + compact_text = text.replace(" ", "") + avg_score = float(getattr(document, "avg_score", 0.0) or 0.0) + line_count = int(getattr(document, "line_count", 0) or 0) + document_scene_matches = self._detect_expense_scenes(text) + purpose_mismatch_point = self._build_purpose_mismatch_point( + item=item, + document_scenes=set(document_scene_matches.keys()), + ) + + has_ticket_keyword = any( + keyword in compact_text + for keyword in ( + "发票", + "票据", + "增值税", + "电子行程单", + "购买方", + "销售方", + "税额", + "价税", + "票号", + "发票代码", + "凭证", + ) + ) + amount_candidates = self._extract_amount_candidates(text) + item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) + has_matching_amount = any(abs(candidate - item_amount) <= Decimal("1.00") for candidate in amount_candidates) + has_date_text = self._has_date_like_text(text) + amount_mismatch = bool(amount_candidates) and item_amount > Decimal("0.00") and not has_matching_amount + + points: list[str] = [] + if warnings: + points.append(f"识别提示:{warnings[0]}") + if line_count == 0 or not compact_text: + points.append("附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。") + if not has_ticket_keyword: + points.append("票据类型:未识别到发票、票据、电子行程单等关键字。") + if not amount_candidates: + points.append("金额字段:未识别到可用于核对的金额。") + elif amount_mismatch: + candidate_text = "、".join(str(candidate) for candidate in amount_candidates[:3]) + points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。") + if not has_date_text: + points.append("日期字段:未识别到开票日期或业务发生日期。") + if purpose_mismatch_point: + points.append(purpose_mismatch_point) + if avg_score and avg_score < 0.72: + points.append(f"识别质量:OCR 置信度偏低({avg_score:.0%}),可能影响票据核验准确性。") + + issue_count = len(points) + if issue_count == 0: + return { + "severity": "pass", + "label": "AI提示符合条件", + "headline": "AI提示:附件符合基础校验条件", + "summary": "已识别到票据关键字段,附件可继续进入人工复核与报销流程。", + "points": [ + "票据类型:已识别到可用于报销核验的票据关键字。", + f"金额字段:已识别到与当前明细接近的金额 {item_amount} 元。", + ], + "suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。", + } + + severity = "low" + label = "低风险" + headline = "AI提示:附件存在轻微待核对项" + summary = "当前附件已识别出部分票据要素,但仍建议人工继续复核。" + + if ( + line_count == 0 + or not compact_text + or (not has_ticket_keyword and issue_count >= 2) + or (purpose_mismatch_point and amount_mismatch) + ): + severity = "high" + label = "高风险" + headline = "AI提示:附件不符合票据校验条件" + summary = "当前附件存在明显异常,票据内容与填写信息不一致,或无法作为有效报销材料。" + elif purpose_mismatch_point or amount_mismatch or issue_count >= 2 or warnings or (avg_score and avg_score < 0.72): + severity = "medium" + label = "中风险" + headline = "AI提示:附件存在明显待整改项" + summary = "当前附件可见部分内容,但金额、用途、日期或票据类型仍有缺失或不一致。" + + suggestion = { + "high": "建议重新上传清晰的票据原件,确保包含发票抬头、金额、日期等核心字段。", + "medium": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。", + "low": "建议人工再次核对金额和业务说明,确认后可继续流转。", + }[severity] + + return { + "severity": severity, + "label": label, + "headline": headline, + "summary": summary, + "points": points, + "suggestion": suggestion, + } + @staticmethod def _serialize_claim(claim: ExpenseClaim) -> dict[str, Any]: return { @@ -696,6 +1416,14 @@ class ExpenseClaimService: return None return fallback + @staticmethod + def _normalize_sort_datetime(value: datetime | None) -> datetime: + if value is None: + return datetime.max.replace(tzinfo=UTC) + if value.tzinfo is None: + return value.replace(tzinfo=UTC) + return value + @staticmethod def _is_missing_value(value: Any) -> bool: text = str(value or "").strip() @@ -712,13 +1440,14 @@ class ExpenseClaimService: if not claim.items: claim.amount = Decimal("0.00") claim.invoice_count = 0 + claim.risk_flags_json = self._merge_claim_attachment_risk_flags(claim, []) return ordered_items = sorted( claim.items, key=lambda item: ( item.item_date or date.max, - item.created_at or datetime.max.replace(tzinfo=UTC), + self._normalize_sort_datetime(item.created_at), ), ) primary_item = ordered_items[0] @@ -740,11 +1469,94 @@ class ExpenseClaimService: self._normalize_optional_text(primary_item.item_location, fallback=claim.location or "待补充") or "待补充" ) + claim.risk_flags_json = self._merge_claim_attachment_risk_flags( + claim, + self._build_claim_attachment_risk_flags(ordered_items), + ) if str(claim.status or "").strip().lower() == "draft": claim.approval_stage = "待提交" + def _refresh_item_attachment_analysis(self, item: ExpenseClaimItem) -> None: + file_path = self._resolve_attachment_path(item.invoice_id) + if file_path is None or not file_path.exists(): + return + + metadata = self._read_attachment_meta(file_path) + media_type = str(metadata.get("media_type") or self._resolve_attachment_media_type(file_path.name)).strip() + ocr_status = str(metadata.get("ocr_status") or "").strip().lower() + + if ocr_status == "failed": + analysis = self._build_failed_ocr_attachment_analysis( + media_type=media_type, + error_message=str(metadata.get("ocr_error") or ""), + item=item, + ) + elif ocr_status == "recognized" or any( + ( + str(metadata.get("ocr_text") or "").strip(), + str(metadata.get("ocr_summary") or "").strip(), + int(metadata.get("ocr_line_count") or 0), + list(metadata.get("ocr_warnings") or []), + ) + ): + document = SimpleNamespace( + text=str(metadata.get("ocr_text") or ""), + summary=str(metadata.get("ocr_summary") or ""), + avg_score=float(metadata.get("ocr_avg_score") or 0.0), + line_count=int(metadata.get("ocr_line_count") or 0), + warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()], + ) + analysis = self._build_attachment_analysis(document=document, item=item) + else: + analysis = self._build_fallback_attachment_analysis(media_type=media_type, item=item) + + metadata["analysis"] = analysis + self._write_attachment_meta(file_path, metadata) + + def _build_claim_attachment_risk_flags(self, ordered_items: list[ExpenseClaimItem]) -> list[dict[str, Any]]: + derived_flags: list[dict[str, Any]] = [] + for index, item in enumerate(ordered_items, start=1): + file_path = self._resolve_attachment_path(item.invoice_id) + if file_path is None or not file_path.exists(): + continue + + metadata = self._read_attachment_meta(file_path) + analysis = metadata.get("analysis") + if not isinstance(analysis, dict): + continue + + severity = str(analysis.get("severity") or "").strip().lower() + if severity in {"", "pass", "low"}: + continue + + summary = str(analysis.get("summary") or analysis.get("headline") or "").strip() or "附件存在待核对风险。" + label = str(analysis.get("label") or ("高风险" if severity == "high" else "中风险")).strip() + derived_flags.append( + { + "source": "attachment_analysis", + "item_id": item.id, + "severity": severity, + "label": label, + "message": f"费用明细第 {index} 条:{summary}", + } + ) + return derived_flags + + @staticmethod + def _merge_claim_attachment_risk_flags( + claim: ExpenseClaim, + attachment_risk_flags: list[dict[str, Any]], + ) -> list[Any]: + preserved_flags = [ + flag + for flag in list(claim.risk_flags_json or []) + if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis") + ] + return preserved_flags + attachment_risk_flags + def _validate_claim_for_submission(self, claim: ExpenseClaim) -> list[str]: issues: list[str] = [] + claim_location_required = self._is_location_required_expense_type(claim.expense_type) if self._is_missing_value(claim.employee_name): issues.append("申请人未完善") @@ -754,7 +1566,7 @@ class ExpenseClaimService: issues.append("报销类型未完善") if self._is_missing_value(claim.reason): issues.append("报销事由未完善") - if self._is_missing_value(claim.location): + if claim_location_required and self._is_missing_value(claim.location): issues.append("业务地点未完善") if claim.amount is None or claim.amount <= Decimal("0.00"): issues.append("报销金额未完善") @@ -765,13 +1577,14 @@ class ExpenseClaimService: for index, item in enumerate(claim.items, start=1): prefix = f"费用明细第 {index} 条" + item_location_required = self._is_location_required_expense_type(item.item_type or claim.expense_type) if item.item_date is None: issues.append(f"{prefix}缺少日期") if self._is_missing_value(item.item_type): issues.append(f"{prefix}缺少费用项目") if self._is_missing_value(item.item_reason): issues.append(f"{prefix}缺少说明") - if self._is_missing_value(item.item_location): + if item_location_required and self._is_missing_value(item.item_location): issues.append(f"{prefix}缺少地点") if item.item_amount is None or item.item_amount <= Decimal("0.00"): issues.append(f"{prefix}缺少金额") @@ -780,6 +1593,10 @@ class ExpenseClaimService: return issues + @staticmethod + def _is_location_required_expense_type(expense_type: str | None) -> bool: + return str(expense_type or "").strip().lower() in LOCATION_REQUIRED_EXPENSE_TYPES + @staticmethod def _has_privileged_claim_access(current_user: CurrentUserContext) -> bool: if current_user.is_admin: diff --git a/server/src/app/services/ontology.py b/server/src/app/services/ontology.py index 97e68b7..f12ae2f 100644 --- a/server/src/app/services/ontology.py +++ b/server/src/app/services/ontology.py @@ -169,6 +169,19 @@ EXPENSE_TYPE_KEYWORDS = { "招待费": "entertainment", "招待": "entertainment", "宴请": "entertainment", + "办公费": "office", + "办公用品": "office", + "文具": "office", + "耗材": "office", + "办公耗材": "office", + "打印纸": "office", + "办公设备": "office", + "培训费": "training", + "培训": "training", + "通讯费": "communication", + "话费": "communication", + "福利费": "welfare", + "团建": "welfare", } EXPENSE_NARRATIVE_KEYWORDS = ( @@ -1117,6 +1130,21 @@ class SemanticOntologyService: ): upsert(self._make_entity("expense_type", "餐费", "meal", role="filter", confidence=0.84)) + if any( + keyword in query + for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板") + ): + upsert(self._make_entity("expense_type", "办公费", "office", role="filter", confidence=0.87)) + + if any(keyword in query for keyword in ("培训", "讲师费", "课时费", "课程费")): + upsert(self._make_entity("expense_type", "培训费", "training", role="filter", confidence=0.84)) + + if any(keyword in query for keyword in ("通讯费", "话费", "流量费", "宽带费")): + upsert(self._make_entity("expense_type", "通讯费", "communication", role="filter", confidence=0.84)) + + if any(keyword in query for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")): + upsert(self._make_entity("expense_type", "福利费", "welfare", role="filter", confidence=0.84)) + for amount in self._extract_amount_entities(query): upsert(amount) diff --git a/server/src/app/services/user_agent.py b/server/src/app/services/user_agent.py index ae4f303..616dbdb 100644 --- a/server/src/app/services/user_agent.py +++ b/server/src/app/services/user_agent.py @@ -65,6 +65,10 @@ EXPENSE_TYPE_LABELS = { "meal": "餐费", "meeting": "会务费", "entertainment": "业务招待费", + "office": "办公费", + "training": "培训费", + "communication": "通讯费", + "welfare": "福利费", "other": "其他费用", } @@ -74,6 +78,10 @@ GROUP_SCENE_LABELS = { "meal": "伙食费", "transport": "交通费", "hotel": "住宿费", + "office": "办公费", + "training": "培训费", + "communication": "通讯费", + "welfare": "福利费", "other": "其他费用", } @@ -1825,6 +1833,14 @@ class UserAgentService: return "meal", "餐费" if "会务" in compact: return "meeting", "会务费" + if any(keyword in compact for keyword in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")): + return "office", "办公费" + if any(keyword in compact for keyword in ("培训费", "培训", "讲师费", "课时费", "课程费")): + return "training", "培训费" + if any(keyword in compact for keyword in ("通讯费", "话费", "流量费", "宽带费")): + return "communication", "通讯费" + if any(keyword in compact for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")): + return "welfare", "福利费" return "other", str(value or "").strip() or "其他费用" def _resolve_required_review_keys( @@ -1951,6 +1967,13 @@ class UserAgentService: "group_code": group_code, "scene_label": "餐饮票据", } + if any(keyword in compact for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "键盘", "鼠标", "白板", "墨盒", "硒鼓")): + return { + "document_type": "other", + "expense_type": "office", + "group_code": "office", + "scene_label": "办公用品票据", + } return { "document_type": "other", "expense_type": expense_type_code or "other", @@ -1962,7 +1985,7 @@ class UserAgentService: def _normalize_group_code(expense_type_code: str) -> str: if expense_type_code in {"travel", "hotel", "transport"}: return "travel" - if expense_type_code in {"entertainment", "meal"}: + if expense_type_code in {"entertainment", "meal", "office", "training", "communication", "welfare"}: return expense_type_code return "other"