refactor(backend): update reimbursement and related services

- endpoints/reimbursements.py: update reimbursement API endpoint
- schemas/reimbursement.py: update reimbursement data schemas
- services/expense_claims.py: update expense claims service
- services/ontology.py: update ontology service
- services/user_agent.py: update user agent service
This commit is contained in:
caoxiaozhu
2026-05-13 06:45:04 +00:00
parent 4db5e8ec16
commit 6317fc0ccd
5 changed files with 1154 additions and 7 deletions

View File

@@ -2,13 +2,18 @@ from __future__ import annotations
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
from app.api.deps import CurrentUserContext, get_current_user, get_db
from app.schemas.common import ErrorResponse
from app.schemas.reimbursement import (
ExpenseClaimAttachmentActionResponse,
ExpenseClaimActionResponse,
ExpenseClaimAttachmentRead,
ExpenseClaimItemCreate,
ExpenseClaimItemActionResponse,
ExpenseClaimItemUpdate,
ExpenseClaimRead,
ReimbursementCreate,
@@ -113,6 +118,238 @@ def update_expense_claim_item(
return claim
@router.post(
"/claims/{claim_id}/items",
response_model=ExpenseClaimRead,
summary="新增草稿费用明细",
description="在草稿报销单中新增一条费用明细,供用户继续补充附件与字段。",
responses={
status.HTTP_404_NOT_FOUND: {
"model": ErrorResponse,
"description": "报销单不存在。",
},
status.HTTP_400_BAD_REQUEST: {
"model": ErrorResponse,
"description": "草稿状态校验失败或字段校验失败。",
},
},
)
def create_expense_claim_item(
claim_id: str,
payload: ExpenseClaimItemCreate | None,
db: DbSession,
current_user: CurrentUser,
) -> ExpenseClaimRead:
service = ExpenseClaimService(db)
try:
claim = service.create_claim_item(
claim_id=claim_id,
payload=payload,
current_user=current_user,
)
except ValueError as error:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(error)) from error
if claim is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found")
return claim
@router.delete(
"/claims/{claim_id}/items/{item_id}",
response_model=ExpenseClaimItemActionResponse,
summary="删除草稿费用明细",
description="删除草稿报销单中的一条费用明细,并同步清理该行已上传的附件与 OCR 元数据。",
responses={
status.HTTP_404_NOT_FOUND: {
"model": ErrorResponse,
"description": "报销单或费用明细不存在。",
},
status.HTTP_400_BAD_REQUEST: {
"model": ErrorResponse,
"description": "草稿状态校验失败。",
},
},
)
def delete_expense_claim_item(
claim_id: str,
item_id: str,
db: DbSession,
current_user: CurrentUser,
) -> ExpenseClaimItemActionResponse:
service = ExpenseClaimService(db)
try:
payload = service.delete_claim_item(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
except LookupError as error:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error
except ValueError as error:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(error)) from error
if payload is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found")
return ExpenseClaimItemActionResponse(**payload)
@router.post(
"/claims/{claim_id}/items/{item_id}/attachment",
response_model=ExpenseClaimAttachmentActionResponse,
summary="上传费用明细附件",
description="为草稿费用明细上传真实附件文件,并返回附件元信息与 AI 校验结果。",
responses={
status.HTTP_404_NOT_FOUND: {
"model": ErrorResponse,
"description": "报销单或费用明细不存在。",
},
status.HTTP_400_BAD_REQUEST: {
"model": ErrorResponse,
"description": "草稿状态校验失败或文件不合法。",
},
},
)
async def upload_expense_claim_item_attachment(
claim_id: str,
item_id: str,
file: Annotated[UploadFile, File(description="待上传的附件文件。")],
db: DbSession,
current_user: CurrentUser,
) -> ExpenseClaimAttachmentActionResponse:
service = ExpenseClaimService(db)
try:
payload = service.upload_claim_item_attachment(
claim_id=claim_id,
item_id=item_id,
filename=str(file.filename or "attachment.bin"),
content=await file.read(),
media_type=file.content_type,
current_user=current_user,
)
except LookupError as error:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error
except ValueError as error:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(error)) from error
if payload is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found")
return ExpenseClaimAttachmentActionResponse(**payload)
@router.get(
"/claims/{claim_id}/items/{item_id}/attachment/meta",
response_model=ExpenseClaimAttachmentRead,
summary="读取费用明细附件元信息",
description="返回当前费用明细已上传附件的文件信息与 AI 校验结果。",
responses={
status.HTTP_404_NOT_FOUND: {
"model": ErrorResponse,
"description": "报销单、费用明细或附件不存在。",
},
},
)
def get_expense_claim_item_attachment_meta(
claim_id: str,
item_id: str,
db: DbSession,
current_user: CurrentUser,
) -> ExpenseClaimAttachmentRead:
service = ExpenseClaimService(db)
try:
payload = service.get_claim_item_attachment_meta(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
except LookupError as error:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error
except FileNotFoundError as error:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error
if payload is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found")
return ExpenseClaimAttachmentRead(**payload)
@router.get(
"/claims/{claim_id}/items/{item_id}/attachment",
response_class=FileResponse,
summary="读取费用明细附件内容",
description="用于详情页预览当前费用明细已上传的附件文件。",
responses={
status.HTTP_404_NOT_FOUND: {
"model": ErrorResponse,
"description": "报销单、费用明细或附件不存在。",
},
},
)
def get_expense_claim_item_attachment(
claim_id: str,
item_id: str,
db: DbSession,
current_user: CurrentUser,
) -> FileResponse:
service = ExpenseClaimService(db)
try:
payload = service.get_claim_item_attachment_content(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
except LookupError as error:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error
except FileNotFoundError as error:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error
if payload is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found")
file_path, media_type, filename = payload
return FileResponse(file_path, media_type=media_type, filename=filename)
@router.delete(
"/claims/{claim_id}/items/{item_id}/attachment",
response_model=ExpenseClaimAttachmentActionResponse,
summary="删除费用明细附件",
description="删除草稿费用明细当前已上传的附件文件,并清空票据关联。",
responses={
status.HTTP_404_NOT_FOUND: {
"model": ErrorResponse,
"description": "报销单、费用明细或附件不存在。",
},
status.HTTP_400_BAD_REQUEST: {
"model": ErrorResponse,
"description": "当前状态不允许删除附件。",
},
},
)
def delete_expense_claim_item_attachment(
claim_id: str,
item_id: str,
db: DbSession,
current_user: CurrentUser,
) -> ExpenseClaimAttachmentActionResponse:
service = ExpenseClaimService(db)
try:
payload = service.delete_claim_item_attachment(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
except LookupError as error:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error
except FileNotFoundError as error:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error
except ValueError as error:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(error)) from error
if payload is None:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Claim not found")
return ExpenseClaimAttachmentActionResponse(**payload)
@router.post(
"/claims/{claim_id}/submit",
response_model=ExpenseClaimRead,

View File

@@ -45,6 +45,25 @@ class ExpenseClaimItemRead(BaseModel):
updated_at: datetime
class ExpenseClaimAttachmentAnalysisRead(BaseModel):
severity: str
label: str
headline: str
summary: str
points: list[str] = Field(default_factory=list)
suggestion: str = ""
class ExpenseClaimAttachmentRead(BaseModel):
file_name: str
storage_key: str
media_type: str
size_bytes: int
uploaded_at: datetime | None = None
previewable: bool = True
analysis: ExpenseClaimAttachmentAnalysisRead | None = None
class ExpenseClaimItemUpdate(BaseModel):
item_date: date | None = None
item_type: str | None = None
@@ -54,6 +73,15 @@ class ExpenseClaimItemUpdate(BaseModel):
invoice_id: str | None = None
class ExpenseClaimItemCreate(BaseModel):
item_date: date | None = None
item_type: str | None = None
item_reason: str | None = None
item_location: str | None = None
item_amount: Decimal | None = None
invoice_id: str | None = None
class ExpenseClaimRead(BaseModel):
model_config = ConfigDict(from_attributes=True)
@@ -84,3 +112,17 @@ class ExpenseClaimActionResponse(BaseModel):
message: str
claim_id: str
status: str | None = None
class ExpenseClaimAttachmentActionResponse(BaseModel):
message: str
claim_id: str
item_id: str
invoice_id: str | None = None
attachment: ExpenseClaimAttachmentRead | None = None
class ExpenseClaimItemActionResponse(BaseModel):
message: str
claim_id: str
item_id: str

View File

@@ -1,19 +1,27 @@
from __future__ import annotations
import json
import mimetypes
import re
import shutil
from datetime import UTC, date, datetime
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.config import get_settings
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import ExpenseClaimItemUpdate
from app.services.audit import AuditLogService
from app.schemas.reimbursement import ExpenseClaimItemCreate, ExpenseClaimItemUpdate
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.ocr import OcrService
EXPENSE_TYPE_LABELS = {
"travel": "差旅",
@@ -22,10 +30,64 @@ EXPENSE_TYPE_LABELS = {
"meal": "餐费",
"meeting": "会务",
"entertainment": "招待",
"office": "办公",
"training": "培训",
"communication": "通讯",
"welfare": "福利",
}
PRIVILEGED_CLAIM_ROLE_CODES = {"manager", "finance", "approver", "auditor", "executive"}
MAX_DRAFT_CLAIMS_PER_USER = 3
LOCATION_REQUIRED_EXPENSE_TYPES = {
"travel",
"hotel",
"transport",
"meal",
"meeting",
"entertainment",
}
EXPENSE_SCENE_KEYWORDS = {
"travel": ("差旅", "出差", "行程"),
"hotel": ("酒店", "住宿", "房费", "客房", "入住", "离店"),
"transport": (
"交通",
"打车",
"出租车",
"网约车",
"滴滴",
"出行",
"高铁",
"动车",
"火车",
"机票",
"航班",
"行程单",
"登机",
"客票",
"公交",
"地铁",
"过路费",
"通行费",
"停车",
),
"meal": ("餐饮", "餐费", "用餐", "外卖", "快餐", "酒楼", "饭店", "饭馆", "食品", "咖啡"),
"entertainment": ("招待", "宴请", "接待", "客户餐", "商务餐", "业务招待"),
"office": ("办公", "办公用品", "文具", "耗材", "打印", "纸张", "硒鼓", "墨盒", "鼠标", "键盘", "电脑"),
"meeting": ("会议", "会务", "会展", "会议室", "会场", "场地费", "论坛"),
"training": ("培训", "课程", "讲师", "教材", "学费", "认证"),
}
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = {
"travel": {"travel", "hotel", "transport", "meal"},
"hotel": {"hotel"},
"transport": {"transport", "travel"},
"meal": {"meal", "entertainment"},
"entertainment": {"entertainment", "meal"},
"office": {"office"},
"meeting": {"meeting"},
"training": {"training"},
}
class ExpenseClaimService:
@@ -90,6 +152,7 @@ class ExpenseClaimService:
if payload.invoice_id is not None:
item.invoice_id = self._normalize_optional_text(payload.invoice_id, allow_empty=True)
self._refresh_item_attachment_analysis(item)
self._sync_claim_from_items(claim)
self.db.commit()
self.db.refresh(claim)
@@ -105,6 +168,279 @@ class ExpenseClaimService:
return claim
def create_claim_item(
self,
*,
claim_id: str,
payload: ExpenseClaimItemCreate | None,
current_user: CurrentUserContext,
) -> ExpenseClaim | None:
claim = self.get_claim(claim_id, current_user)
if claim is None:
return None
self._ensure_draft_claim(claim)
before_json = self._serialize_claim(claim)
payload = payload or ExpenseClaimItemCreate()
occurred_at = claim.occurred_at if claim.occurred_at is not None else datetime.now(UTC)
item_amount = Decimal("0.00")
if payload.item_amount is not None:
item_amount = payload.item_amount.quantize(Decimal("0.01"))
if item_amount < Decimal("0.00"):
raise ValueError("费用金额不能小于 0。")
item = ExpenseClaimItem(
claim_id=claim.id,
item_date=payload.item_date or occurred_at.date(),
item_type=self._normalize_optional_text(
payload.item_type,
fallback=str(claim.expense_type or "").strip() or "other",
)
or "other",
item_reason=self._normalize_optional_text(payload.item_reason, fallback="") or "",
item_location=self._normalize_optional_text(payload.item_location, fallback="") or "",
item_amount=item_amount,
invoice_id=self._normalize_optional_text(payload.invoice_id, allow_empty=True),
)
claim.items.append(item)
self.db.add(item)
self._sync_claim_from_items(claim)
self.db.commit()
self.db.refresh(claim)
self.audit_service.log_action(
actor=current_user.name or current_user.username,
action="expense_claim.item_create",
resource_type="expense_claim",
resource_id=claim.id,
before_json=before_json,
after_json=self._serialize_claim(claim),
)
return claim
def delete_claim_item(
self,
*,
claim_id: str,
item_id: str,
current_user: CurrentUserContext,
) -> dict[str, Any] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
if claim is None:
return None
self._ensure_draft_claim(claim)
before_json = self._serialize_claim(claim)
item_label = str(item.item_reason or "").strip() or self._resolve_expense_type_label(item.item_type)
self._delete_item_attachment_files(item)
claim.items = [entry for entry in claim.items if entry.id != item.id]
self.db.delete(item)
self._sync_claim_from_items(claim)
self.db.commit()
self.db.refresh(claim)
self.audit_service.log_action(
actor=current_user.name or current_user.username,
action="expense_claim.item_delete",
resource_type="expense_claim",
resource_id=claim.id,
before_json=before_json,
after_json=self._serialize_claim(claim),
)
return {
"message": f"费用明细“{item_label}”已删除。",
"claim_id": claim.id,
"item_id": item.id,
}
def upload_claim_item_attachment(
self,
*,
claim_id: str,
item_id: str,
filename: str,
content: bytes,
media_type: str | None,
current_user: CurrentUserContext,
) -> dict[str, Any] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
if claim is None:
return None
self._ensure_draft_claim(claim)
normalized_name = self._normalize_attachment_filename(filename)
if not content:
raise ValueError("上传文件不能为空。")
before_json = self._serialize_claim(claim)
attachment_dir = self._build_item_attachment_dir(claim.id, item.id)
shutil.rmtree(attachment_dir, ignore_errors=True)
attachment_dir.mkdir(parents=True, exist_ok=True)
file_path = attachment_dir / normalized_name
file_path.write_bytes(content)
attachment_analysis = self._build_fallback_attachment_analysis(
media_type=media_type,
item=item,
)
ocr_document = None
ocr_status = "empty"
ocr_error = ""
try:
ocr_result = OcrService().recognize_files(
[(normalized_name, content, media_type or "application/octet-stream")]
)
documents = list(ocr_result.documents or [])
if documents:
ocr_document = documents[0]
ocr_status = "recognized"
attachment_analysis = self._build_attachment_analysis(
document=ocr_document,
item=item,
)
except Exception as exc: # pragma: no cover - fallback path depends on OCR runtime
ocr_status = "failed"
ocr_error = str(exc)
attachment_analysis = self._build_failed_ocr_attachment_analysis(
media_type=media_type,
error_message=ocr_error,
item=item,
)
item.invoice_id = self._to_attachment_storage_key(file_path)
meta = {
"file_name": normalized_name,
"storage_key": item.invoice_id,
"media_type": self._resolve_attachment_media_type(
normalized_name,
fallback=media_type,
),
"size_bytes": len(content),
"uploaded_at": datetime.now(UTC).isoformat(),
"previewable": self._is_previewable_media_type(media_type, normalized_name),
"analysis": attachment_analysis,
"ocr_status": ocr_status,
"ocr_error": ocr_error,
"ocr_text": str(getattr(ocr_document, "text", "") or ""),
"ocr_summary": str(getattr(ocr_document, "summary", "") or ""),
"ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0),
"ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0),
"ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
}
self._write_attachment_meta(file_path, meta)
self._sync_claim_from_items(claim)
self.db.commit()
self.db.refresh(claim)
self.audit_service.log_action(
actor=current_user.name or current_user.username,
action="expense_claim.attachment_upload",
resource_type="expense_claim",
resource_id=claim.id,
before_json=before_json,
after_json=self._serialize_claim(claim),
)
return {
"message": f"{normalized_name} 已上传并关联到当前费用明细。",
"claim_id": claim.id,
"item_id": item.id,
"invoice_id": item.invoice_id,
"attachment": self._build_attachment_payload(item),
}
def get_claim_item_attachment_meta(
self,
*,
claim_id: str,
item_id: str,
current_user: CurrentUserContext,
) -> dict[str, Any] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
if claim is None:
return None
return self._build_attachment_payload(item)
def get_claim_item_attachment_content(
self,
*,
claim_id: str,
item_id: str,
current_user: CurrentUserContext,
) -> tuple[Path, str, str] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
if claim is None:
return None
return self._resolve_item_attachment_content(item)
def delete_claim_item_attachment(
self,
*,
claim_id: str,
item_id: str,
current_user: CurrentUserContext,
) -> dict[str, Any] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
if claim is None:
return None
self._ensure_draft_claim(claim)
before_json = self._serialize_claim(claim)
previous_name = self._resolve_attachment_display_name(item.invoice_id)
self._delete_item_attachment_files(item)
item.invoice_id = None
self._sync_claim_from_items(claim)
self.db.commit()
self.db.refresh(claim)
self.audit_service.log_action(
actor=current_user.name or current_user.username,
action="expense_claim.attachment_delete",
resource_type="expense_claim",
resource_id=claim.id,
before_json=before_json,
after_json=self._serialize_claim(claim),
)
return {
"message": f"{previous_name or '附件'} 已删除。",
"claim_id": claim.id,
"item_id": item.id,
"invoice_id": item.invoice_id,
"attachment": None,
}
def submit_claim(self, claim_id: str, current_user: CurrentUserContext) -> ExpenseClaim | None:
claim = self.get_claim(claim_id, current_user)
if claim is None:
@@ -144,6 +480,7 @@ class ExpenseClaimService:
before_json = self._serialize_claim(claim)
resource_id = claim.id
self._delete_claim_attachment_root(claim.id)
self.db.delete(claim)
self.db.commit()
@@ -545,6 +882,14 @@ class ExpenseClaimService:
return "meal"
if "会务" in compact:
return "meeting"
if any(word in compact for word in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")):
return "office"
if any(word in compact for word in ("培训费", "培训", "讲师费", "课时费", "课程费")):
return "training"
if any(word in compact for word in ("通讯费", "话费", "流量费", "宽带费")):
return "communication"
if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")):
return "welfare"
for item in entities:
if item.type == "expense_type":
normalized = item.normalized_value.strip()
@@ -669,6 +1014,381 @@ class ExpenseClaimService:
except (TypeError, ValueError):
return 0
def _get_claim_item_or_raise(
self,
*,
claim_id: str,
item_id: str,
current_user: CurrentUserContext,
) -> tuple[ExpenseClaim | None, ExpenseClaimItem]:
claim = self.get_claim(claim_id, current_user)
if claim is None:
return None, None # type: ignore[return-value]
item = next((entry for entry in claim.items if entry.id == item_id), None)
if item is None:
raise LookupError("Item not found")
return claim, item
def _get_attachment_storage_root(self) -> Path:
return (get_settings().resolved_storage_root_dir / "expense_claims").resolve()
def _build_item_attachment_dir(self, claim_id: str, item_id: str) -> Path:
return (self._get_attachment_storage_root() / claim_id / item_id).resolve()
def _delete_claim_attachment_root(self, claim_id: str) -> None:
shutil.rmtree((self._get_attachment_storage_root() / claim_id).resolve(), ignore_errors=True)
@staticmethod
def _normalize_attachment_filename(filename: str | None) -> str:
normalized = Path(str(filename or "").strip()).name
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._")
suffix = Path(normalized).suffix
if normalized:
return normalized
return f"attachment{suffix or '.bin'}"
def _resolve_attachment_path(self, storage_key: str | None) -> Path | None:
normalized = str(storage_key or "").strip()
if not normalized:
return None
root = self._get_attachment_storage_root()
path = (root / normalized).resolve()
try:
path.relative_to(root)
except ValueError as exc:
raise FileNotFoundError("Attachment path is invalid") from exc
return path
def _to_attachment_storage_key(self, file_path: Path) -> str:
root = self._get_attachment_storage_root()
return file_path.resolve().relative_to(root).as_posix()
def _resolve_item_attachment_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]:
file_path = self._resolve_attachment_path(item.invoice_id)
if file_path is None or not file_path.exists():
raise FileNotFoundError("Attachment not found")
metadata = self._read_attachment_meta(file_path)
filename = str(metadata.get("file_name") or file_path.name)
media_type = self._resolve_attachment_media_type(
filename,
fallback=str(metadata.get("media_type") or ""),
)
return file_path, media_type, filename
def _delete_item_attachment_files(self, item: ExpenseClaimItem) -> None:
file_path = self._resolve_attachment_path(item.invoice_id)
if file_path is None:
return
root = self._get_attachment_storage_root()
if file_path.parent == root:
file_path.unlink(missing_ok=True)
self._attachment_meta_path(file_path).unlink(missing_ok=True)
return
shutil.rmtree(file_path.parent, ignore_errors=True)
@staticmethod
def _attachment_meta_path(file_path: Path) -> Path:
return file_path.with_name(f"{file_path.name}.meta.json")
def _write_attachment_meta(self, file_path: Path, payload: dict[str, Any]) -> None:
meta_path = self._attachment_meta_path(file_path)
meta_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
def _read_attachment_meta(self, file_path: Path) -> dict[str, Any]:
meta_path = self._attachment_meta_path(file_path)
if not meta_path.exists():
return {}
try:
payload = json.loads(meta_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return {}
return payload if isinstance(payload, dict) else {}
def _build_attachment_payload(self, item: ExpenseClaimItem) -> dict[str, Any]:
file_path, media_type, filename = self._resolve_item_attachment_content(item)
metadata = self._read_attachment_meta(file_path)
uploaded_at_value = metadata.get("uploaded_at")
uploaded_at = None
if isinstance(uploaded_at_value, str) and uploaded_at_value.strip():
try:
uploaded_at = datetime.fromisoformat(uploaded_at_value)
except ValueError:
uploaded_at = None
analysis = metadata.get("analysis")
if not isinstance(analysis, dict):
analysis = None
return {
"file_name": str(metadata.get("file_name") or filename),
"storage_key": str(item.invoice_id or ""),
"media_type": str(metadata.get("media_type") or media_type),
"size_bytes": int(metadata.get("size_bytes") or file_path.stat().st_size),
"uploaded_at": uploaded_at,
"previewable": bool(metadata.get("previewable", self._is_previewable_media_type(media_type, filename))),
"analysis": analysis,
}
@staticmethod
def _resolve_attachment_media_type(filename: str, *, fallback: str | None = None) -> str:
guessed = mimetypes.guess_type(filename)[0]
return str(guessed or fallback or "application/octet-stream")
@staticmethod
def _is_previewable_media_type(media_type: str | None, filename: str) -> bool:
resolved = str(media_type or "").strip() or (mimetypes.guess_type(filename)[0] or "")
return resolved.startswith("image/") or resolved == "application/pdf"
@staticmethod
def _resolve_attachment_display_name(storage_key: str | None) -> str:
return Path(str(storage_key or "").strip()).name
@staticmethod
def _extract_amount_candidates(text: str) -> list[Decimal]:
values: list[Decimal] = []
seen: set[Decimal] = set()
def append_candidate(raw: str) -> None:
compact = str(raw or "").replace(",", ".").strip()
if not compact:
return
try:
candidate = Decimal(compact).quantize(Decimal("0.01"))
except (InvalidOperation, ValueError):
return
if candidate in seen:
return
seen.add(candidate)
values.append(candidate)
for pattern in (
r"(?:金额|价税合计|合计|小写|实收金额|支付金额|订单金额|总额|票价|房费|餐费)[:\s¥¥]*([0-9]{1,6}(?:[.,][0-9]{1,2})?)",
r"[¥¥]\s*([0-9]{1,6}(?:[.,][0-9]{1,2})?)",
r"([0-9]{1,6}(?:[.,][0-9]{1,2})?)\s*元",
):
for raw in re.findall(pattern, text, flags=re.IGNORECASE):
append_candidate(raw)
if values:
return values
for raw in re.findall(r"(?<!\d)(\d{1,6}\.\d{1,2})(?!\d)", text):
append_candidate(raw)
return values
@staticmethod
def _has_date_like_text(text: str) -> bool:
return bool(re.search(r"(20\d{2}[年/\-.]\d{1,2}[月/\-.]\d{1,2}日?)", text))
@staticmethod
def _normalize_match_text(text: str) -> str:
return re.sub(r"\s+", "", str(text or "")).lower()
@staticmethod
def _resolve_expense_type_label(expense_type: str | None) -> str:
normalized = str(expense_type or "").strip().lower()
return EXPENSE_TYPE_LABELS.get(normalized, "其他")
@staticmethod
def _resolve_allowed_document_scenes(expense_type: str | None) -> set[str]:
normalized = str(expense_type or "").strip().lower()
return set(EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(normalized, set()))
def _detect_expense_scenes(self, text: str) -> dict[str, list[str]]:
normalized = self._normalize_match_text(text)
if not normalized:
return {}
matches: dict[str, list[str]] = {}
for scene, keywords in EXPENSE_SCENE_KEYWORDS.items():
matched = [keyword for keyword in keywords if keyword in normalized]
if matched:
matches[scene] = matched[:3]
return matches
def _format_scene_labels(self, scene_codes: set[str]) -> str:
labels = [self._resolve_expense_type_label(code) for code in scene_codes]
unique_labels = list(dict.fromkeys(label for label in labels if label))
return "".join(unique_labels) if unique_labels else "其他"
def _build_purpose_mismatch_point(
self,
*,
item: ExpenseClaimItem,
document_scenes: set[str],
) -> str | None:
if not document_scenes:
return None
allowed_scenes = self._resolve_allowed_document_scenes(item.item_type)
reason_text = str(item.item_reason or "").strip()
reason_scenes = set(self._detect_expense_scenes(reason_text).keys())
document_scene_labels = self._format_scene_labels(document_scenes)
if reason_scenes and document_scenes.isdisjoint(reason_scenes):
return (
f"用途字段:用户填写用途“{reason_text[:24]}”与票据内容不一致,"
f"当前附件更像{document_scene_labels}相关材料。"
)
if allowed_scenes and document_scenes.isdisjoint(allowed_scenes):
expense_label = self._resolve_expense_type_label(item.item_type)
return f"用途字段:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。"
return None
def _build_fallback_attachment_analysis(
self,
*,
media_type: str | None,
item: ExpenseClaimItem,
) -> dict[str, Any]:
return {
"severity": "medium",
"label": "中风险",
"headline": "AI提示附件已上传待识别结果",
"summary": "附件已成功保存,但当前尚未拿到有效识别结果,建议人工先核对票据内容。",
"points": [
f"附件格式:{self._resolve_attachment_media_type('attachment', fallback=media_type)}",
f"费用金额:当前明细金额为 {item.item_amount}",
],
"suggestion": "建议打开附件确认金额、日期和票据类型是否完整,再继续提交审批。",
}
def _build_failed_ocr_attachment_analysis(
self,
*,
media_type: str | None,
error_message: str,
item: ExpenseClaimItem,
) -> dict[str, Any]:
return {
"severity": "medium",
"label": "中风险",
"headline": "AI提示附件已上传但识别失败",
"summary": "文件已经保存成功,但本次 AI 识别未完成,因此无法给出完整票据核验结论。",
"points": [
f"识别异常:{error_message or 'OCR 服务暂不可用'}",
f"费用金额:当前明细金额为 {item.item_amount}",
f"附件格式:{self._resolve_attachment_media_type('attachment', fallback=media_type)}",
],
"suggestion": "建议重新上传更清晰的票据图片,或稍后重试识别后再提交。",
}
def _build_attachment_analysis(self, *, document: Any, item: ExpenseClaimItem) -> dict[str, Any]:
warnings = [str(value).strip() for value in list(getattr(document, "warnings", []) or []) if str(value).strip()]
text = " ".join(
[
str(getattr(document, "summary", "") or "").strip(),
str(getattr(document, "text", "") or "").strip(),
]
).strip()
compact_text = text.replace(" ", "")
avg_score = float(getattr(document, "avg_score", 0.0) or 0.0)
line_count = int(getattr(document, "line_count", 0) or 0)
document_scene_matches = self._detect_expense_scenes(text)
purpose_mismatch_point = self._build_purpose_mismatch_point(
item=item,
document_scenes=set(document_scene_matches.keys()),
)
has_ticket_keyword = any(
keyword in compact_text
for keyword in (
"发票",
"票据",
"增值税",
"电子行程单",
"购买方",
"销售方",
"税额",
"价税",
"票号",
"发票代码",
"凭证",
)
)
amount_candidates = self._extract_amount_candidates(text)
item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01"))
has_matching_amount = any(abs(candidate - item_amount) <= Decimal("1.00") for candidate in amount_candidates)
has_date_text = self._has_date_like_text(text)
amount_mismatch = bool(amount_candidates) and item_amount > Decimal("0.00") and not has_matching_amount
points: list[str] = []
if warnings:
points.append(f"识别提示:{warnings[0]}")
if line_count == 0 or not compact_text:
points.append("附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。")
if not has_ticket_keyword:
points.append("票据类型:未识别到发票、票据、电子行程单等关键字。")
if not amount_candidates:
points.append("金额字段:未识别到可用于核对的金额。")
elif amount_mismatch:
candidate_text = "".join(str(candidate) for candidate in amount_candidates[:3])
points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。")
if not has_date_text:
points.append("日期字段:未识别到开票日期或业务发生日期。")
if purpose_mismatch_point:
points.append(purpose_mismatch_point)
if avg_score and avg_score < 0.72:
points.append(f"识别质量OCR 置信度偏低({avg_score:.0%}),可能影响票据核验准确性。")
issue_count = len(points)
if issue_count == 0:
return {
"severity": "pass",
"label": "AI提示符合条件",
"headline": "AI提示附件符合基础校验条件",
"summary": "已识别到票据关键字段,附件可继续进入人工复核与报销流程。",
"points": [
"票据类型:已识别到可用于报销核验的票据关键字。",
f"金额字段:已识别到与当前明细接近的金额 {item_amount} 元。",
],
"suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。",
}
severity = "low"
label = "低风险"
headline = "AI提示附件存在轻微待核对项"
summary = "当前附件已识别出部分票据要素,但仍建议人工继续复核。"
if (
line_count == 0
or not compact_text
or (not has_ticket_keyword and issue_count >= 2)
or (purpose_mismatch_point and amount_mismatch)
):
severity = "high"
label = "高风险"
headline = "AI提示附件不符合票据校验条件"
summary = "当前附件存在明显异常,票据内容与填写信息不一致,或无法作为有效报销材料。"
elif purpose_mismatch_point or amount_mismatch or issue_count >= 2 or warnings or (avg_score and avg_score < 0.72):
severity = "medium"
label = "中风险"
headline = "AI提示附件存在明显待整改项"
summary = "当前附件可见部分内容,但金额、用途、日期或票据类型仍有缺失或不一致。"
suggestion = {
"high": "建议重新上传清晰的票据原件,确保包含发票抬头、金额、日期等核心字段。",
"medium": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。",
"low": "建议人工再次核对金额和业务说明,确认后可继续流转。",
}[severity]
return {
"severity": severity,
"label": label,
"headline": headline,
"summary": summary,
"points": points,
"suggestion": suggestion,
}
@staticmethod
def _serialize_claim(claim: ExpenseClaim) -> dict[str, Any]:
return {
@@ -696,6 +1416,14 @@ class ExpenseClaimService:
return None
return fallback
@staticmethod
def _normalize_sort_datetime(value: datetime | None) -> datetime:
if value is None:
return datetime.max.replace(tzinfo=UTC)
if value.tzinfo is None:
return value.replace(tzinfo=UTC)
return value
@staticmethod
def _is_missing_value(value: Any) -> bool:
text = str(value or "").strip()
@@ -712,13 +1440,14 @@ class ExpenseClaimService:
if not claim.items:
claim.amount = Decimal("0.00")
claim.invoice_count = 0
claim.risk_flags_json = self._merge_claim_attachment_risk_flags(claim, [])
return
ordered_items = sorted(
claim.items,
key=lambda item: (
item.item_date or date.max,
item.created_at or datetime.max.replace(tzinfo=UTC),
self._normalize_sort_datetime(item.created_at),
),
)
primary_item = ordered_items[0]
@@ -740,11 +1469,94 @@ class ExpenseClaimService:
self._normalize_optional_text(primary_item.item_location, fallback=claim.location or "待补充")
or "待补充"
)
claim.risk_flags_json = self._merge_claim_attachment_risk_flags(
claim,
self._build_claim_attachment_risk_flags(ordered_items),
)
if str(claim.status or "").strip().lower() == "draft":
claim.approval_stage = "待提交"
def _refresh_item_attachment_analysis(self, item: ExpenseClaimItem) -> None:
file_path = self._resolve_attachment_path(item.invoice_id)
if file_path is None or not file_path.exists():
return
metadata = self._read_attachment_meta(file_path)
media_type = str(metadata.get("media_type") or self._resolve_attachment_media_type(file_path.name)).strip()
ocr_status = str(metadata.get("ocr_status") or "").strip().lower()
if ocr_status == "failed":
analysis = self._build_failed_ocr_attachment_analysis(
media_type=media_type,
error_message=str(metadata.get("ocr_error") or ""),
item=item,
)
elif ocr_status == "recognized" or any(
(
str(metadata.get("ocr_text") or "").strip(),
str(metadata.get("ocr_summary") or "").strip(),
int(metadata.get("ocr_line_count") or 0),
list(metadata.get("ocr_warnings") or []),
)
):
document = SimpleNamespace(
text=str(metadata.get("ocr_text") or ""),
summary=str(metadata.get("ocr_summary") or ""),
avg_score=float(metadata.get("ocr_avg_score") or 0.0),
line_count=int(metadata.get("ocr_line_count") or 0),
warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()],
)
analysis = self._build_attachment_analysis(document=document, item=item)
else:
analysis = self._build_fallback_attachment_analysis(media_type=media_type, item=item)
metadata["analysis"] = analysis
self._write_attachment_meta(file_path, metadata)
def _build_claim_attachment_risk_flags(self, ordered_items: list[ExpenseClaimItem]) -> list[dict[str, Any]]:
derived_flags: list[dict[str, Any]] = []
for index, item in enumerate(ordered_items, start=1):
file_path = self._resolve_attachment_path(item.invoice_id)
if file_path is None or not file_path.exists():
continue
metadata = self._read_attachment_meta(file_path)
analysis = metadata.get("analysis")
if not isinstance(analysis, dict):
continue
severity = str(analysis.get("severity") or "").strip().lower()
if severity in {"", "pass", "low"}:
continue
summary = str(analysis.get("summary") or analysis.get("headline") or "").strip() or "附件存在待核对风险。"
label = str(analysis.get("label") or ("高风险" if severity == "high" else "中风险")).strip()
derived_flags.append(
{
"source": "attachment_analysis",
"item_id": item.id,
"severity": severity,
"label": label,
"message": f"费用明细第 {index} 条:{summary}",
}
)
return derived_flags
@staticmethod
def _merge_claim_attachment_risk_flags(
claim: ExpenseClaim,
attachment_risk_flags: list[dict[str, Any]],
) -> list[Any]:
preserved_flags = [
flag
for flag in list(claim.risk_flags_json or [])
if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis")
]
return preserved_flags + attachment_risk_flags
def _validate_claim_for_submission(self, claim: ExpenseClaim) -> list[str]:
issues: list[str] = []
claim_location_required = self._is_location_required_expense_type(claim.expense_type)
if self._is_missing_value(claim.employee_name):
issues.append("申请人未完善")
@@ -754,7 +1566,7 @@ class ExpenseClaimService:
issues.append("报销类型未完善")
if self._is_missing_value(claim.reason):
issues.append("报销事由未完善")
if self._is_missing_value(claim.location):
if claim_location_required and self._is_missing_value(claim.location):
issues.append("业务地点未完善")
if claim.amount is None or claim.amount <= Decimal("0.00"):
issues.append("报销金额未完善")
@@ -765,13 +1577,14 @@ class ExpenseClaimService:
for index, item in enumerate(claim.items, start=1):
prefix = f"费用明细第 {index}"
item_location_required = self._is_location_required_expense_type(item.item_type or claim.expense_type)
if item.item_date is None:
issues.append(f"{prefix}缺少日期")
if self._is_missing_value(item.item_type):
issues.append(f"{prefix}缺少费用项目")
if self._is_missing_value(item.item_reason):
issues.append(f"{prefix}缺少说明")
if self._is_missing_value(item.item_location):
if item_location_required and self._is_missing_value(item.item_location):
issues.append(f"{prefix}缺少地点")
if item.item_amount is None or item.item_amount <= Decimal("0.00"):
issues.append(f"{prefix}缺少金额")
@@ -780,6 +1593,10 @@ class ExpenseClaimService:
return issues
@staticmethod
def _is_location_required_expense_type(expense_type: str | None) -> bool:
return str(expense_type or "").strip().lower() in LOCATION_REQUIRED_EXPENSE_TYPES
@staticmethod
def _has_privileged_claim_access(current_user: CurrentUserContext) -> bool:
if current_user.is_admin:

View File

@@ -169,6 +169,19 @@ EXPENSE_TYPE_KEYWORDS = {
"招待费": "entertainment",
"招待": "entertainment",
"宴请": "entertainment",
"办公费": "office",
"办公用品": "office",
"文具": "office",
"耗材": "office",
"办公耗材": "office",
"打印纸": "office",
"办公设备": "office",
"培训费": "training",
"培训": "training",
"通讯费": "communication",
"话费": "communication",
"福利费": "welfare",
"团建": "welfare",
}
EXPENSE_NARRATIVE_KEYWORDS = (
@@ -1117,6 +1130,21 @@ class SemanticOntologyService:
):
upsert(self._make_entity("expense_type", "餐费", "meal", role="filter", confidence=0.84))
if any(
keyword in query
for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")
):
upsert(self._make_entity("expense_type", "办公费", "office", role="filter", confidence=0.87))
if any(keyword in query for keyword in ("培训", "讲师费", "课时费", "课程费")):
upsert(self._make_entity("expense_type", "培训费", "training", role="filter", confidence=0.84))
if any(keyword in query for keyword in ("通讯费", "话费", "流量费", "宽带费")):
upsert(self._make_entity("expense_type", "通讯费", "communication", role="filter", confidence=0.84))
if any(keyword in query for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")):
upsert(self._make_entity("expense_type", "福利费", "welfare", role="filter", confidence=0.84))
for amount in self._extract_amount_entities(query):
upsert(amount)

View File

@@ -65,6 +65,10 @@ EXPENSE_TYPE_LABELS = {
"meal": "餐费",
"meeting": "会务费",
"entertainment": "业务招待费",
"office": "办公费",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
}
@@ -74,6 +78,10 @@ GROUP_SCENE_LABELS = {
"meal": "伙食费",
"transport": "交通费",
"hotel": "住宿费",
"office": "办公费",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
}
@@ -1825,6 +1833,14 @@ class UserAgentService:
return "meal", "餐费"
if "会务" in compact:
return "meeting", "会务费"
if any(keyword in compact for keyword in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")):
return "office", "办公费"
if any(keyword in compact for keyword in ("培训费", "培训", "讲师费", "课时费", "课程费")):
return "training", "培训费"
if any(keyword in compact for keyword in ("通讯费", "话费", "流量费", "宽带费")):
return "communication", "通讯费"
if any(keyword in compact for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")):
return "welfare", "福利费"
return "other", str(value or "").strip() or "其他费用"
def _resolve_required_review_keys(
@@ -1951,6 +1967,13 @@ class UserAgentService:
"group_code": group_code,
"scene_label": "餐饮票据",
}
if any(keyword in compact for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "键盘", "鼠标", "白板", "墨盒", "硒鼓")):
return {
"document_type": "other",
"expense_type": "office",
"group_code": "office",
"scene_label": "办公用品票据",
}
return {
"document_type": "other",
"expense_type": expense_type_code or "other",
@@ -1962,7 +1985,7 @@ class UserAgentService:
def _normalize_group_code(expense_type_code: str) -> str:
if expense_type_code in {"travel", "hotel", "transport"}:
return "travel"
if expense_type_code in {"entertainment", "meal"}:
if expense_type_code in {"entertainment", "meal", "office", "training", "communication", "welfare"}:
return expense_type_code
return "other"