feat: 新增票据夹模块并优化 OCR 与员工画像服务

后端新增票据夹端点、数据模型和服务模块,优化 OCR 端点
Schema 和附件操作逻辑,完善员工行为画像服务和辅助函数,
前端新增票据夹视图和服务层,优化文档中心样式和侧边栏导
航,完善员工画像详情弹窗和权限控制,补充单元测试。
This commit is contained in:
caoxiaozhu
2026-05-29 14:51:18 +08:00
parent 678f64d772
commit 4c59941ec6
33 changed files with 2855 additions and 551 deletions

View File

@@ -9,6 +9,7 @@ from sqlalchemy.orm import Session
from app.api.deps import CurrentUserContext, get_current_user, get_db
from app.models.employee import Employee
from app.schemas.employee_profile import EmployeeProfileLatestRead
from app.services.account_behavior_profile import AccountBehaviorProfileService
from app.services.employee_behavior_profile_service import EmployeeBehaviorProfileService
router = APIRouter(prefix="/employee-profiles")
@@ -31,13 +32,13 @@ def get_current_employee_latest_profile(
) -> EmployeeProfileLatestRead:
employee = _resolve_current_employee(db, current_user)
if employee is None:
return EmployeeProfileLatestRead(
employee_id=current_user.username,
employee_name=current_user.name,
return AccountBehaviorProfileService(db).get_latest_account_profile(
account_id=current_user.username,
account_name=current_user.name,
identifiers=_current_account_identifiers(current_user),
scene=scene,
window_days=window_days,
expense_type_scope=expense_type_scope,
empty_reason="当前登录用户未匹配到员工目录,暂无法形成用户画像。",
)
service = EmployeeBehaviorProfileService(db)
@@ -47,7 +48,7 @@ def get_current_employee_latest_profile(
window_days=window_days,
expense_type_scope=expense_type_scope,
)
if latest.empty_reason:
if latest.empty_reason or _missing_usage_duration_metric(latest):
service.refresh_employee_profiles(
employee_id=employee.id,
window_days=(window_days,),
@@ -115,3 +116,24 @@ def _resolve_current_employee(
stmt = select(Employee).where(or_(*conditions)).order_by(Employee.created_at.asc()).limit(1)
return db.scalars(stmt).first()
def _missing_usage_duration_metric(latest: EmployeeProfileLatestRead) -> bool:
if latest.scene != "operations":
return False
for profile in latest.profiles:
if profile.profile_type == "ai_usage":
return "ai_run_duration_ms" not in profile.metrics
return False
def _current_account_identifiers(current_user: CurrentUserContext) -> set[str]:
return {
item
for item in (
current_user.username,
current_user.name,
)
if str(item or "").strip()
}

View File

@@ -2,13 +2,14 @@ from __future__ import annotations
from typing import Annotated
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
from sqlalchemy.orm import Session
from app.api.deps import CurrentUserContext, get_current_user, get_db
from app.schemas.common import ErrorResponse
from app.schemas.ocr import OcrRecognizeBatchRead
from app.services.ocr import OcrService
from app.services.receipt_folder import ReceiptFolderService
router = APIRouter(prefix="/ocr")
@@ -35,8 +36,9 @@ router = APIRouter(prefix="/ocr")
)
async def recognize_ocr_documents(
files: Annotated[list[UploadFile], File(description="待识别的票据图片或 PDF。")],
_: Annotated[CurrentUserContext, Depends(get_current_user)],
current_user: Annotated[CurrentUserContext, Depends(get_current_user)],
db: Annotated[Session, Depends(get_db)],
receipt_ids: Annotated[list[str] | None, Form(description="可选,来源于票据夹的持久化票据 ID。")] = None,
) -> OcrRecognizeBatchRead:
try:
payload = []
@@ -48,7 +50,13 @@ async def recognize_ocr_documents(
upload.content_type,
)
)
return OcrService(db).recognize_files(payload)
result = OcrService(db).recognize_files(payload)
return ReceiptFolderService().persist_ocr_batch(
files=payload,
result=result,
current_user=current_user,
receipt_ids=receipt_ids or [],
)
except ValueError as exc:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
except RuntimeError as exc:

View File

@@ -0,0 +1,108 @@
from __future__ import annotations
from typing import Annotated
from fastapi import APIRouter, Depends, HTTPException, Query, status
from fastapi.responses import FileResponse
from app.api.deps import CurrentUserContext, get_current_user
from app.schemas.common import ErrorResponse
from app.schemas.receipt_folder import (
ReceiptFolderDeleteResponse,
ReceiptFolderDetailRead,
ReceiptFolderItemRead,
ReceiptFolderUpdate,
)
from app.services.receipt_folder import ReceiptFolderService
router = APIRouter(prefix="/receipt-folder")
CurrentUser = Annotated[CurrentUserContext, Depends(get_current_user)]
@router.get(
"",
response_model=list[ReceiptFolderItemRead],
summary="查询票据夹列表",
description="返回当前登录用户上传并持久化的票据列表。",
)
def list_receipts(
current_user: CurrentUser,
status_filter: Annotated[str, Query(alias="status")] = "all",
) -> list[ReceiptFolderItemRead]:
return ReceiptFolderService().list_receipts(
current_user=current_user,
status_filter=status_filter,
)
@router.get(
"/{receipt_id}",
response_model=ReceiptFolderDetailRead,
summary="读取票据详情",
responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据不存在。"}},
)
def get_receipt(receipt_id: str, current_user: CurrentUser) -> ReceiptFolderDetailRead:
try:
return ReceiptFolderService().get_receipt(receipt_id, current_user)
except FileNotFoundError as exc:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt not found") from exc
@router.patch(
"/{receipt_id}",
response_model=ReceiptFolderDetailRead,
summary="更新票据基础识别信息",
responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据不存在。"}},
)
def update_receipt(
receipt_id: str,
payload: ReceiptFolderUpdate,
current_user: CurrentUser,
) -> ReceiptFolderDetailRead:
try:
return ReceiptFolderService().update_receipt(
receipt_id=receipt_id,
payload=payload,
current_user=current_user,
)
except FileNotFoundError as exc:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt not found") from exc
@router.delete(
"/{receipt_id}",
response_model=ReceiptFolderDeleteResponse,
summary="删除票据",
responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据不存在。"}},
)
def delete_receipt(receipt_id: str, current_user: CurrentUser) -> ReceiptFolderDeleteResponse:
try:
return ReceiptFolderService().delete_receipt(receipt_id=receipt_id, current_user=current_user)
except FileNotFoundError as exc:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt not found") from exc
@router.get(
"/{receipt_id}/preview",
summary="预览票据原始文件",
responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据预览不存在。"}},
)
def preview_receipt(receipt_id: str, current_user: CurrentUser) -> FileResponse:
try:
file_path, media_type, file_name = ReceiptFolderService().resolve_preview(receipt_id, current_user)
except FileNotFoundError as exc:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt preview not found") from exc
return FileResponse(file_path, media_type=media_type, filename=file_name)
@router.get(
"/{receipt_id}/source",
summary="读取票据源文件",
responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据源文件不存在。"}},
)
def source_receipt(receipt_id: str, current_user: CurrentUser) -> FileResponse:
try:
file_path, media_type, file_name = ReceiptFolderService().resolve_source(receipt_id, current_user)
except FileNotFoundError as exc:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt source not found") from exc
return FileResponse(file_path, media_type=media_type, filename=file_name)

View File

@@ -2,7 +2,7 @@ from __future__ import annotations
from typing import Annotated
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
from fastapi.responses import FileResponse
from sqlalchemy.orm import Session
@@ -372,6 +372,7 @@ async def upload_expense_claim_item_attachment(
file: Annotated[UploadFile, File(description="待上传的附件文件。")],
db: DbSession,
current_user: CurrentUser,
receipt_id: Annotated[str | None, Form(description="可选,来源于票据夹的持久化票据 ID。")] = None,
) -> ExpenseClaimAttachmentActionResponse:
service = ExpenseClaimService(db)
try:
@@ -382,6 +383,7 @@ async def upload_expense_claim_item_attachment(
content=await file.read(),
media_type=file.content_type,
current_user=current_user,
source_receipt_id=receipt_id or "",
)
except LookupError as error:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error

View File

@@ -13,6 +13,7 @@ from app.api.v1.endpoints.knowledge import router as knowledge_router
from app.api.v1.endpoints.ocr import router as ocr_router
from app.api.v1.endpoints.ontology import router as ontology_router
from app.api.v1.endpoints.orchestrator import router as orchestrator_router
from app.api.v1.endpoints.receipt_folder import router as receipt_folder_router
from app.api.v1.endpoints.reimbursements import router as reimbursements_router
from app.api.v1.endpoints.settings import router as settings_router
from app.api.v1.endpoints.system_logs import router as system_logs_router
@@ -29,6 +30,7 @@ router.include_router(knowledge_router, tags=["knowledge"])
router.include_router(ocr_router, tags=["ocr"])
router.include_router(ontology_router, tags=["ontology"])
router.include_router(orchestrator_router, tags=["orchestrator"])
router.include_router(receipt_folder_router, tags=["receipt-folder"])
router.include_router(employees_router, prefix="/employees", tags=["employees"])
router.include_router(employee_profiles_router, tags=["employee-profiles"])
router.include_router(reimbursements_router, prefix="/reimbursements", tags=["reimbursements"])

View File

@@ -39,6 +39,10 @@ class OcrRecognizeDocumentRead(BaseModel):
)
preview_kind: str = Field(default="", description="预览类型PDF 转图后通常为 image。")
preview_data_url: str = Field(default="", description="用于前端展示的图片预览 data URL。")
receipt_id: str = Field(default="", description="票据夹中的持久化票据 ID。")
receipt_status: str = Field(default="", description="票据夹关联状态unlinked / linked。")
receipt_preview_url: str = Field(default="", description="票据夹预览接口地址。")
receipt_source_url: str = Field(default="", description="票据夹原始文件接口地址。")
warnings: list[str] = Field(default_factory=list, description="该文件的识别提示或警告。")
lines: list[OcrRecognizeLineRead] = Field(default_factory=list, description="逐行识别结果。")

View File

@@ -0,0 +1,68 @@
from __future__ import annotations
from datetime import datetime
from typing import Any
from pydantic import BaseModel, Field
class ReceiptFolderFieldRead(BaseModel):
key: str = ""
label: str = ""
value: str = ""
class ReceiptFolderItemRead(BaseModel):
id: str
file_name: str
media_type: str = "application/octet-stream"
size_bytes: int = 0
status: str = "unlinked"
status_label: str = "未关联"
document_type: str = "other"
document_type_label: str = "其他单据"
scene_code: str = "other"
scene_label: str = "其他票据"
summary: str = ""
amount: str = ""
document_date: str = ""
merchant_name: str = ""
avg_score: float = 0.0
uploaded_at: datetime | None = None
linked_at: datetime | None = None
linked_claim_id: str = ""
linked_claim_no: str = ""
previewable: bool = False
preview_kind: str = ""
preview_url: str = ""
source_url: str = ""
warnings: list[str] = Field(default_factory=list)
class ReceiptFolderDetailRead(ReceiptFolderItemRead):
engine: str = ""
model: str = ""
ocr_text: str = ""
line_count: int = 0
page_count: int = 1
classification_confidence: float = 0.0
classification_evidence: list[str] = Field(default_factory=list)
fields: list[ReceiptFolderFieldRead] = Field(default_factory=list)
raw_meta: dict[str, Any] = Field(default_factory=dict)
class ReceiptFolderUpdate(BaseModel):
document_type: str | None = None
document_type_label: str | None = None
scene_code: str | None = None
scene_label: str | None = None
summary: str | None = None
amount: str | None = None
document_date: str | None = None
merchant_name: str | None = None
fields: list[ReceiptFolderFieldRead] | None = None
class ReceiptFolderDeleteResponse(BaseModel):
message: str
receipt_id: str

View File

@@ -0,0 +1,176 @@
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from decimal import Decimal
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session, selectinload
from app.algorithem.employee_behavior_profile import (
LEVEL_LABELS,
PROFILE_LABELS,
ProfileComponent,
evaluate_weighted_profile,
score_by_bands,
)
from app.algorithem.employee_behavior_profile_tags import build_profile_radar, build_profile_tags
from app.models.agent_run import AgentRun
from app.schemas.employee_profile import EmployeeProfileLatestRead, EmployeeProfileRead
from app.services.employee_behavior_profile_helpers import EmployeeBehaviorProfileMetricHelpers
class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
def __init__(self, db: Session) -> None:
self.db = db
def get_latest_account_profile(
self,
*,
account_id: str,
account_name: str,
identifiers: set[str],
scene: str,
window_days: int,
expense_type_scope: str,
) -> EmployeeProfileLatestRead:
if scene != "operations":
return EmployeeProfileLatestRead(
employee_id=account_id,
employee_name=account_name,
scene=scene,
window_days=window_days,
expense_type_scope=expense_type_scope,
empty_reason="当前账号未匹配员工目录,无法形成审批场景员工画像。",
)
runs = self._fetch_account_runs(identifiers, datetime.now(UTC) - timedelta(days=window_days))
if not runs:
return EmployeeProfileLatestRead(
employee_id=account_id,
employee_name=account_name,
scene=scene,
window_days=window_days,
expense_type_scope=expense_type_scope,
empty_reason="当前账号暂无可统计的智能体运行记录。",
)
result = self._calculate_account_ai_usage_profile(
runs=runs,
window_days=window_days,
expense_type_scope=expense_type_scope,
)
payload = {
"profile_type": result.profile_type,
"profile_label": result.profile_label,
"score": result.profile_score,
"level": result.profile_level,
"metrics": result.metrics,
"top_contributors": result.top_contributors(),
}
tags = build_profile_tags([payload], scene=scene)
radar = build_profile_radar([payload], tags, scene=scene)
return EmployeeProfileLatestRead(
employee_id=account_id,
employee_name=account_name,
scene=scene,
window_days=window_days,
expense_type_scope=expense_type_scope,
calculated_at=datetime.now(UTC),
review_priority_score=0,
review_priority_level="normal",
review_priority_label=LEVEL_LABELS["normal"],
profiles=[
EmployeeProfileRead(
profile_type=payload["profile_type"],
profile_label=PROFILE_LABELS.get(payload["profile_type"], payload["profile_type"]),
score=payload["score"],
level=payload["level"],
level_label=LEVEL_LABELS.get(payload["level"], payload["level"]),
metrics=payload["metrics"],
top_contributors=payload["top_contributors"],
)
],
profile_tags=tags,
radar=radar,
)
def _calculate_account_ai_usage_profile(
self,
*,
runs: list[AgentRun],
window_days: int,
expense_type_scope: str,
):
tool_calls = [tool for run in runs for tool in run.tool_calls]
failed_calls = [
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
]
estimated_tokens = self._estimate_tokens(runs)
duration_ms = self._sum_agent_run_duration_ms(runs)
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
return evaluate_weighted_profile(
"ai_usage",
[
ProfileComponent(
"ai_call_count_score",
"AI 调用次数",
score_by_bands(len(runs), [(0, 0), (3, 25), (10, 65), (20, 100)]),
len(runs),
"",
Decimal("0.25"),
),
ProfileComponent(
"token_cost_score",
"Token 使用强度",
score_by_bands(
estimated_tokens, [(0, 0), (2000, 25), (8000, 65), (20000, 100)]
),
estimated_tokens,
"tokens",
Decimal("0.25"),
),
ProfileComponent(
"ai_generated_claim_ratio_score",
"AI 生成申请比例",
score_by_bands(len(runs), [(0, 0), (2, 20), (8, 60), (16, 90)]),
len(runs),
"",
Decimal("0.20"),
),
ProfileComponent(
"failed_ai_call_score",
"AI 调用失败",
score_by_bands(len(failed_calls), [(0, 0), (1, 35), (3, 80)]),
len(failed_calls),
"",
Decimal("0.10"),
),
],
metrics={
"window_days": window_days,
"expense_type_scope": expense_type_scope,
"peer_sample_size": 0,
"ai_run_count": len(runs),
"tool_call_count": len(tool_calls),
"failed_tool_call_count": len(failed_calls),
"token_count_mode": token_mode,
"estimated_token_count": estimated_tokens,
"exact_token_count": None,
"ai_run_duration_ms": duration_ms,
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
},
)
def _fetch_account_runs(self, identifiers: set[str], cutoff: datetime) -> list[AgentRun]:
normalized = {item for item in identifiers if str(item or "").strip()}
if not normalized:
return []
stmt = (
select(AgentRun)
.options(selectinload(AgentRun.tool_calls))
.where(AgentRun.started_at >= cutoff, AgentRun.user_id.in_(normalized))
)
return list(self.db.scalars(stmt).all())

View File

@@ -171,6 +171,22 @@ class EmployeeBehaviorProfileMetricHelpers:
total += max(0, len(text) // 4)
return total
def _sum_agent_run_duration_ms(self, runs: list[AgentRun]) -> int:
return sum(self._agent_run_duration_ms(run) for run in runs)
def _agent_run_duration_ms(self, run: AgentRun) -> int:
if run.started_at is not None and run.finished_at is not None:
try:
if run.finished_at > run.started_at:
return min(
int((run.finished_at - run.started_at).total_seconds() * 1000),
24 * 60 * 60 * 1000,
)
except TypeError:
pass
return sum(max(0, int(tool.duration_ms or 0)) for tool in run.tool_calls)
@staticmethod
def _is_missing_value(value: Any) -> bool:
text = str(value or "").strip()

View File

@@ -466,6 +466,7 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
]
estimated_tokens = self._estimate_tokens(runs)
duration_ms = self._sum_agent_run_duration_ms(runs)
override_score = 0
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
@@ -524,6 +525,8 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
"token_count_mode": token_mode,
"estimated_token_count": estimated_tokens,
"exact_token_count": None,
"ai_run_duration_ms": duration_ms,
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
},
)

View File

@@ -108,6 +108,7 @@ from app.services.expense_rule_runtime import (
resolve_document_type_label,
)
from app.services.ocr import OcrService
from app.services.receipt_folder import ReceiptFolderService
class ExpenseClaimAttachmentOperationsMixin:
@@ -120,6 +121,7 @@ class ExpenseClaimAttachmentOperationsMixin:
content: bytes,
media_type: str | None,
current_user: CurrentUserContext,
source_receipt_id: str = "",
) -> dict[str, Any] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
@@ -240,6 +242,16 @@ class ExpenseClaimAttachmentOperationsMixin:
"ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
}
self._attachment_storage.write_meta(file_path, meta)
ReceiptFolderService().save_linked_attachment(
file_path=file_path,
media_type=resolved_media_type,
document=ocr_document,
current_user=current_user,
claim_id=claim.id,
claim_no=claim.claim_no,
item_id=item.id,
source_receipt_id=source_receipt_id,
)
self._sync_claim_from_items(claim)
self.db.commit()

View File

@@ -0,0 +1,532 @@
from __future__ import annotations
import json
import mimetypes
import re
import shutil
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from uuid import uuid4
from app.api.deps import CurrentUserContext
from app.core.config import get_settings
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead
from app.schemas.receipt_folder import (
ReceiptFolderDeleteResponse,
ReceiptFolderDetailRead,
ReceiptFolderFieldRead,
ReceiptFolderItemRead,
ReceiptFolderUpdate,
)
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.ocr import SUPPORTED_SUFFIXES
class ReceiptFolderService:
def __init__(self) -> None:
self.root = (get_settings().resolved_storage_root_dir / "receipt_folder").resolve()
def persist_ocr_batch(
self,
*,
files: list[tuple[str, bytes, str | None]],
result: OcrRecognizeBatchRead,
current_user: CurrentUserContext,
receipt_ids: list[str] | None = None,
) -> OcrRecognizeBatchRead:
documents = list(result.documents or [])
enriched: list[OcrRecognizeDocumentRead] = []
for index, document in enumerate(documents):
if index >= len(files):
enriched.append(document)
continue
existing_receipt = self._resolve_existing_item(
receipt_ids[index] if receipt_ids and index < len(receipt_ids) else "",
current_user,
)
if existing_receipt is not None:
enriched.append(
document.model_copy(
update={
"receipt_id": existing_receipt.id,
"receipt_status": existing_receipt.status,
"receipt_preview_url": existing_receipt.preview_url,
"receipt_source_url": existing_receipt.source_url,
}
)
)
continue
filename, content, media_type = files[index]
if not self._should_persist_source(filename, content):
enriched.append(document)
continue
receipt = self.save_receipt(
filename=filename,
content=content,
media_type=media_type or document.media_type,
document=document,
current_user=current_user,
)
enriched.append(
document.model_copy(
update={
"receipt_id": receipt.id,
"receipt_status": receipt.status,
"receipt_preview_url": receipt.preview_url,
"receipt_source_url": receipt.source_url,
}
)
)
return result.model_copy(update={"documents": enriched})
def save_receipt(
self,
*,
filename: str,
content: bytes,
media_type: str | None,
document: Any | None,
current_user: CurrentUserContext,
linked_claim_id: str = "",
linked_claim_no: str = "",
linked_item_id: str = "",
) -> ReceiptFolderItemRead:
owner_key = self._owner_key(current_user)
receipt_id = str(uuid4())
receipt_dir = self._owner_root(owner_key) / receipt_id
receipt_dir.mkdir(parents=True, exist_ok=True)
normalized_name = self.normalize_filename(filename)
source_path = receipt_dir / normalized_name
source_path.write_bytes(content)
resolved_media_type = self.resolve_media_type(normalized_name, media_type)
preview_meta = self._write_preview_asset(
receipt_dir=receipt_dir,
source_path=source_path,
media_type=resolved_media_type,
document=document,
)
now = datetime.now(UTC)
linked = bool(str(linked_claim_id or "").strip())
meta = {
"id": receipt_id,
"owner_key": owner_key,
"file_name": normalized_name,
"source_file_name": normalized_name,
"media_type": resolved_media_type,
"size_bytes": len(content),
"uploaded_at": now.isoformat(),
"status": "linked" if linked else "unlinked",
"linked_claim_id": str(linked_claim_id or "").strip(),
"linked_claim_no": str(linked_claim_no or "").strip(),
"linked_item_id": str(linked_item_id or "").strip(),
"linked_at": now.isoformat() if linked else "",
**self._build_document_meta(document),
**preview_meta,
}
self._write_meta(receipt_dir, meta)
return self._build_item(meta)
def save_linked_attachment(
self,
*,
file_path: Path,
media_type: str,
document: Any | None,
current_user: CurrentUserContext,
claim_id: str,
claim_no: str,
item_id: str,
source_receipt_id: str = "",
) -> ReceiptFolderItemRead | None:
if not file_path.exists() or not file_path.is_file():
return None
if str(source_receipt_id or "").strip():
try:
return self.mark_receipt_linked(
receipt_id=source_receipt_id,
current_user=current_user,
claim_id=claim_id,
claim_no=claim_no,
item_id=item_id,
)
except FileNotFoundError:
pass
storage_root = get_settings().resolved_storage_root_dir
try:
file_path.resolve().relative_to(storage_root)
except ValueError:
return None
return self.save_receipt(
filename=file_path.name,
content=file_path.read_bytes(),
media_type=media_type,
document=document,
current_user=current_user,
linked_claim_id=claim_id,
linked_claim_no=claim_no,
linked_item_id=item_id,
)
def mark_receipt_linked(
self,
*,
receipt_id: str,
current_user: CurrentUserContext,
claim_id: str,
claim_no: str,
item_id: str,
) -> ReceiptFolderItemRead:
owner_key = self._owner_key(current_user)
receipt_dir = self._receipt_dir(owner_key, receipt_id)
meta = self._read_meta(receipt_dir)
meta["status"] = "linked"
meta["linked_claim_id"] = str(claim_id or "").strip()
meta["linked_claim_no"] = str(claim_no or "").strip()
meta["linked_item_id"] = str(item_id or "").strip()
meta["linked_at"] = datetime.now(UTC).isoformat()
self._write_meta(receipt_dir, meta)
return self._build_item(meta)
def list_receipts(
self,
*,
current_user: CurrentUserContext,
status_filter: str = "all",
) -> list[ReceiptFolderItemRead]:
status_filter = str(status_filter or "all").strip().lower()
items = [
self._build_item(meta)
for meta in self._iter_owner_meta(self._owner_key(current_user))
if self._matches_status(meta, status_filter)
]
return sorted(items, key=lambda item: item.uploaded_at or datetime.min.replace(tzinfo=UTC), reverse=True)
def get_receipt(self, receipt_id: str, current_user: CurrentUserContext) -> ReceiptFolderDetailRead:
meta = self._read_receipt_meta(receipt_id, current_user)
item = self._build_item(meta)
return ReceiptFolderDetailRead(
**item.model_dump(),
engine=str(meta.get("engine") or ""),
model=str(meta.get("model") or ""),
ocr_text=str(meta.get("ocr_text") or ""),
line_count=int(meta.get("ocr_line_count") or 0),
page_count=max(1, int(meta.get("page_count") or 1)),
classification_confidence=float(meta.get("ocr_classification_confidence") or 0.0),
classification_evidence=[
str(value) for value in list(meta.get("ocr_classification_evidence") or []) if str(value).strip()
],
fields=self._resolve_fields(meta),
raw_meta=meta,
)
def update_receipt(
self,
*,
receipt_id: str,
payload: ReceiptFolderUpdate,
current_user: CurrentUserContext,
) -> ReceiptFolderDetailRead:
owner_key = self._owner_key(current_user)
receipt_dir = self._receipt_dir(owner_key, receipt_id)
meta = self._read_meta(receipt_dir)
updates = payload.model_dump(exclude_unset=True)
for key in ("document_type", "document_type_label", "scene_code", "scene_label", "summary"):
if key in updates and updates[key] is not None:
meta[key] = str(updates[key] or "").strip()
editable = dict(meta.get("editable_fields") or {})
for key in ("amount", "document_date", "merchant_name"):
if key in updates and updates[key] is not None:
editable[key] = str(updates[key] or "").strip()
if "fields" in updates and updates["fields"] is not None:
meta["document_fields"] = [
field.model_dump() if isinstance(field, ReceiptFolderFieldRead) else dict(field)
for field in payload.fields or []
]
meta["editable_fields"] = editable
meta["updated_at"] = datetime.now(UTC).isoformat()
self._write_meta(receipt_dir, meta)
return self.get_receipt(receipt_id, current_user)
def delete_receipt(
self,
*,
receipt_id: str,
current_user: CurrentUserContext,
) -> ReceiptFolderDeleteResponse:
owner_key = self._owner_key(current_user)
receipt_dir = self._receipt_dir(owner_key, receipt_id)
shutil.rmtree(receipt_dir)
return ReceiptFolderDeleteResponse(message="票据已删除。", receipt_id=receipt_id)
def resolve_source(self, receipt_id: str, current_user: CurrentUserContext) -> tuple[Path, str, str]:
meta = self._read_receipt_meta(receipt_id, current_user)
receipt_dir = self._receipt_dir(self._owner_key(current_user), receipt_id)
file_name = str(meta.get("source_file_name") or meta.get("file_name") or "").strip()
path = self._assert_child(receipt_dir / file_name)
if not path.exists():
raise FileNotFoundError("Receipt source not found")
media_type = self.resolve_media_type(path.name, str(meta.get("media_type") or ""))
return path, media_type, str(meta.get("file_name") or path.name)
def resolve_preview(self, receipt_id: str, current_user: CurrentUserContext) -> tuple[Path, str, str]:
meta = self._read_receipt_meta(receipt_id, current_user)
receipt_dir = self._receipt_dir(self._owner_key(current_user), receipt_id)
preview_name = str(meta.get("preview_file_name") or "").strip()
if preview_name:
preview_path = self._assert_child(receipt_dir / preview_name)
if preview_path.exists():
return (
preview_path,
self.resolve_media_type(preview_path.name, str(meta.get("preview_media_type") or "")),
preview_path.name,
)
source_path, source_media_type, source_name = self.resolve_source(receipt_id, current_user)
if self._is_previewable(source_media_type):
return source_path, source_media_type, source_name
raise FileNotFoundError("Receipt preview not found")
@staticmethod
def normalize_filename(filename: str | None) -> str:
normalized = Path(str(filename or "").strip()).name
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._")
return normalized or "receipt.bin"
@staticmethod
def resolve_media_type(filename: str, fallback: str | None = None) -> str:
return str(mimetypes.guess_type(filename)[0] or fallback or "application/octet-stream")
def _owner_root(self, owner_key: str) -> Path:
return self._assert_child(self.root / owner_key)
def _receipt_dir(self, owner_key: str, receipt_id: str) -> Path:
normalized = str(receipt_id or "").strip()
if not re.fullmatch(r"[0-9a-fA-F-]{32,36}", normalized):
raise FileNotFoundError("Receipt not found")
path = self._assert_child(self._owner_root(owner_key) / normalized)
if not path.exists() or not path.is_dir():
raise FileNotFoundError("Receipt not found")
return path
def _assert_child(self, path: Path) -> Path:
self.root.mkdir(parents=True, exist_ok=True)
resolved = path.resolve()
try:
resolved.relative_to(self.root)
except ValueError as exc:
raise FileNotFoundError("Receipt path is invalid") from exc
return resolved
@staticmethod
def _owner_key(current_user: CurrentUserContext) -> str:
raw = str(current_user.username or current_user.name or "anonymous").strip().lower()
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", raw).strip("._")
return normalized or "anonymous"
@staticmethod
def _should_persist_source(filename: str, content: bytes) -> bool:
if not content:
return False
return Path(str(filename or "")).suffix.lower() in SUPPORTED_SUFFIXES
def _write_preview_asset(
self,
*,
receipt_dir: Path,
source_path: Path,
media_type: str,
document: Any | None,
) -> dict[str, Any]:
preview_data_url = str(getattr(document, "preview_data_url", "") or "").strip()
decoded = ExpenseClaimAttachmentPresentation.decode_data_url(preview_data_url)
if decoded is not None:
preview_media_type, preview_content = decoded
suffix = mimetypes.guess_extension(preview_media_type) or ".bin"
preview_name = f"preview{suffix}"
preview_path = receipt_dir / preview_name
preview_path.write_bytes(preview_content)
return {
"previewable": True,
"preview_kind": "image",
"preview_file_name": preview_name,
"preview_media_type": preview_media_type,
}
if self._is_previewable(media_type):
return {
"previewable": True,
"preview_kind": "image" if media_type.startswith("image/") else "pdf",
"preview_file_name": source_path.name,
"preview_media_type": media_type,
}
return {
"previewable": False,
"preview_kind": "",
"preview_file_name": "",
"preview_media_type": "",
}
@staticmethod
def _is_previewable(media_type: str) -> bool:
return str(media_type or "").startswith("image/") or str(media_type or "") == "application/pdf"
@staticmethod
def _build_document_meta(document: Any | None) -> dict[str, Any]:
fields = []
for field in list(getattr(document, "document_fields", []) or []):
if isinstance(field, dict):
fields.append(
{
"key": str(field.get("key") or "").strip(),
"label": str(field.get("label") or "").strip(),
"value": str(field.get("value") or "").strip(),
}
)
else:
fields.append(
{
"key": str(getattr(field, "key", "") or "").strip(),
"label": str(getattr(field, "label", "") or "").strip(),
"value": str(getattr(field, "value", "") or "").strip(),
}
)
fields = [field for field in fields if field["label"] and field["value"]]
return {
"engine": str(getattr(document, "engine", "") or ""),
"model": str(getattr(document, "model", "") or ""),
"ocr_text": str(getattr(document, "text", "") or ""),
"summary": str(getattr(document, "summary", "") or ""),
"ocr_avg_score": float(getattr(document, "avg_score", 0.0) or 0.0),
"ocr_line_count": int(getattr(document, "line_count", 0) or 0),
"page_count": int(getattr(document, "page_count", 1) or 1),
"document_type": str(getattr(document, "document_type", "") or "other"),
"document_type_label": str(getattr(document, "document_type_label", "") or "其他单据"),
"scene_code": str(getattr(document, "scene_code", "") or "other"),
"scene_label": str(getattr(document, "scene_label", "") or "其他票据"),
"ocr_classification_source": str(getattr(document, "classification_source", "") or ""),
"ocr_classification_confidence": float(getattr(document, "classification_confidence", 0.0) or 0.0),
"ocr_classification_evidence": [
str(value) for value in list(getattr(document, "classification_evidence", []) or []) if str(value).strip()
],
"document_fields": fields,
"editable_fields": {},
"ocr_warnings": [str(value) for value in list(getattr(document, "warnings", []) or []) if str(value).strip()],
}
def _iter_owner_meta(self, owner_key: str) -> list[dict[str, Any]]:
owner_root = self._owner_root(owner_key)
if not owner_root.exists():
return []
metas = []
for meta_path in owner_root.glob("*/meta.json"):
meta = self._read_meta(meta_path.parent)
if meta:
metas.append(meta)
return metas
def _read_receipt_meta(self, receipt_id: str, current_user: CurrentUserContext) -> dict[str, Any]:
return self._read_meta(self._receipt_dir(self._owner_key(current_user), receipt_id))
def _resolve_existing_item(
self,
receipt_id: str | None,
current_user: CurrentUserContext,
) -> ReceiptFolderItemRead | None:
normalized = str(receipt_id or "").strip()
if not normalized:
return None
try:
return self._build_item(self._read_receipt_meta(normalized, current_user))
except FileNotFoundError:
return None
@staticmethod
def _meta_path(receipt_dir: Path) -> Path:
return receipt_dir / "meta.json"
def _read_meta(self, receipt_dir: Path) -> dict[str, Any]:
meta_path = self._meta_path(receipt_dir)
if not meta_path.exists():
raise FileNotFoundError("Receipt not found")
try:
payload = json.loads(meta_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError) as exc:
raise FileNotFoundError("Receipt metadata not found") from exc
return payload if isinstance(payload, dict) else {}
def _write_meta(self, receipt_dir: Path, payload: dict[str, Any]) -> None:
self._meta_path(receipt_dir).write_text(
json.dumps(payload, ensure_ascii=False, indent=2),
encoding="utf-8",
)
@staticmethod
def _matches_status(meta: dict[str, Any], status_filter: str) -> bool:
if status_filter in {"", "all"}:
return True
return str(meta.get("status") or "unlinked").strip().lower() == status_filter
def _build_item(self, meta: dict[str, Any]) -> ReceiptFolderItemRead:
receipt_id = str(meta.get("id") or "").strip()
status_value = str(meta.get("status") or "unlinked").strip() or "unlinked"
return ReceiptFolderItemRead(
id=receipt_id,
file_name=str(meta.get("file_name") or ""),
media_type=str(meta.get("media_type") or "application/octet-stream"),
size_bytes=int(meta.get("size_bytes") or 0),
status=status_value,
status_label="已关联" if status_value == "linked" else "未关联",
document_type=str(meta.get("document_type") or "other"),
document_type_label=str(meta.get("document_type_label") or "其他单据"),
scene_code=str(meta.get("scene_code") or "other"),
scene_label=str(meta.get("scene_label") or "其他票据"),
summary=str(meta.get("summary") or ""),
amount=self._resolve_editable_or_field(meta, "amount", labels=("金额", "价税合计", "票价")),
document_date=self._resolve_editable_or_field(meta, "document_date", labels=("日期", "开票日期", "乘车日期")),
merchant_name=self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方")),
avg_score=float(meta.get("ocr_avg_score") or 0.0),
uploaded_at=self._parse_datetime(meta.get("uploaded_at")),
linked_at=self._parse_datetime(meta.get("linked_at")),
linked_claim_id=str(meta.get("linked_claim_id") or ""),
linked_claim_no=str(meta.get("linked_claim_no") or ""),
previewable=bool(meta.get("previewable")),
preview_kind=str(meta.get("preview_kind") or ""),
preview_url=f"/receipt-folder/{receipt_id}/preview" if bool(meta.get("previewable")) and receipt_id else "",
source_url=f"/receipt-folder/{receipt_id}/source" if receipt_id else "",
warnings=[str(value) for value in list(meta.get("ocr_warnings") or []) if str(value).strip()],
)
def _resolve_fields(self, meta: dict[str, Any]) -> list[ReceiptFolderFieldRead]:
return [
ReceiptFolderFieldRead(
key=str(field.get("key") or ""),
label=str(field.get("label") or ""),
value=str(field.get("value") or ""),
)
for field in list(meta.get("document_fields") or [])
if isinstance(field, dict) and str(field.get("label") or "").strip()
]
def _resolve_editable_or_field(self, meta: dict[str, Any], key: str, *, labels: tuple[str, ...]) -> str:
editable = meta.get("editable_fields")
if isinstance(editable, dict):
value = str(editable.get(key) or "").strip()
if value:
return value
label_set = set(labels)
for field in self._resolve_fields(meta):
if field.label in label_set or field.key == key:
return field.value
return ""
@staticmethod
def _parse_datetime(value: Any) -> datetime | None:
raw = str(value or "").strip()
if not raw:
return None
try:
return datetime.fromisoformat(raw)
except ValueError:
return None

View File

@@ -264,6 +264,74 @@ def test_current_employee_profile_endpoint_resolves_login_user() -> None:
payload = response.json()
assert payload["employee_id"] == "emp-main"
assert {item["profile_type"] for item in payload["profiles"]} >= {"expense", "ai_usage"}
ai_profile = next(item for item in payload["profiles"] if item["profile_type"] == "ai_usage")
assert ai_profile["metrics"]["ai_run_duration_ms"] == 120
assert payload["profile_tags"]
assert payload["radar"]["dimensions"]
def test_current_admin_profile_endpoint_returns_account_usage_profile() -> None:
session_factory = build_session_factory()
with session_factory() as db:
seed_profile_data(db)
now = datetime.now(UTC)
for index in range(12):
run_id = f"run-admin-usage-{index}"
started_at = now - timedelta(days=1, minutes=index)
db.add(
AgentRun(
run_id=run_id,
agent="user_agent",
source="user_message",
user_id="admin",
status="success",
result_summary="管理员查看运行概览。",
started_at=started_at,
finished_at=started_at + timedelta(seconds=2),
tool_calls=[
AgentToolCall(
run_id=run_id,
tool_type="database",
tool_name="agent_runs.list",
request_json={"limit": 20},
response_json={"ok": True},
status="success",
duration_ms=120,
)
],
)
)
db.commit()
app = create_app()
def override_db() -> Generator[Session, None, None]:
db = session_factory()
try:
yield db
finally:
db.close()
app.dependency_overrides[get_db] = override_db
client = TestClient(app)
response = client.get(
"/api/v1/employee-profiles/me/latest",
params={
"scene": "operations",
"window_days": 90,
"expense_type_scope": "overall",
},
headers={"x-auth-username": "admin", "x-auth-name": "admin", "x-auth-is-admin": "true"},
)
assert response.status_code == 200
payload = response.json()
assert payload["employee_id"] == "admin"
assert payload["empty_reason"] == ""
assert [item["profile_type"] for item in payload["profiles"]] == ["ai_usage"]
metrics = payload["profiles"][0]["metrics"]
assert metrics["ai_run_count"] == 12
assert metrics["ai_run_duration_ms"] == 24000
assert payload["profile_tags"]
assert payload["radar"]["dimensions"]

View File

@@ -8,6 +8,7 @@ from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import StaticPool
from app.api.deps import get_db
from app.core.config import get_settings
from app.db.base import Base
from app.main import create_app
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeFieldRead, OcrRecognizeLineRead
@@ -35,7 +36,7 @@ def build_client() -> TestClient:
return TestClient(app)
def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch, tmp_path) -> None:
def fake_recognize(
self,
files: list[tuple[str, bytes, str | None]],
@@ -76,21 +77,84 @@ def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
],
)
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
get_settings.cache_clear()
monkeypatch.setattr(OcrService, "recognize_files", fake_recognize)
client = build_client()
try:
client = build_client()
auth_headers = {"x-auth-username": "pytest", "x-auth-name": "Py Test"}
response = client.post(
"/api/v1/ocr/recognize",
headers={"x-auth-username": "pytest", "x-auth-name": "Py Test"},
files=[("files", ("invoice.png", b"fake-image", "image/png"))],
)
response = client.post(
"/api/v1/ocr/recognize",
headers=auth_headers,
files=[("files", ("invoice.png", b"fake-image", "image/png"))],
)
assert response.status_code == 200
payload = response.json()
assert payload["engine"] == "paddleocr_mobile"
assert payload["success_count"] == 1
assert payload["documents"][0]["filename"] == "invoice.png"
assert payload["documents"][0]["summary"] == "增值税电子发票,金额 100 元。"
assert payload["documents"][0]["document_type"] == "vat_invoice"
assert payload["documents"][0]["document_type_label"] == "增值税发票"
assert payload["documents"][0]["document_fields"][0]["label"] == "金额"
assert response.status_code == 200
payload = response.json()
document = payload["documents"][0]
assert payload["engine"] == "paddleocr_mobile"
assert payload["success_count"] == 1
assert document["filename"] == "invoice.png"
assert document["summary"] == "增值税电子发票,金额 100 元。"
assert document["document_type"] == "vat_invoice"
assert document["document_type_label"] == "增值税发票"
assert document["document_fields"][0]["label"] == "金额"
assert document["receipt_id"]
assert document["receipt_status"] == "unlinked"
assert document["receipt_preview_url"].endswith(f"/receipt-folder/{document['receipt_id']}/preview")
assert document["receipt_source_url"].endswith(f"/receipt-folder/{document['receipt_id']}/source")
receipt_id = document["receipt_id"]
list_response = client.get("/api/v1/receipt-folder?status=unlinked", headers=auth_headers)
assert list_response.status_code == 200
receipt_list = list_response.json()
assert len(receipt_list) == 1
assert receipt_list[0]["id"] == receipt_id
assert receipt_list[0]["amount"] == "100元"
repeated_response = client.post(
"/api/v1/ocr/recognize",
headers=auth_headers,
data={"receipt_ids": receipt_id},
files=[("files", ("invoice.png", b"fake-image", "image/png"))],
)
assert repeated_response.status_code == 200
repeated_document = repeated_response.json()["documents"][0]
assert repeated_document["receipt_id"] == receipt_id
all_receipts_response = client.get("/api/v1/receipt-folder?status=all", headers=auth_headers)
assert all_receipts_response.status_code == 200
assert len(all_receipts_response.json()) == 1
detail_response = client.get(f"/api/v1/receipt-folder/{receipt_id}", headers=auth_headers)
assert detail_response.status_code == 200
detail_payload = detail_response.json()
assert detail_payload["file_name"] == "invoice.png"
assert detail_payload["fields"][0]["label"] == "金额"
update_response = client.patch(
f"/api/v1/receipt-folder/{receipt_id}",
headers=auth_headers,
json={
"document_type_label": "电子发票",
"amount": "108元",
"fields": [{"key": "amount", "label": "金额", "value": "108元"}],
},
)
assert update_response.status_code == 200
assert update_response.json()["document_type_label"] == "电子发票"
assert update_response.json()["amount"] == "108元"
preview_response = client.get(f"/api/v1/receipt-folder/{receipt_id}/preview", headers=auth_headers)
assert preview_response.status_code == 200
assert preview_response.content == b"fake-image"
delete_response = client.delete(f"/api/v1/receipt-folder/{receipt_id}", headers=auth_headers)
assert delete_response.status_code == 200
assert delete_response.json()["receipt_id"] == receipt_id
deleted_response = client.get(f"/api/v1/receipt-folder/{receipt_id}", headers=auth_headers)
assert deleted_response.status_code == 404
finally:
get_settings.cache_clear()