feat: 新增票据夹模块并优化 OCR 与员工画像服务
后端新增票据夹端点、数据模型和服务模块,优化 OCR 端点 Schema 和附件操作逻辑,完善员工行为画像服务和辅助函数, 前端新增票据夹视图和服务层,优化文档中心样式和侧边栏导 航,完善员工画像详情弹窗和权限控制,补充单元测试。
This commit is contained in:
@@ -9,6 +9,7 @@ from sqlalchemy.orm import Session
|
||||
from app.api.deps import CurrentUserContext, get_current_user, get_db
|
||||
from app.models.employee import Employee
|
||||
from app.schemas.employee_profile import EmployeeProfileLatestRead
|
||||
from app.services.account_behavior_profile import AccountBehaviorProfileService
|
||||
from app.services.employee_behavior_profile_service import EmployeeBehaviorProfileService
|
||||
|
||||
router = APIRouter(prefix="/employee-profiles")
|
||||
@@ -31,13 +32,13 @@ def get_current_employee_latest_profile(
|
||||
) -> EmployeeProfileLatestRead:
|
||||
employee = _resolve_current_employee(db, current_user)
|
||||
if employee is None:
|
||||
return EmployeeProfileLatestRead(
|
||||
employee_id=current_user.username,
|
||||
employee_name=current_user.name,
|
||||
return AccountBehaviorProfileService(db).get_latest_account_profile(
|
||||
account_id=current_user.username,
|
||||
account_name=current_user.name,
|
||||
identifiers=_current_account_identifiers(current_user),
|
||||
scene=scene,
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
empty_reason="当前登录用户未匹配到员工目录,暂无法形成用户画像。",
|
||||
)
|
||||
|
||||
service = EmployeeBehaviorProfileService(db)
|
||||
@@ -47,7 +48,7 @@ def get_current_employee_latest_profile(
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
)
|
||||
if latest.empty_reason:
|
||||
if latest.empty_reason or _missing_usage_duration_metric(latest):
|
||||
service.refresh_employee_profiles(
|
||||
employee_id=employee.id,
|
||||
window_days=(window_days,),
|
||||
@@ -115,3 +116,24 @@ def _resolve_current_employee(
|
||||
|
||||
stmt = select(Employee).where(or_(*conditions)).order_by(Employee.created_at.asc()).limit(1)
|
||||
return db.scalars(stmt).first()
|
||||
|
||||
|
||||
def _missing_usage_duration_metric(latest: EmployeeProfileLatestRead) -> bool:
|
||||
if latest.scene != "operations":
|
||||
return False
|
||||
|
||||
for profile in latest.profiles:
|
||||
if profile.profile_type == "ai_usage":
|
||||
return "ai_run_duration_ms" not in profile.metrics
|
||||
return False
|
||||
|
||||
|
||||
def _current_account_identifiers(current_user: CurrentUserContext) -> set[str]:
|
||||
return {
|
||||
item
|
||||
for item in (
|
||||
current_user.username,
|
||||
current_user.name,
|
||||
)
|
||||
if str(item or "").strip()
|
||||
}
|
||||
|
||||
@@ -2,13 +2,14 @@ from __future__ import annotations
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.deps import CurrentUserContext, get_current_user, get_db
|
||||
from app.schemas.common import ErrorResponse
|
||||
from app.schemas.ocr import OcrRecognizeBatchRead
|
||||
from app.services.ocr import OcrService
|
||||
from app.services.receipt_folder import ReceiptFolderService
|
||||
|
||||
router = APIRouter(prefix="/ocr")
|
||||
|
||||
@@ -35,8 +36,9 @@ router = APIRouter(prefix="/ocr")
|
||||
)
|
||||
async def recognize_ocr_documents(
|
||||
files: Annotated[list[UploadFile], File(description="待识别的票据图片或 PDF。")],
|
||||
_: Annotated[CurrentUserContext, Depends(get_current_user)],
|
||||
current_user: Annotated[CurrentUserContext, Depends(get_current_user)],
|
||||
db: Annotated[Session, Depends(get_db)],
|
||||
receipt_ids: Annotated[list[str] | None, Form(description="可选,来源于票据夹的持久化票据 ID。")] = None,
|
||||
) -> OcrRecognizeBatchRead:
|
||||
try:
|
||||
payload = []
|
||||
@@ -48,7 +50,13 @@ async def recognize_ocr_documents(
|
||||
upload.content_type,
|
||||
)
|
||||
)
|
||||
return OcrService(db).recognize_files(payload)
|
||||
result = OcrService(db).recognize_files(payload)
|
||||
return ReceiptFolderService().persist_ocr_batch(
|
||||
files=payload,
|
||||
result=result,
|
||||
current_user=current_user,
|
||||
receipt_ids=receipt_ids or [],
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
|
||||
except RuntimeError as exc:
|
||||
|
||||
108
server/src/app/api/v1/endpoints/receipt_folder.py
Normal file
108
server/src/app/api/v1/endpoints/receipt_folder.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||||
from fastapi.responses import FileResponse
|
||||
|
||||
from app.api.deps import CurrentUserContext, get_current_user
|
||||
from app.schemas.common import ErrorResponse
|
||||
from app.schemas.receipt_folder import (
|
||||
ReceiptFolderDeleteResponse,
|
||||
ReceiptFolderDetailRead,
|
||||
ReceiptFolderItemRead,
|
||||
ReceiptFolderUpdate,
|
||||
)
|
||||
from app.services.receipt_folder import ReceiptFolderService
|
||||
|
||||
router = APIRouter(prefix="/receipt-folder")
|
||||
CurrentUser = Annotated[CurrentUserContext, Depends(get_current_user)]
|
||||
|
||||
|
||||
@router.get(
|
||||
"",
|
||||
response_model=list[ReceiptFolderItemRead],
|
||||
summary="查询票据夹列表",
|
||||
description="返回当前登录用户上传并持久化的票据列表。",
|
||||
)
|
||||
def list_receipts(
|
||||
current_user: CurrentUser,
|
||||
status_filter: Annotated[str, Query(alias="status")] = "all",
|
||||
) -> list[ReceiptFolderItemRead]:
|
||||
return ReceiptFolderService().list_receipts(
|
||||
current_user=current_user,
|
||||
status_filter=status_filter,
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{receipt_id}",
|
||||
response_model=ReceiptFolderDetailRead,
|
||||
summary="读取票据详情",
|
||||
responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据不存在。"}},
|
||||
)
|
||||
def get_receipt(receipt_id: str, current_user: CurrentUser) -> ReceiptFolderDetailRead:
|
||||
try:
|
||||
return ReceiptFolderService().get_receipt(receipt_id, current_user)
|
||||
except FileNotFoundError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt not found") from exc
|
||||
|
||||
|
||||
@router.patch(
|
||||
"/{receipt_id}",
|
||||
response_model=ReceiptFolderDetailRead,
|
||||
summary="更新票据基础识别信息",
|
||||
responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据不存在。"}},
|
||||
)
|
||||
def update_receipt(
|
||||
receipt_id: str,
|
||||
payload: ReceiptFolderUpdate,
|
||||
current_user: CurrentUser,
|
||||
) -> ReceiptFolderDetailRead:
|
||||
try:
|
||||
return ReceiptFolderService().update_receipt(
|
||||
receipt_id=receipt_id,
|
||||
payload=payload,
|
||||
current_user=current_user,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt not found") from exc
|
||||
|
||||
|
||||
@router.delete(
|
||||
"/{receipt_id}",
|
||||
response_model=ReceiptFolderDeleteResponse,
|
||||
summary="删除票据",
|
||||
responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据不存在。"}},
|
||||
)
|
||||
def delete_receipt(receipt_id: str, current_user: CurrentUser) -> ReceiptFolderDeleteResponse:
|
||||
try:
|
||||
return ReceiptFolderService().delete_receipt(receipt_id=receipt_id, current_user=current_user)
|
||||
except FileNotFoundError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt not found") from exc
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{receipt_id}/preview",
|
||||
summary="预览票据原始文件",
|
||||
responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据预览不存在。"}},
|
||||
)
|
||||
def preview_receipt(receipt_id: str, current_user: CurrentUser) -> FileResponse:
|
||||
try:
|
||||
file_path, media_type, file_name = ReceiptFolderService().resolve_preview(receipt_id, current_user)
|
||||
except FileNotFoundError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt preview not found") from exc
|
||||
return FileResponse(file_path, media_type=media_type, filename=file_name)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/{receipt_id}/source",
|
||||
summary="读取票据源文件",
|
||||
responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据源文件不存在。"}},
|
||||
)
|
||||
def source_receipt(receipt_id: str, current_user: CurrentUser) -> FileResponse:
|
||||
try:
|
||||
file_path, media_type, file_name = ReceiptFolderService().resolve_source(receipt_id, current_user)
|
||||
except FileNotFoundError as exc:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt source not found") from exc
|
||||
return FileResponse(file_path, media_type=media_type, filename=file_name)
|
||||
@@ -2,7 +2,7 @@ from __future__ import annotations
|
||||
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
|
||||
from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -372,6 +372,7 @@ async def upload_expense_claim_item_attachment(
|
||||
file: Annotated[UploadFile, File(description="待上传的附件文件。")],
|
||||
db: DbSession,
|
||||
current_user: CurrentUser,
|
||||
receipt_id: Annotated[str | None, Form(description="可选,来源于票据夹的持久化票据 ID。")] = None,
|
||||
) -> ExpenseClaimAttachmentActionResponse:
|
||||
service = ExpenseClaimService(db)
|
||||
try:
|
||||
@@ -382,6 +383,7 @@ async def upload_expense_claim_item_attachment(
|
||||
content=await file.read(),
|
||||
media_type=file.content_type,
|
||||
current_user=current_user,
|
||||
source_receipt_id=receipt_id or "",
|
||||
)
|
||||
except LookupError as error:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error
|
||||
|
||||
@@ -13,6 +13,7 @@ from app.api.v1.endpoints.knowledge import router as knowledge_router
|
||||
from app.api.v1.endpoints.ocr import router as ocr_router
|
||||
from app.api.v1.endpoints.ontology import router as ontology_router
|
||||
from app.api.v1.endpoints.orchestrator import router as orchestrator_router
|
||||
from app.api.v1.endpoints.receipt_folder import router as receipt_folder_router
|
||||
from app.api.v1.endpoints.reimbursements import router as reimbursements_router
|
||||
from app.api.v1.endpoints.settings import router as settings_router
|
||||
from app.api.v1.endpoints.system_logs import router as system_logs_router
|
||||
@@ -29,6 +30,7 @@ router.include_router(knowledge_router, tags=["knowledge"])
|
||||
router.include_router(ocr_router, tags=["ocr"])
|
||||
router.include_router(ontology_router, tags=["ontology"])
|
||||
router.include_router(orchestrator_router, tags=["orchestrator"])
|
||||
router.include_router(receipt_folder_router, tags=["receipt-folder"])
|
||||
router.include_router(employees_router, prefix="/employees", tags=["employees"])
|
||||
router.include_router(employee_profiles_router, tags=["employee-profiles"])
|
||||
router.include_router(reimbursements_router, prefix="/reimbursements", tags=["reimbursements"])
|
||||
|
||||
@@ -39,6 +39,10 @@ class OcrRecognizeDocumentRead(BaseModel):
|
||||
)
|
||||
preview_kind: str = Field(default="", description="预览类型,PDF 转图后通常为 image。")
|
||||
preview_data_url: str = Field(default="", description="用于前端展示的图片预览 data URL。")
|
||||
receipt_id: str = Field(default="", description="票据夹中的持久化票据 ID。")
|
||||
receipt_status: str = Field(default="", description="票据夹关联状态,unlinked / linked。")
|
||||
receipt_preview_url: str = Field(default="", description="票据夹预览接口地址。")
|
||||
receipt_source_url: str = Field(default="", description="票据夹原始文件接口地址。")
|
||||
warnings: list[str] = Field(default_factory=list, description="该文件的识别提示或警告。")
|
||||
lines: list[OcrRecognizeLineRead] = Field(default_factory=list, description="逐行识别结果。")
|
||||
|
||||
|
||||
68
server/src/app/schemas/receipt_folder.py
Normal file
68
server/src/app/schemas/receipt_folder.py
Normal file
@@ -0,0 +1,68 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ReceiptFolderFieldRead(BaseModel):
|
||||
key: str = ""
|
||||
label: str = ""
|
||||
value: str = ""
|
||||
|
||||
|
||||
class ReceiptFolderItemRead(BaseModel):
|
||||
id: str
|
||||
file_name: str
|
||||
media_type: str = "application/octet-stream"
|
||||
size_bytes: int = 0
|
||||
status: str = "unlinked"
|
||||
status_label: str = "未关联"
|
||||
document_type: str = "other"
|
||||
document_type_label: str = "其他单据"
|
||||
scene_code: str = "other"
|
||||
scene_label: str = "其他票据"
|
||||
summary: str = ""
|
||||
amount: str = ""
|
||||
document_date: str = ""
|
||||
merchant_name: str = ""
|
||||
avg_score: float = 0.0
|
||||
uploaded_at: datetime | None = None
|
||||
linked_at: datetime | None = None
|
||||
linked_claim_id: str = ""
|
||||
linked_claim_no: str = ""
|
||||
previewable: bool = False
|
||||
preview_kind: str = ""
|
||||
preview_url: str = ""
|
||||
source_url: str = ""
|
||||
warnings: list[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ReceiptFolderDetailRead(ReceiptFolderItemRead):
|
||||
engine: str = ""
|
||||
model: str = ""
|
||||
ocr_text: str = ""
|
||||
line_count: int = 0
|
||||
page_count: int = 1
|
||||
classification_confidence: float = 0.0
|
||||
classification_evidence: list[str] = Field(default_factory=list)
|
||||
fields: list[ReceiptFolderFieldRead] = Field(default_factory=list)
|
||||
raw_meta: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
|
||||
class ReceiptFolderUpdate(BaseModel):
|
||||
document_type: str | None = None
|
||||
document_type_label: str | None = None
|
||||
scene_code: str | None = None
|
||||
scene_label: str | None = None
|
||||
summary: str | None = None
|
||||
amount: str | None = None
|
||||
document_date: str | None = None
|
||||
merchant_name: str | None = None
|
||||
fields: list[ReceiptFolderFieldRead] | None = None
|
||||
|
||||
|
||||
class ReceiptFolderDeleteResponse(BaseModel):
|
||||
message: str
|
||||
receipt_id: str
|
||||
176
server/src/app/services/account_behavior_profile.py
Normal file
176
server/src/app/services/account_behavior_profile.py
Normal file
@@ -0,0 +1,176 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
from app.algorithem.employee_behavior_profile import (
|
||||
LEVEL_LABELS,
|
||||
PROFILE_LABELS,
|
||||
ProfileComponent,
|
||||
evaluate_weighted_profile,
|
||||
score_by_bands,
|
||||
)
|
||||
from app.algorithem.employee_behavior_profile_tags import build_profile_radar, build_profile_tags
|
||||
from app.models.agent_run import AgentRun
|
||||
from app.schemas.employee_profile import EmployeeProfileLatestRead, EmployeeProfileRead
|
||||
from app.services.employee_behavior_profile_helpers import EmployeeBehaviorProfileMetricHelpers
|
||||
|
||||
|
||||
class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def get_latest_account_profile(
|
||||
self,
|
||||
*,
|
||||
account_id: str,
|
||||
account_name: str,
|
||||
identifiers: set[str],
|
||||
scene: str,
|
||||
window_days: int,
|
||||
expense_type_scope: str,
|
||||
) -> EmployeeProfileLatestRead:
|
||||
if scene != "operations":
|
||||
return EmployeeProfileLatestRead(
|
||||
employee_id=account_id,
|
||||
employee_name=account_name,
|
||||
scene=scene,
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
empty_reason="当前账号未匹配员工目录,无法形成审批场景员工画像。",
|
||||
)
|
||||
|
||||
runs = self._fetch_account_runs(identifiers, datetime.now(UTC) - timedelta(days=window_days))
|
||||
if not runs:
|
||||
return EmployeeProfileLatestRead(
|
||||
employee_id=account_id,
|
||||
employee_name=account_name,
|
||||
scene=scene,
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
empty_reason="当前账号暂无可统计的智能体运行记录。",
|
||||
)
|
||||
|
||||
result = self._calculate_account_ai_usage_profile(
|
||||
runs=runs,
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
)
|
||||
payload = {
|
||||
"profile_type": result.profile_type,
|
||||
"profile_label": result.profile_label,
|
||||
"score": result.profile_score,
|
||||
"level": result.profile_level,
|
||||
"metrics": result.metrics,
|
||||
"top_contributors": result.top_contributors(),
|
||||
}
|
||||
tags = build_profile_tags([payload], scene=scene)
|
||||
radar = build_profile_radar([payload], tags, scene=scene)
|
||||
|
||||
return EmployeeProfileLatestRead(
|
||||
employee_id=account_id,
|
||||
employee_name=account_name,
|
||||
scene=scene,
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
calculated_at=datetime.now(UTC),
|
||||
review_priority_score=0,
|
||||
review_priority_level="normal",
|
||||
review_priority_label=LEVEL_LABELS["normal"],
|
||||
profiles=[
|
||||
EmployeeProfileRead(
|
||||
profile_type=payload["profile_type"],
|
||||
profile_label=PROFILE_LABELS.get(payload["profile_type"], payload["profile_type"]),
|
||||
score=payload["score"],
|
||||
level=payload["level"],
|
||||
level_label=LEVEL_LABELS.get(payload["level"], payload["level"]),
|
||||
metrics=payload["metrics"],
|
||||
top_contributors=payload["top_contributors"],
|
||||
)
|
||||
],
|
||||
profile_tags=tags,
|
||||
radar=radar,
|
||||
)
|
||||
|
||||
def _calculate_account_ai_usage_profile(
|
||||
self,
|
||||
*,
|
||||
runs: list[AgentRun],
|
||||
window_days: int,
|
||||
expense_type_scope: str,
|
||||
):
|
||||
tool_calls = [tool for run in runs for tool in run.tool_calls]
|
||||
failed_calls = [
|
||||
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
|
||||
]
|
||||
estimated_tokens = self._estimate_tokens(runs)
|
||||
duration_ms = self._sum_agent_run_duration_ms(runs)
|
||||
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
|
||||
|
||||
return evaluate_weighted_profile(
|
||||
"ai_usage",
|
||||
[
|
||||
ProfileComponent(
|
||||
"ai_call_count_score",
|
||||
"AI 调用次数",
|
||||
score_by_bands(len(runs), [(0, 0), (3, 25), (10, 65), (20, 100)]),
|
||||
len(runs),
|
||||
"次",
|
||||
Decimal("0.25"),
|
||||
),
|
||||
ProfileComponent(
|
||||
"token_cost_score",
|
||||
"Token 使用强度",
|
||||
score_by_bands(
|
||||
estimated_tokens, [(0, 0), (2000, 25), (8000, 65), (20000, 100)]
|
||||
),
|
||||
estimated_tokens,
|
||||
"tokens",
|
||||
Decimal("0.25"),
|
||||
),
|
||||
ProfileComponent(
|
||||
"ai_generated_claim_ratio_score",
|
||||
"AI 生成申请比例",
|
||||
score_by_bands(len(runs), [(0, 0), (2, 20), (8, 60), (16, 90)]),
|
||||
len(runs),
|
||||
"次",
|
||||
Decimal("0.20"),
|
||||
),
|
||||
ProfileComponent(
|
||||
"failed_ai_call_score",
|
||||
"AI 调用失败",
|
||||
score_by_bands(len(failed_calls), [(0, 0), (1, 35), (3, 80)]),
|
||||
len(failed_calls),
|
||||
"次",
|
||||
Decimal("0.10"),
|
||||
),
|
||||
],
|
||||
metrics={
|
||||
"window_days": window_days,
|
||||
"expense_type_scope": expense_type_scope,
|
||||
"peer_sample_size": 0,
|
||||
"ai_run_count": len(runs),
|
||||
"tool_call_count": len(tool_calls),
|
||||
"failed_tool_call_count": len(failed_calls),
|
||||
"token_count_mode": token_mode,
|
||||
"estimated_token_count": estimated_tokens,
|
||||
"exact_token_count": None,
|
||||
"ai_run_duration_ms": duration_ms,
|
||||
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
|
||||
},
|
||||
)
|
||||
|
||||
def _fetch_account_runs(self, identifiers: set[str], cutoff: datetime) -> list[AgentRun]:
|
||||
normalized = {item for item in identifiers if str(item or "").strip()}
|
||||
if not normalized:
|
||||
return []
|
||||
stmt = (
|
||||
select(AgentRun)
|
||||
.options(selectinload(AgentRun.tool_calls))
|
||||
.where(AgentRun.started_at >= cutoff, AgentRun.user_id.in_(normalized))
|
||||
)
|
||||
return list(self.db.scalars(stmt).all())
|
||||
@@ -171,6 +171,22 @@ class EmployeeBehaviorProfileMetricHelpers:
|
||||
total += max(0, len(text) // 4)
|
||||
return total
|
||||
|
||||
def _sum_agent_run_duration_ms(self, runs: list[AgentRun]) -> int:
|
||||
return sum(self._agent_run_duration_ms(run) for run in runs)
|
||||
|
||||
def _agent_run_duration_ms(self, run: AgentRun) -> int:
|
||||
if run.started_at is not None and run.finished_at is not None:
|
||||
try:
|
||||
if run.finished_at > run.started_at:
|
||||
return min(
|
||||
int((run.finished_at - run.started_at).total_seconds() * 1000),
|
||||
24 * 60 * 60 * 1000,
|
||||
)
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
return sum(max(0, int(tool.duration_ms or 0)) for tool in run.tool_calls)
|
||||
|
||||
@staticmethod
|
||||
def _is_missing_value(value: Any) -> bool:
|
||||
text = str(value or "").strip()
|
||||
|
||||
@@ -466,6 +466,7 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
|
||||
]
|
||||
estimated_tokens = self._estimate_tokens(runs)
|
||||
duration_ms = self._sum_agent_run_duration_ms(runs)
|
||||
override_score = 0
|
||||
|
||||
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
|
||||
@@ -524,6 +525,8 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
"token_count_mode": token_mode,
|
||||
"estimated_token_count": estimated_tokens,
|
||||
"exact_token_count": None,
|
||||
"ai_run_duration_ms": duration_ms,
|
||||
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -108,6 +108,7 @@ from app.services.expense_rule_runtime import (
|
||||
resolve_document_type_label,
|
||||
)
|
||||
from app.services.ocr import OcrService
|
||||
from app.services.receipt_folder import ReceiptFolderService
|
||||
|
||||
|
||||
class ExpenseClaimAttachmentOperationsMixin:
|
||||
@@ -120,6 +121,7 @@ class ExpenseClaimAttachmentOperationsMixin:
|
||||
content: bytes,
|
||||
media_type: str | None,
|
||||
current_user: CurrentUserContext,
|
||||
source_receipt_id: str = "",
|
||||
) -> dict[str, Any] | None:
|
||||
claim, item = self._get_claim_item_or_raise(
|
||||
claim_id=claim_id,
|
||||
@@ -240,6 +242,16 @@ class ExpenseClaimAttachmentOperationsMixin:
|
||||
"ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
|
||||
}
|
||||
self._attachment_storage.write_meta(file_path, meta)
|
||||
ReceiptFolderService().save_linked_attachment(
|
||||
file_path=file_path,
|
||||
media_type=resolved_media_type,
|
||||
document=ocr_document,
|
||||
current_user=current_user,
|
||||
claim_id=claim.id,
|
||||
claim_no=claim.claim_no,
|
||||
item_id=item.id,
|
||||
source_receipt_id=source_receipt_id,
|
||||
)
|
||||
|
||||
self._sync_claim_from_items(claim)
|
||||
self.db.commit()
|
||||
|
||||
532
server/src/app/services/receipt_folder.py
Normal file
532
server/src/app/services/receipt_folder.py
Normal file
@@ -0,0 +1,532 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import mimetypes
|
||||
import re
|
||||
import shutil
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from app.core.config import get_settings
|
||||
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead
|
||||
from app.schemas.receipt_folder import (
|
||||
ReceiptFolderDeleteResponse,
|
||||
ReceiptFolderDetailRead,
|
||||
ReceiptFolderFieldRead,
|
||||
ReceiptFolderItemRead,
|
||||
ReceiptFolderUpdate,
|
||||
)
|
||||
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
|
||||
from app.services.ocr import SUPPORTED_SUFFIXES
|
||||
|
||||
|
||||
class ReceiptFolderService:
|
||||
def __init__(self) -> None:
|
||||
self.root = (get_settings().resolved_storage_root_dir / "receipt_folder").resolve()
|
||||
|
||||
def persist_ocr_batch(
|
||||
self,
|
||||
*,
|
||||
files: list[tuple[str, bytes, str | None]],
|
||||
result: OcrRecognizeBatchRead,
|
||||
current_user: CurrentUserContext,
|
||||
receipt_ids: list[str] | None = None,
|
||||
) -> OcrRecognizeBatchRead:
|
||||
documents = list(result.documents or [])
|
||||
enriched: list[OcrRecognizeDocumentRead] = []
|
||||
for index, document in enumerate(documents):
|
||||
if index >= len(files):
|
||||
enriched.append(document)
|
||||
continue
|
||||
existing_receipt = self._resolve_existing_item(
|
||||
receipt_ids[index] if receipt_ids and index < len(receipt_ids) else "",
|
||||
current_user,
|
||||
)
|
||||
if existing_receipt is not None:
|
||||
enriched.append(
|
||||
document.model_copy(
|
||||
update={
|
||||
"receipt_id": existing_receipt.id,
|
||||
"receipt_status": existing_receipt.status,
|
||||
"receipt_preview_url": existing_receipt.preview_url,
|
||||
"receipt_source_url": existing_receipt.source_url,
|
||||
}
|
||||
)
|
||||
)
|
||||
continue
|
||||
filename, content, media_type = files[index]
|
||||
if not self._should_persist_source(filename, content):
|
||||
enriched.append(document)
|
||||
continue
|
||||
receipt = self.save_receipt(
|
||||
filename=filename,
|
||||
content=content,
|
||||
media_type=media_type or document.media_type,
|
||||
document=document,
|
||||
current_user=current_user,
|
||||
)
|
||||
enriched.append(
|
||||
document.model_copy(
|
||||
update={
|
||||
"receipt_id": receipt.id,
|
||||
"receipt_status": receipt.status,
|
||||
"receipt_preview_url": receipt.preview_url,
|
||||
"receipt_source_url": receipt.source_url,
|
||||
}
|
||||
)
|
||||
)
|
||||
return result.model_copy(update={"documents": enriched})
|
||||
|
||||
def save_receipt(
|
||||
self,
|
||||
*,
|
||||
filename: str,
|
||||
content: bytes,
|
||||
media_type: str | None,
|
||||
document: Any | None,
|
||||
current_user: CurrentUserContext,
|
||||
linked_claim_id: str = "",
|
||||
linked_claim_no: str = "",
|
||||
linked_item_id: str = "",
|
||||
) -> ReceiptFolderItemRead:
|
||||
owner_key = self._owner_key(current_user)
|
||||
receipt_id = str(uuid4())
|
||||
receipt_dir = self._owner_root(owner_key) / receipt_id
|
||||
receipt_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
normalized_name = self.normalize_filename(filename)
|
||||
source_path = receipt_dir / normalized_name
|
||||
source_path.write_bytes(content)
|
||||
resolved_media_type = self.resolve_media_type(normalized_name, media_type)
|
||||
preview_meta = self._write_preview_asset(
|
||||
receipt_dir=receipt_dir,
|
||||
source_path=source_path,
|
||||
media_type=resolved_media_type,
|
||||
document=document,
|
||||
)
|
||||
now = datetime.now(UTC)
|
||||
linked = bool(str(linked_claim_id or "").strip())
|
||||
meta = {
|
||||
"id": receipt_id,
|
||||
"owner_key": owner_key,
|
||||
"file_name": normalized_name,
|
||||
"source_file_name": normalized_name,
|
||||
"media_type": resolved_media_type,
|
||||
"size_bytes": len(content),
|
||||
"uploaded_at": now.isoformat(),
|
||||
"status": "linked" if linked else "unlinked",
|
||||
"linked_claim_id": str(linked_claim_id or "").strip(),
|
||||
"linked_claim_no": str(linked_claim_no or "").strip(),
|
||||
"linked_item_id": str(linked_item_id or "").strip(),
|
||||
"linked_at": now.isoformat() if linked else "",
|
||||
**self._build_document_meta(document),
|
||||
**preview_meta,
|
||||
}
|
||||
self._write_meta(receipt_dir, meta)
|
||||
return self._build_item(meta)
|
||||
|
||||
def save_linked_attachment(
|
||||
self,
|
||||
*,
|
||||
file_path: Path,
|
||||
media_type: str,
|
||||
document: Any | None,
|
||||
current_user: CurrentUserContext,
|
||||
claim_id: str,
|
||||
claim_no: str,
|
||||
item_id: str,
|
||||
source_receipt_id: str = "",
|
||||
) -> ReceiptFolderItemRead | None:
|
||||
if not file_path.exists() or not file_path.is_file():
|
||||
return None
|
||||
if str(source_receipt_id or "").strip():
|
||||
try:
|
||||
return self.mark_receipt_linked(
|
||||
receipt_id=source_receipt_id,
|
||||
current_user=current_user,
|
||||
claim_id=claim_id,
|
||||
claim_no=claim_no,
|
||||
item_id=item_id,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
storage_root = get_settings().resolved_storage_root_dir
|
||||
try:
|
||||
file_path.resolve().relative_to(storage_root)
|
||||
except ValueError:
|
||||
return None
|
||||
return self.save_receipt(
|
||||
filename=file_path.name,
|
||||
content=file_path.read_bytes(),
|
||||
media_type=media_type,
|
||||
document=document,
|
||||
current_user=current_user,
|
||||
linked_claim_id=claim_id,
|
||||
linked_claim_no=claim_no,
|
||||
linked_item_id=item_id,
|
||||
)
|
||||
|
||||
def mark_receipt_linked(
|
||||
self,
|
||||
*,
|
||||
receipt_id: str,
|
||||
current_user: CurrentUserContext,
|
||||
claim_id: str,
|
||||
claim_no: str,
|
||||
item_id: str,
|
||||
) -> ReceiptFolderItemRead:
|
||||
owner_key = self._owner_key(current_user)
|
||||
receipt_dir = self._receipt_dir(owner_key, receipt_id)
|
||||
meta = self._read_meta(receipt_dir)
|
||||
meta["status"] = "linked"
|
||||
meta["linked_claim_id"] = str(claim_id or "").strip()
|
||||
meta["linked_claim_no"] = str(claim_no or "").strip()
|
||||
meta["linked_item_id"] = str(item_id or "").strip()
|
||||
meta["linked_at"] = datetime.now(UTC).isoformat()
|
||||
self._write_meta(receipt_dir, meta)
|
||||
return self._build_item(meta)
|
||||
|
||||
def list_receipts(
|
||||
self,
|
||||
*,
|
||||
current_user: CurrentUserContext,
|
||||
status_filter: str = "all",
|
||||
) -> list[ReceiptFolderItemRead]:
|
||||
status_filter = str(status_filter or "all").strip().lower()
|
||||
items = [
|
||||
self._build_item(meta)
|
||||
for meta in self._iter_owner_meta(self._owner_key(current_user))
|
||||
if self._matches_status(meta, status_filter)
|
||||
]
|
||||
return sorted(items, key=lambda item: item.uploaded_at or datetime.min.replace(tzinfo=UTC), reverse=True)
|
||||
|
||||
def get_receipt(self, receipt_id: str, current_user: CurrentUserContext) -> ReceiptFolderDetailRead:
|
||||
meta = self._read_receipt_meta(receipt_id, current_user)
|
||||
item = self._build_item(meta)
|
||||
return ReceiptFolderDetailRead(
|
||||
**item.model_dump(),
|
||||
engine=str(meta.get("engine") or ""),
|
||||
model=str(meta.get("model") or ""),
|
||||
ocr_text=str(meta.get("ocr_text") or ""),
|
||||
line_count=int(meta.get("ocr_line_count") or 0),
|
||||
page_count=max(1, int(meta.get("page_count") or 1)),
|
||||
classification_confidence=float(meta.get("ocr_classification_confidence") or 0.0),
|
||||
classification_evidence=[
|
||||
str(value) for value in list(meta.get("ocr_classification_evidence") or []) if str(value).strip()
|
||||
],
|
||||
fields=self._resolve_fields(meta),
|
||||
raw_meta=meta,
|
||||
)
|
||||
|
||||
def update_receipt(
|
||||
self,
|
||||
*,
|
||||
receipt_id: str,
|
||||
payload: ReceiptFolderUpdate,
|
||||
current_user: CurrentUserContext,
|
||||
) -> ReceiptFolderDetailRead:
|
||||
owner_key = self._owner_key(current_user)
|
||||
receipt_dir = self._receipt_dir(owner_key, receipt_id)
|
||||
meta = self._read_meta(receipt_dir)
|
||||
updates = payload.model_dump(exclude_unset=True)
|
||||
for key in ("document_type", "document_type_label", "scene_code", "scene_label", "summary"):
|
||||
if key in updates and updates[key] is not None:
|
||||
meta[key] = str(updates[key] or "").strip()
|
||||
|
||||
editable = dict(meta.get("editable_fields") or {})
|
||||
for key in ("amount", "document_date", "merchant_name"):
|
||||
if key in updates and updates[key] is not None:
|
||||
editable[key] = str(updates[key] or "").strip()
|
||||
if "fields" in updates and updates["fields"] is not None:
|
||||
meta["document_fields"] = [
|
||||
field.model_dump() if isinstance(field, ReceiptFolderFieldRead) else dict(field)
|
||||
for field in payload.fields or []
|
||||
]
|
||||
meta["editable_fields"] = editable
|
||||
meta["updated_at"] = datetime.now(UTC).isoformat()
|
||||
self._write_meta(receipt_dir, meta)
|
||||
return self.get_receipt(receipt_id, current_user)
|
||||
|
||||
def delete_receipt(
|
||||
self,
|
||||
*,
|
||||
receipt_id: str,
|
||||
current_user: CurrentUserContext,
|
||||
) -> ReceiptFolderDeleteResponse:
|
||||
owner_key = self._owner_key(current_user)
|
||||
receipt_dir = self._receipt_dir(owner_key, receipt_id)
|
||||
shutil.rmtree(receipt_dir)
|
||||
return ReceiptFolderDeleteResponse(message="票据已删除。", receipt_id=receipt_id)
|
||||
|
||||
def resolve_source(self, receipt_id: str, current_user: CurrentUserContext) -> tuple[Path, str, str]:
|
||||
meta = self._read_receipt_meta(receipt_id, current_user)
|
||||
receipt_dir = self._receipt_dir(self._owner_key(current_user), receipt_id)
|
||||
file_name = str(meta.get("source_file_name") or meta.get("file_name") or "").strip()
|
||||
path = self._assert_child(receipt_dir / file_name)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError("Receipt source not found")
|
||||
media_type = self.resolve_media_type(path.name, str(meta.get("media_type") or ""))
|
||||
return path, media_type, str(meta.get("file_name") or path.name)
|
||||
|
||||
def resolve_preview(self, receipt_id: str, current_user: CurrentUserContext) -> tuple[Path, str, str]:
|
||||
meta = self._read_receipt_meta(receipt_id, current_user)
|
||||
receipt_dir = self._receipt_dir(self._owner_key(current_user), receipt_id)
|
||||
preview_name = str(meta.get("preview_file_name") or "").strip()
|
||||
if preview_name:
|
||||
preview_path = self._assert_child(receipt_dir / preview_name)
|
||||
if preview_path.exists():
|
||||
return (
|
||||
preview_path,
|
||||
self.resolve_media_type(preview_path.name, str(meta.get("preview_media_type") or "")),
|
||||
preview_path.name,
|
||||
)
|
||||
|
||||
source_path, source_media_type, source_name = self.resolve_source(receipt_id, current_user)
|
||||
if self._is_previewable(source_media_type):
|
||||
return source_path, source_media_type, source_name
|
||||
raise FileNotFoundError("Receipt preview not found")
|
||||
|
||||
@staticmethod
|
||||
def normalize_filename(filename: str | None) -> str:
|
||||
normalized = Path(str(filename or "").strip()).name
|
||||
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._")
|
||||
return normalized or "receipt.bin"
|
||||
|
||||
@staticmethod
|
||||
def resolve_media_type(filename: str, fallback: str | None = None) -> str:
|
||||
return str(mimetypes.guess_type(filename)[0] or fallback or "application/octet-stream")
|
||||
|
||||
def _owner_root(self, owner_key: str) -> Path:
|
||||
return self._assert_child(self.root / owner_key)
|
||||
|
||||
def _receipt_dir(self, owner_key: str, receipt_id: str) -> Path:
|
||||
normalized = str(receipt_id or "").strip()
|
||||
if not re.fullmatch(r"[0-9a-fA-F-]{32,36}", normalized):
|
||||
raise FileNotFoundError("Receipt not found")
|
||||
path = self._assert_child(self._owner_root(owner_key) / normalized)
|
||||
if not path.exists() or not path.is_dir():
|
||||
raise FileNotFoundError("Receipt not found")
|
||||
return path
|
||||
|
||||
def _assert_child(self, path: Path) -> Path:
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
resolved = path.resolve()
|
||||
try:
|
||||
resolved.relative_to(self.root)
|
||||
except ValueError as exc:
|
||||
raise FileNotFoundError("Receipt path is invalid") from exc
|
||||
return resolved
|
||||
|
||||
@staticmethod
|
||||
def _owner_key(current_user: CurrentUserContext) -> str:
|
||||
raw = str(current_user.username or current_user.name or "anonymous").strip().lower()
|
||||
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", raw).strip("._")
|
||||
return normalized or "anonymous"
|
||||
|
||||
@staticmethod
|
||||
def _should_persist_source(filename: str, content: bytes) -> bool:
|
||||
if not content:
|
||||
return False
|
||||
return Path(str(filename or "")).suffix.lower() in SUPPORTED_SUFFIXES
|
||||
|
||||
def _write_preview_asset(
|
||||
self,
|
||||
*,
|
||||
receipt_dir: Path,
|
||||
source_path: Path,
|
||||
media_type: str,
|
||||
document: Any | None,
|
||||
) -> dict[str, Any]:
|
||||
preview_data_url = str(getattr(document, "preview_data_url", "") or "").strip()
|
||||
decoded = ExpenseClaimAttachmentPresentation.decode_data_url(preview_data_url)
|
||||
if decoded is not None:
|
||||
preview_media_type, preview_content = decoded
|
||||
suffix = mimetypes.guess_extension(preview_media_type) or ".bin"
|
||||
preview_name = f"preview{suffix}"
|
||||
preview_path = receipt_dir / preview_name
|
||||
preview_path.write_bytes(preview_content)
|
||||
return {
|
||||
"previewable": True,
|
||||
"preview_kind": "image",
|
||||
"preview_file_name": preview_name,
|
||||
"preview_media_type": preview_media_type,
|
||||
}
|
||||
if self._is_previewable(media_type):
|
||||
return {
|
||||
"previewable": True,
|
||||
"preview_kind": "image" if media_type.startswith("image/") else "pdf",
|
||||
"preview_file_name": source_path.name,
|
||||
"preview_media_type": media_type,
|
||||
}
|
||||
return {
|
||||
"previewable": False,
|
||||
"preview_kind": "",
|
||||
"preview_file_name": "",
|
||||
"preview_media_type": "",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _is_previewable(media_type: str) -> bool:
|
||||
return str(media_type or "").startswith("image/") or str(media_type or "") == "application/pdf"
|
||||
|
||||
@staticmethod
|
||||
def _build_document_meta(document: Any | None) -> dict[str, Any]:
|
||||
fields = []
|
||||
for field in list(getattr(document, "document_fields", []) or []):
|
||||
if isinstance(field, dict):
|
||||
fields.append(
|
||||
{
|
||||
"key": str(field.get("key") or "").strip(),
|
||||
"label": str(field.get("label") or "").strip(),
|
||||
"value": str(field.get("value") or "").strip(),
|
||||
}
|
||||
)
|
||||
else:
|
||||
fields.append(
|
||||
{
|
||||
"key": str(getattr(field, "key", "") or "").strip(),
|
||||
"label": str(getattr(field, "label", "") or "").strip(),
|
||||
"value": str(getattr(field, "value", "") or "").strip(),
|
||||
}
|
||||
)
|
||||
fields = [field for field in fields if field["label"] and field["value"]]
|
||||
return {
|
||||
"engine": str(getattr(document, "engine", "") or ""),
|
||||
"model": str(getattr(document, "model", "") or ""),
|
||||
"ocr_text": str(getattr(document, "text", "") or ""),
|
||||
"summary": str(getattr(document, "summary", "") or ""),
|
||||
"ocr_avg_score": float(getattr(document, "avg_score", 0.0) or 0.0),
|
||||
"ocr_line_count": int(getattr(document, "line_count", 0) or 0),
|
||||
"page_count": int(getattr(document, "page_count", 1) or 1),
|
||||
"document_type": str(getattr(document, "document_type", "") or "other"),
|
||||
"document_type_label": str(getattr(document, "document_type_label", "") or "其他单据"),
|
||||
"scene_code": str(getattr(document, "scene_code", "") or "other"),
|
||||
"scene_label": str(getattr(document, "scene_label", "") or "其他票据"),
|
||||
"ocr_classification_source": str(getattr(document, "classification_source", "") or ""),
|
||||
"ocr_classification_confidence": float(getattr(document, "classification_confidence", 0.0) or 0.0),
|
||||
"ocr_classification_evidence": [
|
||||
str(value) for value in list(getattr(document, "classification_evidence", []) or []) if str(value).strip()
|
||||
],
|
||||
"document_fields": fields,
|
||||
"editable_fields": {},
|
||||
"ocr_warnings": [str(value) for value in list(getattr(document, "warnings", []) or []) if str(value).strip()],
|
||||
}
|
||||
|
||||
def _iter_owner_meta(self, owner_key: str) -> list[dict[str, Any]]:
|
||||
owner_root = self._owner_root(owner_key)
|
||||
if not owner_root.exists():
|
||||
return []
|
||||
metas = []
|
||||
for meta_path in owner_root.glob("*/meta.json"):
|
||||
meta = self._read_meta(meta_path.parent)
|
||||
if meta:
|
||||
metas.append(meta)
|
||||
return metas
|
||||
|
||||
def _read_receipt_meta(self, receipt_id: str, current_user: CurrentUserContext) -> dict[str, Any]:
|
||||
return self._read_meta(self._receipt_dir(self._owner_key(current_user), receipt_id))
|
||||
|
||||
def _resolve_existing_item(
|
||||
self,
|
||||
receipt_id: str | None,
|
||||
current_user: CurrentUserContext,
|
||||
) -> ReceiptFolderItemRead | None:
|
||||
normalized = str(receipt_id or "").strip()
|
||||
if not normalized:
|
||||
return None
|
||||
try:
|
||||
return self._build_item(self._read_receipt_meta(normalized, current_user))
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _meta_path(receipt_dir: Path) -> Path:
|
||||
return receipt_dir / "meta.json"
|
||||
|
||||
def _read_meta(self, receipt_dir: Path) -> dict[str, Any]:
|
||||
meta_path = self._meta_path(receipt_dir)
|
||||
if not meta_path.exists():
|
||||
raise FileNotFoundError("Receipt not found")
|
||||
try:
|
||||
payload = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
raise FileNotFoundError("Receipt metadata not found") from exc
|
||||
return payload if isinstance(payload, dict) else {}
|
||||
|
||||
def _write_meta(self, receipt_dir: Path, payload: dict[str, Any]) -> None:
|
||||
self._meta_path(receipt_dir).write_text(
|
||||
json.dumps(payload, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _matches_status(meta: dict[str, Any], status_filter: str) -> bool:
|
||||
if status_filter in {"", "all"}:
|
||||
return True
|
||||
return str(meta.get("status") or "unlinked").strip().lower() == status_filter
|
||||
|
||||
def _build_item(self, meta: dict[str, Any]) -> ReceiptFolderItemRead:
|
||||
receipt_id = str(meta.get("id") or "").strip()
|
||||
status_value = str(meta.get("status") or "unlinked").strip() or "unlinked"
|
||||
return ReceiptFolderItemRead(
|
||||
id=receipt_id,
|
||||
file_name=str(meta.get("file_name") or ""),
|
||||
media_type=str(meta.get("media_type") or "application/octet-stream"),
|
||||
size_bytes=int(meta.get("size_bytes") or 0),
|
||||
status=status_value,
|
||||
status_label="已关联" if status_value == "linked" else "未关联",
|
||||
document_type=str(meta.get("document_type") or "other"),
|
||||
document_type_label=str(meta.get("document_type_label") or "其他单据"),
|
||||
scene_code=str(meta.get("scene_code") or "other"),
|
||||
scene_label=str(meta.get("scene_label") or "其他票据"),
|
||||
summary=str(meta.get("summary") or ""),
|
||||
amount=self._resolve_editable_or_field(meta, "amount", labels=("金额", "价税合计", "票价")),
|
||||
document_date=self._resolve_editable_or_field(meta, "document_date", labels=("日期", "开票日期", "乘车日期")),
|
||||
merchant_name=self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方")),
|
||||
avg_score=float(meta.get("ocr_avg_score") or 0.0),
|
||||
uploaded_at=self._parse_datetime(meta.get("uploaded_at")),
|
||||
linked_at=self._parse_datetime(meta.get("linked_at")),
|
||||
linked_claim_id=str(meta.get("linked_claim_id") or ""),
|
||||
linked_claim_no=str(meta.get("linked_claim_no") or ""),
|
||||
previewable=bool(meta.get("previewable")),
|
||||
preview_kind=str(meta.get("preview_kind") or ""),
|
||||
preview_url=f"/receipt-folder/{receipt_id}/preview" if bool(meta.get("previewable")) and receipt_id else "",
|
||||
source_url=f"/receipt-folder/{receipt_id}/source" if receipt_id else "",
|
||||
warnings=[str(value) for value in list(meta.get("ocr_warnings") or []) if str(value).strip()],
|
||||
)
|
||||
|
||||
def _resolve_fields(self, meta: dict[str, Any]) -> list[ReceiptFolderFieldRead]:
|
||||
return [
|
||||
ReceiptFolderFieldRead(
|
||||
key=str(field.get("key") or ""),
|
||||
label=str(field.get("label") or ""),
|
||||
value=str(field.get("value") or ""),
|
||||
)
|
||||
for field in list(meta.get("document_fields") or [])
|
||||
if isinstance(field, dict) and str(field.get("label") or "").strip()
|
||||
]
|
||||
|
||||
def _resolve_editable_or_field(self, meta: dict[str, Any], key: str, *, labels: tuple[str, ...]) -> str:
|
||||
editable = meta.get("editable_fields")
|
||||
if isinstance(editable, dict):
|
||||
value = str(editable.get(key) or "").strip()
|
||||
if value:
|
||||
return value
|
||||
label_set = set(labels)
|
||||
for field in self._resolve_fields(meta):
|
||||
if field.label in label_set or field.key == key:
|
||||
return field.value
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _parse_datetime(value: Any) -> datetime | None:
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
return datetime.fromisoformat(raw)
|
||||
except ValueError:
|
||||
return None
|
||||
@@ -264,6 +264,74 @@ def test_current_employee_profile_endpoint_resolves_login_user() -> None:
|
||||
payload = response.json()
|
||||
assert payload["employee_id"] == "emp-main"
|
||||
assert {item["profile_type"] for item in payload["profiles"]} >= {"expense", "ai_usage"}
|
||||
ai_profile = next(item for item in payload["profiles"] if item["profile_type"] == "ai_usage")
|
||||
assert ai_profile["metrics"]["ai_run_duration_ms"] == 120
|
||||
assert payload["profile_tags"]
|
||||
assert payload["radar"]["dimensions"]
|
||||
|
||||
|
||||
def test_current_admin_profile_endpoint_returns_account_usage_profile() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
seed_profile_data(db)
|
||||
now = datetime.now(UTC)
|
||||
for index in range(12):
|
||||
run_id = f"run-admin-usage-{index}"
|
||||
started_at = now - timedelta(days=1, minutes=index)
|
||||
db.add(
|
||||
AgentRun(
|
||||
run_id=run_id,
|
||||
agent="user_agent",
|
||||
source="user_message",
|
||||
user_id="admin",
|
||||
status="success",
|
||||
result_summary="管理员查看运行概览。",
|
||||
started_at=started_at,
|
||||
finished_at=started_at + timedelta(seconds=2),
|
||||
tool_calls=[
|
||||
AgentToolCall(
|
||||
run_id=run_id,
|
||||
tool_type="database",
|
||||
tool_name="agent_runs.list",
|
||||
request_json={"limit": 20},
|
||||
response_json={"ok": True},
|
||||
status="success",
|
||||
duration_ms=120,
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
db.commit()
|
||||
|
||||
app = create_app()
|
||||
|
||||
def override_db() -> Generator[Session, None, None]:
|
||||
db = session_factory()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
app.dependency_overrides[get_db] = override_db
|
||||
client = TestClient(app)
|
||||
response = client.get(
|
||||
"/api/v1/employee-profiles/me/latest",
|
||||
params={
|
||||
"scene": "operations",
|
||||
"window_days": 90,
|
||||
"expense_type_scope": "overall",
|
||||
},
|
||||
headers={"x-auth-username": "admin", "x-auth-name": "admin", "x-auth-is-admin": "true"},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["employee_id"] == "admin"
|
||||
assert payload["empty_reason"] == ""
|
||||
assert [item["profile_type"] for item in payload["profiles"]] == ["ai_usage"]
|
||||
metrics = payload["profiles"][0]["metrics"]
|
||||
assert metrics["ai_run_count"] == 12
|
||||
assert metrics["ai_run_duration_ms"] == 24000
|
||||
assert payload["profile_tags"]
|
||||
assert payload["radar"]["dimensions"]
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@ from sqlalchemy.orm import Session, sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.api.deps import get_db
|
||||
from app.core.config import get_settings
|
||||
from app.db.base import Base
|
||||
from app.main import create_app
|
||||
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeFieldRead, OcrRecognizeLineRead
|
||||
@@ -35,7 +36,7 @@ def build_client() -> TestClient:
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
|
||||
def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch, tmp_path) -> None:
|
||||
def fake_recognize(
|
||||
self,
|
||||
files: list[tuple[str, bytes, str | None]],
|
||||
@@ -76,21 +77,84 @@ def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
|
||||
],
|
||||
)
|
||||
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
monkeypatch.setattr(OcrService, "recognize_files", fake_recognize)
|
||||
client = build_client()
|
||||
try:
|
||||
client = build_client()
|
||||
auth_headers = {"x-auth-username": "pytest", "x-auth-name": "Py Test"}
|
||||
|
||||
response = client.post(
|
||||
"/api/v1/ocr/recognize",
|
||||
headers={"x-auth-username": "pytest", "x-auth-name": "Py Test"},
|
||||
files=[("files", ("invoice.png", b"fake-image", "image/png"))],
|
||||
)
|
||||
response = client.post(
|
||||
"/api/v1/ocr/recognize",
|
||||
headers=auth_headers,
|
||||
files=[("files", ("invoice.png", b"fake-image", "image/png"))],
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["engine"] == "paddleocr_mobile"
|
||||
assert payload["success_count"] == 1
|
||||
assert payload["documents"][0]["filename"] == "invoice.png"
|
||||
assert payload["documents"][0]["summary"] == "增值税电子发票,金额 100 元。"
|
||||
assert payload["documents"][0]["document_type"] == "vat_invoice"
|
||||
assert payload["documents"][0]["document_type_label"] == "增值税发票"
|
||||
assert payload["documents"][0]["document_fields"][0]["label"] == "金额"
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
document = payload["documents"][0]
|
||||
assert payload["engine"] == "paddleocr_mobile"
|
||||
assert payload["success_count"] == 1
|
||||
assert document["filename"] == "invoice.png"
|
||||
assert document["summary"] == "增值税电子发票,金额 100 元。"
|
||||
assert document["document_type"] == "vat_invoice"
|
||||
assert document["document_type_label"] == "增值税发票"
|
||||
assert document["document_fields"][0]["label"] == "金额"
|
||||
assert document["receipt_id"]
|
||||
assert document["receipt_status"] == "unlinked"
|
||||
assert document["receipt_preview_url"].endswith(f"/receipt-folder/{document['receipt_id']}/preview")
|
||||
assert document["receipt_source_url"].endswith(f"/receipt-folder/{document['receipt_id']}/source")
|
||||
|
||||
receipt_id = document["receipt_id"]
|
||||
list_response = client.get("/api/v1/receipt-folder?status=unlinked", headers=auth_headers)
|
||||
assert list_response.status_code == 200
|
||||
receipt_list = list_response.json()
|
||||
assert len(receipt_list) == 1
|
||||
assert receipt_list[0]["id"] == receipt_id
|
||||
assert receipt_list[0]["amount"] == "100元"
|
||||
|
||||
repeated_response = client.post(
|
||||
"/api/v1/ocr/recognize",
|
||||
headers=auth_headers,
|
||||
data={"receipt_ids": receipt_id},
|
||||
files=[("files", ("invoice.png", b"fake-image", "image/png"))],
|
||||
)
|
||||
assert repeated_response.status_code == 200
|
||||
repeated_document = repeated_response.json()["documents"][0]
|
||||
assert repeated_document["receipt_id"] == receipt_id
|
||||
|
||||
all_receipts_response = client.get("/api/v1/receipt-folder?status=all", headers=auth_headers)
|
||||
assert all_receipts_response.status_code == 200
|
||||
assert len(all_receipts_response.json()) == 1
|
||||
|
||||
detail_response = client.get(f"/api/v1/receipt-folder/{receipt_id}", headers=auth_headers)
|
||||
assert detail_response.status_code == 200
|
||||
detail_payload = detail_response.json()
|
||||
assert detail_payload["file_name"] == "invoice.png"
|
||||
assert detail_payload["fields"][0]["label"] == "金额"
|
||||
|
||||
update_response = client.patch(
|
||||
f"/api/v1/receipt-folder/{receipt_id}",
|
||||
headers=auth_headers,
|
||||
json={
|
||||
"document_type_label": "电子发票",
|
||||
"amount": "108元",
|
||||
"fields": [{"key": "amount", "label": "金额", "value": "108元"}],
|
||||
},
|
||||
)
|
||||
assert update_response.status_code == 200
|
||||
assert update_response.json()["document_type_label"] == "电子发票"
|
||||
assert update_response.json()["amount"] == "108元"
|
||||
|
||||
preview_response = client.get(f"/api/v1/receipt-folder/{receipt_id}/preview", headers=auth_headers)
|
||||
assert preview_response.status_code == 200
|
||||
assert preview_response.content == b"fake-image"
|
||||
|
||||
delete_response = client.delete(f"/api/v1/receipt-folder/{receipt_id}", headers=auth_headers)
|
||||
assert delete_response.status_code == 200
|
||||
assert delete_response.json()["receipt_id"] == receipt_id
|
||||
|
||||
deleted_response = client.get(f"/api/v1/receipt-folder/{receipt_id}", headers=auth_headers)
|
||||
assert deleted_response.status_code == 404
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
Reference in New Issue
Block a user