feat: 新增票据夹模块并优化 OCR 与员工画像服务
后端新增票据夹端点、数据模型和服务模块,优化 OCR 端点 Schema 和附件操作逻辑,完善员工行为画像服务和辅助函数, 前端新增票据夹视图和服务层,优化文档中心样式和侧边栏导 航,完善员工画像详情弹窗和权限控制,补充单元测试。
This commit is contained in:
176
server/src/app/services/account_behavior_profile.py
Normal file
176
server/src/app/services/account_behavior_profile.py
Normal file
@@ -0,0 +1,176 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, selectinload
|
||||
|
||||
from app.algorithem.employee_behavior_profile import (
|
||||
LEVEL_LABELS,
|
||||
PROFILE_LABELS,
|
||||
ProfileComponent,
|
||||
evaluate_weighted_profile,
|
||||
score_by_bands,
|
||||
)
|
||||
from app.algorithem.employee_behavior_profile_tags import build_profile_radar, build_profile_tags
|
||||
from app.models.agent_run import AgentRun
|
||||
from app.schemas.employee_profile import EmployeeProfileLatestRead, EmployeeProfileRead
|
||||
from app.services.employee_behavior_profile_helpers import EmployeeBehaviorProfileMetricHelpers
|
||||
|
||||
|
||||
class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def get_latest_account_profile(
|
||||
self,
|
||||
*,
|
||||
account_id: str,
|
||||
account_name: str,
|
||||
identifiers: set[str],
|
||||
scene: str,
|
||||
window_days: int,
|
||||
expense_type_scope: str,
|
||||
) -> EmployeeProfileLatestRead:
|
||||
if scene != "operations":
|
||||
return EmployeeProfileLatestRead(
|
||||
employee_id=account_id,
|
||||
employee_name=account_name,
|
||||
scene=scene,
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
empty_reason="当前账号未匹配员工目录,无法形成审批场景员工画像。",
|
||||
)
|
||||
|
||||
runs = self._fetch_account_runs(identifiers, datetime.now(UTC) - timedelta(days=window_days))
|
||||
if not runs:
|
||||
return EmployeeProfileLatestRead(
|
||||
employee_id=account_id,
|
||||
employee_name=account_name,
|
||||
scene=scene,
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
empty_reason="当前账号暂无可统计的智能体运行记录。",
|
||||
)
|
||||
|
||||
result = self._calculate_account_ai_usage_profile(
|
||||
runs=runs,
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
)
|
||||
payload = {
|
||||
"profile_type": result.profile_type,
|
||||
"profile_label": result.profile_label,
|
||||
"score": result.profile_score,
|
||||
"level": result.profile_level,
|
||||
"metrics": result.metrics,
|
||||
"top_contributors": result.top_contributors(),
|
||||
}
|
||||
tags = build_profile_tags([payload], scene=scene)
|
||||
radar = build_profile_radar([payload], tags, scene=scene)
|
||||
|
||||
return EmployeeProfileLatestRead(
|
||||
employee_id=account_id,
|
||||
employee_name=account_name,
|
||||
scene=scene,
|
||||
window_days=window_days,
|
||||
expense_type_scope=expense_type_scope,
|
||||
calculated_at=datetime.now(UTC),
|
||||
review_priority_score=0,
|
||||
review_priority_level="normal",
|
||||
review_priority_label=LEVEL_LABELS["normal"],
|
||||
profiles=[
|
||||
EmployeeProfileRead(
|
||||
profile_type=payload["profile_type"],
|
||||
profile_label=PROFILE_LABELS.get(payload["profile_type"], payload["profile_type"]),
|
||||
score=payload["score"],
|
||||
level=payload["level"],
|
||||
level_label=LEVEL_LABELS.get(payload["level"], payload["level"]),
|
||||
metrics=payload["metrics"],
|
||||
top_contributors=payload["top_contributors"],
|
||||
)
|
||||
],
|
||||
profile_tags=tags,
|
||||
radar=radar,
|
||||
)
|
||||
|
||||
def _calculate_account_ai_usage_profile(
|
||||
self,
|
||||
*,
|
||||
runs: list[AgentRun],
|
||||
window_days: int,
|
||||
expense_type_scope: str,
|
||||
):
|
||||
tool_calls = [tool for run in runs for tool in run.tool_calls]
|
||||
failed_calls = [
|
||||
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
|
||||
]
|
||||
estimated_tokens = self._estimate_tokens(runs)
|
||||
duration_ms = self._sum_agent_run_duration_ms(runs)
|
||||
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
|
||||
|
||||
return evaluate_weighted_profile(
|
||||
"ai_usage",
|
||||
[
|
||||
ProfileComponent(
|
||||
"ai_call_count_score",
|
||||
"AI 调用次数",
|
||||
score_by_bands(len(runs), [(0, 0), (3, 25), (10, 65), (20, 100)]),
|
||||
len(runs),
|
||||
"次",
|
||||
Decimal("0.25"),
|
||||
),
|
||||
ProfileComponent(
|
||||
"token_cost_score",
|
||||
"Token 使用强度",
|
||||
score_by_bands(
|
||||
estimated_tokens, [(0, 0), (2000, 25), (8000, 65), (20000, 100)]
|
||||
),
|
||||
estimated_tokens,
|
||||
"tokens",
|
||||
Decimal("0.25"),
|
||||
),
|
||||
ProfileComponent(
|
||||
"ai_generated_claim_ratio_score",
|
||||
"AI 生成申请比例",
|
||||
score_by_bands(len(runs), [(0, 0), (2, 20), (8, 60), (16, 90)]),
|
||||
len(runs),
|
||||
"次",
|
||||
Decimal("0.20"),
|
||||
),
|
||||
ProfileComponent(
|
||||
"failed_ai_call_score",
|
||||
"AI 调用失败",
|
||||
score_by_bands(len(failed_calls), [(0, 0), (1, 35), (3, 80)]),
|
||||
len(failed_calls),
|
||||
"次",
|
||||
Decimal("0.10"),
|
||||
),
|
||||
],
|
||||
metrics={
|
||||
"window_days": window_days,
|
||||
"expense_type_scope": expense_type_scope,
|
||||
"peer_sample_size": 0,
|
||||
"ai_run_count": len(runs),
|
||||
"tool_call_count": len(tool_calls),
|
||||
"failed_tool_call_count": len(failed_calls),
|
||||
"token_count_mode": token_mode,
|
||||
"estimated_token_count": estimated_tokens,
|
||||
"exact_token_count": None,
|
||||
"ai_run_duration_ms": duration_ms,
|
||||
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
|
||||
},
|
||||
)
|
||||
|
||||
def _fetch_account_runs(self, identifiers: set[str], cutoff: datetime) -> list[AgentRun]:
|
||||
normalized = {item for item in identifiers if str(item or "").strip()}
|
||||
if not normalized:
|
||||
return []
|
||||
stmt = (
|
||||
select(AgentRun)
|
||||
.options(selectinload(AgentRun.tool_calls))
|
||||
.where(AgentRun.started_at >= cutoff, AgentRun.user_id.in_(normalized))
|
||||
)
|
||||
return list(self.db.scalars(stmt).all())
|
||||
@@ -171,6 +171,22 @@ class EmployeeBehaviorProfileMetricHelpers:
|
||||
total += max(0, len(text) // 4)
|
||||
return total
|
||||
|
||||
def _sum_agent_run_duration_ms(self, runs: list[AgentRun]) -> int:
|
||||
return sum(self._agent_run_duration_ms(run) for run in runs)
|
||||
|
||||
def _agent_run_duration_ms(self, run: AgentRun) -> int:
|
||||
if run.started_at is not None and run.finished_at is not None:
|
||||
try:
|
||||
if run.finished_at > run.started_at:
|
||||
return min(
|
||||
int((run.finished_at - run.started_at).total_seconds() * 1000),
|
||||
24 * 60 * 60 * 1000,
|
||||
)
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
return sum(max(0, int(tool.duration_ms or 0)) for tool in run.tool_calls)
|
||||
|
||||
@staticmethod
|
||||
def _is_missing_value(value: Any) -> bool:
|
||||
text = str(value or "").strip()
|
||||
|
||||
@@ -466,6 +466,7 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
|
||||
]
|
||||
estimated_tokens = self._estimate_tokens(runs)
|
||||
duration_ms = self._sum_agent_run_duration_ms(runs)
|
||||
override_score = 0
|
||||
|
||||
token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
|
||||
@@ -524,6 +525,8 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
|
||||
"token_count_mode": token_mode,
|
||||
"estimated_token_count": estimated_tokens,
|
||||
"exact_token_count": None,
|
||||
"ai_run_duration_ms": duration_ms,
|
||||
"ai_run_duration_mode": "elapsed_or_tool_call_fallback",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@@ -108,6 +108,7 @@ from app.services.expense_rule_runtime import (
|
||||
resolve_document_type_label,
|
||||
)
|
||||
from app.services.ocr import OcrService
|
||||
from app.services.receipt_folder import ReceiptFolderService
|
||||
|
||||
|
||||
class ExpenseClaimAttachmentOperationsMixin:
|
||||
@@ -120,6 +121,7 @@ class ExpenseClaimAttachmentOperationsMixin:
|
||||
content: bytes,
|
||||
media_type: str | None,
|
||||
current_user: CurrentUserContext,
|
||||
source_receipt_id: str = "",
|
||||
) -> dict[str, Any] | None:
|
||||
claim, item = self._get_claim_item_or_raise(
|
||||
claim_id=claim_id,
|
||||
@@ -240,6 +242,16 @@ class ExpenseClaimAttachmentOperationsMixin:
|
||||
"ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
|
||||
}
|
||||
self._attachment_storage.write_meta(file_path, meta)
|
||||
ReceiptFolderService().save_linked_attachment(
|
||||
file_path=file_path,
|
||||
media_type=resolved_media_type,
|
||||
document=ocr_document,
|
||||
current_user=current_user,
|
||||
claim_id=claim.id,
|
||||
claim_no=claim.claim_no,
|
||||
item_id=item.id,
|
||||
source_receipt_id=source_receipt_id,
|
||||
)
|
||||
|
||||
self._sync_claim_from_items(claim)
|
||||
self.db.commit()
|
||||
|
||||
532
server/src/app/services/receipt_folder.py
Normal file
532
server/src/app/services/receipt_folder.py
Normal file
@@ -0,0 +1,532 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import mimetypes
|
||||
import re
|
||||
import shutil
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from uuid import uuid4
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from app.core.config import get_settings
|
||||
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead
|
||||
from app.schemas.receipt_folder import (
|
||||
ReceiptFolderDeleteResponse,
|
||||
ReceiptFolderDetailRead,
|
||||
ReceiptFolderFieldRead,
|
||||
ReceiptFolderItemRead,
|
||||
ReceiptFolderUpdate,
|
||||
)
|
||||
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
|
||||
from app.services.ocr import SUPPORTED_SUFFIXES
|
||||
|
||||
|
||||
class ReceiptFolderService:
|
||||
def __init__(self) -> None:
|
||||
self.root = (get_settings().resolved_storage_root_dir / "receipt_folder").resolve()
|
||||
|
||||
def persist_ocr_batch(
|
||||
self,
|
||||
*,
|
||||
files: list[tuple[str, bytes, str | None]],
|
||||
result: OcrRecognizeBatchRead,
|
||||
current_user: CurrentUserContext,
|
||||
receipt_ids: list[str] | None = None,
|
||||
) -> OcrRecognizeBatchRead:
|
||||
documents = list(result.documents or [])
|
||||
enriched: list[OcrRecognizeDocumentRead] = []
|
||||
for index, document in enumerate(documents):
|
||||
if index >= len(files):
|
||||
enriched.append(document)
|
||||
continue
|
||||
existing_receipt = self._resolve_existing_item(
|
||||
receipt_ids[index] if receipt_ids and index < len(receipt_ids) else "",
|
||||
current_user,
|
||||
)
|
||||
if existing_receipt is not None:
|
||||
enriched.append(
|
||||
document.model_copy(
|
||||
update={
|
||||
"receipt_id": existing_receipt.id,
|
||||
"receipt_status": existing_receipt.status,
|
||||
"receipt_preview_url": existing_receipt.preview_url,
|
||||
"receipt_source_url": existing_receipt.source_url,
|
||||
}
|
||||
)
|
||||
)
|
||||
continue
|
||||
filename, content, media_type = files[index]
|
||||
if not self._should_persist_source(filename, content):
|
||||
enriched.append(document)
|
||||
continue
|
||||
receipt = self.save_receipt(
|
||||
filename=filename,
|
||||
content=content,
|
||||
media_type=media_type or document.media_type,
|
||||
document=document,
|
||||
current_user=current_user,
|
||||
)
|
||||
enriched.append(
|
||||
document.model_copy(
|
||||
update={
|
||||
"receipt_id": receipt.id,
|
||||
"receipt_status": receipt.status,
|
||||
"receipt_preview_url": receipt.preview_url,
|
||||
"receipt_source_url": receipt.source_url,
|
||||
}
|
||||
)
|
||||
)
|
||||
return result.model_copy(update={"documents": enriched})
|
||||
|
||||
def save_receipt(
|
||||
self,
|
||||
*,
|
||||
filename: str,
|
||||
content: bytes,
|
||||
media_type: str | None,
|
||||
document: Any | None,
|
||||
current_user: CurrentUserContext,
|
||||
linked_claim_id: str = "",
|
||||
linked_claim_no: str = "",
|
||||
linked_item_id: str = "",
|
||||
) -> ReceiptFolderItemRead:
|
||||
owner_key = self._owner_key(current_user)
|
||||
receipt_id = str(uuid4())
|
||||
receipt_dir = self._owner_root(owner_key) / receipt_id
|
||||
receipt_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
normalized_name = self.normalize_filename(filename)
|
||||
source_path = receipt_dir / normalized_name
|
||||
source_path.write_bytes(content)
|
||||
resolved_media_type = self.resolve_media_type(normalized_name, media_type)
|
||||
preview_meta = self._write_preview_asset(
|
||||
receipt_dir=receipt_dir,
|
||||
source_path=source_path,
|
||||
media_type=resolved_media_type,
|
||||
document=document,
|
||||
)
|
||||
now = datetime.now(UTC)
|
||||
linked = bool(str(linked_claim_id or "").strip())
|
||||
meta = {
|
||||
"id": receipt_id,
|
||||
"owner_key": owner_key,
|
||||
"file_name": normalized_name,
|
||||
"source_file_name": normalized_name,
|
||||
"media_type": resolved_media_type,
|
||||
"size_bytes": len(content),
|
||||
"uploaded_at": now.isoformat(),
|
||||
"status": "linked" if linked else "unlinked",
|
||||
"linked_claim_id": str(linked_claim_id or "").strip(),
|
||||
"linked_claim_no": str(linked_claim_no or "").strip(),
|
||||
"linked_item_id": str(linked_item_id or "").strip(),
|
||||
"linked_at": now.isoformat() if linked else "",
|
||||
**self._build_document_meta(document),
|
||||
**preview_meta,
|
||||
}
|
||||
self._write_meta(receipt_dir, meta)
|
||||
return self._build_item(meta)
|
||||
|
||||
def save_linked_attachment(
|
||||
self,
|
||||
*,
|
||||
file_path: Path,
|
||||
media_type: str,
|
||||
document: Any | None,
|
||||
current_user: CurrentUserContext,
|
||||
claim_id: str,
|
||||
claim_no: str,
|
||||
item_id: str,
|
||||
source_receipt_id: str = "",
|
||||
) -> ReceiptFolderItemRead | None:
|
||||
if not file_path.exists() or not file_path.is_file():
|
||||
return None
|
||||
if str(source_receipt_id or "").strip():
|
||||
try:
|
||||
return self.mark_receipt_linked(
|
||||
receipt_id=source_receipt_id,
|
||||
current_user=current_user,
|
||||
claim_id=claim_id,
|
||||
claim_no=claim_no,
|
||||
item_id=item_id,
|
||||
)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
storage_root = get_settings().resolved_storage_root_dir
|
||||
try:
|
||||
file_path.resolve().relative_to(storage_root)
|
||||
except ValueError:
|
||||
return None
|
||||
return self.save_receipt(
|
||||
filename=file_path.name,
|
||||
content=file_path.read_bytes(),
|
||||
media_type=media_type,
|
||||
document=document,
|
||||
current_user=current_user,
|
||||
linked_claim_id=claim_id,
|
||||
linked_claim_no=claim_no,
|
||||
linked_item_id=item_id,
|
||||
)
|
||||
|
||||
def mark_receipt_linked(
|
||||
self,
|
||||
*,
|
||||
receipt_id: str,
|
||||
current_user: CurrentUserContext,
|
||||
claim_id: str,
|
||||
claim_no: str,
|
||||
item_id: str,
|
||||
) -> ReceiptFolderItemRead:
|
||||
owner_key = self._owner_key(current_user)
|
||||
receipt_dir = self._receipt_dir(owner_key, receipt_id)
|
||||
meta = self._read_meta(receipt_dir)
|
||||
meta["status"] = "linked"
|
||||
meta["linked_claim_id"] = str(claim_id or "").strip()
|
||||
meta["linked_claim_no"] = str(claim_no or "").strip()
|
||||
meta["linked_item_id"] = str(item_id or "").strip()
|
||||
meta["linked_at"] = datetime.now(UTC).isoformat()
|
||||
self._write_meta(receipt_dir, meta)
|
||||
return self._build_item(meta)
|
||||
|
||||
def list_receipts(
|
||||
self,
|
||||
*,
|
||||
current_user: CurrentUserContext,
|
||||
status_filter: str = "all",
|
||||
) -> list[ReceiptFolderItemRead]:
|
||||
status_filter = str(status_filter or "all").strip().lower()
|
||||
items = [
|
||||
self._build_item(meta)
|
||||
for meta in self._iter_owner_meta(self._owner_key(current_user))
|
||||
if self._matches_status(meta, status_filter)
|
||||
]
|
||||
return sorted(items, key=lambda item: item.uploaded_at or datetime.min.replace(tzinfo=UTC), reverse=True)
|
||||
|
||||
def get_receipt(self, receipt_id: str, current_user: CurrentUserContext) -> ReceiptFolderDetailRead:
|
||||
meta = self._read_receipt_meta(receipt_id, current_user)
|
||||
item = self._build_item(meta)
|
||||
return ReceiptFolderDetailRead(
|
||||
**item.model_dump(),
|
||||
engine=str(meta.get("engine") or ""),
|
||||
model=str(meta.get("model") or ""),
|
||||
ocr_text=str(meta.get("ocr_text") or ""),
|
||||
line_count=int(meta.get("ocr_line_count") or 0),
|
||||
page_count=max(1, int(meta.get("page_count") or 1)),
|
||||
classification_confidence=float(meta.get("ocr_classification_confidence") or 0.0),
|
||||
classification_evidence=[
|
||||
str(value) for value in list(meta.get("ocr_classification_evidence") or []) if str(value).strip()
|
||||
],
|
||||
fields=self._resolve_fields(meta),
|
||||
raw_meta=meta,
|
||||
)
|
||||
|
||||
def update_receipt(
|
||||
self,
|
||||
*,
|
||||
receipt_id: str,
|
||||
payload: ReceiptFolderUpdate,
|
||||
current_user: CurrentUserContext,
|
||||
) -> ReceiptFolderDetailRead:
|
||||
owner_key = self._owner_key(current_user)
|
||||
receipt_dir = self._receipt_dir(owner_key, receipt_id)
|
||||
meta = self._read_meta(receipt_dir)
|
||||
updates = payload.model_dump(exclude_unset=True)
|
||||
for key in ("document_type", "document_type_label", "scene_code", "scene_label", "summary"):
|
||||
if key in updates and updates[key] is not None:
|
||||
meta[key] = str(updates[key] or "").strip()
|
||||
|
||||
editable = dict(meta.get("editable_fields") or {})
|
||||
for key in ("amount", "document_date", "merchant_name"):
|
||||
if key in updates and updates[key] is not None:
|
||||
editable[key] = str(updates[key] or "").strip()
|
||||
if "fields" in updates and updates["fields"] is not None:
|
||||
meta["document_fields"] = [
|
||||
field.model_dump() if isinstance(field, ReceiptFolderFieldRead) else dict(field)
|
||||
for field in payload.fields or []
|
||||
]
|
||||
meta["editable_fields"] = editable
|
||||
meta["updated_at"] = datetime.now(UTC).isoformat()
|
||||
self._write_meta(receipt_dir, meta)
|
||||
return self.get_receipt(receipt_id, current_user)
|
||||
|
||||
def delete_receipt(
|
||||
self,
|
||||
*,
|
||||
receipt_id: str,
|
||||
current_user: CurrentUserContext,
|
||||
) -> ReceiptFolderDeleteResponse:
|
||||
owner_key = self._owner_key(current_user)
|
||||
receipt_dir = self._receipt_dir(owner_key, receipt_id)
|
||||
shutil.rmtree(receipt_dir)
|
||||
return ReceiptFolderDeleteResponse(message="票据已删除。", receipt_id=receipt_id)
|
||||
|
||||
def resolve_source(self, receipt_id: str, current_user: CurrentUserContext) -> tuple[Path, str, str]:
|
||||
meta = self._read_receipt_meta(receipt_id, current_user)
|
||||
receipt_dir = self._receipt_dir(self._owner_key(current_user), receipt_id)
|
||||
file_name = str(meta.get("source_file_name") or meta.get("file_name") or "").strip()
|
||||
path = self._assert_child(receipt_dir / file_name)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError("Receipt source not found")
|
||||
media_type = self.resolve_media_type(path.name, str(meta.get("media_type") or ""))
|
||||
return path, media_type, str(meta.get("file_name") or path.name)
|
||||
|
||||
def resolve_preview(self, receipt_id: str, current_user: CurrentUserContext) -> tuple[Path, str, str]:
|
||||
meta = self._read_receipt_meta(receipt_id, current_user)
|
||||
receipt_dir = self._receipt_dir(self._owner_key(current_user), receipt_id)
|
||||
preview_name = str(meta.get("preview_file_name") or "").strip()
|
||||
if preview_name:
|
||||
preview_path = self._assert_child(receipt_dir / preview_name)
|
||||
if preview_path.exists():
|
||||
return (
|
||||
preview_path,
|
||||
self.resolve_media_type(preview_path.name, str(meta.get("preview_media_type") or "")),
|
||||
preview_path.name,
|
||||
)
|
||||
|
||||
source_path, source_media_type, source_name = self.resolve_source(receipt_id, current_user)
|
||||
if self._is_previewable(source_media_type):
|
||||
return source_path, source_media_type, source_name
|
||||
raise FileNotFoundError("Receipt preview not found")
|
||||
|
||||
@staticmethod
|
||||
def normalize_filename(filename: str | None) -> str:
|
||||
normalized = Path(str(filename or "").strip()).name
|
||||
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._")
|
||||
return normalized or "receipt.bin"
|
||||
|
||||
@staticmethod
|
||||
def resolve_media_type(filename: str, fallback: str | None = None) -> str:
|
||||
return str(mimetypes.guess_type(filename)[0] or fallback or "application/octet-stream")
|
||||
|
||||
def _owner_root(self, owner_key: str) -> Path:
|
||||
return self._assert_child(self.root / owner_key)
|
||||
|
||||
def _receipt_dir(self, owner_key: str, receipt_id: str) -> Path:
|
||||
normalized = str(receipt_id or "").strip()
|
||||
if not re.fullmatch(r"[0-9a-fA-F-]{32,36}", normalized):
|
||||
raise FileNotFoundError("Receipt not found")
|
||||
path = self._assert_child(self._owner_root(owner_key) / normalized)
|
||||
if not path.exists() or not path.is_dir():
|
||||
raise FileNotFoundError("Receipt not found")
|
||||
return path
|
||||
|
||||
def _assert_child(self, path: Path) -> Path:
|
||||
self.root.mkdir(parents=True, exist_ok=True)
|
||||
resolved = path.resolve()
|
||||
try:
|
||||
resolved.relative_to(self.root)
|
||||
except ValueError as exc:
|
||||
raise FileNotFoundError("Receipt path is invalid") from exc
|
||||
return resolved
|
||||
|
||||
@staticmethod
|
||||
def _owner_key(current_user: CurrentUserContext) -> str:
|
||||
raw = str(current_user.username or current_user.name or "anonymous").strip().lower()
|
||||
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", raw).strip("._")
|
||||
return normalized or "anonymous"
|
||||
|
||||
@staticmethod
|
||||
def _should_persist_source(filename: str, content: bytes) -> bool:
|
||||
if not content:
|
||||
return False
|
||||
return Path(str(filename or "")).suffix.lower() in SUPPORTED_SUFFIXES
|
||||
|
||||
def _write_preview_asset(
|
||||
self,
|
||||
*,
|
||||
receipt_dir: Path,
|
||||
source_path: Path,
|
||||
media_type: str,
|
||||
document: Any | None,
|
||||
) -> dict[str, Any]:
|
||||
preview_data_url = str(getattr(document, "preview_data_url", "") or "").strip()
|
||||
decoded = ExpenseClaimAttachmentPresentation.decode_data_url(preview_data_url)
|
||||
if decoded is not None:
|
||||
preview_media_type, preview_content = decoded
|
||||
suffix = mimetypes.guess_extension(preview_media_type) or ".bin"
|
||||
preview_name = f"preview{suffix}"
|
||||
preview_path = receipt_dir / preview_name
|
||||
preview_path.write_bytes(preview_content)
|
||||
return {
|
||||
"previewable": True,
|
||||
"preview_kind": "image",
|
||||
"preview_file_name": preview_name,
|
||||
"preview_media_type": preview_media_type,
|
||||
}
|
||||
if self._is_previewable(media_type):
|
||||
return {
|
||||
"previewable": True,
|
||||
"preview_kind": "image" if media_type.startswith("image/") else "pdf",
|
||||
"preview_file_name": source_path.name,
|
||||
"preview_media_type": media_type,
|
||||
}
|
||||
return {
|
||||
"previewable": False,
|
||||
"preview_kind": "",
|
||||
"preview_file_name": "",
|
||||
"preview_media_type": "",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _is_previewable(media_type: str) -> bool:
|
||||
return str(media_type or "").startswith("image/") or str(media_type or "") == "application/pdf"
|
||||
|
||||
@staticmethod
|
||||
def _build_document_meta(document: Any | None) -> dict[str, Any]:
|
||||
fields = []
|
||||
for field in list(getattr(document, "document_fields", []) or []):
|
||||
if isinstance(field, dict):
|
||||
fields.append(
|
||||
{
|
||||
"key": str(field.get("key") or "").strip(),
|
||||
"label": str(field.get("label") or "").strip(),
|
||||
"value": str(field.get("value") or "").strip(),
|
||||
}
|
||||
)
|
||||
else:
|
||||
fields.append(
|
||||
{
|
||||
"key": str(getattr(field, "key", "") or "").strip(),
|
||||
"label": str(getattr(field, "label", "") or "").strip(),
|
||||
"value": str(getattr(field, "value", "") or "").strip(),
|
||||
}
|
||||
)
|
||||
fields = [field for field in fields if field["label"] and field["value"]]
|
||||
return {
|
||||
"engine": str(getattr(document, "engine", "") or ""),
|
||||
"model": str(getattr(document, "model", "") or ""),
|
||||
"ocr_text": str(getattr(document, "text", "") or ""),
|
||||
"summary": str(getattr(document, "summary", "") or ""),
|
||||
"ocr_avg_score": float(getattr(document, "avg_score", 0.0) or 0.0),
|
||||
"ocr_line_count": int(getattr(document, "line_count", 0) or 0),
|
||||
"page_count": int(getattr(document, "page_count", 1) or 1),
|
||||
"document_type": str(getattr(document, "document_type", "") or "other"),
|
||||
"document_type_label": str(getattr(document, "document_type_label", "") or "其他单据"),
|
||||
"scene_code": str(getattr(document, "scene_code", "") or "other"),
|
||||
"scene_label": str(getattr(document, "scene_label", "") or "其他票据"),
|
||||
"ocr_classification_source": str(getattr(document, "classification_source", "") or ""),
|
||||
"ocr_classification_confidence": float(getattr(document, "classification_confidence", 0.0) or 0.0),
|
||||
"ocr_classification_evidence": [
|
||||
str(value) for value in list(getattr(document, "classification_evidence", []) or []) if str(value).strip()
|
||||
],
|
||||
"document_fields": fields,
|
||||
"editable_fields": {},
|
||||
"ocr_warnings": [str(value) for value in list(getattr(document, "warnings", []) or []) if str(value).strip()],
|
||||
}
|
||||
|
||||
def _iter_owner_meta(self, owner_key: str) -> list[dict[str, Any]]:
|
||||
owner_root = self._owner_root(owner_key)
|
||||
if not owner_root.exists():
|
||||
return []
|
||||
metas = []
|
||||
for meta_path in owner_root.glob("*/meta.json"):
|
||||
meta = self._read_meta(meta_path.parent)
|
||||
if meta:
|
||||
metas.append(meta)
|
||||
return metas
|
||||
|
||||
def _read_receipt_meta(self, receipt_id: str, current_user: CurrentUserContext) -> dict[str, Any]:
|
||||
return self._read_meta(self._receipt_dir(self._owner_key(current_user), receipt_id))
|
||||
|
||||
def _resolve_existing_item(
|
||||
self,
|
||||
receipt_id: str | None,
|
||||
current_user: CurrentUserContext,
|
||||
) -> ReceiptFolderItemRead | None:
|
||||
normalized = str(receipt_id or "").strip()
|
||||
if not normalized:
|
||||
return None
|
||||
try:
|
||||
return self._build_item(self._read_receipt_meta(normalized, current_user))
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _meta_path(receipt_dir: Path) -> Path:
|
||||
return receipt_dir / "meta.json"
|
||||
|
||||
def _read_meta(self, receipt_dir: Path) -> dict[str, Any]:
|
||||
meta_path = self._meta_path(receipt_dir)
|
||||
if not meta_path.exists():
|
||||
raise FileNotFoundError("Receipt not found")
|
||||
try:
|
||||
payload = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError) as exc:
|
||||
raise FileNotFoundError("Receipt metadata not found") from exc
|
||||
return payload if isinstance(payload, dict) else {}
|
||||
|
||||
def _write_meta(self, receipt_dir: Path, payload: dict[str, Any]) -> None:
|
||||
self._meta_path(receipt_dir).write_text(
|
||||
json.dumps(payload, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _matches_status(meta: dict[str, Any], status_filter: str) -> bool:
|
||||
if status_filter in {"", "all"}:
|
||||
return True
|
||||
return str(meta.get("status") or "unlinked").strip().lower() == status_filter
|
||||
|
||||
def _build_item(self, meta: dict[str, Any]) -> ReceiptFolderItemRead:
|
||||
receipt_id = str(meta.get("id") or "").strip()
|
||||
status_value = str(meta.get("status") or "unlinked").strip() or "unlinked"
|
||||
return ReceiptFolderItemRead(
|
||||
id=receipt_id,
|
||||
file_name=str(meta.get("file_name") or ""),
|
||||
media_type=str(meta.get("media_type") or "application/octet-stream"),
|
||||
size_bytes=int(meta.get("size_bytes") or 0),
|
||||
status=status_value,
|
||||
status_label="已关联" if status_value == "linked" else "未关联",
|
||||
document_type=str(meta.get("document_type") or "other"),
|
||||
document_type_label=str(meta.get("document_type_label") or "其他单据"),
|
||||
scene_code=str(meta.get("scene_code") or "other"),
|
||||
scene_label=str(meta.get("scene_label") or "其他票据"),
|
||||
summary=str(meta.get("summary") or ""),
|
||||
amount=self._resolve_editable_or_field(meta, "amount", labels=("金额", "价税合计", "票价")),
|
||||
document_date=self._resolve_editable_or_field(meta, "document_date", labels=("日期", "开票日期", "乘车日期")),
|
||||
merchant_name=self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方")),
|
||||
avg_score=float(meta.get("ocr_avg_score") or 0.0),
|
||||
uploaded_at=self._parse_datetime(meta.get("uploaded_at")),
|
||||
linked_at=self._parse_datetime(meta.get("linked_at")),
|
||||
linked_claim_id=str(meta.get("linked_claim_id") or ""),
|
||||
linked_claim_no=str(meta.get("linked_claim_no") or ""),
|
||||
previewable=bool(meta.get("previewable")),
|
||||
preview_kind=str(meta.get("preview_kind") or ""),
|
||||
preview_url=f"/receipt-folder/{receipt_id}/preview" if bool(meta.get("previewable")) and receipt_id else "",
|
||||
source_url=f"/receipt-folder/{receipt_id}/source" if receipt_id else "",
|
||||
warnings=[str(value) for value in list(meta.get("ocr_warnings") or []) if str(value).strip()],
|
||||
)
|
||||
|
||||
def _resolve_fields(self, meta: dict[str, Any]) -> list[ReceiptFolderFieldRead]:
|
||||
return [
|
||||
ReceiptFolderFieldRead(
|
||||
key=str(field.get("key") or ""),
|
||||
label=str(field.get("label") or ""),
|
||||
value=str(field.get("value") or ""),
|
||||
)
|
||||
for field in list(meta.get("document_fields") or [])
|
||||
if isinstance(field, dict) and str(field.get("label") or "").strip()
|
||||
]
|
||||
|
||||
def _resolve_editable_or_field(self, meta: dict[str, Any], key: str, *, labels: tuple[str, ...]) -> str:
|
||||
editable = meta.get("editable_fields")
|
||||
if isinstance(editable, dict):
|
||||
value = str(editable.get(key) or "").strip()
|
||||
if value:
|
||||
return value
|
||||
label_set = set(labels)
|
||||
for field in self._resolve_fields(meta):
|
||||
if field.label in label_set or field.key == key:
|
||||
return field.value
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _parse_datetime(value: Any) -> datetime | None:
|
||||
raw = str(value or "").strip()
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
return datetime.fromisoformat(raw)
|
||||
except ValueError:
|
||||
return None
|
||||
Reference in New Issue
Block a user