后端新增票据夹端点、数据模型和服务模块,优化 OCR 端点 Schema 和附件操作逻辑,完善员工行为画像服务和辅助函数, 前端新增票据夹视图和服务层,优化文档中心样式和侧边栏导 航,完善员工画像详情弹窗和权限控制,补充单元测试。
533 lines
22 KiB
Python
533 lines
22 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import mimetypes
|
|
import re
|
|
import shutil
|
|
from datetime import UTC, datetime
|
|
from pathlib import Path
|
|
from typing import Any
|
|
from uuid import uuid4
|
|
|
|
from app.api.deps import CurrentUserContext
|
|
from app.core.config import get_settings
|
|
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead
|
|
from app.schemas.receipt_folder import (
|
|
ReceiptFolderDeleteResponse,
|
|
ReceiptFolderDetailRead,
|
|
ReceiptFolderFieldRead,
|
|
ReceiptFolderItemRead,
|
|
ReceiptFolderUpdate,
|
|
)
|
|
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
|
|
from app.services.ocr import SUPPORTED_SUFFIXES
|
|
|
|
|
|
class ReceiptFolderService:
|
|
def __init__(self) -> None:
|
|
self.root = (get_settings().resolved_storage_root_dir / "receipt_folder").resolve()
|
|
|
|
def persist_ocr_batch(
|
|
self,
|
|
*,
|
|
files: list[tuple[str, bytes, str | None]],
|
|
result: OcrRecognizeBatchRead,
|
|
current_user: CurrentUserContext,
|
|
receipt_ids: list[str] | None = None,
|
|
) -> OcrRecognizeBatchRead:
|
|
documents = list(result.documents or [])
|
|
enriched: list[OcrRecognizeDocumentRead] = []
|
|
for index, document in enumerate(documents):
|
|
if index >= len(files):
|
|
enriched.append(document)
|
|
continue
|
|
existing_receipt = self._resolve_existing_item(
|
|
receipt_ids[index] if receipt_ids and index < len(receipt_ids) else "",
|
|
current_user,
|
|
)
|
|
if existing_receipt is not None:
|
|
enriched.append(
|
|
document.model_copy(
|
|
update={
|
|
"receipt_id": existing_receipt.id,
|
|
"receipt_status": existing_receipt.status,
|
|
"receipt_preview_url": existing_receipt.preview_url,
|
|
"receipt_source_url": existing_receipt.source_url,
|
|
}
|
|
)
|
|
)
|
|
continue
|
|
filename, content, media_type = files[index]
|
|
if not self._should_persist_source(filename, content):
|
|
enriched.append(document)
|
|
continue
|
|
receipt = self.save_receipt(
|
|
filename=filename,
|
|
content=content,
|
|
media_type=media_type or document.media_type,
|
|
document=document,
|
|
current_user=current_user,
|
|
)
|
|
enriched.append(
|
|
document.model_copy(
|
|
update={
|
|
"receipt_id": receipt.id,
|
|
"receipt_status": receipt.status,
|
|
"receipt_preview_url": receipt.preview_url,
|
|
"receipt_source_url": receipt.source_url,
|
|
}
|
|
)
|
|
)
|
|
return result.model_copy(update={"documents": enriched})
|
|
|
|
def save_receipt(
|
|
self,
|
|
*,
|
|
filename: str,
|
|
content: bytes,
|
|
media_type: str | None,
|
|
document: Any | None,
|
|
current_user: CurrentUserContext,
|
|
linked_claim_id: str = "",
|
|
linked_claim_no: str = "",
|
|
linked_item_id: str = "",
|
|
) -> ReceiptFolderItemRead:
|
|
owner_key = self._owner_key(current_user)
|
|
receipt_id = str(uuid4())
|
|
receipt_dir = self._owner_root(owner_key) / receipt_id
|
|
receipt_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
normalized_name = self.normalize_filename(filename)
|
|
source_path = receipt_dir / normalized_name
|
|
source_path.write_bytes(content)
|
|
resolved_media_type = self.resolve_media_type(normalized_name, media_type)
|
|
preview_meta = self._write_preview_asset(
|
|
receipt_dir=receipt_dir,
|
|
source_path=source_path,
|
|
media_type=resolved_media_type,
|
|
document=document,
|
|
)
|
|
now = datetime.now(UTC)
|
|
linked = bool(str(linked_claim_id or "").strip())
|
|
meta = {
|
|
"id": receipt_id,
|
|
"owner_key": owner_key,
|
|
"file_name": normalized_name,
|
|
"source_file_name": normalized_name,
|
|
"media_type": resolved_media_type,
|
|
"size_bytes": len(content),
|
|
"uploaded_at": now.isoformat(),
|
|
"status": "linked" if linked else "unlinked",
|
|
"linked_claim_id": str(linked_claim_id or "").strip(),
|
|
"linked_claim_no": str(linked_claim_no or "").strip(),
|
|
"linked_item_id": str(linked_item_id or "").strip(),
|
|
"linked_at": now.isoformat() if linked else "",
|
|
**self._build_document_meta(document),
|
|
**preview_meta,
|
|
}
|
|
self._write_meta(receipt_dir, meta)
|
|
return self._build_item(meta)
|
|
|
|
def save_linked_attachment(
|
|
self,
|
|
*,
|
|
file_path: Path,
|
|
media_type: str,
|
|
document: Any | None,
|
|
current_user: CurrentUserContext,
|
|
claim_id: str,
|
|
claim_no: str,
|
|
item_id: str,
|
|
source_receipt_id: str = "",
|
|
) -> ReceiptFolderItemRead | None:
|
|
if not file_path.exists() or not file_path.is_file():
|
|
return None
|
|
if str(source_receipt_id or "").strip():
|
|
try:
|
|
return self.mark_receipt_linked(
|
|
receipt_id=source_receipt_id,
|
|
current_user=current_user,
|
|
claim_id=claim_id,
|
|
claim_no=claim_no,
|
|
item_id=item_id,
|
|
)
|
|
except FileNotFoundError:
|
|
pass
|
|
storage_root = get_settings().resolved_storage_root_dir
|
|
try:
|
|
file_path.resolve().relative_to(storage_root)
|
|
except ValueError:
|
|
return None
|
|
return self.save_receipt(
|
|
filename=file_path.name,
|
|
content=file_path.read_bytes(),
|
|
media_type=media_type,
|
|
document=document,
|
|
current_user=current_user,
|
|
linked_claim_id=claim_id,
|
|
linked_claim_no=claim_no,
|
|
linked_item_id=item_id,
|
|
)
|
|
|
|
def mark_receipt_linked(
|
|
self,
|
|
*,
|
|
receipt_id: str,
|
|
current_user: CurrentUserContext,
|
|
claim_id: str,
|
|
claim_no: str,
|
|
item_id: str,
|
|
) -> ReceiptFolderItemRead:
|
|
owner_key = self._owner_key(current_user)
|
|
receipt_dir = self._receipt_dir(owner_key, receipt_id)
|
|
meta = self._read_meta(receipt_dir)
|
|
meta["status"] = "linked"
|
|
meta["linked_claim_id"] = str(claim_id or "").strip()
|
|
meta["linked_claim_no"] = str(claim_no or "").strip()
|
|
meta["linked_item_id"] = str(item_id or "").strip()
|
|
meta["linked_at"] = datetime.now(UTC).isoformat()
|
|
self._write_meta(receipt_dir, meta)
|
|
return self._build_item(meta)
|
|
|
|
def list_receipts(
|
|
self,
|
|
*,
|
|
current_user: CurrentUserContext,
|
|
status_filter: str = "all",
|
|
) -> list[ReceiptFolderItemRead]:
|
|
status_filter = str(status_filter or "all").strip().lower()
|
|
items = [
|
|
self._build_item(meta)
|
|
for meta in self._iter_owner_meta(self._owner_key(current_user))
|
|
if self._matches_status(meta, status_filter)
|
|
]
|
|
return sorted(items, key=lambda item: item.uploaded_at or datetime.min.replace(tzinfo=UTC), reverse=True)
|
|
|
|
def get_receipt(self, receipt_id: str, current_user: CurrentUserContext) -> ReceiptFolderDetailRead:
|
|
meta = self._read_receipt_meta(receipt_id, current_user)
|
|
item = self._build_item(meta)
|
|
return ReceiptFolderDetailRead(
|
|
**item.model_dump(),
|
|
engine=str(meta.get("engine") or ""),
|
|
model=str(meta.get("model") or ""),
|
|
ocr_text=str(meta.get("ocr_text") or ""),
|
|
line_count=int(meta.get("ocr_line_count") or 0),
|
|
page_count=max(1, int(meta.get("page_count") or 1)),
|
|
classification_confidence=float(meta.get("ocr_classification_confidence") or 0.0),
|
|
classification_evidence=[
|
|
str(value) for value in list(meta.get("ocr_classification_evidence") or []) if str(value).strip()
|
|
],
|
|
fields=self._resolve_fields(meta),
|
|
raw_meta=meta,
|
|
)
|
|
|
|
def update_receipt(
|
|
self,
|
|
*,
|
|
receipt_id: str,
|
|
payload: ReceiptFolderUpdate,
|
|
current_user: CurrentUserContext,
|
|
) -> ReceiptFolderDetailRead:
|
|
owner_key = self._owner_key(current_user)
|
|
receipt_dir = self._receipt_dir(owner_key, receipt_id)
|
|
meta = self._read_meta(receipt_dir)
|
|
updates = payload.model_dump(exclude_unset=True)
|
|
for key in ("document_type", "document_type_label", "scene_code", "scene_label", "summary"):
|
|
if key in updates and updates[key] is not None:
|
|
meta[key] = str(updates[key] or "").strip()
|
|
|
|
editable = dict(meta.get("editable_fields") or {})
|
|
for key in ("amount", "document_date", "merchant_name"):
|
|
if key in updates and updates[key] is not None:
|
|
editable[key] = str(updates[key] or "").strip()
|
|
if "fields" in updates and updates["fields"] is not None:
|
|
meta["document_fields"] = [
|
|
field.model_dump() if isinstance(field, ReceiptFolderFieldRead) else dict(field)
|
|
for field in payload.fields or []
|
|
]
|
|
meta["editable_fields"] = editable
|
|
meta["updated_at"] = datetime.now(UTC).isoformat()
|
|
self._write_meta(receipt_dir, meta)
|
|
return self.get_receipt(receipt_id, current_user)
|
|
|
|
def delete_receipt(
|
|
self,
|
|
*,
|
|
receipt_id: str,
|
|
current_user: CurrentUserContext,
|
|
) -> ReceiptFolderDeleteResponse:
|
|
owner_key = self._owner_key(current_user)
|
|
receipt_dir = self._receipt_dir(owner_key, receipt_id)
|
|
shutil.rmtree(receipt_dir)
|
|
return ReceiptFolderDeleteResponse(message="票据已删除。", receipt_id=receipt_id)
|
|
|
|
def resolve_source(self, receipt_id: str, current_user: CurrentUserContext) -> tuple[Path, str, str]:
|
|
meta = self._read_receipt_meta(receipt_id, current_user)
|
|
receipt_dir = self._receipt_dir(self._owner_key(current_user), receipt_id)
|
|
file_name = str(meta.get("source_file_name") or meta.get("file_name") or "").strip()
|
|
path = self._assert_child(receipt_dir / file_name)
|
|
if not path.exists():
|
|
raise FileNotFoundError("Receipt source not found")
|
|
media_type = self.resolve_media_type(path.name, str(meta.get("media_type") or ""))
|
|
return path, media_type, str(meta.get("file_name") or path.name)
|
|
|
|
def resolve_preview(self, receipt_id: str, current_user: CurrentUserContext) -> tuple[Path, str, str]:
|
|
meta = self._read_receipt_meta(receipt_id, current_user)
|
|
receipt_dir = self._receipt_dir(self._owner_key(current_user), receipt_id)
|
|
preview_name = str(meta.get("preview_file_name") or "").strip()
|
|
if preview_name:
|
|
preview_path = self._assert_child(receipt_dir / preview_name)
|
|
if preview_path.exists():
|
|
return (
|
|
preview_path,
|
|
self.resolve_media_type(preview_path.name, str(meta.get("preview_media_type") or "")),
|
|
preview_path.name,
|
|
)
|
|
|
|
source_path, source_media_type, source_name = self.resolve_source(receipt_id, current_user)
|
|
if self._is_previewable(source_media_type):
|
|
return source_path, source_media_type, source_name
|
|
raise FileNotFoundError("Receipt preview not found")
|
|
|
|
@staticmethod
|
|
def normalize_filename(filename: str | None) -> str:
|
|
normalized = Path(str(filename or "").strip()).name
|
|
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._")
|
|
return normalized or "receipt.bin"
|
|
|
|
@staticmethod
|
|
def resolve_media_type(filename: str, fallback: str | None = None) -> str:
|
|
return str(mimetypes.guess_type(filename)[0] or fallback or "application/octet-stream")
|
|
|
|
def _owner_root(self, owner_key: str) -> Path:
|
|
return self._assert_child(self.root / owner_key)
|
|
|
|
def _receipt_dir(self, owner_key: str, receipt_id: str) -> Path:
|
|
normalized = str(receipt_id or "").strip()
|
|
if not re.fullmatch(r"[0-9a-fA-F-]{32,36}", normalized):
|
|
raise FileNotFoundError("Receipt not found")
|
|
path = self._assert_child(self._owner_root(owner_key) / normalized)
|
|
if not path.exists() or not path.is_dir():
|
|
raise FileNotFoundError("Receipt not found")
|
|
return path
|
|
|
|
def _assert_child(self, path: Path) -> Path:
|
|
self.root.mkdir(parents=True, exist_ok=True)
|
|
resolved = path.resolve()
|
|
try:
|
|
resolved.relative_to(self.root)
|
|
except ValueError as exc:
|
|
raise FileNotFoundError("Receipt path is invalid") from exc
|
|
return resolved
|
|
|
|
@staticmethod
|
|
def _owner_key(current_user: CurrentUserContext) -> str:
|
|
raw = str(current_user.username or current_user.name or "anonymous").strip().lower()
|
|
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", raw).strip("._")
|
|
return normalized or "anonymous"
|
|
|
|
@staticmethod
|
|
def _should_persist_source(filename: str, content: bytes) -> bool:
|
|
if not content:
|
|
return False
|
|
return Path(str(filename or "")).suffix.lower() in SUPPORTED_SUFFIXES
|
|
|
|
def _write_preview_asset(
|
|
self,
|
|
*,
|
|
receipt_dir: Path,
|
|
source_path: Path,
|
|
media_type: str,
|
|
document: Any | None,
|
|
) -> dict[str, Any]:
|
|
preview_data_url = str(getattr(document, "preview_data_url", "") or "").strip()
|
|
decoded = ExpenseClaimAttachmentPresentation.decode_data_url(preview_data_url)
|
|
if decoded is not None:
|
|
preview_media_type, preview_content = decoded
|
|
suffix = mimetypes.guess_extension(preview_media_type) or ".bin"
|
|
preview_name = f"preview{suffix}"
|
|
preview_path = receipt_dir / preview_name
|
|
preview_path.write_bytes(preview_content)
|
|
return {
|
|
"previewable": True,
|
|
"preview_kind": "image",
|
|
"preview_file_name": preview_name,
|
|
"preview_media_type": preview_media_type,
|
|
}
|
|
if self._is_previewable(media_type):
|
|
return {
|
|
"previewable": True,
|
|
"preview_kind": "image" if media_type.startswith("image/") else "pdf",
|
|
"preview_file_name": source_path.name,
|
|
"preview_media_type": media_type,
|
|
}
|
|
return {
|
|
"previewable": False,
|
|
"preview_kind": "",
|
|
"preview_file_name": "",
|
|
"preview_media_type": "",
|
|
}
|
|
|
|
@staticmethod
|
|
def _is_previewable(media_type: str) -> bool:
|
|
return str(media_type or "").startswith("image/") or str(media_type or "") == "application/pdf"
|
|
|
|
@staticmethod
|
|
def _build_document_meta(document: Any | None) -> dict[str, Any]:
|
|
fields = []
|
|
for field in list(getattr(document, "document_fields", []) or []):
|
|
if isinstance(field, dict):
|
|
fields.append(
|
|
{
|
|
"key": str(field.get("key") or "").strip(),
|
|
"label": str(field.get("label") or "").strip(),
|
|
"value": str(field.get("value") or "").strip(),
|
|
}
|
|
)
|
|
else:
|
|
fields.append(
|
|
{
|
|
"key": str(getattr(field, "key", "") or "").strip(),
|
|
"label": str(getattr(field, "label", "") or "").strip(),
|
|
"value": str(getattr(field, "value", "") or "").strip(),
|
|
}
|
|
)
|
|
fields = [field for field in fields if field["label"] and field["value"]]
|
|
return {
|
|
"engine": str(getattr(document, "engine", "") or ""),
|
|
"model": str(getattr(document, "model", "") or ""),
|
|
"ocr_text": str(getattr(document, "text", "") or ""),
|
|
"summary": str(getattr(document, "summary", "") or ""),
|
|
"ocr_avg_score": float(getattr(document, "avg_score", 0.0) or 0.0),
|
|
"ocr_line_count": int(getattr(document, "line_count", 0) or 0),
|
|
"page_count": int(getattr(document, "page_count", 1) or 1),
|
|
"document_type": str(getattr(document, "document_type", "") or "other"),
|
|
"document_type_label": str(getattr(document, "document_type_label", "") or "其他单据"),
|
|
"scene_code": str(getattr(document, "scene_code", "") or "other"),
|
|
"scene_label": str(getattr(document, "scene_label", "") or "其他票据"),
|
|
"ocr_classification_source": str(getattr(document, "classification_source", "") or ""),
|
|
"ocr_classification_confidence": float(getattr(document, "classification_confidence", 0.0) or 0.0),
|
|
"ocr_classification_evidence": [
|
|
str(value) for value in list(getattr(document, "classification_evidence", []) or []) if str(value).strip()
|
|
],
|
|
"document_fields": fields,
|
|
"editable_fields": {},
|
|
"ocr_warnings": [str(value) for value in list(getattr(document, "warnings", []) or []) if str(value).strip()],
|
|
}
|
|
|
|
def _iter_owner_meta(self, owner_key: str) -> list[dict[str, Any]]:
|
|
owner_root = self._owner_root(owner_key)
|
|
if not owner_root.exists():
|
|
return []
|
|
metas = []
|
|
for meta_path in owner_root.glob("*/meta.json"):
|
|
meta = self._read_meta(meta_path.parent)
|
|
if meta:
|
|
metas.append(meta)
|
|
return metas
|
|
|
|
def _read_receipt_meta(self, receipt_id: str, current_user: CurrentUserContext) -> dict[str, Any]:
|
|
return self._read_meta(self._receipt_dir(self._owner_key(current_user), receipt_id))
|
|
|
|
def _resolve_existing_item(
|
|
self,
|
|
receipt_id: str | None,
|
|
current_user: CurrentUserContext,
|
|
) -> ReceiptFolderItemRead | None:
|
|
normalized = str(receipt_id or "").strip()
|
|
if not normalized:
|
|
return None
|
|
try:
|
|
return self._build_item(self._read_receipt_meta(normalized, current_user))
|
|
except FileNotFoundError:
|
|
return None
|
|
|
|
@staticmethod
|
|
def _meta_path(receipt_dir: Path) -> Path:
|
|
return receipt_dir / "meta.json"
|
|
|
|
def _read_meta(self, receipt_dir: Path) -> dict[str, Any]:
|
|
meta_path = self._meta_path(receipt_dir)
|
|
if not meta_path.exists():
|
|
raise FileNotFoundError("Receipt not found")
|
|
try:
|
|
payload = json.loads(meta_path.read_text(encoding="utf-8"))
|
|
except (OSError, json.JSONDecodeError) as exc:
|
|
raise FileNotFoundError("Receipt metadata not found") from exc
|
|
return payload if isinstance(payload, dict) else {}
|
|
|
|
def _write_meta(self, receipt_dir: Path, payload: dict[str, Any]) -> None:
|
|
self._meta_path(receipt_dir).write_text(
|
|
json.dumps(payload, ensure_ascii=False, indent=2),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
@staticmethod
|
|
def _matches_status(meta: dict[str, Any], status_filter: str) -> bool:
|
|
if status_filter in {"", "all"}:
|
|
return True
|
|
return str(meta.get("status") or "unlinked").strip().lower() == status_filter
|
|
|
|
def _build_item(self, meta: dict[str, Any]) -> ReceiptFolderItemRead:
|
|
receipt_id = str(meta.get("id") or "").strip()
|
|
status_value = str(meta.get("status") or "unlinked").strip() or "unlinked"
|
|
return ReceiptFolderItemRead(
|
|
id=receipt_id,
|
|
file_name=str(meta.get("file_name") or ""),
|
|
media_type=str(meta.get("media_type") or "application/octet-stream"),
|
|
size_bytes=int(meta.get("size_bytes") or 0),
|
|
status=status_value,
|
|
status_label="已关联" if status_value == "linked" else "未关联",
|
|
document_type=str(meta.get("document_type") or "other"),
|
|
document_type_label=str(meta.get("document_type_label") or "其他单据"),
|
|
scene_code=str(meta.get("scene_code") or "other"),
|
|
scene_label=str(meta.get("scene_label") or "其他票据"),
|
|
summary=str(meta.get("summary") or ""),
|
|
amount=self._resolve_editable_or_field(meta, "amount", labels=("金额", "价税合计", "票价")),
|
|
document_date=self._resolve_editable_or_field(meta, "document_date", labels=("日期", "开票日期", "乘车日期")),
|
|
merchant_name=self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方")),
|
|
avg_score=float(meta.get("ocr_avg_score") or 0.0),
|
|
uploaded_at=self._parse_datetime(meta.get("uploaded_at")),
|
|
linked_at=self._parse_datetime(meta.get("linked_at")),
|
|
linked_claim_id=str(meta.get("linked_claim_id") or ""),
|
|
linked_claim_no=str(meta.get("linked_claim_no") or ""),
|
|
previewable=bool(meta.get("previewable")),
|
|
preview_kind=str(meta.get("preview_kind") or ""),
|
|
preview_url=f"/receipt-folder/{receipt_id}/preview" if bool(meta.get("previewable")) and receipt_id else "",
|
|
source_url=f"/receipt-folder/{receipt_id}/source" if receipt_id else "",
|
|
warnings=[str(value) for value in list(meta.get("ocr_warnings") or []) if str(value).strip()],
|
|
)
|
|
|
|
def _resolve_fields(self, meta: dict[str, Any]) -> list[ReceiptFolderFieldRead]:
|
|
return [
|
|
ReceiptFolderFieldRead(
|
|
key=str(field.get("key") or ""),
|
|
label=str(field.get("label") or ""),
|
|
value=str(field.get("value") or ""),
|
|
)
|
|
for field in list(meta.get("document_fields") or [])
|
|
if isinstance(field, dict) and str(field.get("label") or "").strip()
|
|
]
|
|
|
|
def _resolve_editable_or_field(self, meta: dict[str, Any], key: str, *, labels: tuple[str, ...]) -> str:
|
|
editable = meta.get("editable_fields")
|
|
if isinstance(editable, dict):
|
|
value = str(editable.get(key) or "").strip()
|
|
if value:
|
|
return value
|
|
label_set = set(labels)
|
|
for field in self._resolve_fields(meta):
|
|
if field.label in label_set or field.key == key:
|
|
return field.value
|
|
return ""
|
|
|
|
@staticmethod
|
|
def _parse_datetime(value: Any) -> datetime | None:
|
|
raw = str(value or "").strip()
|
|
if not raw:
|
|
return None
|
|
try:
|
|
return datetime.fromisoformat(raw)
|
|
except ValueError:
|
|
return None
|