feat: 新增票据夹模块并优化 OCR 与员工画像服务

后端新增票据夹端点、数据模型和服务模块，优化 OCR 端点 Schema 和附件操作逻辑，完善员工行为画像服务和辅助函数，前端新增票据夹视图和服务层，优化文档中心样式和侧边栏导航，完善员工画像详情弹窗和权限控制，补充单元测试。
2026-05-29 14:51:18 +08:00
parent 678f64d772
commit 4c59941ec6
33 changed files with 2855 additions and 551 deletions
--- a/server/src/app/api/v1/endpoints/employee_profiles.py
+++ b/server/src/app/api/v1/endpoints/employee_profiles.py
@@ -9,6 +9,7 @@ from sqlalchemy.orm import Session
 from app.api.deps import CurrentUserContext, get_current_user, get_db
 from app.models.employee import Employee
 from app.schemas.employee_profile import EmployeeProfileLatestRead
+from app.services.account_behavior_profile import AccountBehaviorProfileService
 from app.services.employee_behavior_profile_service import EmployeeBehaviorProfileService

 router = APIRouter(prefix="/employee-profiles")
@@ -31,13 +32,13 @@ def get_current_employee_latest_profile(
 ) -> EmployeeProfileLatestRead:
    employee = _resolve_current_employee(db, current_user)
    if employee is None:
-        return EmployeeProfileLatestRead(
-            employee_id=current_user.username,
-            employee_name=current_user.name,
+        return AccountBehaviorProfileService(db).get_latest_account_profile(
+            account_id=current_user.username,
+            account_name=current_user.name,
+            identifiers=_current_account_identifiers(current_user),
            scene=scene,
            window_days=window_days,
            expense_type_scope=expense_type_scope,
-            empty_reason="当前登录用户未匹配到员工目录，暂无法形成用户画像。",
        )

    service = EmployeeBehaviorProfileService(db)
@@ -47,7 +48,7 @@ def get_current_employee_latest_profile(
        window_days=window_days,
        expense_type_scope=expense_type_scope,
    )
-    if latest.empty_reason:
+    if latest.empty_reason or _missing_usage_duration_metric(latest):
        service.refresh_employee_profiles(
            employee_id=employee.id,
            window_days=(window_days,),
@@ -115,3 +116,24 @@ def _resolve_current_employee(

    stmt = select(Employee).where(or_(*conditions)).order_by(Employee.created_at.asc()).limit(1)
    return db.scalars(stmt).first()
+
+
+def _missing_usage_duration_metric(latest: EmployeeProfileLatestRead) -> bool:
+    if latest.scene != "operations":
+        return False
+
+    for profile in latest.profiles:
+        if profile.profile_type == "ai_usage":
+            return "ai_run_duration_ms" not in profile.metrics
+    return False
+
+
+def _current_account_identifiers(current_user: CurrentUserContext) -> set[str]:
+    return {
+        item
+        for item in (
+            current_user.username,
+            current_user.name,
+        )
+        if str(item or "").strip()
+    }
--- a/server/src/app/api/v1/endpoints/ocr.py
+++ b/server/src/app/api/v1/endpoints/ocr.py
@@ -2,13 +2,14 @@ from __future__ import annotations

 from typing import Annotated

-from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
 from sqlalchemy.orm import Session

 from app.api.deps import CurrentUserContext, get_current_user, get_db
 from app.schemas.common import ErrorResponse
 from app.schemas.ocr import OcrRecognizeBatchRead
 from app.services.ocr import OcrService
+from app.services.receipt_folder import ReceiptFolderService

 router = APIRouter(prefix="/ocr")

@@ -35,8 +36,9 @@ router = APIRouter(prefix="/ocr")
 )
 async def recognize_ocr_documents(
    files: Annotated[list[UploadFile], File(description="待识别的票据图片或 PDF。")],
-    _: Annotated[CurrentUserContext, Depends(get_current_user)],
+    current_user: Annotated[CurrentUserContext, Depends(get_current_user)],
    db: Annotated[Session, Depends(get_db)],
+    receipt_ids: Annotated[list[str] | None, Form(description="可选，来源于票据夹的持久化票据 ID。")] = None,
 ) -> OcrRecognizeBatchRead:
    try:
        payload = []
@@ -48,7 +50,13 @@ async def recognize_ocr_documents(
                    upload.content_type,
                )
            )
-        return OcrService(db).recognize_files(payload)
+        result = OcrService(db).recognize_files(payload)
+        return ReceiptFolderService().persist_ocr_batch(
+            files=payload,
+            result=result,
+            current_user=current_user,
+            receipt_ids=receipt_ids or [],
+        )
    except ValueError as exc:
        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc)) from exc
    except RuntimeError as exc:
--- a/server/src/app/api/v1/endpoints/receipt_folder.py
+++ b/server/src/app/api/v1/endpoints/receipt_folder.py
@@ -0,0 +1,108 @@
+from __future__ import annotations
+
+from typing import Annotated
+
+from fastapi import APIRouter, Depends, HTTPException, Query, status
+from fastapi.responses import FileResponse
+
+from app.api.deps import CurrentUserContext, get_current_user
+from app.schemas.common import ErrorResponse
+from app.schemas.receipt_folder import (
+    ReceiptFolderDeleteResponse,
+    ReceiptFolderDetailRead,
+    ReceiptFolderItemRead,
+    ReceiptFolderUpdate,
+)
+from app.services.receipt_folder import ReceiptFolderService
+
+router = APIRouter(prefix="/receipt-folder")
+CurrentUser = Annotated[CurrentUserContext, Depends(get_current_user)]
+
+
+@router.get(
+    "",
+    response_model=list[ReceiptFolderItemRead],
+    summary="查询票据夹列表",
+    description="返回当前登录用户上传并持久化的票据列表。",
+)
+def list_receipts(
+    current_user: CurrentUser,
+    status_filter: Annotated[str, Query(alias="status")] = "all",
+) -> list[ReceiptFolderItemRead]:
+    return ReceiptFolderService().list_receipts(
+        current_user=current_user,
+        status_filter=status_filter,
+    )
+
+
+@router.get(
+    "/{receipt_id}",
+    response_model=ReceiptFolderDetailRead,
+    summary="读取票据详情",
+    responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据不存在。"}},
+)
+def get_receipt(receipt_id: str, current_user: CurrentUser) -> ReceiptFolderDetailRead:
+    try:
+        return ReceiptFolderService().get_receipt(receipt_id, current_user)
+    except FileNotFoundError as exc:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt not found") from exc
+
+
+@router.patch(
+    "/{receipt_id}",
+    response_model=ReceiptFolderDetailRead,
+    summary="更新票据基础识别信息",
+    responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据不存在。"}},
+)
+def update_receipt(
+    receipt_id: str,
+    payload: ReceiptFolderUpdate,
+    current_user: CurrentUser,
+) -> ReceiptFolderDetailRead:
+    try:
+        return ReceiptFolderService().update_receipt(
+            receipt_id=receipt_id,
+            payload=payload,
+            current_user=current_user,
+        )
+    except FileNotFoundError as exc:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt not found") from exc
+
+
+@router.delete(
+    "/{receipt_id}",
+    response_model=ReceiptFolderDeleteResponse,
+    summary="删除票据",
+    responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据不存在。"}},
+)
+def delete_receipt(receipt_id: str, current_user: CurrentUser) -> ReceiptFolderDeleteResponse:
+    try:
+        return ReceiptFolderService().delete_receipt(receipt_id=receipt_id, current_user=current_user)
+    except FileNotFoundError as exc:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt not found") from exc
+
+
+@router.get(
+    "/{receipt_id}/preview",
+    summary="预览票据原始文件",
+    responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据预览不存在。"}},
+)
+def preview_receipt(receipt_id: str, current_user: CurrentUser) -> FileResponse:
+    try:
+        file_path, media_type, file_name = ReceiptFolderService().resolve_preview(receipt_id, current_user)
+    except FileNotFoundError as exc:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt preview not found") from exc
+    return FileResponse(file_path, media_type=media_type, filename=file_name)
+
+
+@router.get(
+    "/{receipt_id}/source",
+    summary="读取票据源文件",
+    responses={status.HTTP_404_NOT_FOUND: {"model": ErrorResponse, "description": "票据源文件不存在。"}},
+)
+def source_receipt(receipt_id: str, current_user: CurrentUser) -> FileResponse:
+    try:
+        file_path, media_type, file_name = ReceiptFolderService().resolve_source(receipt_id, current_user)
+    except FileNotFoundError as exc:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Receipt source not found") from exc
+    return FileResponse(file_path, media_type=media_type, filename=file_name)
--- a/server/src/app/api/v1/endpoints/reimbursements.py
+++ b/server/src/app/api/v1/endpoints/reimbursements.py
@@ -2,7 +2,7 @@ from __future__ import annotations

 from typing import Annotated

-from fastapi import APIRouter, Depends, File, HTTPException, UploadFile, status
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
 from fastapi.responses import FileResponse
 from sqlalchemy.orm import Session

@@ -372,6 +372,7 @@ async def upload_expense_claim_item_attachment(
    file: Annotated[UploadFile, File(description="待上传的附件文件。")],
    db: DbSession,
    current_user: CurrentUser,
+    receipt_id: Annotated[str | None, Form(description="可选，来源于票据夹的持久化票据 ID。")] = None,
 ) -> ExpenseClaimAttachmentActionResponse:
    service = ExpenseClaimService(db)
    try:
@@ -382,6 +383,7 @@ async def upload_expense_claim_item_attachment(
            content=await file.read(),
            media_type=file.content_type,
            current_user=current_user,
+            source_receipt_id=receipt_id or "",
        )
    except LookupError as error:
        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(error)) from error
--- a/server/src/app/api/v1/router.py
+++ b/server/src/app/api/v1/router.py
@@ -13,6 +13,7 @@ from app.api.v1.endpoints.knowledge import router as knowledge_router
 from app.api.v1.endpoints.ocr import router as ocr_router
 from app.api.v1.endpoints.ontology import router as ontology_router
 from app.api.v1.endpoints.orchestrator import router as orchestrator_router
+from app.api.v1.endpoints.receipt_folder import router as receipt_folder_router
 from app.api.v1.endpoints.reimbursements import router as reimbursements_router
 from app.api.v1.endpoints.settings import router as settings_router
 from app.api.v1.endpoints.system_logs import router as system_logs_router
@@ -29,6 +30,7 @@ router.include_router(knowledge_router, tags=["knowledge"])
 router.include_router(ocr_router, tags=["ocr"])
 router.include_router(ontology_router, tags=["ontology"])
 router.include_router(orchestrator_router, tags=["orchestrator"])
+router.include_router(receipt_folder_router, tags=["receipt-folder"])
 router.include_router(employees_router, prefix="/employees", tags=["employees"])
 router.include_router(employee_profiles_router, tags=["employee-profiles"])
 router.include_router(reimbursements_router, prefix="/reimbursements", tags=["reimbursements"])
--- a/server/src/app/schemas/ocr.py
+++ b/server/src/app/schemas/ocr.py
@@ -39,6 +39,10 @@ class OcrRecognizeDocumentRead(BaseModel):
    )
    preview_kind: str = Field(default="", description="预览类型，PDF 转图后通常为 image。")
    preview_data_url: str = Field(default="", description="用于前端展示的图片预览 data URL。")
+    receipt_id: str = Field(default="", description="票据夹中的持久化票据 ID。")
+    receipt_status: str = Field(default="", description="票据夹关联状态，unlinked / linked。")
+    receipt_preview_url: str = Field(default="", description="票据夹预览接口地址。")
+    receipt_source_url: str = Field(default="", description="票据夹原始文件接口地址。")
    warnings: list[str] = Field(default_factory=list, description="该文件的识别提示或警告。")
    lines: list[OcrRecognizeLineRead] = Field(default_factory=list, description="逐行识别结果。")

--- a/server/src/app/schemas/receipt_folder.py
+++ b/server/src/app/schemas/receipt_folder.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any
+
+from pydantic import BaseModel, Field
+
+
+class ReceiptFolderFieldRead(BaseModel):
+    key: str = ""
+    label: str = ""
+    value: str = ""
+
+
+class ReceiptFolderItemRead(BaseModel):
+    id: str
+    file_name: str
+    media_type: str = "application/octet-stream"
+    size_bytes: int = 0
+    status: str = "unlinked"
+    status_label: str = "未关联"
+    document_type: str = "other"
+    document_type_label: str = "其他单据"
+    scene_code: str = "other"
+    scene_label: str = "其他票据"
+    summary: str = ""
+    amount: str = ""
+    document_date: str = ""
+    merchant_name: str = ""
+    avg_score: float = 0.0
+    uploaded_at: datetime | None = None
+    linked_at: datetime | None = None
+    linked_claim_id: str = ""
+    linked_claim_no: str = ""
+    previewable: bool = False
+    preview_kind: str = ""
+    preview_url: str = ""
+    source_url: str = ""
+    warnings: list[str] = Field(default_factory=list)
+
+
+class ReceiptFolderDetailRead(ReceiptFolderItemRead):
+    engine: str = ""
+    model: str = ""
+    ocr_text: str = ""
+    line_count: int = 0
+    page_count: int = 1
+    classification_confidence: float = 0.0
+    classification_evidence: list[str] = Field(default_factory=list)
+    fields: list[ReceiptFolderFieldRead] = Field(default_factory=list)
+    raw_meta: dict[str, Any] = Field(default_factory=dict)
+
+
+class ReceiptFolderUpdate(BaseModel):
+    document_type: str | None = None
+    document_type_label: str | None = None
+    scene_code: str | None = None
+    scene_label: str | None = None
+    summary: str | None = None
+    amount: str | None = None
+    document_date: str | None = None
+    merchant_name: str | None = None
+    fields: list[ReceiptFolderFieldRead] | None = None
+
+
+class ReceiptFolderDeleteResponse(BaseModel):
+    message: str
+    receipt_id: str
--- a/server/src/app/services/account_behavior_profile.py
+++ b/server/src/app/services/account_behavior_profile.py
@@ -0,0 +1,176 @@
+from __future__ import annotations
+
+from datetime import UTC, datetime, timedelta
+from decimal import Decimal
+from typing import Any
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session, selectinload
+
+from app.algorithem.employee_behavior_profile import (
+    LEVEL_LABELS,
+    PROFILE_LABELS,
+    ProfileComponent,
+    evaluate_weighted_profile,
+    score_by_bands,
+)
+from app.algorithem.employee_behavior_profile_tags import build_profile_radar, build_profile_tags
+from app.models.agent_run import AgentRun
+from app.schemas.employee_profile import EmployeeProfileLatestRead, EmployeeProfileRead
+from app.services.employee_behavior_profile_helpers import EmployeeBehaviorProfileMetricHelpers
+
+
+class AccountBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
+    def __init__(self, db: Session) -> None:
+        self.db = db
+
+    def get_latest_account_profile(
+        self,
+        *,
+        account_id: str,
+        account_name: str,
+        identifiers: set[str],
+        scene: str,
+        window_days: int,
+        expense_type_scope: str,
+    ) -> EmployeeProfileLatestRead:
+        if scene != "operations":
+            return EmployeeProfileLatestRead(
+                employee_id=account_id,
+                employee_name=account_name,
+                scene=scene,
+                window_days=window_days,
+                expense_type_scope=expense_type_scope,
+                empty_reason="当前账号未匹配员工目录，无法形成审批场景员工画像。",
+            )
+
+        runs = self._fetch_account_runs(identifiers, datetime.now(UTC) - timedelta(days=window_days))
+        if not runs:
+            return EmployeeProfileLatestRead(
+                employee_id=account_id,
+                employee_name=account_name,
+                scene=scene,
+                window_days=window_days,
+                expense_type_scope=expense_type_scope,
+                empty_reason="当前账号暂无可统计的智能体运行记录。",
+            )
+
+        result = self._calculate_account_ai_usage_profile(
+            runs=runs,
+            window_days=window_days,
+            expense_type_scope=expense_type_scope,
+        )
+        payload = {
+            "profile_type": result.profile_type,
+            "profile_label": result.profile_label,
+            "score": result.profile_score,
+            "level": result.profile_level,
+            "metrics": result.metrics,
+            "top_contributors": result.top_contributors(),
+        }
+        tags = build_profile_tags([payload], scene=scene)
+        radar = build_profile_radar([payload], tags, scene=scene)
+
+        return EmployeeProfileLatestRead(
+            employee_id=account_id,
+            employee_name=account_name,
+            scene=scene,
+            window_days=window_days,
+            expense_type_scope=expense_type_scope,
+            calculated_at=datetime.now(UTC),
+            review_priority_score=0,
+            review_priority_level="normal",
+            review_priority_label=LEVEL_LABELS["normal"],
+            profiles=[
+                EmployeeProfileRead(
+                    profile_type=payload["profile_type"],
+                    profile_label=PROFILE_LABELS.get(payload["profile_type"], payload["profile_type"]),
+                    score=payload["score"],
+                    level=payload["level"],
+                    level_label=LEVEL_LABELS.get(payload["level"], payload["level"]),
+                    metrics=payload["metrics"],
+                    top_contributors=payload["top_contributors"],
+                )
+            ],
+            profile_tags=tags,
+            radar=radar,
+        )
+
+    def _calculate_account_ai_usage_profile(
+        self,
+        *,
+        runs: list[AgentRun],
+        window_days: int,
+        expense_type_scope: str,
+    ):
+        tool_calls = [tool for run in runs for tool in run.tool_calls]
+        failed_calls = [
+            tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
+        ]
+        estimated_tokens = self._estimate_tokens(runs)
+        duration_ms = self._sum_agent_run_duration_ms(runs)
+        token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
+
+        return evaluate_weighted_profile(
+            "ai_usage",
+            [
+                ProfileComponent(
+                    "ai_call_count_score",
+                    "AI 调用次数",
+                    score_by_bands(len(runs), [(0, 0), (3, 25), (10, 65), (20, 100)]),
+                    len(runs),
+                    "次",
+                    Decimal("0.25"),
+                ),
+                ProfileComponent(
+                    "token_cost_score",
+                    "Token 使用强度",
+                    score_by_bands(
+                        estimated_tokens, [(0, 0), (2000, 25), (8000, 65), (20000, 100)]
+                    ),
+                    estimated_tokens,
+                    "tokens",
+                    Decimal("0.25"),
+                ),
+                ProfileComponent(
+                    "ai_generated_claim_ratio_score",
+                    "AI 生成申请比例",
+                    score_by_bands(len(runs), [(0, 0), (2, 20), (8, 60), (16, 90)]),
+                    len(runs),
+                    "次",
+                    Decimal("0.20"),
+                ),
+                ProfileComponent(
+                    "failed_ai_call_score",
+                    "AI 调用失败",
+                    score_by_bands(len(failed_calls), [(0, 0), (1, 35), (3, 80)]),
+                    len(failed_calls),
+                    "次",
+                    Decimal("0.10"),
+                ),
+            ],
+            metrics={
+                "window_days": window_days,
+                "expense_type_scope": expense_type_scope,
+                "peer_sample_size": 0,
+                "ai_run_count": len(runs),
+                "tool_call_count": len(tool_calls),
+                "failed_tool_call_count": len(failed_calls),
+                "token_count_mode": token_mode,
+                "estimated_token_count": estimated_tokens,
+                "exact_token_count": None,
+                "ai_run_duration_ms": duration_ms,
+                "ai_run_duration_mode": "elapsed_or_tool_call_fallback",
+            },
+        )
+
+    def _fetch_account_runs(self, identifiers: set[str], cutoff: datetime) -> list[AgentRun]:
+        normalized = {item for item in identifiers if str(item or "").strip()}
+        if not normalized:
+            return []
+        stmt = (
+            select(AgentRun)
+            .options(selectinload(AgentRun.tool_calls))
+            .where(AgentRun.started_at >= cutoff, AgentRun.user_id.in_(normalized))
+        )
+        return list(self.db.scalars(stmt).all())
--- a/server/src/app/services/employee_behavior_profile_helpers.py
+++ b/server/src/app/services/employee_behavior_profile_helpers.py
@@ -171,6 +171,22 @@ class EmployeeBehaviorProfileMetricHelpers:
            total += max(0, len(text) // 4)
        return total

+    def _sum_agent_run_duration_ms(self, runs: list[AgentRun]) -> int:
+        return sum(self._agent_run_duration_ms(run) for run in runs)
+
+    def _agent_run_duration_ms(self, run: AgentRun) -> int:
+        if run.started_at is not None and run.finished_at is not None:
+            try:
+                if run.finished_at > run.started_at:
+                    return min(
+                        int((run.finished_at - run.started_at).total_seconds() * 1000),
+                        24 * 60 * 60 * 1000,
+                    )
+            except TypeError:
+                pass
+
+        return sum(max(0, int(tool.duration_ms or 0)) for tool in run.tool_calls)
+
    @staticmethod
    def _is_missing_value(value: Any) -> bool:
        text = str(value or "").strip()
--- a/server/src/app/services/employee_behavior_profile_service.py
+++ b/server/src/app/services/employee_behavior_profile_service.py
@@ -466,6 +466,7 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
            tool for tool in tool_calls if str(tool.status or "").lower() not in {"success", "ok"}
        ]
        estimated_tokens = self._estimate_tokens(runs)
+        duration_ms = self._sum_agent_run_duration_ms(runs)
        override_score = 0

        token_mode = "estimated_token_count" if estimated_tokens else "unavailable"
@@ -524,6 +525,8 @@ class EmployeeBehaviorProfileService(EmployeeBehaviorProfileMetricHelpers):
                "token_count_mode": token_mode,
                "estimated_token_count": estimated_tokens,
                "exact_token_count": None,
+                "ai_run_duration_ms": duration_ms,
+                "ai_run_duration_mode": "elapsed_or_tool_call_fallback",
            },
        )

--- a/server/src/app/services/expense_claim_attachment_operations.py
+++ b/server/src/app/services/expense_claim_attachment_operations.py
@@ -108,6 +108,7 @@ from app.services.expense_rule_runtime import (
    resolve_document_type_label,
 )
 from app.services.ocr import OcrService
+from app.services.receipt_folder import ReceiptFolderService


 class ExpenseClaimAttachmentOperationsMixin:
@@ -120,6 +121,7 @@ class ExpenseClaimAttachmentOperationsMixin:
        content: bytes,
        media_type: str | None,
        current_user: CurrentUserContext,
+        source_receipt_id: str = "",
    ) -> dict[str, Any] | None:
        claim, item = self._get_claim_item_or_raise(
            claim_id=claim_id,
@@ -240,6 +242,16 @@ class ExpenseClaimAttachmentOperationsMixin:
            "ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
        }
        self._attachment_storage.write_meta(file_path, meta)
+        ReceiptFolderService().save_linked_attachment(
+            file_path=file_path,
+            media_type=resolved_media_type,
+            document=ocr_document,
+            current_user=current_user,
+            claim_id=claim.id,
+            claim_no=claim.claim_no,
+            item_id=item.id,
+            source_receipt_id=source_receipt_id,
+        )

        self._sync_claim_from_items(claim)
        self.db.commit()
--- a/server/src/app/services/receipt_folder.py
+++ b/server/src/app/services/receipt_folder.py
@@ -0,0 +1,532 @@
+from __future__ import annotations
+
+import json
+import mimetypes
+import re
+import shutil
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+from uuid import uuid4
+
+from app.api.deps import CurrentUserContext
+from app.core.config import get_settings
+from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead
+from app.schemas.receipt_folder import (
+    ReceiptFolderDeleteResponse,
+    ReceiptFolderDetailRead,
+    ReceiptFolderFieldRead,
+    ReceiptFolderItemRead,
+    ReceiptFolderUpdate,
+)
+from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
+from app.services.ocr import SUPPORTED_SUFFIXES
+
+
+class ReceiptFolderService:
+    def __init__(self) -> None:
+        self.root = (get_settings().resolved_storage_root_dir / "receipt_folder").resolve()
+
+    def persist_ocr_batch(
+        self,
+        *,
+        files: list[tuple[str, bytes, str | None]],
+        result: OcrRecognizeBatchRead,
+        current_user: CurrentUserContext,
+        receipt_ids: list[str] | None = None,
+    ) -> OcrRecognizeBatchRead:
+        documents = list(result.documents or [])
+        enriched: list[OcrRecognizeDocumentRead] = []
+        for index, document in enumerate(documents):
+            if index >= len(files):
+                enriched.append(document)
+                continue
+            existing_receipt = self._resolve_existing_item(
+                receipt_ids[index] if receipt_ids and index < len(receipt_ids) else "",
+                current_user,
+            )
+            if existing_receipt is not None:
+                enriched.append(
+                    document.model_copy(
+                        update={
+                            "receipt_id": existing_receipt.id,
+                            "receipt_status": existing_receipt.status,
+                            "receipt_preview_url": existing_receipt.preview_url,
+                            "receipt_source_url": existing_receipt.source_url,
+                        }
+                    )
+                )
+                continue
+            filename, content, media_type = files[index]
+            if not self._should_persist_source(filename, content):
+                enriched.append(document)
+                continue
+            receipt = self.save_receipt(
+                filename=filename,
+                content=content,
+                media_type=media_type or document.media_type,
+                document=document,
+                current_user=current_user,
+            )
+            enriched.append(
+                document.model_copy(
+                    update={
+                        "receipt_id": receipt.id,
+                        "receipt_status": receipt.status,
+                        "receipt_preview_url": receipt.preview_url,
+                        "receipt_source_url": receipt.source_url,
+                    }
+                )
+            )
+        return result.model_copy(update={"documents": enriched})
+
+    def save_receipt(
+        self,
+        *,
+        filename: str,
+        content: bytes,
+        media_type: str | None,
+        document: Any | None,
+        current_user: CurrentUserContext,
+        linked_claim_id: str = "",
+        linked_claim_no: str = "",
+        linked_item_id: str = "",
+    ) -> ReceiptFolderItemRead:
+        owner_key = self._owner_key(current_user)
+        receipt_id = str(uuid4())
+        receipt_dir = self._owner_root(owner_key) / receipt_id
+        receipt_dir.mkdir(parents=True, exist_ok=True)
+
+        normalized_name = self.normalize_filename(filename)
+        source_path = receipt_dir / normalized_name
+        source_path.write_bytes(content)
+        resolved_media_type = self.resolve_media_type(normalized_name, media_type)
+        preview_meta = self._write_preview_asset(
+            receipt_dir=receipt_dir,
+            source_path=source_path,
+            media_type=resolved_media_type,
+            document=document,
+        )
+        now = datetime.now(UTC)
+        linked = bool(str(linked_claim_id or "").strip())
+        meta = {
+            "id": receipt_id,
+            "owner_key": owner_key,
+            "file_name": normalized_name,
+            "source_file_name": normalized_name,
+            "media_type": resolved_media_type,
+            "size_bytes": len(content),
+            "uploaded_at": now.isoformat(),
+            "status": "linked" if linked else "unlinked",
+            "linked_claim_id": str(linked_claim_id or "").strip(),
+            "linked_claim_no": str(linked_claim_no or "").strip(),
+            "linked_item_id": str(linked_item_id or "").strip(),
+            "linked_at": now.isoformat() if linked else "",
+            **self._build_document_meta(document),
+            **preview_meta,
+        }
+        self._write_meta(receipt_dir, meta)
+        return self._build_item(meta)
+
+    def save_linked_attachment(
+        self,
+        *,
+        file_path: Path,
+        media_type: str,
+        document: Any | None,
+        current_user: CurrentUserContext,
+        claim_id: str,
+        claim_no: str,
+        item_id: str,
+        source_receipt_id: str = "",
+    ) -> ReceiptFolderItemRead | None:
+        if not file_path.exists() or not file_path.is_file():
+            return None
+        if str(source_receipt_id or "").strip():
+            try:
+                return self.mark_receipt_linked(
+                    receipt_id=source_receipt_id,
+                    current_user=current_user,
+                    claim_id=claim_id,
+                    claim_no=claim_no,
+                    item_id=item_id,
+                )
+            except FileNotFoundError:
+                pass
+        storage_root = get_settings().resolved_storage_root_dir
+        try:
+            file_path.resolve().relative_to(storage_root)
+        except ValueError:
+            return None
+        return self.save_receipt(
+            filename=file_path.name,
+            content=file_path.read_bytes(),
+            media_type=media_type,
+            document=document,
+            current_user=current_user,
+            linked_claim_id=claim_id,
+            linked_claim_no=claim_no,
+            linked_item_id=item_id,
+        )
+
+    def mark_receipt_linked(
+        self,
+        *,
+        receipt_id: str,
+        current_user: CurrentUserContext,
+        claim_id: str,
+        claim_no: str,
+        item_id: str,
+    ) -> ReceiptFolderItemRead:
+        owner_key = self._owner_key(current_user)
+        receipt_dir = self._receipt_dir(owner_key, receipt_id)
+        meta = self._read_meta(receipt_dir)
+        meta["status"] = "linked"
+        meta["linked_claim_id"] = str(claim_id or "").strip()
+        meta["linked_claim_no"] = str(claim_no or "").strip()
+        meta["linked_item_id"] = str(item_id or "").strip()
+        meta["linked_at"] = datetime.now(UTC).isoformat()
+        self._write_meta(receipt_dir, meta)
+        return self._build_item(meta)
+
+    def list_receipts(
+        self,
+        *,
+        current_user: CurrentUserContext,
+        status_filter: str = "all",
+    ) -> list[ReceiptFolderItemRead]:
+        status_filter = str(status_filter or "all").strip().lower()
+        items = [
+            self._build_item(meta)
+            for meta in self._iter_owner_meta(self._owner_key(current_user))
+            if self._matches_status(meta, status_filter)
+        ]
+        return sorted(items, key=lambda item: item.uploaded_at or datetime.min.replace(tzinfo=UTC), reverse=True)
+
+    def get_receipt(self, receipt_id: str, current_user: CurrentUserContext) -> ReceiptFolderDetailRead:
+        meta = self._read_receipt_meta(receipt_id, current_user)
+        item = self._build_item(meta)
+        return ReceiptFolderDetailRead(
+            **item.model_dump(),
+            engine=str(meta.get("engine") or ""),
+            model=str(meta.get("model") or ""),
+            ocr_text=str(meta.get("ocr_text") or ""),
+            line_count=int(meta.get("ocr_line_count") or 0),
+            page_count=max(1, int(meta.get("page_count") or 1)),
+            classification_confidence=float(meta.get("ocr_classification_confidence") or 0.0),
+            classification_evidence=[
+                str(value) for value in list(meta.get("ocr_classification_evidence") or []) if str(value).strip()
+            ],
+            fields=self._resolve_fields(meta),
+            raw_meta=meta,
+        )
+
+    def update_receipt(
+        self,
+        *,
+        receipt_id: str,
+        payload: ReceiptFolderUpdate,
+        current_user: CurrentUserContext,
+    ) -> ReceiptFolderDetailRead:
+        owner_key = self._owner_key(current_user)
+        receipt_dir = self._receipt_dir(owner_key, receipt_id)
+        meta = self._read_meta(receipt_dir)
+        updates = payload.model_dump(exclude_unset=True)
+        for key in ("document_type", "document_type_label", "scene_code", "scene_label", "summary"):
+            if key in updates and updates[key] is not None:
+                meta[key] = str(updates[key] or "").strip()
+
+        editable = dict(meta.get("editable_fields") or {})
+        for key in ("amount", "document_date", "merchant_name"):
+            if key in updates and updates[key] is not None:
+                editable[key] = str(updates[key] or "").strip()
+        if "fields" in updates and updates["fields"] is not None:
+            meta["document_fields"] = [
+                field.model_dump() if isinstance(field, ReceiptFolderFieldRead) else dict(field)
+                for field in payload.fields or []
+            ]
+        meta["editable_fields"] = editable
+        meta["updated_at"] = datetime.now(UTC).isoformat()
+        self._write_meta(receipt_dir, meta)
+        return self.get_receipt(receipt_id, current_user)
+
+    def delete_receipt(
+        self,
+        *,
+        receipt_id: str,
+        current_user: CurrentUserContext,
+    ) -> ReceiptFolderDeleteResponse:
+        owner_key = self._owner_key(current_user)
+        receipt_dir = self._receipt_dir(owner_key, receipt_id)
+        shutil.rmtree(receipt_dir)
+        return ReceiptFolderDeleteResponse(message="票据已删除。", receipt_id=receipt_id)
+
+    def resolve_source(self, receipt_id: str, current_user: CurrentUserContext) -> tuple[Path, str, str]:
+        meta = self._read_receipt_meta(receipt_id, current_user)
+        receipt_dir = self._receipt_dir(self._owner_key(current_user), receipt_id)
+        file_name = str(meta.get("source_file_name") or meta.get("file_name") or "").strip()
+        path = self._assert_child(receipt_dir / file_name)
+        if not path.exists():
+            raise FileNotFoundError("Receipt source not found")
+        media_type = self.resolve_media_type(path.name, str(meta.get("media_type") or ""))
+        return path, media_type, str(meta.get("file_name") or path.name)
+
+    def resolve_preview(self, receipt_id: str, current_user: CurrentUserContext) -> tuple[Path, str, str]:
+        meta = self._read_receipt_meta(receipt_id, current_user)
+        receipt_dir = self._receipt_dir(self._owner_key(current_user), receipt_id)
+        preview_name = str(meta.get("preview_file_name") or "").strip()
+        if preview_name:
+            preview_path = self._assert_child(receipt_dir / preview_name)
+            if preview_path.exists():
+                return (
+                    preview_path,
+                    self.resolve_media_type(preview_path.name, str(meta.get("preview_media_type") or "")),
+                    preview_path.name,
+                )
+
+        source_path, source_media_type, source_name = self.resolve_source(receipt_id, current_user)
+        if self._is_previewable(source_media_type):
+            return source_path, source_media_type, source_name
+        raise FileNotFoundError("Receipt preview not found")
+
+    @staticmethod
+    def normalize_filename(filename: str | None) -> str:
+        normalized = Path(str(filename or "").strip()).name
+        normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._")
+        return normalized or "receipt.bin"
+
+    @staticmethod
+    def resolve_media_type(filename: str, fallback: str | None = None) -> str:
+        return str(mimetypes.guess_type(filename)[0] or fallback or "application/octet-stream")
+
+    def _owner_root(self, owner_key: str) -> Path:
+        return self._assert_child(self.root / owner_key)
+
+    def _receipt_dir(self, owner_key: str, receipt_id: str) -> Path:
+        normalized = str(receipt_id or "").strip()
+        if not re.fullmatch(r"[0-9a-fA-F-]{32,36}", normalized):
+            raise FileNotFoundError("Receipt not found")
+        path = self._assert_child(self._owner_root(owner_key) / normalized)
+        if not path.exists() or not path.is_dir():
+            raise FileNotFoundError("Receipt not found")
+        return path
+
+    def _assert_child(self, path: Path) -> Path:
+        self.root.mkdir(parents=True, exist_ok=True)
+        resolved = path.resolve()
+        try:
+            resolved.relative_to(self.root)
+        except ValueError as exc:
+            raise FileNotFoundError("Receipt path is invalid") from exc
+        return resolved
+
+    @staticmethod
+    def _owner_key(current_user: CurrentUserContext) -> str:
+        raw = str(current_user.username or current_user.name or "anonymous").strip().lower()
+        normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", raw).strip("._")
+        return normalized or "anonymous"
+
+    @staticmethod
+    def _should_persist_source(filename: str, content: bytes) -> bool:
+        if not content:
+            return False
+        return Path(str(filename or "")).suffix.lower() in SUPPORTED_SUFFIXES
+
+    def _write_preview_asset(
+        self,
+        *,
+        receipt_dir: Path,
+        source_path: Path,
+        media_type: str,
+        document: Any | None,
+    ) -> dict[str, Any]:
+        preview_data_url = str(getattr(document, "preview_data_url", "") or "").strip()
+        decoded = ExpenseClaimAttachmentPresentation.decode_data_url(preview_data_url)
+        if decoded is not None:
+            preview_media_type, preview_content = decoded
+            suffix = mimetypes.guess_extension(preview_media_type) or ".bin"
+            preview_name = f"preview{suffix}"
+            preview_path = receipt_dir / preview_name
+            preview_path.write_bytes(preview_content)
+            return {
+                "previewable": True,
+                "preview_kind": "image",
+                "preview_file_name": preview_name,
+                "preview_media_type": preview_media_type,
+            }
+        if self._is_previewable(media_type):
+            return {
+                "previewable": True,
+                "preview_kind": "image" if media_type.startswith("image/") else "pdf",
+                "preview_file_name": source_path.name,
+                "preview_media_type": media_type,
+            }
+        return {
+            "previewable": False,
+            "preview_kind": "",
+            "preview_file_name": "",
+            "preview_media_type": "",
+        }
+
+    @staticmethod
+    def _is_previewable(media_type: str) -> bool:
+        return str(media_type or "").startswith("image/") or str(media_type or "") == "application/pdf"
+
+    @staticmethod
+    def _build_document_meta(document: Any | None) -> dict[str, Any]:
+        fields = []
+        for field in list(getattr(document, "document_fields", []) or []):
+            if isinstance(field, dict):
+                fields.append(
+                    {
+                        "key": str(field.get("key") or "").strip(),
+                        "label": str(field.get("label") or "").strip(),
+                        "value": str(field.get("value") or "").strip(),
+                    }
+                )
+            else:
+                fields.append(
+                    {
+                        "key": str(getattr(field, "key", "") or "").strip(),
+                        "label": str(getattr(field, "label", "") or "").strip(),
+                        "value": str(getattr(field, "value", "") or "").strip(),
+                    }
+                )
+        fields = [field for field in fields if field["label"] and field["value"]]
+        return {
+            "engine": str(getattr(document, "engine", "") or ""),
+            "model": str(getattr(document, "model", "") or ""),
+            "ocr_text": str(getattr(document, "text", "") or ""),
+            "summary": str(getattr(document, "summary", "") or ""),
+            "ocr_avg_score": float(getattr(document, "avg_score", 0.0) or 0.0),
+            "ocr_line_count": int(getattr(document, "line_count", 0) or 0),
+            "page_count": int(getattr(document, "page_count", 1) or 1),
+            "document_type": str(getattr(document, "document_type", "") or "other"),
+            "document_type_label": str(getattr(document, "document_type_label", "") or "其他单据"),
+            "scene_code": str(getattr(document, "scene_code", "") or "other"),
+            "scene_label": str(getattr(document, "scene_label", "") or "其他票据"),
+            "ocr_classification_source": str(getattr(document, "classification_source", "") or ""),
+            "ocr_classification_confidence": float(getattr(document, "classification_confidence", 0.0) or 0.0),
+            "ocr_classification_evidence": [
+                str(value) for value in list(getattr(document, "classification_evidence", []) or []) if str(value).strip()
+            ],
+            "document_fields": fields,
+            "editable_fields": {},
+            "ocr_warnings": [str(value) for value in list(getattr(document, "warnings", []) or []) if str(value).strip()],
+        }
+
+    def _iter_owner_meta(self, owner_key: str) -> list[dict[str, Any]]:
+        owner_root = self._owner_root(owner_key)
+        if not owner_root.exists():
+            return []
+        metas = []
+        for meta_path in owner_root.glob("*/meta.json"):
+            meta = self._read_meta(meta_path.parent)
+            if meta:
+                metas.append(meta)
+        return metas
+
+    def _read_receipt_meta(self, receipt_id: str, current_user: CurrentUserContext) -> dict[str, Any]:
+        return self._read_meta(self._receipt_dir(self._owner_key(current_user), receipt_id))
+
+    def _resolve_existing_item(
+        self,
+        receipt_id: str | None,
+        current_user: CurrentUserContext,
+    ) -> ReceiptFolderItemRead | None:
+        normalized = str(receipt_id or "").strip()
+        if not normalized:
+            return None
+        try:
+            return self._build_item(self._read_receipt_meta(normalized, current_user))
+        except FileNotFoundError:
+            return None
+
+    @staticmethod
+    def _meta_path(receipt_dir: Path) -> Path:
+        return receipt_dir / "meta.json"
+
+    def _read_meta(self, receipt_dir: Path) -> dict[str, Any]:
+        meta_path = self._meta_path(receipt_dir)
+        if not meta_path.exists():
+            raise FileNotFoundError("Receipt not found")
+        try:
+            payload = json.loads(meta_path.read_text(encoding="utf-8"))
+        except (OSError, json.JSONDecodeError) as exc:
+            raise FileNotFoundError("Receipt metadata not found") from exc
+        return payload if isinstance(payload, dict) else {}
+
+    def _write_meta(self, receipt_dir: Path, payload: dict[str, Any]) -> None:
+        self._meta_path(receipt_dir).write_text(
+            json.dumps(payload, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+
+    @staticmethod
+    def _matches_status(meta: dict[str, Any], status_filter: str) -> bool:
+        if status_filter in {"", "all"}:
+            return True
+        return str(meta.get("status") or "unlinked").strip().lower() == status_filter
+
+    def _build_item(self, meta: dict[str, Any]) -> ReceiptFolderItemRead:
+        receipt_id = str(meta.get("id") or "").strip()
+        status_value = str(meta.get("status") or "unlinked").strip() or "unlinked"
+        return ReceiptFolderItemRead(
+            id=receipt_id,
+            file_name=str(meta.get("file_name") or ""),
+            media_type=str(meta.get("media_type") or "application/octet-stream"),
+            size_bytes=int(meta.get("size_bytes") or 0),
+            status=status_value,
+            status_label="已关联" if status_value == "linked" else "未关联",
+            document_type=str(meta.get("document_type") or "other"),
+            document_type_label=str(meta.get("document_type_label") or "其他单据"),
+            scene_code=str(meta.get("scene_code") or "other"),
+            scene_label=str(meta.get("scene_label") or "其他票据"),
+            summary=str(meta.get("summary") or ""),
+            amount=self._resolve_editable_or_field(meta, "amount", labels=("金额", "价税合计", "票价")),
+            document_date=self._resolve_editable_or_field(meta, "document_date", labels=("日期", "开票日期", "乘车日期")),
+            merchant_name=self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方")),
+            avg_score=float(meta.get("ocr_avg_score") or 0.0),
+            uploaded_at=self._parse_datetime(meta.get("uploaded_at")),
+            linked_at=self._parse_datetime(meta.get("linked_at")),
+            linked_claim_id=str(meta.get("linked_claim_id") or ""),
+            linked_claim_no=str(meta.get("linked_claim_no") or ""),
+            previewable=bool(meta.get("previewable")),
+            preview_kind=str(meta.get("preview_kind") or ""),
+            preview_url=f"/receipt-folder/{receipt_id}/preview" if bool(meta.get("previewable")) and receipt_id else "",
+            source_url=f"/receipt-folder/{receipt_id}/source" if receipt_id else "",
+            warnings=[str(value) for value in list(meta.get("ocr_warnings") or []) if str(value).strip()],
+        )
+
+    def _resolve_fields(self, meta: dict[str, Any]) -> list[ReceiptFolderFieldRead]:
+        return [
+            ReceiptFolderFieldRead(
+                key=str(field.get("key") or ""),
+                label=str(field.get("label") or ""),
+                value=str(field.get("value") or ""),
+            )
+            for field in list(meta.get("document_fields") or [])
+            if isinstance(field, dict) and str(field.get("label") or "").strip()
+        ]
+
+    def _resolve_editable_or_field(self, meta: dict[str, Any], key: str, *, labels: tuple[str, ...]) -> str:
+        editable = meta.get("editable_fields")
+        if isinstance(editable, dict):
+            value = str(editable.get(key) or "").strip()
+            if value:
+                return value
+        label_set = set(labels)
+        for field in self._resolve_fields(meta):
+            if field.label in label_set or field.key == key:
+                return field.value
+        return ""
+
+    @staticmethod
+    def _parse_datetime(value: Any) -> datetime | None:
+        raw = str(value or "").strip()
+        if not raw:
+            return None
+        try:
+            return datetime.fromisoformat(raw)
+        except ValueError:
+            return None
--- a/server/tests/test_employee_behavior_profile_service.py
+++ b/server/tests/test_employee_behavior_profile_service.py
@@ -264,6 +264,74 @@ def test_current_employee_profile_endpoint_resolves_login_user() -> None:
    payload = response.json()
    assert payload["employee_id"] == "emp-main"
    assert {item["profile_type"] for item in payload["profiles"]} >= {"expense", "ai_usage"}
+    ai_profile = next(item for item in payload["profiles"] if item["profile_type"] == "ai_usage")
+    assert ai_profile["metrics"]["ai_run_duration_ms"] == 120
+    assert payload["profile_tags"]
+    assert payload["radar"]["dimensions"]
+
+
+def test_current_admin_profile_endpoint_returns_account_usage_profile() -> None:
+    session_factory = build_session_factory()
+    with session_factory() as db:
+        seed_profile_data(db)
+        now = datetime.now(UTC)
+        for index in range(12):
+            run_id = f"run-admin-usage-{index}"
+            started_at = now - timedelta(days=1, minutes=index)
+            db.add(
+                AgentRun(
+                    run_id=run_id,
+                    agent="user_agent",
+                    source="user_message",
+                    user_id="admin",
+                    status="success",
+                    result_summary="管理员查看运行概览。",
+                    started_at=started_at,
+                    finished_at=started_at + timedelta(seconds=2),
+                    tool_calls=[
+                        AgentToolCall(
+                            run_id=run_id,
+                            tool_type="database",
+                            tool_name="agent_runs.list",
+                            request_json={"limit": 20},
+                            response_json={"ok": True},
+                            status="success",
+                            duration_ms=120,
+                        )
+                    ],
+                )
+            )
+        db.commit()
+
+    app = create_app()
+
+    def override_db() -> Generator[Session, None, None]:
+        db = session_factory()
+        try:
+            yield db
+        finally:
+            db.close()
+
+    app.dependency_overrides[get_db] = override_db
+    client = TestClient(app)
+    response = client.get(
+        "/api/v1/employee-profiles/me/latest",
+        params={
+            "scene": "operations",
+            "window_days": 90,
+            "expense_type_scope": "overall",
+        },
+        headers={"x-auth-username": "admin", "x-auth-name": "admin", "x-auth-is-admin": "true"},
+    )
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["employee_id"] == "admin"
+    assert payload["empty_reason"] == ""
+    assert [item["profile_type"] for item in payload["profiles"]] == ["ai_usage"]
+    metrics = payload["profiles"][0]["metrics"]
+    assert metrics["ai_run_count"] == 12
+    assert metrics["ai_run_duration_ms"] == 24000
    assert payload["profile_tags"]
    assert payload["radar"]["dimensions"]

--- a/server/tests/test_ocr_endpoints.py
+++ b/server/tests/test_ocr_endpoints.py
@@ -8,6 +8,7 @@ from sqlalchemy.orm import Session, sessionmaker
 from sqlalchemy.pool import StaticPool

 from app.api.deps import get_db
+from app.core.config import get_settings
 from app.db.base import Base
 from app.main import create_app
 from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeFieldRead, OcrRecognizeLineRead
@@ -35,7 +36,7 @@ def build_client() -> TestClient:
    return TestClient(app)


-def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
+def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch, tmp_path) -> None:
    def fake_recognize(
        self,
        files: list[tuple[str, bytes, str | None]],
@@ -76,21 +77,84 @@ def test_ocr_recognize_endpoint_returns_structured_payload(monkeypatch) -> None:
            ],
        )

+    monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
+    get_settings.cache_clear()
    monkeypatch.setattr(OcrService, "recognize_files", fake_recognize)
-    client = build_client()
+    try:
+        client = build_client()
+        auth_headers = {"x-auth-username": "pytest", "x-auth-name": "Py Test"}

-    response = client.post(
-        "/api/v1/ocr/recognize",
-        headers={"x-auth-username": "pytest", "x-auth-name": "Py Test"},
-        files=[("files", ("invoice.png", b"fake-image", "image/png"))],
-    )
+        response = client.post(
+            "/api/v1/ocr/recognize",
+            headers=auth_headers,
+            files=[("files", ("invoice.png", b"fake-image", "image/png"))],
+        )

-    assert response.status_code == 200
-    payload = response.json()
-    assert payload["engine"] == "paddleocr_mobile"
-    assert payload["success_count"] == 1
-    assert payload["documents"][0]["filename"] == "invoice.png"
-    assert payload["documents"][0]["summary"] == "增值税电子发票，金额 100 元。"
-    assert payload["documents"][0]["document_type"] == "vat_invoice"
-    assert payload["documents"][0]["document_type_label"] == "增值税发票"
-    assert payload["documents"][0]["document_fields"][0]["label"] == "金额"
+        assert response.status_code == 200
+        payload = response.json()
+        document = payload["documents"][0]
+        assert payload["engine"] == "paddleocr_mobile"
+        assert payload["success_count"] == 1
+        assert document["filename"] == "invoice.png"
+        assert document["summary"] == "增值税电子发票，金额 100 元。"
+        assert document["document_type"] == "vat_invoice"
+        assert document["document_type_label"] == "增值税发票"
+        assert document["document_fields"][0]["label"] == "金额"
+        assert document["receipt_id"]
+        assert document["receipt_status"] == "unlinked"
+        assert document["receipt_preview_url"].endswith(f"/receipt-folder/{document['receipt_id']}/preview")
+        assert document["receipt_source_url"].endswith(f"/receipt-folder/{document['receipt_id']}/source")
+
+        receipt_id = document["receipt_id"]
+        list_response = client.get("/api/v1/receipt-folder?status=unlinked", headers=auth_headers)
+        assert list_response.status_code == 200
+        receipt_list = list_response.json()
+        assert len(receipt_list) == 1
+        assert receipt_list[0]["id"] == receipt_id
+        assert receipt_list[0]["amount"] == "100元"
+
+        repeated_response = client.post(
+            "/api/v1/ocr/recognize",
+            headers=auth_headers,
+            data={"receipt_ids": receipt_id},
+            files=[("files", ("invoice.png", b"fake-image", "image/png"))],
+        )
+        assert repeated_response.status_code == 200
+        repeated_document = repeated_response.json()["documents"][0]
+        assert repeated_document["receipt_id"] == receipt_id
+
+        all_receipts_response = client.get("/api/v1/receipt-folder?status=all", headers=auth_headers)
+        assert all_receipts_response.status_code == 200
+        assert len(all_receipts_response.json()) == 1
+
+        detail_response = client.get(f"/api/v1/receipt-folder/{receipt_id}", headers=auth_headers)
+        assert detail_response.status_code == 200
+        detail_payload = detail_response.json()
+        assert detail_payload["file_name"] == "invoice.png"
+        assert detail_payload["fields"][0]["label"] == "金额"
+
+        update_response = client.patch(
+            f"/api/v1/receipt-folder/{receipt_id}",
+            headers=auth_headers,
+            json={
+                "document_type_label": "电子发票",
+                "amount": "108元",
+                "fields": [{"key": "amount", "label": "金额", "value": "108元"}],
+            },
+        )
+        assert update_response.status_code == 200
+        assert update_response.json()["document_type_label"] == "电子发票"
+        assert update_response.json()["amount"] == "108元"
+
+        preview_response = client.get(f"/api/v1/receipt-folder/{receipt_id}/preview", headers=auth_headers)
+        assert preview_response.status_code == 200
+        assert preview_response.content == b"fake-image"
+
+        delete_response = client.delete(f"/api/v1/receipt-folder/{receipt_id}", headers=auth_headers)
+        assert delete_response.status_code == 200
+        assert delete_response.json()["receipt_id"] == receipt_id
+
+        deleted_response = client.get(f"/api/v1/receipt-folder/{receipt_id}", headers=auth_headers)
+        assert deleted_response.status_code == 404
+    finally:
+        get_settings.cache_clear()