feat: 新增归档中心页面并完善知识库与报销查询能力

新增前端归档中心视图及相关工具函数，扩充知识库文档分类和提取器支持多种格式，增强编排器报销查询的多维度检索，优化本体规则和用户代理审核消息，前端完善报销创建和审批详情交互细节，补充单元测试覆盖。
2026-05-22 16:00:19 +08:00
parent 1f15699013
commit 88ff04bef8
120 changed files with 6236 additions and 643 deletions
--- a/server/src/app/api/v1/endpoints/reimbursements.py
+++ b/server/src/app/api/v1/endpoints/reimbursements.py
@@ -97,6 +97,16 @@ def list_expense_claim_approvals(db: DbSession, current_user: CurrentUser) -> li
    return ExpenseClaimService(db).list_approval_claims(current_user)


+@router.get(
+    "/claims/archives",
+    response_model=list[ExpenseClaimRead],
+    summary="查询归档中心报销单列表",
+    description="返回公司已归档入账的报销单据，供财务与审计角色集中查阅。",
+)
+def list_archived_expense_claims(db: DbSession, current_user: CurrentUser) -> list[ExpenseClaimRead]:
+    return ExpenseClaimService(db).list_archived_claims(current_user)
+
+
@router.get(
    "/claims/{claim_id}",
    response_model=ExpenseClaimRead,
--- a/server/src/app/schemas/user_agent.py
+++ b/server/src/app/schemas/user_agent.py
@@ -58,10 +58,12 @@ class UserAgentExpenseQueryRecord(BaseModel):
    occurred_at: str = Field(default="", description="业务发生日期。")
    reason: str = Field(default="", description="事由。")
    location: str = Field(default="", description="地点。")
+    risk_flags: list[dict[str, Any]] = Field(default_factory=list, description="该单据当前风险项。")


 class UserAgentQueryPayload(BaseModel):
    result_type: str = Field(default="expense_claim_list", description="结构化查询结果类型。")
+    title: str = Field(default="", description="查询结果标题。")
    scope_label: str = Field(default="报销单", description="当前查询范围名。")
    recent_window_applied: bool = Field(default=False, description="是否应用了近 10 日窗口。")
    window_days: int | None = Field(default=None, ge=1, description="近 N 日窗口天数。")
@@ -69,6 +71,7 @@ class UserAgentQueryPayload(BaseModel):
    window_end_date: str | None = Field(default=None, description="近 N 日窗口结束日期。")
    record_count: int = Field(default=0, ge=0, description="当前展示范围内的单据数。")
    preview_count: int = Field(default=0, ge=0, description="当前返回的单据数。")
+    preview_limit: int = Field(default=5, ge=1, description="默认展示条数上限。")
    older_record_count: int = Field(default=0, ge=0, description="超出近 10 日窗口的单据数。")
    has_more_in_window: bool = Field(default=False, description="当前展示范围内是否还有更多单据未返回。")
    total_amount: float = Field(default=0.0, ge=0.0, description="当前展示范围内金额合计。")
@@ -122,6 +125,7 @@ class UserAgentReviewDocumentCard(BaseModel):
    avg_score: float = Field(default=0.0, ge=0.0, le=1.0, description="OCR 平均得分。")
    preview_kind: str = Field(default="", description="票据预览类型，例如 image。")
    preview_data_url: str = Field(default="", description="票据预览图片 data URL。")
+    preview_url: str = Field(default="", description="票据预览图片地址。")
    warnings: list[str] = Field(default_factory=list, description="该票据的识别提示。")
    fields: list[UserAgentReviewDocumentField] = Field(
        default_factory=list,
--- a/server/src/app/services/agent_conversations.py
+++ b/server/src/app/services/agent_conversations.py
@@ -93,6 +93,12 @@ class AgentConversationService:
            if existing_session_type != incoming_session_type:
                normalized_id = ""
                conversation = None
+        if conversation is not None and self._has_draft_claim_scope_conflict(
+            conversation,
+            incoming_draft_claim_id,
+        ):
+            normalized_id = ""
+            conversation = None

        if conversation is None:
            conversation = AgentConversation(
@@ -241,6 +247,10 @@ class AgentConversationService:
        history_limit: int = 8,
    ) -> dict[str, Any]:
        merged = dict(context_json or {})
+        incoming_draft_claim_id = self._resolve_draft_claim_id(merged)
+        if self._has_draft_claim_scope_conflict(conversation, incoming_draft_claim_id):
+            return merged
+
        state_json = dict(conversation.state_json or {})
        should_hydrate_review_flow = self._should_hydrate_review_flow_context(
            context_json=merged,
@@ -641,6 +651,26 @@ class AgentConversationService:
            ).strip()
        return ""

+    @staticmethod
+    def _resolve_conversation_draft_claim_id(conversation: AgentConversation) -> str:
+        state_json = dict(conversation.state_json or {})
+        return str(
+            conversation.draft_claim_id
+            or state_json.get("draft_claim_id")
+            or ""
+        ).strip()
+
+    @staticmethod
+    def _has_draft_claim_scope_conflict(
+        conversation: AgentConversation,
+        incoming_draft_claim_id: str | None,
+    ) -> bool:
+        incoming_claim_id = str(incoming_draft_claim_id or "").strip()
+        if not incoming_claim_id:
+            return False
+        existing_claim_id = AgentConversationService._resolve_conversation_draft_claim_id(conversation)
+        return bool(existing_claim_id and existing_claim_id != incoming_claim_id)
+
    @staticmethod
    def _merge_state_json(
        current_state: dict[str, Any] | None,
--- a/server/src/app/services/expense_claim_access_policy.py
+++ b/server/src/app/services/expense_claim_access_policy.py
@@ -13,8 +13,10 @@ from app.models.organization import OrganizationUnit


 PRIVILEGED_CLAIM_ROLE_CODES = {"finance", "executive"}
+ARCHIVE_CENTER_ROLE_CODES = {"finance", "executive", "auditor"}
 APPROVAL_VISIBLE_CLAIM_ROLE_CODES = {"manager", "approver"}
 CLAIM_DELETE_ROLE_CODES = {"executive"}
+ARCHIVED_CLAIM_STATUSES = ("approved", "completed", "paid")


 class ExpenseClaimAccessPolicy:
@@ -27,6 +29,30 @@ class ExpenseClaimAccessPolicy:
            return True
        return bool(ExpenseClaimAccessPolicy.normalize_role_codes(current_user) & PRIVILEGED_CLAIM_ROLE_CODES)

+    @staticmethod
+    def has_archive_center_access(current_user: CurrentUserContext) -> bool:
+        if current_user.is_admin:
+            return True
+        return bool(ExpenseClaimAccessPolicy.normalize_role_codes(current_user) & ARCHIVE_CENTER_ROLE_CODES)
+
+    @staticmethod
+    def build_archived_claim_condition() -> Any:
+        normalized_status = func.lower(func.coalesce(ExpenseClaim.status, ""))
+        stage = func.coalesce(ExpenseClaim.approval_stage, "")
+        return or_(
+            stage == "归档入账",
+            stage == "completed",
+            and_(
+                normalized_status.in_(ARCHIVED_CLAIM_STATUSES),
+                or_(
+                    stage == "",
+                    stage.is_(None),
+                    stage == "归档入账",
+                    stage == "completed",
+                ),
+            ),
+        )
+
    @staticmethod
    def has_claim_delete_access(current_user: CurrentUserContext) -> bool:
        if current_user.is_admin:
@@ -374,7 +400,16 @@ class ExpenseClaimAccessPolicy:
        include_approval_scope: bool = False,
    ) -> Any:
        if self.has_privileged_claim_access(current_user):
-            return stmt
+            owned_conditions = self.build_personal_claim_conditions(current_user)
+            archived_condition = self.build_archived_claim_condition()
+            if owned_conditions:
+                return stmt.where(
+                    or_(
+                        ~archived_condition,
+                        and_(archived_condition, or_(*owned_conditions)),
+                    )
+                )
+            return stmt.where(~archived_condition)

        conditions = self.build_personal_claim_conditions(current_user)

@@ -386,6 +421,12 @@ class ExpenseClaimAccessPolicy:

        return stmt.where(or_(*conditions))

+    def apply_archived_claim_scope(self, stmt: Any, current_user: CurrentUserContext) -> Any:
+        if not self.has_archive_center_access(current_user):
+            return stmt.where(ExpenseClaim.id == "__no_visible_claim__")
+
+        return stmt.where(self.build_archived_claim_condition())
+
    @staticmethod
    def resolve_claim_manager_name(claim: ExpenseClaim) -> str:
        if claim.employee is not None:
--- a/server/src/app/services/expense_claim_attachment_analysis.py
+++ b/server/src/app/services/expense_claim_attachment_analysis.py
@@ -615,7 +615,7 @@ class ExpenseClaimAttachmentAnalysisMixin:
            severity = "high"
            label = "高风险"
            headline = "AI提示：住宿金额超出报销标准"
-            summary = "当前住宿票据金额超过规则中心差旅住宿标准，强行提交前需补充超标原因。"
+            summary = "当前住宿票据金额超过规则中心差旅住宿标准，已作为风险项保留在单据中；如需按特殊情况提交，请补充超标原因。"
        elif (
            line_count == 0
            or not compact_text
--- a/server/src/app/services/expense_claims.py
+++ b/server/src/app/services/expense_claims.py
@@ -169,6 +169,19 @@ class ExpenseClaimService(
        stmt = self._access_policy.apply_approval_claim_scope(stmt, current_user)
        return list(self.db.scalars(stmt).all())

+    def list_archived_claims(self, current_user: CurrentUserContext) -> list[ExpenseClaim]:
+        stmt = (
+            select(ExpenseClaim)
+            .options(
+                selectinload(ExpenseClaim.items),
+                selectinload(ExpenseClaim.employee).selectinload(Employee.manager),
+                selectinload(ExpenseClaim.employee).selectinload(Employee.roles),
+            )
+            .order_by(ExpenseClaim.updated_at.desc(), ExpenseClaim.submitted_at.desc(), ExpenseClaim.created_at.desc())
+        )
+        stmt = self._access_policy.apply_archived_claim_scope(stmt, current_user)
+        return list(self.db.scalars(stmt).all())
+
    def get_claim(self, claim_id: str, current_user: CurrentUserContext) -> ExpenseClaim | None:
        stmt = (
            select(ExpenseClaim)
--- a/server/src/app/services/knowledge.py
+++ b/server/src/app/services/knowledge.py
@@ -106,7 +106,7 @@ class KnowledgeService:
            KnowledgeFolderRead(
                name=folder_name,
                count=sum(1 for item in documents if item.folder == folder_name),
-                icon="mdi mdi-folder-open" if folder_name == "差旅规范" else "mdi mdi-folder",
+                icon="mdi mdi-folder",
            )
            for folder_name in FIXED_KNOWLEDGE_FOLDERS
        ]
--- a/server/src/app/services/knowledge_document_extractors.py
+++ b/server/src/app/services/knowledge_document_extractors.py
@@ -10,6 +10,12 @@ from zipfile import BadZipFile, ZipFile
 from app.services.knowledge_constants import IMAGE_EXTENSIONS, TEXT_EXTENSIONS
 from app.services.knowledge_file_utils import extract_extension

+MAX_EXTRACTED_XLSX_SHEETS = 12
+MAX_EXTRACTED_XLSX_ROWS_PER_SHEET = 300
+MAX_EXTRACTED_XLSX_COLUMNS = 40
+MAX_EXTRACTED_PPTX_SLIDES = 80
+
+
 def _read_text_preview(file_path: Path) -> str:
    encodings = ("utf-8", "utf-8-sig", "gbk")
    for encoding in encodings:
@@ -19,6 +25,7 @@ def _read_text_preview(file_path: Path) -> str:
            continue
    return "当前文本文件编码暂不支持在线解析。"

+
 def _extract_docx_text(file_path: Path) -> str:
    try:
        with ZipFile(file_path) as archive:
@@ -30,6 +37,7 @@ def _extract_docx_text(file_path: Path) -> str:
    texts = [node.text.strip() for node in root.iter() if node.tag.endswith("}t") and node.text]
    return "\n".join(texts)

+
 def _extract_document_text_from_path(
    *,
    file_path: Path,
@@ -41,6 +49,20 @@ def _extract_document_text_from_path(
        return _normalize_extracted_text(_read_text_preview(file_path))
    if extension == "docx":
        return _normalize_extracted_text(_extract_docx_text(file_path))
+    if extension == "xlsx":
+        return _normalize_extracted_text(
+            _build_xlsx_markdown(
+                original_name=original_name,
+                sheets=_extract_xlsx_sheets(file_path),
+            )
+        )
+    if extension == "pptx":
+        return _normalize_extracted_text(
+            _build_pptx_markdown(
+                original_name=original_name,
+                slides=_extract_pptx_slides(file_path),
+            )
+        )
    if extension == "pdf":
        text = _normalize_extracted_text(_extract_pdf_text(file_path))
        if text:
@@ -62,11 +84,13 @@ def _extract_document_text_from_path(
        )
    return ""

+
 def _normalize_extracted_text(text: str) -> str:
    normalized = str(text or "").replace("\r\n", "\n").replace("\r", "\n")
    normalized = re.sub(r"\n{3,}", "\n\n", normalized)
    return normalized.strip()

+
 def _extract_pdf_text(file_path: Path) -> str:
    pdftotext_bin = shutil.which("pdftotext")
    if not pdftotext_bin:
@@ -83,6 +107,7 @@ def _extract_pdf_text(file_path: Path) -> str:
        return ""
    return str(completed.stdout or "")

+
 def _extract_text_with_ocr(
    *,
    file_path: Path,
@@ -92,9 +117,7 @@ def _extract_text_with_ocr(
    try:
        from app.services.ocr import OcrService

-        result = OcrService().recognize_files(
-            [(original_name, file_path.read_bytes(), mime_type)]
-        )
+        result = OcrService().recognize_files([(original_name, file_path.read_bytes(), mime_type)])
    except Exception:
        return ""

@@ -108,6 +131,7 @@ def _extract_text_with_ocr(
            parts.append(summary)
    return "\n\n".join(part for part in parts if part)

+
 def _extract_xlsx_sheets(file_path: Path) -> list[tuple[str, list[list[str]]]]:
    try:
        with ZipFile(file_path) as archive:
@@ -182,8 +206,13 @@ def _extract_xlsx_sheets(file_path: Path) -> list[tuple[str, list[list[str]]]]:
                        value_node = next((item for item in cell if item.tag.endswith("}v")), None)

                        if cell_type == "inlineStr":
-                            text_node = next((item for item in cell.iter() if item.tag.endswith("}t")), None)
-                            row_values.append((text_node.text or "").strip() if text_node is not None else "")
+                            text_node = next(
+                                (item for item in cell.iter() if item.tag.endswith("}t")),
+                                None,
+                            )
+                            row_values.append(
+                                (text_node.text or "").strip() if text_node is not None else ""
+                            )
                            continue

                        if value_node is None or value_node.text is None:
@@ -193,7 +222,9 @@ def _extract_xlsx_sheets(file_path: Path) -> list[tuple[str, list[list[str]]]]:
                        raw_value = value_node.text.strip()
                        if cell_type == "s" and raw_value.isdigit():
                            index = int(raw_value)
-                            row_values.append(shared_strings[index] if index < len(shared_strings) else raw_value)
+                            row_values.append(
+                                shared_strings[index] if index < len(shared_strings) else raw_value
+                            )
                        else:
                            row_values.append(raw_value)
                    if row_values:
@@ -205,6 +236,7 @@ def _extract_xlsx_sheets(file_path: Path) -> list[tuple[str, list[list[str]]]]:
    except (BadZipFile, ElementTree.ParseError, KeyError, ValueError):
        return []

+
 def _extract_pptx_slides(file_path: Path) -> list[list[str]]:
    try:
        with ZipFile(file_path) as archive:
@@ -216,8 +248,91 @@ def _extract_pptx_slides(file_path: Path) -> list[list[str]]:
            slides: list[list[str]] = []
            for slide_name in slide_names:
                root = ElementTree.fromstring(archive.read(slide_name))
-                texts = [node.text.strip() for node in root.iter() if node.tag.endswith("}t") and node.text]
+                texts = [
+                    node.text.strip()
+                    for node in root.iter()
+                    if node.tag.endswith("}t") and node.text
+                ]
                slides.append(texts)
            return slides
    except (BadZipFile, ElementTree.ParseError, KeyError):
        return []
+
+
+def _build_xlsx_markdown(
+    *,
+    original_name: str,
+    sheets: list[tuple[str, list[list[str]]]],
+) -> str:
+    if not sheets:
+        return ""
+
+    parts = [f"# Excel 工作簿：{original_name}"]
+    for sheet_index, (sheet_name, rows) in enumerate(sheets[:MAX_EXTRACTED_XLSX_SHEETS], start=1):
+        visible_rows = [
+            [_escape_markdown_cell(cell) for cell in row[:MAX_EXTRACTED_XLSX_COLUMNS]]
+            for row in rows[:MAX_EXTRACTED_XLSX_ROWS_PER_SHEET]
+            if any(str(cell or "").strip() for cell in row)
+        ]
+        if not visible_rows:
+            continue
+
+        column_count = max(len(row) for row in visible_rows)
+        normalized_rows = [row + [""] * (column_count - len(row)) for row in visible_rows]
+        header = [
+            cell or f"列{column_index + 1}" for column_index, cell in enumerate(normalized_rows[0])
+        ]
+
+        parts.append(f"## 工作表 {sheet_index}：{sheet_name}")
+        parts.append(_format_markdown_table(header, normalized_rows[1:]))
+        parts.append("### 行级检索线索")
+        for row_number, row in enumerate(normalized_rows[1:], start=2):
+            pairs = [
+                f"{header[column_index]}={value}" for column_index, value in enumerate(row) if value
+            ]
+            if pairs:
+                parts.append(f"- {sheet_name} 第 {row_number} 行：" + "；".join(pairs))
+
+        if len(rows) > MAX_EXTRACTED_XLSX_ROWS_PER_SHEET:
+            parts.append(
+                f"- {sheet_name} 还有 {len(rows) - MAX_EXTRACTED_XLSX_ROWS_PER_SHEET} 行未展开。"
+            )
+
+    return "\n\n".join(part for part in parts if part).strip()
+
+
+def _build_pptx_markdown(
+    *,
+    original_name: str,
+    slides: list[list[str]],
+) -> str:
+    if not slides:
+        return ""
+
+    parts = [f"# PowerPoint 演示文稿：{original_name}"]
+    for slide_index, slide_lines in enumerate(slides[:MAX_EXTRACTED_PPTX_SLIDES], start=1):
+        lines = [line.strip() for line in slide_lines if str(line or "").strip()]
+        if not lines:
+            continue
+        parts.append(f"## 幻灯片 {slide_index}")
+        parts.extend(f"- {line}" for line in lines)
+    if len(slides) > MAX_EXTRACTED_PPTX_SLIDES:
+        parts.append(f"- 还有 {len(slides) - MAX_EXTRACTED_PPTX_SLIDES} 页未展开。")
+    return "\n\n".join(part for part in parts if part).strip()
+
+
+def _format_markdown_table(header: list[str], rows: list[list[str]]) -> str:
+    table_rows = [header] + rows
+    separator = ["---"] * len(header)
+    lines = [
+        "| " + " | ".join(table_rows[0]) + " |",
+        "| " + " | ".join(separator) + " |",
+    ]
+    lines.extend("| " + " | ".join(row) + " |" for row in table_rows[1:])
+    return "\n".join(lines)
+
+
+def _escape_markdown_cell(value: str) -> str:
+    text = str(value or "").replace("\r\n", " ").replace("\r", " ").replace("\n", " ")
+    text = re.sub(r"\s+", " ", text).strip()
+    return text.replace("|", "\\|")
--- a/server/src/app/services/knowledge_preview.py
+++ b/server/src/app/services/knowledge_preview.py
@@ -16,6 +16,7 @@ from app.services.knowledge_document_extractors import (
 )
 from app.services.knowledge_file_utils import extract_extension, format_size

+
 def build_preview(
    entry: dict[str, Any],
    *,
@@ -52,7 +53,9 @@ def build_preview(
                subtitle="当前格式暂不支持在线解析预览。",
                stats=[
                    KnowledgePreviewStatRead(label="文件格式", value=extension.upper() or "FILE"),
-                    KnowledgePreviewStatRead(label="文件大小", value=format_size(entry["size_bytes"])),
+                    KnowledgePreviewStatRead(
+                        label="文件大小", value=format_size(entry["size_bytes"])
+                    ),
                    KnowledgePreviewStatRead(label="建议操作", value="下载后查看"),
                ],
                blocks=[
@@ -68,9 +71,8 @@ def build_preview(
        ],
    )

-def _build_text_preview_page(
-    entry: dict[str, Any], text: str
-) -> KnowledgePreviewPageRead:
+
+def _build_text_preview_page(entry: dict[str, Any], text: str) -> KnowledgePreviewPageRead:
    lines = [line.strip() for line in text.splitlines() if line.strip()]
    if not lines:
        lines = ["文件内容为空，或当前文档未提取到可展示文本。"]
@@ -92,10 +94,9 @@ def _build_text_preview_page(
        blocks=blocks,
    )

-def _build_xlsx_preview_pages(
-    entry: dict[str, Any], file_path
-) -> list[KnowledgePreviewPageRead]:
-    sheets = self._extract_xlsx_sheets(file_path)
+
+def _build_xlsx_preview_pages(entry: dict[str, Any], file_path) -> list[KnowledgePreviewPageRead]:
+    sheets = _extract_xlsx_sheets(file_path)
    if not sheets:
        sheets = [("Sheet 1", [["未提取到表格内容。"]])]

@@ -118,7 +119,9 @@ def _build_xlsx_preview_pages(
                stats=[
                    KnowledgePreviewStatRead(label="工作表数量", value=str(sheet_count)),
                    KnowledgePreviewStatRead(label="预览行数", value=str(len(visible_rows))),
-                    KnowledgePreviewStatRead(label="文件大小", value=format_size(entry["size_bytes"])),
+                    KnowledgePreviewStatRead(
+                        label="文件大小", value=format_size(entry["size_bytes"])
+                    ),
                ],
                blocks=blocks,
            )
@@ -126,10 +129,9 @@ def _build_xlsx_preview_pages(

    return preview_pages

-def _build_pptx_preview_pages(
-    entry: dict[str, Any], file_path
-) -> list[KnowledgePreviewPageRead]:
-    slides = self._extract_pptx_slides(file_path)
+
+def _build_pptx_preview_pages(entry: dict[str, Any], file_path) -> list[KnowledgePreviewPageRead]:
+    slides = _extract_pptx_slides(file_path)
    if not slides:
        slides = [["未提取到幻灯片文本。"]]

@@ -154,4 +156,3 @@ def _build_pptx_preview_pages(
        )

    return pages
-
--- a/server/src/app/services/ontology_detection.py
+++ b/server/src/app/services/ontology_detection.py
@@ -114,6 +114,20 @@ class OntologyDetectionMixin:
            return "query", 0.24
        if any(keyword in compact_query for keyword in DRAFT_KEYWORDS):
            return "draft", 0.26
+        if scenario == "expense" and "报销" in compact_query and any(
+            item.type == "expense_type"
+            and str(item.normalized_value or item.value or "").strip()
+            for item in entities
+        ) and not any(
+            keyword in compact_query
+            for keyword in (
+                *QUERY_KEYWORDS,
+                *COMPARE_KEYWORDS,
+                *EXPLAIN_KEYWORDS,
+                *RISK_KEYWORDS,
+            )
+        ):
+            return "draft", 0.25
        if scenario == "expense" and self._is_generic_expense_prompt(compact_query):
            return "draft", 0.24
        if any(keyword in compact_query for keyword in COMPARE_KEYWORDS):
@@ -220,7 +234,11 @@ class OntologyDetectionMixin:
        has_expense_signal = any(
            keyword in compact_query for keyword in EXPENSE_NARRATIVE_KEYWORDS
        ) or "expense_type" in entity_types
-        has_context_signal = bool(time_range.start_date) or "amount" in entity_types
+        has_context_signal = (
+            bool(time_range.start_date)
+            or "amount" in entity_types
+            or ("报销" in compact_query and "expense_type" in entity_types)
+        )

        return has_expense_signal and has_context_signal

--- a/server/src/app/services/ontology_extraction.py
+++ b/server/src/app/services/ontology_extraction.py
@@ -186,7 +186,21 @@ class OntologyExtractionMixin:

        if any(
            keyword in query
-            for keyword in ("打车", "网约车", "出租车", "车费", "乘车", "用车", "叫车", "车资", "停车费", "过路费")
+            for keyword in (
+                "打车",
+                "网约车",
+                "出租车",
+                "出租车票",
+                "车费",
+                "乘车",
+                "用车",
+                "叫车",
+                "车资",
+                "的士",
+                "的士票",
+                "停车费",
+                "过路费",
+            )
        ):
            upsert(self._make_entity("expense_type", "交通", "transport", role="filter", confidence=0.9))

--- a/server/src/app/services/ontology_rules.py
+++ b/server/src/app/services/ontology_rules.py
@@ -137,11 +137,14 @@ EXPENSE_TYPE_KEYWORDS = {
    "打车": "transport",
    "网约车": "transport",
    "出租车": "transport",
+    "出租车票": "transport",
    "乘车": "transport",
    "乘车费": "transport",
    "用车": "transport",
    "叫车": "transport",
    "车资": "transport",
+    "的士": "transport",
+    "的士票": "transport",
    "停车费": "transport",
    "餐费": "meal",
    "用餐": "meal",
@@ -180,6 +183,9 @@ EXPENSE_NARRATIVE_KEYWORDS = (
    "用车",
    "叫车",
    "车资",
+    "的士",
+    "的士票",
+    "出租车票",
    "餐费",
    "吃饭",
    "用餐",
@@ -232,6 +238,9 @@ STATUS_KEYWORDS = {
    "已审批": "approved",
    "已通过": "approved",
    "已审核": "approved",
+    "归档": "archived",
+    "已归档": "archived",
+    "入账": "archived",
    "已入账": "paid",
    "已付款": "paid",
    "未付款": "unpaid",
--- a/server/src/app/services/orchestrator_expense_query.py
+++ b/server/src/app/services/orchestrator_expense_query.py
@@ -17,14 +17,50 @@ from app.schemas.ontology import OntologyParseResult
 PRIVILEGED_EXPENSE_QUERY_ROLE_CODES = {"finance"}
 SELF_REFERENCE_KEYWORDS = ("我的", "我自己", "本人", "我名下", "给我查", "我提交", "我申请")
 EXPENSE_QUERY_RECENT_WINDOW_DAYS = 10
-EXPENSE_QUERY_PREVIEW_LIMIT = 20
+EXPENSE_QUERY_PREVIEW_LIMIT = 5
 EXPENSE_STATUS_LABELS = {
+    "archived": "归档",
    "draft": "草稿",
+    "supplement": "待补充",
+    "returned": "已退回",
    "submitted": "已提交",
    "review": "审核中",
    "approved": "已通过",
    "rejected": "已驳回",
-    "paid": "已付款",
+    "paid": "归档",
+}
+EXPENSE_QUERY_STATUS_KEYWORDS = (
+    (("归档", "已归档", "入账", "已入账", "已付款"), ("archived",)),
+    (("审批通过", "审核通过", "已通过", "已审核"), ("approved",)),
+    (("审批中", "审核中", "进行中", "流程中"), ("submitted", "review")),
+    (("已提交", "提交了"), ("submitted",)),
+    (("草稿", "待报销", "待提交"), ("draft",)),
+    (("待补充", "待完善", "退回", "已退回"), ("supplement", "returned")),
+    (("驳回", "已驳回", "拒绝"), ("rejected",)),
+)
+EXPENSE_STATUS_ALIASES = {
+    "归档": "archived",
+    "已归档": "archived",
+    "入账": "archived",
+    "已入账": "archived",
+    "已付款": "archived",
+    "已通过": "approved",
+    "审批通过": "approved",
+    "审核通过": "approved",
+    "已审核": "approved",
+    "审批中": "review",
+    "审核中": "review",
+    "进行中": "review",
+    "已提交": "submitted",
+    "草稿": "draft",
+    "待报销": "draft",
+    "待提交": "draft",
+    "待补充": "supplement",
+    "待完善": "supplement",
+    "已退回": "returned",
+    "退回": "returned",
+    "驳回": "rejected",
+    "已驳回": "rejected",
 }
 EXPENSE_STATUS_GROUP_LABELS = {
    "draft": "草稿",
@@ -33,6 +69,13 @@ EXPENSE_STATUS_GROUP_LABELS = {
    "other": "其他状态",
 }
 EXPENSE_STATUS_GROUP_ORDER = ("draft", "in_progress", "completed", "other")
+EXPENSE_RISK_LEVEL_LABELS = {
+    "high": "高风险",
+    "medium": "中风险",
+    "warning": "中风险",
+    "low": "低风险",
+    "info": "低风险",
+}
 EXPENSE_TYPE_LABELS = {
    "travel": "差旅费",
    "hotel": "住宿费",
@@ -95,7 +138,7 @@ class OrchestratorDatabaseQueryBuilder:
        total_count = int(self.db.scalar(count_stmt) or 0)
        total_amount = float(self.db.scalar(amount_stmt) or 0)

-        recent_window_applied = self._should_limit_expense_query_to_recent_window(ontology)
+        recent_window_applied = self._should_limit_expense_query_to_recent_window(ontology, message)
        display_count = total_count
        display_amount = total_amount
        older_record_count = 0
@@ -146,12 +189,14 @@ class OrchestratorDatabaseQueryBuilder:
            "record_count": display_count,
            "total_amount": round(display_amount, 2),
            "scope_label": scope_label,
+            "title": f"最近 {len(preview_claims)} 条{scope_label}" if preview_claims else f"{scope_label}筛选结果",
            "scoped_to_current_user": scoped_to_current_user,
            "recent_window_applied": recent_window_applied,
            "window_days": EXPENSE_QUERY_RECENT_WINDOW_DAYS if recent_window_applied else None,
            "window_start_date": window_start_date,
            "window_end_date": window_end_date,
            "preview_count": len(preview_claims),
+            "preview_limit": EXPENSE_QUERY_PREVIEW_LIMIT,
            "older_record_count": older_record_count,
            "records": [
                self._build_expense_query_record(claim)
@@ -199,6 +244,7 @@ class OrchestratorDatabaseQueryBuilder:
    @staticmethod
    def _should_limit_expense_query_to_recent_window(
        ontology: OntologyParseResult,
+        message: str = "",
    ) -> bool:
        has_explicit_claim_no = any(
            item.type == "expense_claim"
@@ -208,7 +254,12 @@ class OrchestratorDatabaseQueryBuilder:
        has_explicit_time_range = bool(
            ontology.time_range.start_date or ontology.time_range.end_date
        )
-        return not has_explicit_claim_no and not has_explicit_time_range
+        compact_message = str(message or "").replace(" ", "")
+        asks_recent_window = any(
+            keyword in compact_message
+            for keyword in ("近", "最近", "本周", "上周", "过去", "前几天", "这几天")
+        )
+        return asks_recent_window and not has_explicit_claim_no and not has_explicit_time_range

    @staticmethod
    def _resolve_reference_now(context_json: dict[str, Any]) -> datetime:
@@ -294,6 +345,12 @@ class OrchestratorDatabaseQueryBuilder:
    ) -> dict[str, Any]:
        status_group, status_group_label = self._resolve_expense_status_group(claim.status)
        document_datetime = self._resolve_expense_query_document_datetime(claim)
+        approval_stage = str(claim.approval_stage or "").strip()
+        status_label = (
+            "已归档"
+            if "归档" in approval_stage
+            else EXPENSE_STATUS_LABELS.get(claim.status, claim.status or "处理中")
+        )
        return {
            "claim_id": claim.id,
            "claim_no": claim.claim_no,
@@ -302,16 +359,63 @@ class OrchestratorDatabaseQueryBuilder:
            "expense_type_label": EXPENSE_TYPE_LABELS.get(claim.expense_type, claim.expense_type or "报销"),
            "amount": round(float(claim.amount), 2),
            "status": claim.status,
-            "status_label": EXPENSE_STATUS_LABELS.get(claim.status, claim.status or "处理中"),
+            "status_label": status_label,
            "status_group": status_group,
            "status_group_label": status_group_label,
-            "approval_stage": claim.approval_stage,
+            "approval_stage": approval_stage,
            "document_date": document_datetime.date().isoformat() if document_datetime else "",
            "occurred_at": claim.occurred_at.date().isoformat() if claim.occurred_at else "",
            "reason": claim.reason,
            "location": claim.location,
+            "risk_flags": self._normalize_expense_query_risk_flags(claim.risk_flags_json),
        }

+    @staticmethod
+    def _normalize_expense_query_risk_flags(raw_flags: Any) -> list[dict[str, str]]:
+        if not isinstance(raw_flags, list):
+            return []
+
+        normalized_flags: list[dict[str, str]] = []
+        for index, raw_flag in enumerate(raw_flags, start=1):
+            if isinstance(raw_flag, dict):
+                raw_level = str(raw_flag.get("severity") or raw_flag.get("level") or "").strip().lower()
+                level = raw_level if raw_level in EXPENSE_RISK_LEVEL_LABELS else "medium"
+                summary = str(
+                    raw_flag.get("message")
+                    or raw_flag.get("summary")
+                    or raw_flag.get("title")
+                    or raw_flag.get("label")
+                    or ""
+                ).strip()
+                detail = "；".join(
+                    str(point or "").strip()
+                    for point in list(raw_flag.get("points") or [])
+                    if str(point or "").strip()
+                )
+                title = str(raw_flag.get("label") or EXPENSE_RISK_LEVEL_LABELS[level]).strip()
+            else:
+                raw_text = str(raw_flag or "").strip()
+                if not raw_text:
+                    continue
+                level = "high" if any(keyword in raw_text for keyword in ("高风险", "超标", "重复", "异常")) else "medium"
+                summary = raw_text
+                detail = raw_text
+                title = EXPENSE_RISK_LEVEL_LABELS[level]
+
+            if not summary:
+                continue
+            normalized_flags.append(
+                {
+                    "key": f"risk-{index}",
+                    "level": level,
+                    "level_label": EXPENSE_RISK_LEVEL_LABELS.get(level, "中风险"),
+                    "title": title or EXPENSE_RISK_LEVEL_LABELS.get(level, "中风险"),
+                    "summary": summary,
+                    "detail": detail or summary,
+                }
+            )
+        return normalized_flags
+
    def _build_expense_query_scope(
        self,
        *,
@@ -344,12 +448,13 @@ class OrchestratorDatabaseQueryBuilder:
        )
        project_values = self._collect_expense_query_filter_values(ontology, "project")
        location_values = self._collect_expense_query_filter_values(ontology, "location")
-        status_values = list(
-            dict.fromkeys(
+        status_values = self._resolve_expense_query_status_values(
+            [
                str(item.value).strip()
                for item in ontology.constraints
                if item.field == "status" and item.operator == "=" and str(item.value).strip()
-            )
+            ],
+            message,
        )
        amount_constraints = [
            item
@@ -363,8 +468,16 @@ class OrchestratorDatabaseQueryBuilder:
            conditions.append(ExpenseClaim.claim_no.in_(expense_claim_nos))
        if expense_types:
            conditions.append(ExpenseClaim.expense_type.in_(expense_types))
-        if status_values:
-            conditions.append(ExpenseClaim.status.in_(status_values))
+        direct_status_values = [status for status in status_values if status != "archived"]
+        if "archived" in status_values:
+            conditions.append(
+                or_(
+                    ExpenseClaim.approval_stage.ilike("%归档%"),
+                    ExpenseClaim.status.in_(["approved", "paid"]),
+                )
+            )
+        if direct_status_values:
+            conditions.append(ExpenseClaim.status.in_(direct_status_values))
        if project_values:
            project_conditions = []
            for value in project_values:
@@ -438,7 +551,49 @@ class OrchestratorDatabaseQueryBuilder:
        else:
            scope_label = "全部报销单"

-        return conditions, scope_label, scoped_to_current_user
+        return conditions, self._compose_expense_scope_label(scope_label, status_values), scoped_to_current_user
+
+    @staticmethod
+    def _resolve_expense_query_status_values(
+        raw_values: list[str],
+        message: str,
+    ) -> list[str]:
+        values: list[str] = []
+        for raw_value in raw_values:
+            normalized = str(raw_value or "").strip()
+            if not normalized:
+                continue
+            values.append(EXPENSE_STATUS_ALIASES.get(normalized, normalized))
+
+        compact_message = str(message or "").replace(" ", "")
+        for keywords, statuses in EXPENSE_QUERY_STATUS_KEYWORDS:
+            if any(keyword in compact_message for keyword in keywords):
+                values.extend(statuses)
+
+        return [
+            status
+            for status in dict.fromkeys(values)
+            if status in EXPENSE_STATUS_LABELS
+        ]
+
+    @staticmethod
+    def _compose_expense_scope_label(scope_label: str, status_values: list[str]) -> str:
+        normalized_scope = str(scope_label or "").strip() or "报销单"
+        if not status_values:
+            return normalized_scope
+
+        status_labels = [
+            EXPENSE_STATUS_LABELS.get(status, status)
+            for status in status_values
+            if status in EXPENSE_STATUS_LABELS
+        ]
+        if not status_labels:
+            return normalized_scope
+
+        status_text = "或".join(dict.fromkeys(status_labels))
+        if "报销单" in normalized_scope:
+            return normalized_scope.replace("报销单", f"{status_text}报销单")
+        return f"{normalized_scope}（{status_text}）"

    @staticmethod
    def _collect_expense_query_filter_values(
--- a/server/src/app/services/user_agent_response.py
+++ b/server/src/app/services/user_agent_response.py
@@ -365,25 +365,13 @@ class UserAgentResponseMixin:
                    )
                return f"{window_prefix}没有查到{query_payload.scope_label}。你可以补充时间范围、单号或状态继续筛选。"

-            group_lines = [
-                f"{item.label} {item.count} 笔"
-                for item in query_payload.status_groups
-                if item.count > 0
-            ]
            answer_parts = [
-                f"我先为你列出{window_prefix}的{query_payload.scope_label}，"
-                f"共 {query_payload.record_count} 笔，金额合计 {query_payload.total_amount:.2f} 元。"
+                f"已按你的筛选条件查询{query_payload.scope_label}。",
+                f"下面先列出最近 {query_payload.preview_count} 条记录，点击任一单据即可查看详情。",
+                f"本次共命中 {query_payload.record_count} 笔，金额合计 {query_payload.total_amount:.2f} 元。",
            ]
-            if group_lines:
-                answer_parts.append(f"其中包括：{'、'.join(group_lines)}。")

            hint_parts: list[str] = []
-            if query_payload.has_more_in_window and query_payload.preview_count < query_payload.record_count:
-                hint_parts.append(
-                    f"下方先展示最近 {query_payload.preview_count} 笔，你可以直接点击单据查看详情。"
-                )
-            elif query_payload.records:
-                hint_parts.append("下方已列出本次命中的真实单据，可直接点击查看详情。")

            if query_payload.older_record_count > 0 and query_payload.window_days:
                hint_parts.append(
@@ -448,6 +436,11 @@ class UserAgentResponseMixin:
                    occurred_at=str(item.get("occurred_at") or "").strip(),
                    reason=str(item.get("reason") or "").strip(),
                    location=str(item.get("location") or "").strip(),
+                    risk_flags=[
+                        flag
+                        for flag in list(item.get("risk_flags") or [])
+                        if isinstance(flag, dict)
+                    ],
                )
            )

@@ -466,6 +459,7 @@ class UserAgentResponseMixin:
        return UserAgentQueryPayload(
            result_type="expense_claim_list",
            scope_label=str(payload.tool_payload.get("scope_label") or self._resolve_subject(payload)).strip() or "报销单",
+            title=str(payload.tool_payload.get("title") or "").strip(),
            recent_window_applied=bool(payload.tool_payload.get("recent_window_applied")),
            window_days=(
                int(payload.tool_payload["window_days"])
@@ -480,6 +474,7 @@ class UserAgentResponseMixin:
            ),
            record_count=max(0, int(payload.tool_payload.get("record_count") or 0)),
            preview_count=max(0, int(payload.tool_payload.get("preview_count") or len(records))),
+            preview_limit=max(1, int(payload.tool_payload.get("preview_limit") or 5)),
            older_record_count=max(0, int(payload.tool_payload.get("older_record_count") or 0)),
            has_more_in_window=bool(payload.tool_payload.get("has_more_in_window") or payload.tool_payload.get("has_more")),
            total_amount=round(float(payload.tool_payload.get("total_amount") or 0), 2),
@@ -670,18 +665,7 @@ class UserAgentResponseMixin:
            ]

        if payload.ontology.intent in {"query", "compare"}:
-            return [
-                UserAgentSuggestedAction(
-                    label="查看明细",
-                    action_type="open_detail",
-                    description="继续查看命中记录和过滤条件。",
-                ),
-                UserAgentSuggestedAction(
-                    label="生成处理意见",
-                    action_type="create_draft",
-                    description="把当前查询结果整理成可确认草稿。",
-                ),
-            ]
+            return []

        if payload.ontology.intent == "risk_check":
            return [
--- a/server/src/app/services/user_agent_review_core.py
+++ b/server/src/app/services/user_agent_review_core.py
@@ -322,6 +322,7 @@ class UserAgentReviewCoreMixin:
                    avg_score=float(item.get("avg_score") or 0.0),
                    preview_kind=str(item.get("preview_kind") or "").strip(),
                    preview_data_url=str(item.get("preview_data_url") or "").strip(),
+                    preview_url=str(item.get("preview_url") or "").strip(),
                    warnings=[str(warning) for warning in item.get("warnings", []) if str(warning).strip()],
                    fields=[
                        UserAgentReviewDocumentField(
@@ -411,16 +412,26 @@ class UserAgentReviewCoreMixin:
    ) -> list[UserAgentReviewRiskBrief]:
        briefs: list[UserAgentReviewRiskBrief] = []
        for reason in self._resolve_submission_blocked_reasons(payload):
+            needs_exception_explanation = self._is_submission_exception_explanation_reason(reason)
            briefs.append(
                UserAgentReviewRiskBrief(
                    title="提交风险提示",
                    level=self._resolve_submission_blocked_risk_level(reason),
                    content=reason,
                    detail=(
-                        "该项属于提交审批前的阻断条件。系统会先要求补齐基础字段、附件或业务说明，"
-                        "否则审批人无法判断成本归属、业务真实性或票据有效性。"
+                        "该项不是票据归集阻断条件，系统会保留费用明细并在详情中标记高风险；"
+                        "继续提交前需要补充特殊情况说明，便于审批人判断例外原因。"
+                        if needs_exception_explanation
+                        else (
+                            "该项属于提交审批前的阻断条件。系统会先要求补齐基础字段、附件或业务说明，"
+                            "否则审批人无法判断成本归属、业务真实性或票据有效性。"
+                        )
+                    ),
+                    suggestion=(
+                        "请在附加说明中写清超标或例外原因；确认业务真实后可继续提交给审批人重点复核。"
+                        if needs_exception_explanation
+                        else "按提示补齐对应信息；如果业务场景本身合理，请补充说明或佐证附件后再提交。"
                    ),
-                    suggestion="按提示补齐对应信息；如果业务场景本身合理，请补充说明或佐证附件后再提交。",
                )
            )

@@ -514,6 +525,16 @@ class UserAgentReviewCoreMixin:
        return "high" if any(keyword in normalized for keyword in amount_keywords) else "warning"


+    @staticmethod
+    def _is_submission_exception_explanation_reason(reason: str) -> bool:
+        normalized = re.sub(r"\s+", "", str(reason or ""))
+        if not normalized:
+            return False
+        has_over_standard = any(keyword in normalized for keyword in ("超标", "超出", "超标准", "差标"))
+        has_explanation = any(keyword in normalized for keyword in ("说明", "原因", "例外", "特殊"))
+        return has_over_standard and has_explanation
+
+
    @staticmethod
    def _filter_deprecated_review_risk_briefs(
        briefs: list[UserAgentReviewRiskBrief],
--- a/server/src/app/services/user_agent_review_messages.py
+++ b/server/src/app/services/user_agent_review_messages.py
@@ -183,9 +183,9 @@ class UserAgentReviewMessageMixin:
            if draft_payload is not None and draft_payload.claim_no:
                return (
                    f"已按您当前确认的信息保存为草稿 {draft_payload.claim_no}。"
-                    "后续您可以继续补充缺失项，或修改识别结果后再继续提交。"
+                    "后续上传附件或补充票据信息时，请关联这张草稿；补齐缺失项后再继续提交。"
                )
-            return "已按您当前确认的信息保存为草稿。后续您可以继续补充缺失项，或修改识别结果后再继续提交。"
+            return "已按您当前确认的信息保存为草稿。后续上传附件或补充票据信息时，请关联这张草稿；补齐缺失项后再继续提交。"
        if review_action == "link_to_existing_draft":
            document_count = self._resolve_review_document_count(payload)
            followup_copy = self._build_review_action_followup_copy(review_payload)
@@ -214,6 +214,12 @@ class UserAgentReviewMessageMixin:
                    reason_lines = "\n".join(
                        f"{index}. {reason}" for index, reason in enumerate(reasons, start=1)
                    )
+                    if all(self._is_submission_exception_explanation_reason(reason) for reason in reasons):
+                        return (
+                            "检测到当前单据存在需要说明的超标风险，但票据和费用明细会继续保留在单据中。\n"
+                            f"{reason_lines}\n"
+                            "如果确有特殊情况，请先在附加说明中补充原因；补充后可以继续提交给审批人重点复核。"
+                        )
                    return (
                        "AI预审暂未通过，所以还没有提交到审批人。\n"
                        f"{reason_lines}\n"
@@ -253,6 +259,12 @@ class UserAgentReviewMessageMixin:
        blocked_reasons = self._resolve_submission_blocked_reasons(payload)
        if blocked_reasons:
            reason_text = "；".join(dict.fromkeys(reason.strip("。；;") for reason in blocked_reasons if reason))
+            if all(self._is_submission_exception_explanation_reason(reason) for reason in blocked_reasons):
+                return (
+                    f"检测到当前单据存在需要说明的超标风险：{reason_text}。"
+                    "票据会先正常归集到单据中，并在费用明细前标记风险；"
+                    "如确有特殊情况，请在附加说明中补充原因后继续提交审批。"
+                )
            return (
                f"AI预审未通过：{reason_text}。"
                "请先根据风险提示补充原因、调整金额或更换附件，整改后再继续提交。"
@@ -670,4 +682,3 @@ class UserAgentReviewMessageMixin:
        if not claim_groups:
            return False
        return True
-