feat: 新增归档中心页面并完善知识库与报销查询能力

新增前端归档中心视图及相关工具函数，扩充知识库文档分类和提取器支持多种格式，增强编排器报销查询的多维度检索，优化本体规则和用户代理审核消息，前端完善报销创建和审批详情交互细节，补充单元测试覆盖。
2026-05-22 16:00:19 +08:00
parent 1f15699013
commit 88ff04bef8
120 changed files with 6236 additions and 643 deletions
--- a/server/tests/test_knowledge_document_extractors.py
+++ b/server/tests/test_knowledge_document_extractors.py
@@ -0,0 +1,96 @@
+from __future__ import annotations
+
+from zipfile import ZipFile
+
+from app.services.knowledge_document_extractors import _extract_document_text_from_path
+
+
+def test_extract_xlsx_document_text_builds_markdown_with_row_clues(tmp_path) -> None:
+    file_path = tmp_path / "company-expense-rules.xlsx"
+    _write_minimal_xlsx(
+        file_path,
+        sheet_name="报销标准",
+        rows=[
+            ["费用类型", "标准", "说明"],
+            ["住宿费", "500", "超标准需事前审批"],
+            ["交通费", "据实", "保留发票"],
+        ],
+    )
+
+    text = _extract_document_text_from_path(
+        file_path=file_path,
+        original_name="公司支出管理办法.xlsx",
+        mime_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+    )
+
+    assert "# Excel 工作簿：公司支出管理办法.xlsx" in text
+    assert "## 工作表 1：报销标准" in text
+    assert "| 费用类型 | 标准 | 说明 |" in text
+    assert "费用类型=住宿费；标准=500；说明=超标准需事前审批" in text
+    assert "费用类型=交通费；标准=据实；说明=保留发票" in text
+
+
+def test_extract_pptx_document_text_builds_markdown_slides(tmp_path) -> None:
+    file_path = tmp_path / "training.pptx"
+    slide_xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<p:sld xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"
+       xmlns:p="http://schemas.openxmlformats.org/presentationml/2006/main">
+  <p:cSld>
+    <p:spTree>
+      <p:sp><p:txBody><a:p><a:r><a:t>差旅报销培训</a:t></a:r></a:p></p:txBody></p:sp>
+      <p:sp><p:txBody><a:p><a:r><a:t>发票、审批、预算三项要素必须齐全</a:t></a:r></a:p></p:txBody></p:sp>
+    </p:spTree>
+  </p:cSld>
+</p:sld>
+"""
+    with ZipFile(file_path, "w") as archive:
+        archive.writestr("ppt/slides/slide1.xml", slide_xml)
+
+    text = _extract_document_text_from_path(
+        file_path=file_path,
+        original_name="报销培训.pptx",
+        mime_type="application/vnd.openxmlformats-officedocument.presentationml.presentation",
+    )
+
+    assert "# PowerPoint 演示文稿：报销培训.pptx" in text
+    assert "## 幻灯片 1" in text
+    assert "- 差旅报销培训" in text
+    assert "- 发票、审批、预算三项要素必须齐全" in text
+
+
+def _write_minimal_xlsx(file_path, *, sheet_name: str, rows: list[list[str]]) -> None:
+    workbook_xml = f"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
+          xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
+  <sheets>
+    <sheet name="{sheet_name}" sheetId="1" r:id="rId1"/>
+  </sheets>
+</workbook>
+"""
+    rels_xml = """<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
+  <Relationship Id="rId1"
+                Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"
+                Target="worksheets/sheet1.xml"/>
+</Relationships>
+"""
+    row_xml = "\n".join(
+        f'<row r="{row_index}">'
+        + "".join(
+            f'<c r="{chr(65 + column_index)}{row_index}" t="inlineStr"><is><t>{cell}</t></is></c>'
+            for column_index, cell in enumerate(row)
+        )
+        + "</row>"
+        for row_index, row in enumerate(rows, start=1)
+    )
+    sheet_xml = f"""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
+  <sheetData>
+    {row_xml}
+  </sheetData>
+</worksheet>
+"""
+    with ZipFile(file_path, "w") as archive:
+        archive.writestr("xl/workbook.xml", workbook_xml)
+        archive.writestr("xl/_rels/workbook.xml.rels", rels_xml)
+        archive.writestr("xl/worksheets/sheet1.xml", sheet_xml)