feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重,优
化本体检测和规则匹配精度,前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式,新增日志详情组件和
知识入库日志模型,补充单元测试覆盖。
This commit is contained in:
caoxiaozhu
2026-05-22 23:47:28 +08:00
parent 88ff04bef8
commit 5b388d08c0
84 changed files with 10170 additions and 2599 deletions

View File

@@ -540,7 +540,11 @@ def test_user_agent_asks_for_type_when_trip_context_is_ambiguous() -> None:
"交通费",
"住宿费",
"业务招待费",
"办公",
"会务",
"办公用品费",
"培训费",
"通讯费",
"福利费",
"其他费用",
]
assert response.suggested_actions[0].payload["original_message"] == message
@@ -729,6 +733,9 @@ def test_user_agent_keeps_taxi_ticket_for_customer_dropoff_as_transport_expense(
assert "业务招待费" not in response.review_payload.intent_summary
assert "客户名称" not in response.review_payload.missing_slots
assert "参与人员" not in response.review_payload.missing_slots
edit_field_keys = {item.key for item in response.review_payload.edit_fields}
assert "merchant_name" not in edit_field_keys
assert "participants" not in edit_field_keys
def test_user_agent_keeps_travel_range_when_user_adds_receipts_after_text_context() -> None:
@@ -1000,6 +1007,9 @@ def test_user_agent_transport_flow_infers_reason_and_does_not_require_location_o
assert response.review_payload is not None
slot_map = {item.key: item for item in response.review_payload.slot_cards}
document_card = response.review_payload.document_cards[0]
assert document_card.scene_label == "出租车/网约车票据"
assert document_card.suggested_expense_type == "transport"
assert slot_map["reason"].value == "交通出行"
assert slot_map["reason"].status == "inferred"
assert "酒店/商户" not in response.review_payload.missing_slots
@@ -1189,8 +1199,15 @@ def test_user_agent_document_service_normalizes_ocr_fields_and_scene() -> None:
assert fields["列车出发时间"] == "2026-03-04"
assert "商户/酒店" not in fields
assert document_service.extract_amount_text_from_value("滴滴出行 支付金额 1 元,实付 13.4 元,订单号 12345678") == "13.40元"
taxi_classified = document_service.classify_document({"filename": "行程单_的士票.jpg", "summary": "的士 车费 48 元"})
assert taxi_classified["document_type"] == "taxi_receipt"
assert taxi_classified["expense_type"] == "transport"
assert taxi_classified["scene_label"] == "出租车/网约车票据"
ship_classified = document_service.classify_document({"filename": "轮船票.jpg", "summary": "轮船 船票 金额 180 元"})
assert ship_classified["document_type"] == "ship_ticket"
assert ship_classified["scene_label"] == "轮船票"
assert classified["document_type"] == "meal_receipt"
assert classified["expense_type"] == "entertainment"
assert classified["expense_type"] == "meal"
assert document_service.infer_expense_type_from_documents(
[{"filename": "客户餐饮发票.jpg", "summary": "餐饮发票 客户招待 金额 320 元"}],
expense_type_code="entertainment",
@@ -1262,11 +1279,13 @@ def test_user_agent_builds_review_payload_for_multi_document_expense_flow() -> N
assert response.review_payload is not None
assert len(response.review_payload.document_cards) == 2
assert len(response.review_payload.claim_groups) == 2
assert response.review_payload.missing_slots == ["参与人员"]
assert response.review_payload.missing_slots == ["参与人员", "酒店的报销票据待上传(必须)"]
assert [item.action_type for item in response.review_payload.confirmation_actions] == [
"save_draft",
]
assert any(item.scene_label == "业务招待费" for item in response.review_payload.document_cards)
assert any(item.scene_label == "餐饮发票" for item in response.review_payload.document_cards)
assert all(item.scene_label != "业务招待费" for item in response.review_payload.document_cards)
assert any(item.scene_label == "业务招待费" for item in response.review_payload.claim_groups)
assert f"时间:{yesterday}" in response.review_payload.intent_summary
slot_map = {item.key: item for item in response.review_payload.slot_cards}
assert slot_map["time_range"].value == yesterday
@@ -1899,7 +1918,58 @@ def test_user_agent_review_payload_prechecks_taxi_amount_against_rule_standard()
assert "单笔交通金额" in combined
assert "报销场景提交与附件标准" in combined
assert amount_brief.level == "high"
assert any(item.title == "附件金额测算结果" for item in response.review_payload.risk_briefs)
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算异常")
assert measurement.level == "warning"
assert "超出标准" in measurement.detail
def test_user_agent_review_payload_does_not_mark_compliant_taxi_amount_as_low_risk() -> None:
session_factory = build_session_factory()
with session_factory() as db:
query = "我上传一张的士票59.10元,帮我生成交通费报销草稿"
context = {
"name": "张三",
"attachment_names": ["的士1.jpg"],
"attachment_count": 1,
"ocr_documents": [
{
"filename": "的士1.jpg",
"document_type": "taxi_receipt",
"summary": "出租车/网约车票据 支付金额 59.10 元",
"text": "的士 车费 59.10 元",
"avg_score": 0.95,
"document_fields": [
{"key": "amount", "label": "支付金额", "value": "59.10"},
],
"warnings": [],
}
],
}
ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
query=query,
user_id="pytest-taxi-pass@example.com",
context_json=context,
)
)
response = UserAgentService(db).respond(
UserAgentRequest(
run_id=ontology.run_id,
user_id="pytest-taxi-pass@example.com",
message=query,
ontology=ontology,
context_json=context,
tool_payload={"draft_only": True},
)
)
assert response.review_payload is not None
risk_titles = [item.title for item in response.review_payload.risk_briefs]
risk_details = "\n".join(item.detail for item in response.review_payload.risk_briefs)
assert "附件金额测算结果" not in risk_titles
assert "附件金额测算异常" not in risk_titles
assert "测算通过" not in risk_details
def test_user_agent_review_payload_uses_finance_spreadsheet_hotel_amount_standard() -> None:
@@ -2067,8 +2137,9 @@ def test_user_agent_review_payload_uses_finance_spreadsheet_meal_allowance_stand
assert "直辖市/特区" in combined
assert "公司差旅费报销规则" in combined
assert meal_brief.level == "high"
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算结果")
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算异常")
assert "伙食补助标准 65.00" in measurement.detail
assert "超出标准" in measurement.detail
def test_user_agent_filters_deprecated_review_risk_briefs() -> None: