feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重,优
化本体检测和规则匹配精度,前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式,新增日志详情组件和
知识入库日志模型,补充单元测试覆盖。
This commit is contained in:
caoxiaozhu
2026-05-22 23:47:28 +08:00
parent 88ff04bef8
commit 5b388d08c0
84 changed files with 10170 additions and 2599 deletions

View File

@@ -181,6 +181,49 @@ def test_save_or_submit_persists_claim_only_after_save_draft_action() -> None:
assert _count_claims(db) == before_count + 1
def test_save_draft_persists_user_changed_expense_category() -> None:
user_id = "save-draft-category@example.com"
message = "业务发生时间:2026-03-04打车去客户现场交通费32元请帮我看看怎么报"
with build_session() as db:
employee = Employee(
employee_no="E5102",
name="分类员工",
email=user_id,
)
db.add(employee)
db.commit()
ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
query=message,
user_id=user_id,
)
)
result = ExpenseClaimService(db).save_or_submit_from_ontology(
run_id=ontology.run_id,
user_id=user_id,
message=message,
ontology=ontology,
context_json={
"name": "分类员工",
"user_input_text": message,
"review_action": "save_draft",
"review_form_values": {
"expense_type": "办公用品费",
"amount": "32元",
"occurred_date": "2026-03-04",
"reason": "右侧核对后改为办公用品费",
},
},
)
claim = db.get(ExpenseClaim, result["claim_id"])
assert claim is not None
assert claim.expense_type == "office"
assert claim.items[0].item_type == "office"
def test_unsaved_conversation_expires_after_retention_but_saved_conversation_stays() -> None:
with build_session() as db:
service = AgentConversationService(db)

View File

@@ -1,6 +1,14 @@
from __future__ import annotations
import json
import threading
from app.services import knowledge_rag as knowledge_rag_module
from app.services.knowledge_ingest_log import (
build_document_graph_summary,
build_ingest_document_summary,
build_ingest_status_summary,
)
from app.services.knowledge_rag import KnowledgeRagService
@@ -86,7 +94,10 @@ def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> No
{
"chunk_id": "body-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "附表3支出归口管理部门与归口业务范围\n组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。",
"content": (
"附表3支出归口管理部门与归口业务范围\n"
"组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。"
),
},
],
entities=[],
@@ -100,9 +111,11 @@ def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
monkeypatch.setattr(
knowledge_rag_module.socket,
"getaddrinfo",
lambda hostname, port: [("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
if hostname == "qdrant"
else [],
lambda hostname, port: (
[("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
if hostname == "qdrant"
else []
),
)
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://qdrant:6333"
@@ -117,6 +130,45 @@ def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
def test_runtime_cache_is_isolated_by_thread(monkeypatch) -> None:
knowledge_rag_module.shutdown_knowledge_rag_runtime()
created_runtimes = []
class FakeRuntime:
def __init__(self, **_kwargs):
self.finalized = False
created_runtimes.append(self)
def finalize(self):
self.finalized = True
monkeypatch.setattr(knowledge_rag_module, "_LightRagRuntime", FakeRuntime)
monkeypatch.setattr(
KnowledgeRagService,
"_build_runtime_signature",
lambda self: (("same-config",), {}),
)
service = KnowledgeRagService()
main_runtime = service._get_runtime()
assert service._get_runtime() is main_runtime
worker_runtimes = []
def load_worker_runtime() -> None:
worker_runtimes.append(KnowledgeRagService()._get_runtime())
thread = threading.Thread(target=load_worker_runtime)
thread.start()
thread.join(timeout=5)
assert len(created_runtimes) == 2
assert worker_runtimes[0] is not main_runtime
knowledge_rag_module.shutdown_knowledge_rag_runtime()
assert all(runtime.finalized for runtime in created_runtimes)
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
@@ -141,3 +193,89 @@ def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> N
)
is False
)
def test_build_document_graph_summary_reads_lightrag_storage(tmp_path) -> None:
workspace = tmp_path / "knowledge" / ".lightrag" / "test_workspace"
workspace.mkdir(parents=True)
(workspace / "kv_store_full_entities.json").write_text(
json.dumps({"doc-1": {"entity_names": ["远光软件", "支出管理", "远光软件"]}}),
encoding="utf-8",
)
(workspace / "kv_store_full_relations.json").write_text(
json.dumps({"doc-1": {"relation_pairs": [["远光软件", "支出管理"]]}}),
encoding="utf-8",
)
(workspace / "kv_store_text_chunks.json").write_text(
json.dumps(
{
"chunk-2": {
"_id": "chunk-2",
"full_doc_id": "doc-1",
"chunk_order_index": 1,
"tokens": 45,
"content": "第二条 支出审批需要结合预算、归口部门和授权标准执行。",
},
"chunk-1": {
"_id": "chunk-1",
"full_doc_id": "doc-1",
"chunk_order_index": 0,
"tokens": 31,
"content": "第一条 本办法适用于公司支出管理。",
},
}
),
encoding="utf-8",
)
summary = build_document_graph_summary(
tmp_path,
workspace="test_workspace",
document_id="doc-1",
)
assert summary["entity_count"] == 2
assert summary["entities"] == ["远光软件", "支出管理"]
assert summary["relation_count"] == 1
assert summary["relations"] == [{"source": "远光软件", "target": "支出管理", "type": "关联"}]
assert [item["id"] for item in summary["chunks"]] == ["chunk-1", "chunk-2"]
def test_build_ingest_document_summary_extracts_sections() -> None:
summary = build_ingest_document_summary(
document_id="doc-1",
entry={
"original_name": "公司支出管理办法.pdf",
"folder": "制度文件",
"extension": "pdf",
"mime_type": "application/pdf",
},
raw_text="第一章 总则\n本办法用于规范公司支出。",
indexed_text="# 第一章 总则\n本办法用于规范公司支出。\n第二条 审批\n审批需按授权执行。",
)
assert summary["name"] == "公司支出管理办法.pdf"
assert summary["section_count"] == 2
assert summary["sections"][0]["title"] == "第一章 总则"
def test_build_ingest_status_summary_keeps_chunk_status() -> None:
summary = build_ingest_status_summary(
status_payload={
"status": "processed",
"query_ready": True,
"chunks_count": 2,
"chunks_list": ["chunk-1", "chunk-2"],
},
graph_summary={
"entity_count": 1,
"relation_count": 0,
"entities": ["预算"],
"relations": [],
},
)
assert summary["lightrag_status"] == "processed"
assert summary["query_ready"] is True
assert summary["chunk_count"] == 2
assert summary["chunk_ids"] == ["chunk-1", "chunk-2"]

View File

@@ -389,10 +389,10 @@ def test_semantic_ontology_service_prefers_expense_for_customer_entertainment_na
assert result.clarification_required is True
assert "customer_name" in result.missing_slots
assert "participants" in result.missing_slots
assert any(
item.type == "expense_type" and item.normalized_value == "entertainment"
for item in result.entities
)
assert any(
item.type == "expense_type" and item.normalized_value == "meal"
for item in result.entities
)
def test_semantic_ontology_service_uses_client_local_date_for_relative_time() -> None:
@@ -556,6 +556,39 @@ def test_semantic_ontology_service_maps_taxi_ticket_reimbursement_to_transport_d
)
@pytest.mark.parametrize(
"query,expected_type",
[
("报销飞机票和行程单", "travel"),
("报销酒店发票和房费", "hotel"),
("报销滴滴打车票", "transport"),
("报销工作餐餐费", "meal"),
("报销会议场地费", "meeting"),
("报销客户接待餐", "meal"),
("报销打印纸和硒鼓", "office"),
("报销培训课程费", "training"),
("报销手机话费和流量费", "communication"),
("报销员工体检费", "welfare"),
],
)
def test_semantic_ontology_service_covers_common_expense_scene_keywords(
query: str,
expected_type: str,
) -> None:
session_factory = build_session_factory()
with session_factory() as db:
result = SemanticOntologyService(db).parse(
OntologyParseRequest(query=query, user_id="pytest")
)
assert result.scenario == "expense"
assert result.intent == "draft"
assert any(
item.type == "expense_type" and item.normalized_value == expected_type
for item in result.entities
)
def test_semantic_ontology_service_uses_model_parse_when_available(monkeypatch) -> None:
session_factory = build_session_factory()
with session_factory() as db:

View File

@@ -540,7 +540,11 @@ def test_user_agent_asks_for_type_when_trip_context_is_ambiguous() -> None:
"交通费",
"住宿费",
"业务招待费",
"办公",
"会务",
"办公用品费",
"培训费",
"通讯费",
"福利费",
"其他费用",
]
assert response.suggested_actions[0].payload["original_message"] == message
@@ -729,6 +733,9 @@ def test_user_agent_keeps_taxi_ticket_for_customer_dropoff_as_transport_expense(
assert "业务招待费" not in response.review_payload.intent_summary
assert "客户名称" not in response.review_payload.missing_slots
assert "参与人员" not in response.review_payload.missing_slots
edit_field_keys = {item.key for item in response.review_payload.edit_fields}
assert "merchant_name" not in edit_field_keys
assert "participants" not in edit_field_keys
def test_user_agent_keeps_travel_range_when_user_adds_receipts_after_text_context() -> None:
@@ -1000,6 +1007,9 @@ def test_user_agent_transport_flow_infers_reason_and_does_not_require_location_o
assert response.review_payload is not None
slot_map = {item.key: item for item in response.review_payload.slot_cards}
document_card = response.review_payload.document_cards[0]
assert document_card.scene_label == "出租车/网约车票据"
assert document_card.suggested_expense_type == "transport"
assert slot_map["reason"].value == "交通出行"
assert slot_map["reason"].status == "inferred"
assert "酒店/商户" not in response.review_payload.missing_slots
@@ -1189,8 +1199,15 @@ def test_user_agent_document_service_normalizes_ocr_fields_and_scene() -> None:
assert fields["列车出发时间"] == "2026-03-04"
assert "商户/酒店" not in fields
assert document_service.extract_amount_text_from_value("滴滴出行 支付金额 1 元,实付 13.4 元,订单号 12345678") == "13.40元"
taxi_classified = document_service.classify_document({"filename": "行程单_的士票.jpg", "summary": "的士 车费 48 元"})
assert taxi_classified["document_type"] == "taxi_receipt"
assert taxi_classified["expense_type"] == "transport"
assert taxi_classified["scene_label"] == "出租车/网约车票据"
ship_classified = document_service.classify_document({"filename": "轮船票.jpg", "summary": "轮船 船票 金额 180 元"})
assert ship_classified["document_type"] == "ship_ticket"
assert ship_classified["scene_label"] == "轮船票"
assert classified["document_type"] == "meal_receipt"
assert classified["expense_type"] == "entertainment"
assert classified["expense_type"] == "meal"
assert document_service.infer_expense_type_from_documents(
[{"filename": "客户餐饮发票.jpg", "summary": "餐饮发票 客户招待 金额 320 元"}],
expense_type_code="entertainment",
@@ -1262,11 +1279,13 @@ def test_user_agent_builds_review_payload_for_multi_document_expense_flow() -> N
assert response.review_payload is not None
assert len(response.review_payload.document_cards) == 2
assert len(response.review_payload.claim_groups) == 2
assert response.review_payload.missing_slots == ["参与人员"]
assert response.review_payload.missing_slots == ["参与人员", "酒店的报销票据待上传(必须)"]
assert [item.action_type for item in response.review_payload.confirmation_actions] == [
"save_draft",
]
assert any(item.scene_label == "业务招待费" for item in response.review_payload.document_cards)
assert any(item.scene_label == "餐饮发票" for item in response.review_payload.document_cards)
assert all(item.scene_label != "业务招待费" for item in response.review_payload.document_cards)
assert any(item.scene_label == "业务招待费" for item in response.review_payload.claim_groups)
assert f"时间:{yesterday}" in response.review_payload.intent_summary
slot_map = {item.key: item for item in response.review_payload.slot_cards}
assert slot_map["time_range"].value == yesterday
@@ -1899,7 +1918,58 @@ def test_user_agent_review_payload_prechecks_taxi_amount_against_rule_standard()
assert "单笔交通金额" in combined
assert "报销场景提交与附件标准" in combined
assert amount_brief.level == "high"
assert any(item.title == "附件金额测算结果" for item in response.review_payload.risk_briefs)
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算异常")
assert measurement.level == "warning"
assert "超出标准" in measurement.detail
def test_user_agent_review_payload_does_not_mark_compliant_taxi_amount_as_low_risk() -> None:
session_factory = build_session_factory()
with session_factory() as db:
query = "我上传一张的士票59.10元,帮我生成交通费报销草稿"
context = {
"name": "张三",
"attachment_names": ["的士1.jpg"],
"attachment_count": 1,
"ocr_documents": [
{
"filename": "的士1.jpg",
"document_type": "taxi_receipt",
"summary": "出租车/网约车票据 支付金额 59.10 元",
"text": "的士 车费 59.10 元",
"avg_score": 0.95,
"document_fields": [
{"key": "amount", "label": "支付金额", "value": "59.10"},
],
"warnings": [],
}
],
}
ontology = SemanticOntologyService(db).parse(
OntologyParseRequest(
query=query,
user_id="pytest-taxi-pass@example.com",
context_json=context,
)
)
response = UserAgentService(db).respond(
UserAgentRequest(
run_id=ontology.run_id,
user_id="pytest-taxi-pass@example.com",
message=query,
ontology=ontology,
context_json=context,
tool_payload={"draft_only": True},
)
)
assert response.review_payload is not None
risk_titles = [item.title for item in response.review_payload.risk_briefs]
risk_details = "\n".join(item.detail for item in response.review_payload.risk_briefs)
assert "附件金额测算结果" not in risk_titles
assert "附件金额测算异常" not in risk_titles
assert "测算通过" not in risk_details
def test_user_agent_review_payload_uses_finance_spreadsheet_hotel_amount_standard() -> None:
@@ -2067,8 +2137,9 @@ def test_user_agent_review_payload_uses_finance_spreadsheet_meal_allowance_stand
assert "直辖市/特区" in combined
assert "公司差旅费报销规则" in combined
assert meal_brief.level == "high"
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算结果")
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算异常")
assert "伙食补助标准 65.00" in measurement.detail
assert "超出标准" in measurement.detail
def test_user_agent_filters_deprecated_review_risk_briefs() -> None: