feat: 增强知识库索引与设置页面模块化拆分
扩展知识库索引任务和 RAG 检索支持增量入库和文档去重,优 化本体检测和规则匹配精度,前端设置页面拆分为 LLM、邮件 和 Hermes 员工同步子面板并重构样式,新增日志详情组件和 知识入库日志模型,补充单元测试覆盖。
This commit is contained in:
@@ -181,6 +181,49 @@ def test_save_or_submit_persists_claim_only_after_save_draft_action() -> None:
|
||||
assert _count_claims(db) == before_count + 1
|
||||
|
||||
|
||||
def test_save_draft_persists_user_changed_expense_category() -> None:
|
||||
user_id = "save-draft-category@example.com"
|
||||
message = "业务发生时间:2026-03-04,打车去客户现场,交通费32元,请帮我看看怎么报"
|
||||
|
||||
with build_session() as db:
|
||||
employee = Employee(
|
||||
employee_no="E5102",
|
||||
name="分类员工",
|
||||
email=user_id,
|
||||
)
|
||||
db.add(employee)
|
||||
db.commit()
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query=message,
|
||||
user_id=user_id,
|
||||
)
|
||||
)
|
||||
|
||||
result = ExpenseClaimService(db).save_or_submit_from_ontology(
|
||||
run_id=ontology.run_id,
|
||||
user_id=user_id,
|
||||
message=message,
|
||||
ontology=ontology,
|
||||
context_json={
|
||||
"name": "分类员工",
|
||||
"user_input_text": message,
|
||||
"review_action": "save_draft",
|
||||
"review_form_values": {
|
||||
"expense_type": "办公用品费",
|
||||
"amount": "32元",
|
||||
"occurred_date": "2026-03-04",
|
||||
"reason": "右侧核对后改为办公用品费",
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
claim = db.get(ExpenseClaim, result["claim_id"])
|
||||
assert claim is not None
|
||||
assert claim.expense_type == "office"
|
||||
assert claim.items[0].item_type == "office"
|
||||
|
||||
|
||||
def test_unsaved_conversation_expires_after_retention_but_saved_conversation_stays() -> None:
|
||||
with build_session() as db:
|
||||
service = AgentConversationService(db)
|
||||
|
||||
@@ -1,6 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import threading
|
||||
|
||||
from app.services import knowledge_rag as knowledge_rag_module
|
||||
from app.services.knowledge_ingest_log import (
|
||||
build_document_graph_summary,
|
||||
build_ingest_document_summary,
|
||||
build_ingest_status_summary,
|
||||
)
|
||||
from app.services.knowledge_rag import KnowledgeRagService
|
||||
|
||||
|
||||
@@ -86,7 +94,10 @@ def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> No
|
||||
{
|
||||
"chunk_id": "body-1",
|
||||
"file_path": "/tmp/doc-1__费用制度.md",
|
||||
"content": "附表3:支出归口管理部门与归口业务范围\n组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。",
|
||||
"content": (
|
||||
"附表3:支出归口管理部门与归口业务范围\n"
|
||||
"组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。"
|
||||
),
|
||||
},
|
||||
],
|
||||
entities=[],
|
||||
@@ -100,9 +111,11 @@ def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
knowledge_rag_module.socket,
|
||||
"getaddrinfo",
|
||||
lambda hostname, port: [("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
|
||||
if hostname == "qdrant"
|
||||
else [],
|
||||
lambda hostname, port: (
|
||||
[("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
|
||||
if hostname == "qdrant"
|
||||
else []
|
||||
),
|
||||
)
|
||||
|
||||
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://qdrant:6333"
|
||||
@@ -117,6 +130,45 @@ def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
|
||||
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
|
||||
|
||||
|
||||
def test_runtime_cache_is_isolated_by_thread(monkeypatch) -> None:
|
||||
knowledge_rag_module.shutdown_knowledge_rag_runtime()
|
||||
created_runtimes = []
|
||||
|
||||
class FakeRuntime:
|
||||
def __init__(self, **_kwargs):
|
||||
self.finalized = False
|
||||
created_runtimes.append(self)
|
||||
|
||||
def finalize(self):
|
||||
self.finalized = True
|
||||
|
||||
monkeypatch.setattr(knowledge_rag_module, "_LightRagRuntime", FakeRuntime)
|
||||
monkeypatch.setattr(
|
||||
KnowledgeRagService,
|
||||
"_build_runtime_signature",
|
||||
lambda self: (("same-config",), {}),
|
||||
)
|
||||
|
||||
service = KnowledgeRagService()
|
||||
main_runtime = service._get_runtime()
|
||||
assert service._get_runtime() is main_runtime
|
||||
|
||||
worker_runtimes = []
|
||||
|
||||
def load_worker_runtime() -> None:
|
||||
worker_runtimes.append(KnowledgeRagService()._get_runtime())
|
||||
|
||||
thread = threading.Thread(target=load_worker_runtime)
|
||||
thread.start()
|
||||
thread.join(timeout=5)
|
||||
|
||||
assert len(created_runtimes) == 2
|
||||
assert worker_runtimes[0] is not main_runtime
|
||||
|
||||
knowledge_rag_module.shutdown_knowledge_rag_runtime()
|
||||
assert all(runtime.finalized for runtime in created_runtimes)
|
||||
|
||||
|
||||
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
|
||||
assert (
|
||||
KnowledgeRagService.is_query_ready_status(
|
||||
@@ -141,3 +193,89 @@ def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> N
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_build_document_graph_summary_reads_lightrag_storage(tmp_path) -> None:
|
||||
workspace = tmp_path / "knowledge" / ".lightrag" / "test_workspace"
|
||||
workspace.mkdir(parents=True)
|
||||
(workspace / "kv_store_full_entities.json").write_text(
|
||||
json.dumps({"doc-1": {"entity_names": ["远光软件", "支出管理", "远光软件"]}}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(workspace / "kv_store_full_relations.json").write_text(
|
||||
json.dumps({"doc-1": {"relation_pairs": [["远光软件", "支出管理"]]}}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(workspace / "kv_store_text_chunks.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"chunk-2": {
|
||||
"_id": "chunk-2",
|
||||
"full_doc_id": "doc-1",
|
||||
"chunk_order_index": 1,
|
||||
"tokens": 45,
|
||||
"content": "第二条 支出审批需要结合预算、归口部门和授权标准执行。",
|
||||
},
|
||||
"chunk-1": {
|
||||
"_id": "chunk-1",
|
||||
"full_doc_id": "doc-1",
|
||||
"chunk_order_index": 0,
|
||||
"tokens": 31,
|
||||
"content": "第一条 本办法适用于公司支出管理。",
|
||||
},
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
summary = build_document_graph_summary(
|
||||
tmp_path,
|
||||
workspace="test_workspace",
|
||||
document_id="doc-1",
|
||||
)
|
||||
|
||||
assert summary["entity_count"] == 2
|
||||
assert summary["entities"] == ["远光软件", "支出管理"]
|
||||
assert summary["relation_count"] == 1
|
||||
assert summary["relations"] == [{"source": "远光软件", "target": "支出管理", "type": "关联"}]
|
||||
assert [item["id"] for item in summary["chunks"]] == ["chunk-1", "chunk-2"]
|
||||
|
||||
|
||||
def test_build_ingest_document_summary_extracts_sections() -> None:
|
||||
summary = build_ingest_document_summary(
|
||||
document_id="doc-1",
|
||||
entry={
|
||||
"original_name": "公司支出管理办法.pdf",
|
||||
"folder": "制度文件",
|
||||
"extension": "pdf",
|
||||
"mime_type": "application/pdf",
|
||||
},
|
||||
raw_text="第一章 总则\n本办法用于规范公司支出。",
|
||||
indexed_text="# 第一章 总则\n本办法用于规范公司支出。\n第二条 审批\n审批需按授权执行。",
|
||||
)
|
||||
|
||||
assert summary["name"] == "公司支出管理办法.pdf"
|
||||
assert summary["section_count"] == 2
|
||||
assert summary["sections"][0]["title"] == "第一章 总则"
|
||||
|
||||
|
||||
def test_build_ingest_status_summary_keeps_chunk_status() -> None:
|
||||
summary = build_ingest_status_summary(
|
||||
status_payload={
|
||||
"status": "processed",
|
||||
"query_ready": True,
|
||||
"chunks_count": 2,
|
||||
"chunks_list": ["chunk-1", "chunk-2"],
|
||||
},
|
||||
graph_summary={
|
||||
"entity_count": 1,
|
||||
"relation_count": 0,
|
||||
"entities": ["预算"],
|
||||
"relations": [],
|
||||
},
|
||||
)
|
||||
|
||||
assert summary["lightrag_status"] == "processed"
|
||||
assert summary["query_ready"] is True
|
||||
assert summary["chunk_count"] == 2
|
||||
assert summary["chunk_ids"] == ["chunk-1", "chunk-2"]
|
||||
|
||||
@@ -389,10 +389,10 @@ def test_semantic_ontology_service_prefers_expense_for_customer_entertainment_na
|
||||
assert result.clarification_required is True
|
||||
assert "customer_name" in result.missing_slots
|
||||
assert "participants" in result.missing_slots
|
||||
assert any(
|
||||
item.type == "expense_type" and item.normalized_value == "entertainment"
|
||||
for item in result.entities
|
||||
)
|
||||
assert any(
|
||||
item.type == "expense_type" and item.normalized_value == "meal"
|
||||
for item in result.entities
|
||||
)
|
||||
|
||||
|
||||
def test_semantic_ontology_service_uses_client_local_date_for_relative_time() -> None:
|
||||
@@ -556,6 +556,39 @@ def test_semantic_ontology_service_maps_taxi_ticket_reimbursement_to_transport_d
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query,expected_type",
|
||||
[
|
||||
("报销飞机票和行程单", "travel"),
|
||||
("报销酒店发票和房费", "hotel"),
|
||||
("报销滴滴打车票", "transport"),
|
||||
("报销工作餐餐费", "meal"),
|
||||
("报销会议场地费", "meeting"),
|
||||
("报销客户接待餐", "meal"),
|
||||
("报销打印纸和硒鼓", "office"),
|
||||
("报销培训课程费", "training"),
|
||||
("报销手机话费和流量费", "communication"),
|
||||
("报销员工体检费", "welfare"),
|
||||
],
|
||||
)
|
||||
def test_semantic_ontology_service_covers_common_expense_scene_keywords(
|
||||
query: str,
|
||||
expected_type: str,
|
||||
) -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
result = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(query=query, user_id="pytest")
|
||||
)
|
||||
|
||||
assert result.scenario == "expense"
|
||||
assert result.intent == "draft"
|
||||
assert any(
|
||||
item.type == "expense_type" and item.normalized_value == expected_type
|
||||
for item in result.entities
|
||||
)
|
||||
|
||||
|
||||
def test_semantic_ontology_service_uses_model_parse_when_available(monkeypatch) -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
|
||||
@@ -540,7 +540,11 @@ def test_user_agent_asks_for_type_when_trip_context_is_ambiguous() -> None:
|
||||
"交通费",
|
||||
"住宿费",
|
||||
"业务招待费",
|
||||
"办公费",
|
||||
"会务费",
|
||||
"办公用品费",
|
||||
"培训费",
|
||||
"通讯费",
|
||||
"福利费",
|
||||
"其他费用",
|
||||
]
|
||||
assert response.suggested_actions[0].payload["original_message"] == message
|
||||
@@ -729,6 +733,9 @@ def test_user_agent_keeps_taxi_ticket_for_customer_dropoff_as_transport_expense(
|
||||
assert "业务招待费" not in response.review_payload.intent_summary
|
||||
assert "客户名称" not in response.review_payload.missing_slots
|
||||
assert "参与人员" not in response.review_payload.missing_slots
|
||||
edit_field_keys = {item.key for item in response.review_payload.edit_fields}
|
||||
assert "merchant_name" not in edit_field_keys
|
||||
assert "participants" not in edit_field_keys
|
||||
|
||||
|
||||
def test_user_agent_keeps_travel_range_when_user_adds_receipts_after_text_context() -> None:
|
||||
@@ -1000,6 +1007,9 @@ def test_user_agent_transport_flow_infers_reason_and_does_not_require_location_o
|
||||
|
||||
assert response.review_payload is not None
|
||||
slot_map = {item.key: item for item in response.review_payload.slot_cards}
|
||||
document_card = response.review_payload.document_cards[0]
|
||||
assert document_card.scene_label == "出租车/网约车票据"
|
||||
assert document_card.suggested_expense_type == "transport"
|
||||
assert slot_map["reason"].value == "交通出行"
|
||||
assert slot_map["reason"].status == "inferred"
|
||||
assert "酒店/商户" not in response.review_payload.missing_slots
|
||||
@@ -1189,8 +1199,15 @@ def test_user_agent_document_service_normalizes_ocr_fields_and_scene() -> None:
|
||||
assert fields["列车出发时间"] == "2026-03-04"
|
||||
assert "商户/酒店" not in fields
|
||||
assert document_service.extract_amount_text_from_value("滴滴出行 支付金额 1 元,实付 13.4 元,订单号 12345678") == "13.40元"
|
||||
taxi_classified = document_service.classify_document({"filename": "行程单_的士票.jpg", "summary": "的士 车费 48 元"})
|
||||
assert taxi_classified["document_type"] == "taxi_receipt"
|
||||
assert taxi_classified["expense_type"] == "transport"
|
||||
assert taxi_classified["scene_label"] == "出租车/网约车票据"
|
||||
ship_classified = document_service.classify_document({"filename": "轮船票.jpg", "summary": "轮船 船票 金额 180 元"})
|
||||
assert ship_classified["document_type"] == "ship_ticket"
|
||||
assert ship_classified["scene_label"] == "轮船票"
|
||||
assert classified["document_type"] == "meal_receipt"
|
||||
assert classified["expense_type"] == "entertainment"
|
||||
assert classified["expense_type"] == "meal"
|
||||
assert document_service.infer_expense_type_from_documents(
|
||||
[{"filename": "客户餐饮发票.jpg", "summary": "餐饮发票 客户招待 金额 320 元"}],
|
||||
expense_type_code="entertainment",
|
||||
@@ -1262,11 +1279,13 @@ def test_user_agent_builds_review_payload_for_multi_document_expense_flow() -> N
|
||||
assert response.review_payload is not None
|
||||
assert len(response.review_payload.document_cards) == 2
|
||||
assert len(response.review_payload.claim_groups) == 2
|
||||
assert response.review_payload.missing_slots == ["参与人员"]
|
||||
assert response.review_payload.missing_slots == ["参与人员", "酒店的报销票据待上传(必须)"]
|
||||
assert [item.action_type for item in response.review_payload.confirmation_actions] == [
|
||||
"save_draft",
|
||||
]
|
||||
assert any(item.scene_label == "业务招待费" for item in response.review_payload.document_cards)
|
||||
assert any(item.scene_label == "餐饮发票" for item in response.review_payload.document_cards)
|
||||
assert all(item.scene_label != "业务招待费" for item in response.review_payload.document_cards)
|
||||
assert any(item.scene_label == "业务招待费" for item in response.review_payload.claim_groups)
|
||||
assert f"时间:{yesterday}" in response.review_payload.intent_summary
|
||||
slot_map = {item.key: item for item in response.review_payload.slot_cards}
|
||||
assert slot_map["time_range"].value == yesterday
|
||||
@@ -1899,7 +1918,58 @@ def test_user_agent_review_payload_prechecks_taxi_amount_against_rule_standard()
|
||||
assert "单笔交通金额" in combined
|
||||
assert "报销场景提交与附件标准" in combined
|
||||
assert amount_brief.level == "high"
|
||||
assert any(item.title == "附件金额测算结果" for item in response.review_payload.risk_briefs)
|
||||
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算异常")
|
||||
assert measurement.level == "warning"
|
||||
assert "超出标准" in measurement.detail
|
||||
|
||||
|
||||
def test_user_agent_review_payload_does_not_mark_compliant_taxi_amount_as_low_risk() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
query = "我上传一张的士票59.10元,帮我生成交通费报销草稿"
|
||||
context = {
|
||||
"name": "张三",
|
||||
"attachment_names": ["的士1.jpg"],
|
||||
"attachment_count": 1,
|
||||
"ocr_documents": [
|
||||
{
|
||||
"filename": "的士1.jpg",
|
||||
"document_type": "taxi_receipt",
|
||||
"summary": "出租车/网约车票据 支付金额 59.10 元",
|
||||
"text": "的士 车费 59.10 元",
|
||||
"avg_score": 0.95,
|
||||
"document_fields": [
|
||||
{"key": "amount", "label": "支付金额", "value": "59.10"},
|
||||
],
|
||||
"warnings": [],
|
||||
}
|
||||
],
|
||||
}
|
||||
ontology = SemanticOntologyService(db).parse(
|
||||
OntologyParseRequest(
|
||||
query=query,
|
||||
user_id="pytest-taxi-pass@example.com",
|
||||
context_json=context,
|
||||
)
|
||||
)
|
||||
|
||||
response = UserAgentService(db).respond(
|
||||
UserAgentRequest(
|
||||
run_id=ontology.run_id,
|
||||
user_id="pytest-taxi-pass@example.com",
|
||||
message=query,
|
||||
ontology=ontology,
|
||||
context_json=context,
|
||||
tool_payload={"draft_only": True},
|
||||
)
|
||||
)
|
||||
|
||||
assert response.review_payload is not None
|
||||
risk_titles = [item.title for item in response.review_payload.risk_briefs]
|
||||
risk_details = "\n".join(item.detail for item in response.review_payload.risk_briefs)
|
||||
assert "附件金额测算结果" not in risk_titles
|
||||
assert "附件金额测算异常" not in risk_titles
|
||||
assert "测算通过" not in risk_details
|
||||
|
||||
|
||||
def test_user_agent_review_payload_uses_finance_spreadsheet_hotel_amount_standard() -> None:
|
||||
@@ -2067,8 +2137,9 @@ def test_user_agent_review_payload_uses_finance_spreadsheet_meal_allowance_stand
|
||||
assert "直辖市/特区" in combined
|
||||
assert "公司差旅费报销规则" in combined
|
||||
assert meal_brief.level == "high"
|
||||
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算结果")
|
||||
measurement = next(item for item in response.review_payload.risk_briefs if item.title == "附件金额测算异常")
|
||||
assert "伙食补助标准 65.00" in measurement.detail
|
||||
assert "超出标准" in measurement.detail
|
||||
|
||||
|
||||
def test_user_agent_filters_deprecated_review_risk_briefs() -> None:
|
||||
|
||||
Reference in New Issue
Block a user