Files
X-Financial/server/tests/test_knowledge_rag_service.py
caoxiaozhu d4d5d40569 feat: 新增预算费控模型与报销审批流引擎
后端新增预算费控服务和报销单审批流模块,引入申请人费用画像
算法,优化知识库 RAG 运行时和同步逻辑,完善报销单工作流常
量和明细同步,更新差旅报销规则电子表格,前端新增预算分析
组件和数字员工模型,完善审批对话框和洞察面板交互,优化侧
边栏和顶栏样式,补充单元测试。
2026-05-27 17:31:27 +08:00

459 lines
17 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import json
import threading
from app.services import knowledge_rag as knowledge_rag_module
from app.services.knowledge_ingest_log import (
build_document_graph_summary,
build_ingest_document_summary,
build_ingest_status_summary,
enrich_knowledge_ingest_route_json,
)
from app.services.knowledge_rag import KnowledgeRagService
from app.services.knowledge_rag_local import query_local_text_chunks
def test_build_hits_prioritizes_structured_table_evidence_for_standard_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="住宿费标准是多少?",
chunks=[
{
"chunk_id": "plain-1",
"file_path": "/tmp/doc-1__差旅制度.md",
"content": "住宿费说明文字,提到了出差和报销要求,但没有清晰表格。",
},
{
"chunk_id": "table-1",
"file_path": "/tmp/doc-1__差旅制度.md",
"content": "# 结构化表格补充\n\n| 城市 | 住宿费标准 |\n| 北京 | 500 |",
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["table-1", "plain-1"]
def test_build_hits_boosts_query_term_matches() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="招待费报销标准",
chunks=[
{
"chunk_id": "travel-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "差旅费包含交通费、住宿费和餐补标准。",
},
{
"chunk_id": "ent-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "业务招待费报销标准:应结合客户接待场景、人数和审批要求执行。",
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["ent-1", "travel-1"]
def test_build_hits_keeps_long_query_anchor_terms_for_accounting_table() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="远光软件财务基础知识手册里的常用会计科目是什么?",
chunks=[
{
"chunk_id": "generic-1",
"file_path": "/tmp/doc-1__远光软件财务制度培训手册.docx",
"content": "远光软件股份有限公司财务培训内容,介绍费用报销和财务制度。",
},
{
"chunk_id": "accounts-1",
"file_path": "/tmp/doc-2__远光软件财务基础知识手册.docx",
"content": (
"二、常用会计科目\n\n"
"| 科目类别 | 科目名称 | 说明 |\n"
"| --- | --- | --- |\n"
"| 资产类 | 库存现金 | 公司持有的现金 |\n"
"| 损益类 | 销售费用 | 为销售产品发生的费用 |"
),
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["accounts-1", "generic-1"]
def test_build_hits_prioritizes_answer_clue_appendix_for_rule_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="报销时限是多少?",
chunks=[
{
"chunk_id": "plain-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "本制度用于规范报销流程,员工应遵守公司审批要求。",
},
{
"chunk_id": "clue-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": (
"# 问答线索补充\n\n"
"- 第二章 报销时限:费用发生后 30 日内提交申请。\n"
"- 第二章 报销时限:超过 30 日需补充审批说明。"
),
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["clue-1", "plain-1"]
def test_query_local_text_chunks_prioritizes_relevant_policy_chunk(tmp_path) -> None:
workspace = tmp_path / "knowledge" / ".lightrag" / "x_financial_knowledge"
workspace.mkdir(parents=True)
(workspace / "kv_store_text_chunks.json").write_text(
json.dumps(
{
"chunk-travel": {
"_id": "chunk-travel",
"full_doc_id": "doc-1",
"chunk_order_index": 1,
"file_path": "/tmp/doc-1__差旅费管理办法.pdf",
"content": (
"第十三条 差旅费。酒店住宿限额标准如下其他员工直辖市350元、"
"省会城市300元、其他地区250元。确因紧急公务、特别情形等事项"
"导致住宿超过规定标准时超标20%以内由部门负责人审批,"
"超标20%以上需分管领导审批。"
),
},
"chunk-office": {
"_id": "chunk-office",
"full_doc_id": "doc-2",
"chunk_order_index": 1,
"file_path": "/tmp/doc-2__办公用品管理办法.pdf",
"content": "办公用品采购应遵循预算和验收流程。",
},
}
),
encoding="utf-8",
)
result = query_local_text_chunks(
lightrag_root=tmp_path / "knowledge" / ".lightrag",
workspace="x_financial_knowledge",
query="住宿费超过标准审批依据是什么?",
limit=2,
)
assert result.confident is True
assert result.hits[0]["candidate_id"] == "chunk-travel"
assert "住宿超过规定标准" in result.hits[0]["content"]
def test_query_knowledge_uses_local_chunks_before_lightrag_runtime(tmp_path, monkeypatch) -> None:
workspace = tmp_path / "knowledge" / ".lightrag" / "x_financial_knowledge"
workspace.mkdir(parents=True)
(workspace / "kv_store_text_chunks.json").write_text(
json.dumps(
{
"chunk-1": {
"_id": "chunk-1",
"full_doc_id": "doc-1",
"chunk_order_index": 1,
"file_path": "/tmp/doc-1__公司支出管理办法.pdf",
"content": (
"第八条 支出报销申请时限。公司各类支出报销结算申请时限为三个月。"
"逾期需说明原因,经分管领导审批后方可报销。"
),
}
}
),
encoding="utf-8",
)
def fail_if_runtime_is_used(_self):
raise AssertionError("local high-confidence queries should not initialize LightRAG")
monkeypatch.setattr(KnowledgeRagService, "_get_runtime", fail_if_runtime_is_used)
payload = KnowledgeRagService(storage_root=tmp_path).query_knowledge(
"费用发生后多久内必须报销?超过三个月还能不能报?",
limit=3,
)
assert payload["record_count"] == 1
assert payload["metadata"]["retrieval_strategy"] == "local_text_chunks"
assert "三个月" in payload["hits"][0]["content"]
def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="探亲差旅归哪个部门管理?",
chunks=[
{
"chunk_id": "toc-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "# 章节导航\n\n- 第一章 总则\n- 第二章 职责分工\n- 第三章 支出归口",
},
{
"chunk_id": "body-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": (
"附表3支出归口管理部门与归口业务范围\n"
"组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。"
),
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["body-1", "toc-1"]
def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
monkeypatch.setattr(
knowledge_rag_module.socket,
"getaddrinfo",
lambda hostname, port: (
[("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
if hostname == "qdrant"
else []
),
)
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://qdrant:6333"
def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
def raise_lookup_error(_hostname, _port):
raise OSError("lookup failed")
monkeypatch.setattr(knowledge_rag_module.socket, "getaddrinfo", raise_lookup_error)
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
def test_runtime_cache_uses_dedicated_instance_across_calling_threads(monkeypatch) -> None:
knowledge_rag_module.shutdown_knowledge_rag_runtime()
created_runtimes = []
class FakeRuntime:
def __init__(self, **_kwargs):
self.finalized = False
created_runtimes.append(self)
def finalize(self):
self.finalized = True
monkeypatch.setattr(knowledge_rag_module, "_LightRagRuntime", FakeRuntime)
monkeypatch.setattr(
KnowledgeRagService,
"_build_runtime_signature",
lambda self: (("same-config",), {}),
)
service = KnowledgeRagService()
main_runtime = service._get_runtime()
assert service._get_runtime() is main_runtime
worker_runtimes = []
def load_worker_runtime() -> None:
worker_runtimes.append(KnowledgeRagService()._get_runtime())
thread = threading.Thread(target=load_worker_runtime)
thread.start()
thread.join(timeout=5)
assert len(created_runtimes) == 1
assert worker_runtimes[0] is main_runtime
knowledge_rag_module.shutdown_knowledge_rag_runtime()
assert all(runtime.finalized for runtime in created_runtimes)
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
{
"status": "failed",
"chunks_count": 11,
"chunks_list": ["chunk-1"],
}
)
is False
)
def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
{
"status": "processing",
"chunks_count": 11,
"chunks_list": ["chunk-1"],
}
)
is False
)
def test_build_document_graph_summary_reads_lightrag_storage(tmp_path) -> None:
workspace = tmp_path / "knowledge" / ".lightrag" / "test_workspace"
workspace.mkdir(parents=True)
(workspace / "kv_store_full_entities.json").write_text(
json.dumps({"doc-1": {"entity_names": ["远光软件", "支出管理", "远光软件"]}}),
encoding="utf-8",
)
(workspace / "kv_store_full_relations.json").write_text(
json.dumps({"doc-1": {"relation_pairs": [["远光软件", "支出管理"]]}}),
encoding="utf-8",
)
(workspace / "kv_store_text_chunks.json").write_text(
json.dumps(
{
"chunk-2": {
"_id": "chunk-2",
"full_doc_id": "doc-1",
"chunk_order_index": 1,
"tokens": 45,
"content": "第二条 支出审批需要结合预算、归口部门和授权标准执行。",
},
"chunk-1": {
"_id": "chunk-1",
"full_doc_id": "doc-1",
"chunk_order_index": 0,
"tokens": 31,
"content": "第一条 本办法适用于公司支出管理。",
},
}
),
encoding="utf-8",
)
(workspace / "kv_store_entity_chunks.json").write_text(
json.dumps(
{
"远光软件": {"chunk_ids": ["chunk-1", "chunk-missing"]},
"支出管理": {"chunk_ids": ["chunk-2"]},
}
),
encoding="utf-8",
)
(workspace / "graph_chunk_entity_relation.graphml").write_text(
"""<?xml version="1.0" encoding="UTF-8"?>
<graphml xmlns="http://graphml.graphdrawing.org/xmlns">
<key id="n0" for="node" attr.name="entity_id" attr.type="string" />
<key id="n1" for="node" attr.name="entity_type" attr.type="string" />
<key id="n2" for="node" attr.name="description" attr.type="string" />
<key id="n3" for="node" attr.name="created_at" attr.type="string" />
<key id="e0" for="edge" attr.name="weight" attr.type="double" />
<key id="e1" for="edge" attr.name="description" attr.type="string" />
<key id="e2" for="edge" attr.name="keywords" attr.type="string" />
<graph edgedefault="undirected">
<node id="远光软件">
<data key="n0">远光软件</data>
<data key="n1">ORGANIZATION</data>
<data key="n2">公司主体&lt;SEP&gt;费用制度适用公司</data>
<data key="n3">2026-05-23</data>
</node>
<node id="支出管理">
<data key="n0">支出管理</data>
<data key="n1">TOPIC</data>
<data key="n2">规范费用支出、预算和审批。</data>
</node>
<edge source="远光软件" target="支出管理">
<data key="e0">2.5</data>
<data key="e1">远光软件通过支出管理制度约束费用审批。</data>
<data key="e2">制度&lt;SEP&gt;审批</data>
</edge>
</graph>
</graphml>""",
encoding="utf-8",
)
summary = build_document_graph_summary(
tmp_path,
workspace="test_workspace",
document_id="doc-1",
)
assert summary["entity_count"] == 2
assert [item["name"] for item in summary["entities"]] == ["远光软件", "支出管理"]
assert summary["entities"][0]["type"] == "ORGANIZATION"
assert summary["entities"][0]["descriptions"][0] == "公司主体"
assert summary["relation_count"] == 1
assert summary["relations"][0]["source"] == "远光软件"
assert summary["relations"][0]["target"] == "支出管理"
assert summary["relations"][0]["description"] == "远光软件通过支出管理制度约束费用审批。"
assert summary["relations"][0]["keywords"] == ["制度", "审批"]
assert summary["relations"][0]["weight"] == 2.5
assert [item["id"] for item in summary["chunks"]] == ["chunk-1", "chunk-2"]
assert summary["chunks"][0]["excerpt"].startswith("第一条")
assert summary["entity_chunks"] == [
{"entity": "远光软件", "chunk_ids": ["chunk-1"]},
{"entity": "支出管理", "chunk_ids": ["chunk-2"]},
]
enriched_route = enrich_knowledge_ingest_route_json(
{
"lightrag_workspace": "test_workspace",
"knowledge_ingest": {
"graph": {
"entities": ["远光软件"],
"relations": [
{"source": "远光软件", "target": "支出管理", "type": "关联"}
],
}
},
},
storage_root=tmp_path,
)
enriched_entities = enriched_route["knowledge_ingest"]["graph"]["entities"]
assert [item["name"] for item in enriched_entities] == ["远光软件", "支出管理"]
assert enriched_entities[1]["type"] == "TOPIC"
def test_build_ingest_document_summary_extracts_sections() -> None:
summary = build_ingest_document_summary(
document_id="doc-1",
entry={
"original_name": "公司支出管理办法.pdf",
"folder": "制度文件",
"extension": "pdf",
"mime_type": "application/pdf",
},
raw_text="第一章 总则\n本办法用于规范公司支出。",
indexed_text="# 第一章 总则\n本办法用于规范公司支出。\n第二条 审批\n审批需按授权执行。",
)
assert summary["name"] == "公司支出管理办法.pdf"
assert summary["section_count"] == 2
assert summary["sections"][0]["title"] == "第一章 总则"
def test_build_ingest_status_summary_keeps_chunk_status() -> None:
summary = build_ingest_status_summary(
status_payload={
"status": "processed",
"query_ready": True,
"chunks_count": 2,
"chunks_list": ["chunk-1", "chunk-2"],
},
graph_summary={
"entity_count": 1,
"relation_count": 0,
"entities": ["预算"],
"relations": [],
},
)
assert summary["lightrag_status"] == "processed"
assert summary["query_ready"] is True
assert summary["chunk_count"] == 2
assert summary["chunk_ids"] == ["chunk-1", "chunk-2"]