Files
X-Financial/server/tests/test_knowledge_rag_service.py
caoxiaozhu 575f093c74 feat: 新增风险规则生成引擎与知识图谱可视化
后端新增风险规则自动生成和模板执行服务,支持从规则资产
批量生成并持久化风险规则文件;知识库入库日志增强图谱
查询和本地 RAG 回退,前端审计页面增加风险规则模型和流
程图组件,知识入库面板拆分为图谱可视化子组件,报销创
建页面增加引导式流程模型,更新知识库索引数据。
2026-05-23 19:54:42 +08:00

431 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import json
import threading
from app.services import knowledge_rag as knowledge_rag_module
from app.services.knowledge_ingest_log import (
build_document_graph_summary,
build_ingest_document_summary,
build_ingest_status_summary,
enrich_knowledge_ingest_route_json,
)
from app.services.knowledge_rag import KnowledgeRagService
from app.services.knowledge_rag_local import query_local_text_chunks
def test_build_hits_prioritizes_structured_table_evidence_for_standard_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="住宿费标准是多少?",
chunks=[
{
"chunk_id": "plain-1",
"file_path": "/tmp/doc-1__差旅制度.md",
"content": "住宿费说明文字,提到了出差和报销要求,但没有清晰表格。",
},
{
"chunk_id": "table-1",
"file_path": "/tmp/doc-1__差旅制度.md",
"content": "# 结构化表格补充\n\n| 城市 | 住宿费标准 |\n| 北京 | 500 |",
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["table-1", "plain-1"]
def test_build_hits_boosts_query_term_matches() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="招待费报销标准",
chunks=[
{
"chunk_id": "travel-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "差旅费包含交通费、住宿费和餐补标准。",
},
{
"chunk_id": "ent-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "业务招待费报销标准:应结合客户接待场景、人数和审批要求执行。",
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["ent-1", "travel-1"]
def test_build_hits_prioritizes_answer_clue_appendix_for_rule_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="报销时限是多少?",
chunks=[
{
"chunk_id": "plain-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "本制度用于规范报销流程,员工应遵守公司审批要求。",
},
{
"chunk_id": "clue-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": (
"# 问答线索补充\n\n"
"- 第二章 报销时限:费用发生后 30 日内提交申请。\n"
"- 第二章 报销时限:超过 30 日需补充审批说明。"
),
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["clue-1", "plain-1"]
def test_query_local_text_chunks_prioritizes_relevant_policy_chunk(tmp_path) -> None:
workspace = tmp_path / "knowledge" / ".lightrag" / "x_financial_knowledge"
workspace.mkdir(parents=True)
(workspace / "kv_store_text_chunks.json").write_text(
json.dumps(
{
"chunk-travel": {
"_id": "chunk-travel",
"full_doc_id": "doc-1",
"chunk_order_index": 1,
"file_path": "/tmp/doc-1__差旅费管理办法.pdf",
"content": (
"第十三条 差旅费。酒店住宿限额标准如下其他员工直辖市350元、"
"省会城市300元、其他地区250元。确因紧急公务、特别情形等事项"
"导致住宿超过规定标准时超标20%以内由部门负责人审批,"
"超标20%以上需分管领导审批。"
),
},
"chunk-office": {
"_id": "chunk-office",
"full_doc_id": "doc-2",
"chunk_order_index": 1,
"file_path": "/tmp/doc-2__办公用品管理办法.pdf",
"content": "办公用品采购应遵循预算和验收流程。",
},
}
),
encoding="utf-8",
)
result = query_local_text_chunks(
lightrag_root=tmp_path / "knowledge" / ".lightrag",
workspace="x_financial_knowledge",
query="住宿费超过标准审批依据是什么?",
limit=2,
)
assert result.confident is True
assert result.hits[0]["candidate_id"] == "chunk-travel"
assert "住宿超过规定标准" in result.hits[0]["content"]
def test_query_knowledge_uses_local_chunks_before_lightrag_runtime(tmp_path, monkeypatch) -> None:
workspace = tmp_path / "knowledge" / ".lightrag" / "x_financial_knowledge"
workspace.mkdir(parents=True)
(workspace / "kv_store_text_chunks.json").write_text(
json.dumps(
{
"chunk-1": {
"_id": "chunk-1",
"full_doc_id": "doc-1",
"chunk_order_index": 1,
"file_path": "/tmp/doc-1__公司支出管理办法.pdf",
"content": (
"第八条 支出报销申请时限。公司各类支出报销结算申请时限为三个月。"
"逾期需说明原因,经分管领导审批后方可报销。"
),
}
}
),
encoding="utf-8",
)
def fail_if_runtime_is_used(_self):
raise AssertionError("local high-confidence queries should not initialize LightRAG")
monkeypatch.setattr(KnowledgeRagService, "_get_runtime", fail_if_runtime_is_used)
payload = KnowledgeRagService(storage_root=tmp_path).query_knowledge(
"费用发生后多久内必须报销?超过三个月还能不能报?",
limit=3,
)
assert payload["record_count"] == 1
assert payload["metadata"]["retrieval_strategy"] == "local_text_chunks"
assert "三个月" in payload["hits"][0]["content"]
def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="探亲差旅归哪个部门管理?",
chunks=[
{
"chunk_id": "toc-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "# 章节导航\n\n- 第一章 总则\n- 第二章 职责分工\n- 第三章 支出归口",
},
{
"chunk_id": "body-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": (
"附表3支出归口管理部门与归口业务范围\n"
"组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。"
),
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["body-1", "toc-1"]
def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
monkeypatch.setattr(
knowledge_rag_module.socket,
"getaddrinfo",
lambda hostname, port: (
[("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
if hostname == "qdrant"
else []
),
)
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://qdrant:6333"
def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
def raise_lookup_error(_hostname, _port):
raise OSError("lookup failed")
monkeypatch.setattr(knowledge_rag_module.socket, "getaddrinfo", raise_lookup_error)
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
def test_runtime_cache_is_isolated_by_thread(monkeypatch) -> None:
knowledge_rag_module.shutdown_knowledge_rag_runtime()
created_runtimes = []
class FakeRuntime:
def __init__(self, **_kwargs):
self.finalized = False
created_runtimes.append(self)
def finalize(self):
self.finalized = True
monkeypatch.setattr(knowledge_rag_module, "_LightRagRuntime", FakeRuntime)
monkeypatch.setattr(
KnowledgeRagService,
"_build_runtime_signature",
lambda self: (("same-config",), {}),
)
service = KnowledgeRagService()
main_runtime = service._get_runtime()
assert service._get_runtime() is main_runtime
worker_runtimes = []
def load_worker_runtime() -> None:
worker_runtimes.append(KnowledgeRagService()._get_runtime())
thread = threading.Thread(target=load_worker_runtime)
thread.start()
thread.join(timeout=5)
assert len(created_runtimes) == 2
assert worker_runtimes[0] is not main_runtime
knowledge_rag_module.shutdown_knowledge_rag_runtime()
assert all(runtime.finalized for runtime in created_runtimes)
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
{
"status": "failed",
"chunks_count": 11,
"chunks_list": ["chunk-1"],
}
)
is False
)
def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
{
"status": "processing",
"chunks_count": 11,
"chunks_list": ["chunk-1"],
}
)
is False
)
def test_build_document_graph_summary_reads_lightrag_storage(tmp_path) -> None:
workspace = tmp_path / "knowledge" / ".lightrag" / "test_workspace"
workspace.mkdir(parents=True)
(workspace / "kv_store_full_entities.json").write_text(
json.dumps({"doc-1": {"entity_names": ["远光软件", "支出管理", "远光软件"]}}),
encoding="utf-8",
)
(workspace / "kv_store_full_relations.json").write_text(
json.dumps({"doc-1": {"relation_pairs": [["远光软件", "支出管理"]]}}),
encoding="utf-8",
)
(workspace / "kv_store_text_chunks.json").write_text(
json.dumps(
{
"chunk-2": {
"_id": "chunk-2",
"full_doc_id": "doc-1",
"chunk_order_index": 1,
"tokens": 45,
"content": "第二条 支出审批需要结合预算、归口部门和授权标准执行。",
},
"chunk-1": {
"_id": "chunk-1",
"full_doc_id": "doc-1",
"chunk_order_index": 0,
"tokens": 31,
"content": "第一条 本办法适用于公司支出管理。",
},
}
),
encoding="utf-8",
)
(workspace / "kv_store_entity_chunks.json").write_text(
json.dumps(
{
"远光软件": {"chunk_ids": ["chunk-1", "chunk-missing"]},
"支出管理": {"chunk_ids": ["chunk-2"]},
}
),
encoding="utf-8",
)
(workspace / "graph_chunk_entity_relation.graphml").write_text(
"""<?xml version="1.0" encoding="UTF-8"?>
<graphml xmlns="http://graphml.graphdrawing.org/xmlns">
<key id="n0" for="node" attr.name="entity_id" attr.type="string" />
<key id="n1" for="node" attr.name="entity_type" attr.type="string" />
<key id="n2" for="node" attr.name="description" attr.type="string" />
<key id="n3" for="node" attr.name="created_at" attr.type="string" />
<key id="e0" for="edge" attr.name="weight" attr.type="double" />
<key id="e1" for="edge" attr.name="description" attr.type="string" />
<key id="e2" for="edge" attr.name="keywords" attr.type="string" />
<graph edgedefault="undirected">
<node id="远光软件">
<data key="n0">远光软件</data>
<data key="n1">ORGANIZATION</data>
<data key="n2">公司主体&lt;SEP&gt;费用制度适用公司</data>
<data key="n3">2026-05-23</data>
</node>
<node id="支出管理">
<data key="n0">支出管理</data>
<data key="n1">TOPIC</data>
<data key="n2">规范费用支出、预算和审批。</data>
</node>
<edge source="远光软件" target="支出管理">
<data key="e0">2.5</data>
<data key="e1">远光软件通过支出管理制度约束费用审批。</data>
<data key="e2">制度&lt;SEP&gt;审批</data>
</edge>
</graph>
</graphml>""",
encoding="utf-8",
)
summary = build_document_graph_summary(
tmp_path,
workspace="test_workspace",
document_id="doc-1",
)
assert summary["entity_count"] == 2
assert [item["name"] for item in summary["entities"]] == ["远光软件", "支出管理"]
assert summary["entities"][0]["type"] == "ORGANIZATION"
assert summary["entities"][0]["descriptions"][0] == "公司主体"
assert summary["relation_count"] == 1
assert summary["relations"][0]["source"] == "远光软件"
assert summary["relations"][0]["target"] == "支出管理"
assert summary["relations"][0]["description"] == "远光软件通过支出管理制度约束费用审批。"
assert summary["relations"][0]["keywords"] == ["制度", "审批"]
assert summary["relations"][0]["weight"] == 2.5
assert [item["id"] for item in summary["chunks"]] == ["chunk-1", "chunk-2"]
assert summary["chunks"][0]["excerpt"].startswith("第一条")
assert summary["entity_chunks"] == [
{"entity": "远光软件", "chunk_ids": ["chunk-1"]},
{"entity": "支出管理", "chunk_ids": ["chunk-2"]},
]
enriched_route = enrich_knowledge_ingest_route_json(
{
"lightrag_workspace": "test_workspace",
"knowledge_ingest": {
"graph": {
"entities": ["远光软件"],
"relations": [
{"source": "远光软件", "target": "支出管理", "type": "关联"}
],
}
},
},
storage_root=tmp_path,
)
enriched_entities = enriched_route["knowledge_ingest"]["graph"]["entities"]
assert [item["name"] for item in enriched_entities] == ["远光软件", "支出管理"]
assert enriched_entities[1]["type"] == "TOPIC"
def test_build_ingest_document_summary_extracts_sections() -> None:
summary = build_ingest_document_summary(
document_id="doc-1",
entry={
"original_name": "公司支出管理办法.pdf",
"folder": "制度文件",
"extension": "pdf",
"mime_type": "application/pdf",
},
raw_text="第一章 总则\n本办法用于规范公司支出。",
indexed_text="# 第一章 总则\n本办法用于规范公司支出。\n第二条 审批\n审批需按授权执行。",
)
assert summary["name"] == "公司支出管理办法.pdf"
assert summary["section_count"] == 2
assert summary["sections"][0]["title"] == "第一章 总则"
def test_build_ingest_status_summary_keeps_chunk_status() -> None:
summary = build_ingest_status_summary(
status_payload={
"status": "processed",
"query_ready": True,
"chunks_count": 2,
"chunks_list": ["chunk-1", "chunk-2"],
},
graph_summary={
"entity_count": 1,
"relation_count": 0,
"entities": ["预算"],
"relations": [],
},
)
assert summary["lightrag_status"] == "processed"
assert summary["query_ready"] is True
assert summary["chunk_count"] == 2
assert summary["chunk_ids"] == ["chunk-1", "chunk-2"]