后端新增风险规则自动生成和模板执行服务,支持从规则资产 批量生成并持久化风险规则文件;知识库入库日志增强图谱 查询和本地 RAG 回退,前端审计页面增加风险规则模型和流 程图组件,知识入库面板拆分为图谱可视化子组件,报销创 建页面增加引导式流程模型,更新知识库索引数据。
431 lines
16 KiB
Python
431 lines
16 KiB
Python
from __future__ import annotations
|
||
|
||
import json
|
||
import threading
|
||
|
||
from app.services import knowledge_rag as knowledge_rag_module
|
||
from app.services.knowledge_ingest_log import (
|
||
build_document_graph_summary,
|
||
build_ingest_document_summary,
|
||
build_ingest_status_summary,
|
||
enrich_knowledge_ingest_route_json,
|
||
)
|
||
from app.services.knowledge_rag import KnowledgeRagService
|
||
from app.services.knowledge_rag_local import query_local_text_chunks
|
||
|
||
|
||
def test_build_hits_prioritizes_structured_table_evidence_for_standard_queries() -> None:
|
||
hits = KnowledgeRagService._build_hits_from_query_data(
|
||
query="住宿费标准是多少?",
|
||
chunks=[
|
||
{
|
||
"chunk_id": "plain-1",
|
||
"file_path": "/tmp/doc-1__差旅制度.md",
|
||
"content": "住宿费说明文字,提到了出差和报销要求,但没有清晰表格。",
|
||
},
|
||
{
|
||
"chunk_id": "table-1",
|
||
"file_path": "/tmp/doc-1__差旅制度.md",
|
||
"content": "# 结构化表格补充\n\n| 城市 | 住宿费标准 |\n| 北京 | 500 |",
|
||
},
|
||
],
|
||
entities=[],
|
||
limit=2,
|
||
)
|
||
|
||
assert [item["candidate_id"] for item in hits] == ["table-1", "plain-1"]
|
||
|
||
|
||
def test_build_hits_boosts_query_term_matches() -> None:
|
||
hits = KnowledgeRagService._build_hits_from_query_data(
|
||
query="招待费报销标准",
|
||
chunks=[
|
||
{
|
||
"chunk_id": "travel-1",
|
||
"file_path": "/tmp/doc-1__费用制度.md",
|
||
"content": "差旅费包含交通费、住宿费和餐补标准。",
|
||
},
|
||
{
|
||
"chunk_id": "ent-1",
|
||
"file_path": "/tmp/doc-1__费用制度.md",
|
||
"content": "业务招待费报销标准:应结合客户接待场景、人数和审批要求执行。",
|
||
},
|
||
],
|
||
entities=[],
|
||
limit=2,
|
||
)
|
||
|
||
assert [item["candidate_id"] for item in hits] == ["ent-1", "travel-1"]
|
||
|
||
|
||
def test_build_hits_prioritizes_answer_clue_appendix_for_rule_queries() -> None:
|
||
hits = KnowledgeRagService._build_hits_from_query_data(
|
||
query="报销时限是多少?",
|
||
chunks=[
|
||
{
|
||
"chunk_id": "plain-1",
|
||
"file_path": "/tmp/doc-1__费用制度.md",
|
||
"content": "本制度用于规范报销流程,员工应遵守公司审批要求。",
|
||
},
|
||
{
|
||
"chunk_id": "clue-1",
|
||
"file_path": "/tmp/doc-1__费用制度.md",
|
||
"content": (
|
||
"# 问答线索补充\n\n"
|
||
"- 第二章 报销时限:费用发生后 30 日内提交申请。\n"
|
||
"- 第二章 报销时限:超过 30 日需补充审批说明。"
|
||
),
|
||
},
|
||
],
|
||
entities=[],
|
||
limit=2,
|
||
)
|
||
|
||
assert [item["candidate_id"] for item in hits] == ["clue-1", "plain-1"]
|
||
|
||
|
||
def test_query_local_text_chunks_prioritizes_relevant_policy_chunk(tmp_path) -> None:
|
||
workspace = tmp_path / "knowledge" / ".lightrag" / "x_financial_knowledge"
|
||
workspace.mkdir(parents=True)
|
||
(workspace / "kv_store_text_chunks.json").write_text(
|
||
json.dumps(
|
||
{
|
||
"chunk-travel": {
|
||
"_id": "chunk-travel",
|
||
"full_doc_id": "doc-1",
|
||
"chunk_order_index": 1,
|
||
"file_path": "/tmp/doc-1__差旅费管理办法.pdf",
|
||
"content": (
|
||
"第十三条 差旅费。酒店住宿限额标准如下:其他员工直辖市350元、"
|
||
"省会城市300元、其他地区250元。确因紧急公务、特别情形等事项"
|
||
"导致住宿超过规定标准时,超标20%以内由部门负责人审批,"
|
||
"超标20%以上需分管领导审批。"
|
||
),
|
||
},
|
||
"chunk-office": {
|
||
"_id": "chunk-office",
|
||
"full_doc_id": "doc-2",
|
||
"chunk_order_index": 1,
|
||
"file_path": "/tmp/doc-2__办公用品管理办法.pdf",
|
||
"content": "办公用品采购应遵循预算和验收流程。",
|
||
},
|
||
}
|
||
),
|
||
encoding="utf-8",
|
||
)
|
||
|
||
result = query_local_text_chunks(
|
||
lightrag_root=tmp_path / "knowledge" / ".lightrag",
|
||
workspace="x_financial_knowledge",
|
||
query="住宿费超过标准审批依据是什么?",
|
||
limit=2,
|
||
)
|
||
|
||
assert result.confident is True
|
||
assert result.hits[0]["candidate_id"] == "chunk-travel"
|
||
assert "住宿超过规定标准" in result.hits[0]["content"]
|
||
|
||
|
||
def test_query_knowledge_uses_local_chunks_before_lightrag_runtime(tmp_path, monkeypatch) -> None:
|
||
workspace = tmp_path / "knowledge" / ".lightrag" / "x_financial_knowledge"
|
||
workspace.mkdir(parents=True)
|
||
(workspace / "kv_store_text_chunks.json").write_text(
|
||
json.dumps(
|
||
{
|
||
"chunk-1": {
|
||
"_id": "chunk-1",
|
||
"full_doc_id": "doc-1",
|
||
"chunk_order_index": 1,
|
||
"file_path": "/tmp/doc-1__公司支出管理办法.pdf",
|
||
"content": (
|
||
"第八条 支出报销申请时限。公司各类支出报销结算申请时限为三个月。"
|
||
"逾期需说明原因,经分管领导审批后方可报销。"
|
||
),
|
||
}
|
||
}
|
||
),
|
||
encoding="utf-8",
|
||
)
|
||
|
||
def fail_if_runtime_is_used(_self):
|
||
raise AssertionError("local high-confidence queries should not initialize LightRAG")
|
||
|
||
monkeypatch.setattr(KnowledgeRagService, "_get_runtime", fail_if_runtime_is_used)
|
||
|
||
payload = KnowledgeRagService(storage_root=tmp_path).query_knowledge(
|
||
"费用发生后多久内必须报销?超过三个月还能不能报?",
|
||
limit=3,
|
||
)
|
||
|
||
assert payload["record_count"] == 1
|
||
assert payload["metadata"]["retrieval_strategy"] == "local_text_chunks"
|
||
assert "三个月" in payload["hits"][0]["content"]
|
||
|
||
|
||
def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> None:
|
||
hits = KnowledgeRagService._build_hits_from_query_data(
|
||
query="探亲差旅归哪个部门管理?",
|
||
chunks=[
|
||
{
|
||
"chunk_id": "toc-1",
|
||
"file_path": "/tmp/doc-1__费用制度.md",
|
||
"content": "# 章节导航\n\n- 第一章 总则\n- 第二章 职责分工\n- 第三章 支出归口",
|
||
},
|
||
{
|
||
"chunk_id": "body-1",
|
||
"file_path": "/tmp/doc-1__费用制度.md",
|
||
"content": (
|
||
"附表3:支出归口管理部门与归口业务范围\n"
|
||
"组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。"
|
||
),
|
||
},
|
||
],
|
||
entities=[],
|
||
limit=2,
|
||
)
|
||
|
||
assert [item["candidate_id"] for item in hits] == ["body-1", "toc-1"]
|
||
|
||
|
||
def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
|
||
monkeypatch.setattr(
|
||
knowledge_rag_module.socket,
|
||
"getaddrinfo",
|
||
lambda hostname, port: (
|
||
[("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
|
||
if hostname == "qdrant"
|
||
else []
|
||
),
|
||
)
|
||
|
||
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://qdrant:6333"
|
||
|
||
|
||
def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
|
||
def raise_lookup_error(_hostname, _port):
|
||
raise OSError("lookup failed")
|
||
|
||
monkeypatch.setattr(knowledge_rag_module.socket, "getaddrinfo", raise_lookup_error)
|
||
|
||
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
|
||
|
||
|
||
def test_runtime_cache_is_isolated_by_thread(monkeypatch) -> None:
|
||
knowledge_rag_module.shutdown_knowledge_rag_runtime()
|
||
created_runtimes = []
|
||
|
||
class FakeRuntime:
|
||
def __init__(self, **_kwargs):
|
||
self.finalized = False
|
||
created_runtimes.append(self)
|
||
|
||
def finalize(self):
|
||
self.finalized = True
|
||
|
||
monkeypatch.setattr(knowledge_rag_module, "_LightRagRuntime", FakeRuntime)
|
||
monkeypatch.setattr(
|
||
KnowledgeRagService,
|
||
"_build_runtime_signature",
|
||
lambda self: (("same-config",), {}),
|
||
)
|
||
|
||
service = KnowledgeRagService()
|
||
main_runtime = service._get_runtime()
|
||
assert service._get_runtime() is main_runtime
|
||
|
||
worker_runtimes = []
|
||
|
||
def load_worker_runtime() -> None:
|
||
worker_runtimes.append(KnowledgeRagService()._get_runtime())
|
||
|
||
thread = threading.Thread(target=load_worker_runtime)
|
||
thread.start()
|
||
thread.join(timeout=5)
|
||
|
||
assert len(created_runtimes) == 2
|
||
assert worker_runtimes[0] is not main_runtime
|
||
|
||
knowledge_rag_module.shutdown_knowledge_rag_runtime()
|
||
assert all(runtime.finalized for runtime in created_runtimes)
|
||
|
||
|
||
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
|
||
assert (
|
||
KnowledgeRagService.is_query_ready_status(
|
||
{
|
||
"status": "failed",
|
||
"chunks_count": 11,
|
||
"chunks_list": ["chunk-1"],
|
||
}
|
||
)
|
||
is False
|
||
)
|
||
|
||
|
||
def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> None:
|
||
assert (
|
||
KnowledgeRagService.is_query_ready_status(
|
||
{
|
||
"status": "processing",
|
||
"chunks_count": 11,
|
||
"chunks_list": ["chunk-1"],
|
||
}
|
||
)
|
||
is False
|
||
)
|
||
|
||
|
||
def test_build_document_graph_summary_reads_lightrag_storage(tmp_path) -> None:
|
||
workspace = tmp_path / "knowledge" / ".lightrag" / "test_workspace"
|
||
workspace.mkdir(parents=True)
|
||
(workspace / "kv_store_full_entities.json").write_text(
|
||
json.dumps({"doc-1": {"entity_names": ["远光软件", "支出管理", "远光软件"]}}),
|
||
encoding="utf-8",
|
||
)
|
||
(workspace / "kv_store_full_relations.json").write_text(
|
||
json.dumps({"doc-1": {"relation_pairs": [["远光软件", "支出管理"]]}}),
|
||
encoding="utf-8",
|
||
)
|
||
(workspace / "kv_store_text_chunks.json").write_text(
|
||
json.dumps(
|
||
{
|
||
"chunk-2": {
|
||
"_id": "chunk-2",
|
||
"full_doc_id": "doc-1",
|
||
"chunk_order_index": 1,
|
||
"tokens": 45,
|
||
"content": "第二条 支出审批需要结合预算、归口部门和授权标准执行。",
|
||
},
|
||
"chunk-1": {
|
||
"_id": "chunk-1",
|
||
"full_doc_id": "doc-1",
|
||
"chunk_order_index": 0,
|
||
"tokens": 31,
|
||
"content": "第一条 本办法适用于公司支出管理。",
|
||
},
|
||
}
|
||
),
|
||
encoding="utf-8",
|
||
)
|
||
(workspace / "kv_store_entity_chunks.json").write_text(
|
||
json.dumps(
|
||
{
|
||
"远光软件": {"chunk_ids": ["chunk-1", "chunk-missing"]},
|
||
"支出管理": {"chunk_ids": ["chunk-2"]},
|
||
}
|
||
),
|
||
encoding="utf-8",
|
||
)
|
||
(workspace / "graph_chunk_entity_relation.graphml").write_text(
|
||
"""<?xml version="1.0" encoding="UTF-8"?>
|
||
<graphml xmlns="http://graphml.graphdrawing.org/xmlns">
|
||
<key id="n0" for="node" attr.name="entity_id" attr.type="string" />
|
||
<key id="n1" for="node" attr.name="entity_type" attr.type="string" />
|
||
<key id="n2" for="node" attr.name="description" attr.type="string" />
|
||
<key id="n3" for="node" attr.name="created_at" attr.type="string" />
|
||
<key id="e0" for="edge" attr.name="weight" attr.type="double" />
|
||
<key id="e1" for="edge" attr.name="description" attr.type="string" />
|
||
<key id="e2" for="edge" attr.name="keywords" attr.type="string" />
|
||
<graph edgedefault="undirected">
|
||
<node id="远光软件">
|
||
<data key="n0">远光软件</data>
|
||
<data key="n1">ORGANIZATION</data>
|
||
<data key="n2">公司主体<SEP>费用制度适用公司</data>
|
||
<data key="n3">2026-05-23</data>
|
||
</node>
|
||
<node id="支出管理">
|
||
<data key="n0">支出管理</data>
|
||
<data key="n1">TOPIC</data>
|
||
<data key="n2">规范费用支出、预算和审批。</data>
|
||
</node>
|
||
<edge source="远光软件" target="支出管理">
|
||
<data key="e0">2.5</data>
|
||
<data key="e1">远光软件通过支出管理制度约束费用审批。</data>
|
||
<data key="e2">制度<SEP>审批</data>
|
||
</edge>
|
||
</graph>
|
||
</graphml>""",
|
||
encoding="utf-8",
|
||
)
|
||
|
||
summary = build_document_graph_summary(
|
||
tmp_path,
|
||
workspace="test_workspace",
|
||
document_id="doc-1",
|
||
)
|
||
|
||
assert summary["entity_count"] == 2
|
||
assert [item["name"] for item in summary["entities"]] == ["远光软件", "支出管理"]
|
||
assert summary["entities"][0]["type"] == "ORGANIZATION"
|
||
assert summary["entities"][0]["descriptions"][0] == "公司主体"
|
||
assert summary["relation_count"] == 1
|
||
assert summary["relations"][0]["source"] == "远光软件"
|
||
assert summary["relations"][0]["target"] == "支出管理"
|
||
assert summary["relations"][0]["description"] == "远光软件通过支出管理制度约束费用审批。"
|
||
assert summary["relations"][0]["keywords"] == ["制度", "审批"]
|
||
assert summary["relations"][0]["weight"] == 2.5
|
||
assert [item["id"] for item in summary["chunks"]] == ["chunk-1", "chunk-2"]
|
||
assert summary["chunks"][0]["excerpt"].startswith("第一条")
|
||
assert summary["entity_chunks"] == [
|
||
{"entity": "远光软件", "chunk_ids": ["chunk-1"]},
|
||
{"entity": "支出管理", "chunk_ids": ["chunk-2"]},
|
||
]
|
||
|
||
enriched_route = enrich_knowledge_ingest_route_json(
|
||
{
|
||
"lightrag_workspace": "test_workspace",
|
||
"knowledge_ingest": {
|
||
"graph": {
|
||
"entities": ["远光软件"],
|
||
"relations": [
|
||
{"source": "远光软件", "target": "支出管理", "type": "关联"}
|
||
],
|
||
}
|
||
},
|
||
},
|
||
storage_root=tmp_path,
|
||
)
|
||
enriched_entities = enriched_route["knowledge_ingest"]["graph"]["entities"]
|
||
assert [item["name"] for item in enriched_entities] == ["远光软件", "支出管理"]
|
||
assert enriched_entities[1]["type"] == "TOPIC"
|
||
|
||
|
||
def test_build_ingest_document_summary_extracts_sections() -> None:
|
||
summary = build_ingest_document_summary(
|
||
document_id="doc-1",
|
||
entry={
|
||
"original_name": "公司支出管理办法.pdf",
|
||
"folder": "制度文件",
|
||
"extension": "pdf",
|
||
"mime_type": "application/pdf",
|
||
},
|
||
raw_text="第一章 总则\n本办法用于规范公司支出。",
|
||
indexed_text="# 第一章 总则\n本办法用于规范公司支出。\n第二条 审批\n审批需按授权执行。",
|
||
)
|
||
|
||
assert summary["name"] == "公司支出管理办法.pdf"
|
||
assert summary["section_count"] == 2
|
||
assert summary["sections"][0]["title"] == "第一章 总则"
|
||
|
||
|
||
def test_build_ingest_status_summary_keeps_chunk_status() -> None:
|
||
summary = build_ingest_status_summary(
|
||
status_payload={
|
||
"status": "processed",
|
||
"query_ready": True,
|
||
"chunks_count": 2,
|
||
"chunks_list": ["chunk-1", "chunk-2"],
|
||
},
|
||
graph_summary={
|
||
"entity_count": 1,
|
||
"relation_count": 0,
|
||
"entities": ["预算"],
|
||
"relations": [],
|
||
},
|
||
)
|
||
|
||
assert summary["lightrag_status"] == "processed"
|
||
assert summary["query_ready"] is True
|
||
assert summary["chunk_count"] == 2
|
||
assert summary["chunk_ids"] == ["chunk-1", "chunk-2"]
|