Files
X-Financial/server/tests/test_knowledge_rag_service.py

282 lines
9.4 KiB
Python
Raw Normal View History

from __future__ import annotations
import json
import threading
from app.services import knowledge_rag as knowledge_rag_module
from app.services.knowledge_ingest_log import (
build_document_graph_summary,
build_ingest_document_summary,
build_ingest_status_summary,
)
from app.services.knowledge_rag import KnowledgeRagService
def test_build_hits_prioritizes_structured_table_evidence_for_standard_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="住宿费标准是多少?",
chunks=[
{
"chunk_id": "plain-1",
"file_path": "/tmp/doc-1__差旅制度.md",
"content": "住宿费说明文字,提到了出差和报销要求,但没有清晰表格。",
},
{
"chunk_id": "table-1",
"file_path": "/tmp/doc-1__差旅制度.md",
"content": "# 结构化表格补充\n\n| 城市 | 住宿费标准 |\n| 北京 | 500 |",
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["table-1", "plain-1"]
def test_build_hits_boosts_query_term_matches() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="招待费报销标准",
chunks=[
{
"chunk_id": "travel-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "差旅费包含交通费、住宿费和餐补标准。",
},
{
"chunk_id": "ent-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "业务招待费报销标准:应结合客户接待场景、人数和审批要求执行。",
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["ent-1", "travel-1"]
def test_build_hits_prioritizes_answer_clue_appendix_for_rule_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="报销时限是多少?",
chunks=[
{
"chunk_id": "plain-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "本制度用于规范报销流程,员工应遵守公司审批要求。",
},
{
"chunk_id": "clue-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": (
"# 问答线索补充\n\n"
"- 第二章 报销时限:费用发生后 30 日内提交申请。\n"
"- 第二章 报销时限:超过 30 日需补充审批说明。"
),
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["clue-1", "plain-1"]
def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="探亲差旅归哪个部门管理?",
chunks=[
{
"chunk_id": "toc-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "# 章节导航\n\n- 第一章 总则\n- 第二章 职责分工\n- 第三章 支出归口",
},
{
"chunk_id": "body-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": (
"附表3支出归口管理部门与归口业务范围\n"
"组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。"
),
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["body-1", "toc-1"]
def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
monkeypatch.setattr(
knowledge_rag_module.socket,
"getaddrinfo",
lambda hostname, port: (
[("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
if hostname == "qdrant"
else []
),
)
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://qdrant:6333"
def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
def raise_lookup_error(_hostname, _port):
raise OSError("lookup failed")
monkeypatch.setattr(knowledge_rag_module.socket, "getaddrinfo", raise_lookup_error)
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
def test_runtime_cache_is_isolated_by_thread(monkeypatch) -> None:
knowledge_rag_module.shutdown_knowledge_rag_runtime()
created_runtimes = []
class FakeRuntime:
def __init__(self, **_kwargs):
self.finalized = False
created_runtimes.append(self)
def finalize(self):
self.finalized = True
monkeypatch.setattr(knowledge_rag_module, "_LightRagRuntime", FakeRuntime)
monkeypatch.setattr(
KnowledgeRagService,
"_build_runtime_signature",
lambda self: (("same-config",), {}),
)
service = KnowledgeRagService()
main_runtime = service._get_runtime()
assert service._get_runtime() is main_runtime
worker_runtimes = []
def load_worker_runtime() -> None:
worker_runtimes.append(KnowledgeRagService()._get_runtime())
thread = threading.Thread(target=load_worker_runtime)
thread.start()
thread.join(timeout=5)
assert len(created_runtimes) == 2
assert worker_runtimes[0] is not main_runtime
knowledge_rag_module.shutdown_knowledge_rag_runtime()
assert all(runtime.finalized for runtime in created_runtimes)
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
{
"status": "failed",
"chunks_count": 11,
"chunks_list": ["chunk-1"],
}
)
is False
)
def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
{
"status": "processing",
"chunks_count": 11,
"chunks_list": ["chunk-1"],
}
)
is False
)
def test_build_document_graph_summary_reads_lightrag_storage(tmp_path) -> None:
workspace = tmp_path / "knowledge" / ".lightrag" / "test_workspace"
workspace.mkdir(parents=True)
(workspace / "kv_store_full_entities.json").write_text(
json.dumps({"doc-1": {"entity_names": ["远光软件", "支出管理", "远光软件"]}}),
encoding="utf-8",
)
(workspace / "kv_store_full_relations.json").write_text(
json.dumps({"doc-1": {"relation_pairs": [["远光软件", "支出管理"]]}}),
encoding="utf-8",
)
(workspace / "kv_store_text_chunks.json").write_text(
json.dumps(
{
"chunk-2": {
"_id": "chunk-2",
"full_doc_id": "doc-1",
"chunk_order_index": 1,
"tokens": 45,
"content": "第二条 支出审批需要结合预算、归口部门和授权标准执行。",
},
"chunk-1": {
"_id": "chunk-1",
"full_doc_id": "doc-1",
"chunk_order_index": 0,
"tokens": 31,
"content": "第一条 本办法适用于公司支出管理。",
},
}
),
encoding="utf-8",
)
summary = build_document_graph_summary(
tmp_path,
workspace="test_workspace",
document_id="doc-1",
)
assert summary["entity_count"] == 2
assert summary["entities"] == ["远光软件", "支出管理"]
assert summary["relation_count"] == 1
assert summary["relations"] == [{"source": "远光软件", "target": "支出管理", "type": "关联"}]
assert [item["id"] for item in summary["chunks"]] == ["chunk-1", "chunk-2"]
def test_build_ingest_document_summary_extracts_sections() -> None:
summary = build_ingest_document_summary(
document_id="doc-1",
entry={
"original_name": "公司支出管理办法.pdf",
"folder": "制度文件",
"extension": "pdf",
"mime_type": "application/pdf",
},
raw_text="第一章 总则\n本办法用于规范公司支出。",
indexed_text="# 第一章 总则\n本办法用于规范公司支出。\n第二条 审批\n审批需按授权执行。",
)
assert summary["name"] == "公司支出管理办法.pdf"
assert summary["section_count"] == 2
assert summary["sections"][0]["title"] == "第一章 总则"
def test_build_ingest_status_summary_keeps_chunk_status() -> None:
summary = build_ingest_status_summary(
status_payload={
"status": "processed",
"query_ready": True,
"chunks_count": 2,
"chunks_list": ["chunk-1", "chunk-2"],
},
graph_summary={
"entity_count": 1,
"relation_count": 0,
"entities": ["预算"],
"relations": [],
},
)
assert summary["lightrag_status"] == "processed"
assert summary["query_ready"] is True
assert summary["chunk_count"] == 2
assert summary["chunk_ids"] == ["chunk-1", "chunk-2"]