from __future__ import annotations import json import threading from app.services import knowledge_rag as knowledge_rag_module from app.services.knowledge_ingest_log import ( build_document_graph_summary, build_ingest_document_summary, build_ingest_status_summary, ) from app.services.knowledge_rag import KnowledgeRagService def test_build_hits_prioritizes_structured_table_evidence_for_standard_queries() -> None: hits = KnowledgeRagService._build_hits_from_query_data( query="住宿费标准是多少?", chunks=[ { "chunk_id": "plain-1", "file_path": "/tmp/doc-1__差旅制度.md", "content": "住宿费说明文字,提到了出差和报销要求,但没有清晰表格。", }, { "chunk_id": "table-1", "file_path": "/tmp/doc-1__差旅制度.md", "content": "# 结构化表格补充\n\n| 城市 | 住宿费标准 |\n| 北京 | 500 |", }, ], entities=[], limit=2, ) assert [item["candidate_id"] for item in hits] == ["table-1", "plain-1"] def test_build_hits_boosts_query_term_matches() -> None: hits = KnowledgeRagService._build_hits_from_query_data( query="招待费报销标准", chunks=[ { "chunk_id": "travel-1", "file_path": "/tmp/doc-1__费用制度.md", "content": "差旅费包含交通费、住宿费和餐补标准。", }, { "chunk_id": "ent-1", "file_path": "/tmp/doc-1__费用制度.md", "content": "业务招待费报销标准:应结合客户接待场景、人数和审批要求执行。", }, ], entities=[], limit=2, ) assert [item["candidate_id"] for item in hits] == ["ent-1", "travel-1"] def test_build_hits_prioritizes_answer_clue_appendix_for_rule_queries() -> None: hits = KnowledgeRagService._build_hits_from_query_data( query="报销时限是多少?", chunks=[ { "chunk_id": "plain-1", "file_path": "/tmp/doc-1__费用制度.md", "content": "本制度用于规范报销流程,员工应遵守公司审批要求。", }, { "chunk_id": "clue-1", "file_path": "/tmp/doc-1__费用制度.md", "content": ( "# 问答线索补充\n\n" "- 第二章 报销时限:费用发生后 30 日内提交申请。\n" "- 第二章 报销时限:超过 30 日需补充审批说明。" ), }, ], entities=[], limit=2, ) assert [item["candidate_id"] for item in hits] == ["clue-1", "plain-1"] def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> None: hits = KnowledgeRagService._build_hits_from_query_data( query="探亲差旅归哪个部门管理?", chunks=[ { "chunk_id": "toc-1", "file_path": "/tmp/doc-1__费用制度.md", "content": "# 章节导航\n\n- 第一章 总则\n- 第二章 职责分工\n- 第三章 支出归口", }, { "chunk_id": "body-1", "file_path": "/tmp/doc-1__费用制度.md", "content": ( "附表3:支出归口管理部门与归口业务范围\n" "组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。" ), }, ], entities=[], limit=2, ) assert [item["candidate_id"] for item in hits] == ["body-1", "toc-1"] def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None: monkeypatch.setattr( knowledge_rag_module.socket, "getaddrinfo", lambda hostname, port: ( [("family", "type", "proto", "canonname", ("172.21.0.2", 0))] if hostname == "qdrant" else [] ), ) assert knowledge_rag_module._resolve_default_qdrant_url() == "http://qdrant:6333" def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None: def raise_lookup_error(_hostname, _port): raise OSError("lookup failed") monkeypatch.setattr(knowledge_rag_module.socket, "getaddrinfo", raise_lookup_error) assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333" def test_runtime_cache_is_isolated_by_thread(monkeypatch) -> None: knowledge_rag_module.shutdown_knowledge_rag_runtime() created_runtimes = [] class FakeRuntime: def __init__(self, **_kwargs): self.finalized = False created_runtimes.append(self) def finalize(self): self.finalized = True monkeypatch.setattr(knowledge_rag_module, "_LightRagRuntime", FakeRuntime) monkeypatch.setattr( KnowledgeRagService, "_build_runtime_signature", lambda self: (("same-config",), {}), ) service = KnowledgeRagService() main_runtime = service._get_runtime() assert service._get_runtime() is main_runtime worker_runtimes = [] def load_worker_runtime() -> None: worker_runtimes.append(KnowledgeRagService()._get_runtime()) thread = threading.Thread(target=load_worker_runtime) thread.start() thread.join(timeout=5) assert len(created_runtimes) == 2 assert worker_runtimes[0] is not main_runtime knowledge_rag_module.shutdown_knowledge_rag_runtime() assert all(runtime.finalized for runtime in created_runtimes) def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None: assert ( KnowledgeRagService.is_query_ready_status( { "status": "failed", "chunks_count": 11, "chunks_list": ["chunk-1"], } ) is False ) def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> None: assert ( KnowledgeRagService.is_query_ready_status( { "status": "processing", "chunks_count": 11, "chunks_list": ["chunk-1"], } ) is False ) def test_build_document_graph_summary_reads_lightrag_storage(tmp_path) -> None: workspace = tmp_path / "knowledge" / ".lightrag" / "test_workspace" workspace.mkdir(parents=True) (workspace / "kv_store_full_entities.json").write_text( json.dumps({"doc-1": {"entity_names": ["远光软件", "支出管理", "远光软件"]}}), encoding="utf-8", ) (workspace / "kv_store_full_relations.json").write_text( json.dumps({"doc-1": {"relation_pairs": [["远光软件", "支出管理"]]}}), encoding="utf-8", ) (workspace / "kv_store_text_chunks.json").write_text( json.dumps( { "chunk-2": { "_id": "chunk-2", "full_doc_id": "doc-1", "chunk_order_index": 1, "tokens": 45, "content": "第二条 支出审批需要结合预算、归口部门和授权标准执行。", }, "chunk-1": { "_id": "chunk-1", "full_doc_id": "doc-1", "chunk_order_index": 0, "tokens": 31, "content": "第一条 本办法适用于公司支出管理。", }, } ), encoding="utf-8", ) summary = build_document_graph_summary( tmp_path, workspace="test_workspace", document_id="doc-1", ) assert summary["entity_count"] == 2 assert summary["entities"] == ["远光软件", "支出管理"] assert summary["relation_count"] == 1 assert summary["relations"] == [{"source": "远光软件", "target": "支出管理", "type": "关联"}] assert [item["id"] for item in summary["chunks"]] == ["chunk-1", "chunk-2"] def test_build_ingest_document_summary_extracts_sections() -> None: summary = build_ingest_document_summary( document_id="doc-1", entry={ "original_name": "公司支出管理办法.pdf", "folder": "制度文件", "extension": "pdf", "mime_type": "application/pdf", }, raw_text="第一章 总则\n本办法用于规范公司支出。", indexed_text="# 第一章 总则\n本办法用于规范公司支出。\n第二条 审批\n审批需按授权执行。", ) assert summary["name"] == "公司支出管理办法.pdf" assert summary["section_count"] == 2 assert summary["sections"][0]["title"] == "第一章 总则" def test_build_ingest_status_summary_keeps_chunk_status() -> None: summary = build_ingest_status_summary( status_payload={ "status": "processed", "query_ready": True, "chunks_count": 2, "chunks_list": ["chunk-1", "chunk-2"], }, graph_summary={ "entity_count": 1, "relation_count": 0, "entities": ["预算"], "relations": [], }, ) assert summary["lightrag_status"] == "processed" assert summary["query_ready"] is True assert summary["chunk_count"] == 2 assert summary["chunk_ids"] == ["chunk-1", "chunk-2"]