from __future__ import annotations from app.services import knowledge_rag as knowledge_rag_module from app.services.knowledge_rag import KnowledgeRagService def test_build_hits_prioritizes_structured_table_evidence_for_standard_queries() -> None: hits = KnowledgeRagService._build_hits_from_query_data( query="住宿费标准是多少?", chunks=[ { "chunk_id": "plain-1", "file_path": "/tmp/doc-1__差旅制度.md", "content": "住宿费说明文字,提到了出差和报销要求,但没有清晰表格。", }, { "chunk_id": "table-1", "file_path": "/tmp/doc-1__差旅制度.md", "content": "# 结构化表格补充\n\n| 城市 | 住宿费标准 |\n| 北京 | 500 |", }, ], entities=[], limit=2, ) assert [item["candidate_id"] for item in hits] == ["table-1", "plain-1"] def test_build_hits_boosts_query_term_matches() -> None: hits = KnowledgeRagService._build_hits_from_query_data( query="招待费报销标准", chunks=[ { "chunk_id": "travel-1", "file_path": "/tmp/doc-1__费用制度.md", "content": "差旅费包含交通费、住宿费和餐补标准。", }, { "chunk_id": "ent-1", "file_path": "/tmp/doc-1__费用制度.md", "content": "业务招待费报销标准:应结合客户接待场景、人数和审批要求执行。", }, ], entities=[], limit=2, ) assert [item["candidate_id"] for item in hits] == ["ent-1", "travel-1"] def test_build_hits_prioritizes_answer_clue_appendix_for_rule_queries() -> None: hits = KnowledgeRagService._build_hits_from_query_data( query="报销时限是多少?", chunks=[ { "chunk_id": "plain-1", "file_path": "/tmp/doc-1__费用制度.md", "content": "本制度用于规范报销流程,员工应遵守公司审批要求。", }, { "chunk_id": "clue-1", "file_path": "/tmp/doc-1__费用制度.md", "content": ( "# 问答线索补充\n\n" "- 第二章 报销时限:费用发生后 30 日内提交申请。\n" "- 第二章 报销时限:超过 30 日需补充审批说明。" ), }, ], entities=[], limit=2, ) assert [item["candidate_id"] for item in hits] == ["clue-1", "plain-1"] def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> None: hits = KnowledgeRagService._build_hits_from_query_data( query="探亲差旅归哪个部门管理?", chunks=[ { "chunk_id": "toc-1", "file_path": "/tmp/doc-1__费用制度.md", "content": "# 章节导航\n\n- 第一章 总则\n- 第二章 职责分工\n- 第三章 支出归口", }, { "chunk_id": "body-1", "file_path": "/tmp/doc-1__费用制度.md", "content": "附表3:支出归口管理部门与归口业务范围\n组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。", }, ], entities=[], limit=2, ) assert [item["candidate_id"] for item in hits] == ["body-1", "toc-1"] def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None: monkeypatch.setattr( knowledge_rag_module.socket, "getaddrinfo", lambda hostname, port: [("family", "type", "proto", "canonname", ("172.21.0.2", 0))] if hostname == "qdrant" else [], ) assert knowledge_rag_module._resolve_default_qdrant_url() == "http://qdrant:6333" def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None: def raise_lookup_error(_hostname, _port): raise OSError("lookup failed") monkeypatch.setattr(knowledge_rag_module.socket, "getaddrinfo", raise_lookup_error) assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333" def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None: assert ( KnowledgeRagService.is_query_ready_status( { "status": "failed", "chunks_count": 11, "chunks_list": ["chunk-1"], } ) is False ) def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> None: assert ( KnowledgeRagService.is_query_ready_status( { "status": "processing", "chunks_count": 11, "chunks_list": ["chunk-1"], } ) is False )