feat: 增强知识库索引与设置页面模块化拆分
扩展知识库索引任务和 RAG 检索支持增量入库和文档去重,优 化本体检测和规则匹配精度,前端设置页面拆分为 LLM、邮件 和 Hermes 员工同步子面板并重构样式,新增日志详情组件和 知识入库日志模型,补充单元测试覆盖。
This commit is contained in:
@@ -1,6 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import threading
|
||||
|
||||
from app.services import knowledge_rag as knowledge_rag_module
|
||||
from app.services.knowledge_ingest_log import (
|
||||
build_document_graph_summary,
|
||||
build_ingest_document_summary,
|
||||
build_ingest_status_summary,
|
||||
)
|
||||
from app.services.knowledge_rag import KnowledgeRagService
|
||||
|
||||
|
||||
@@ -86,7 +94,10 @@ def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> No
|
||||
{
|
||||
"chunk_id": "body-1",
|
||||
"file_path": "/tmp/doc-1__费用制度.md",
|
||||
"content": "附表3:支出归口管理部门与归口业务范围\n组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。",
|
||||
"content": (
|
||||
"附表3:支出归口管理部门与归口业务范围\n"
|
||||
"组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。"
|
||||
),
|
||||
},
|
||||
],
|
||||
entities=[],
|
||||
@@ -100,9 +111,11 @@ def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
knowledge_rag_module.socket,
|
||||
"getaddrinfo",
|
||||
lambda hostname, port: [("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
|
||||
if hostname == "qdrant"
|
||||
else [],
|
||||
lambda hostname, port: (
|
||||
[("family", "type", "proto", "canonname", ("172.21.0.2", 0))]
|
||||
if hostname == "qdrant"
|
||||
else []
|
||||
),
|
||||
)
|
||||
|
||||
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://qdrant:6333"
|
||||
@@ -117,6 +130,45 @@ def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
|
||||
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
|
||||
|
||||
|
||||
def test_runtime_cache_is_isolated_by_thread(monkeypatch) -> None:
|
||||
knowledge_rag_module.shutdown_knowledge_rag_runtime()
|
||||
created_runtimes = []
|
||||
|
||||
class FakeRuntime:
|
||||
def __init__(self, **_kwargs):
|
||||
self.finalized = False
|
||||
created_runtimes.append(self)
|
||||
|
||||
def finalize(self):
|
||||
self.finalized = True
|
||||
|
||||
monkeypatch.setattr(knowledge_rag_module, "_LightRagRuntime", FakeRuntime)
|
||||
monkeypatch.setattr(
|
||||
KnowledgeRagService,
|
||||
"_build_runtime_signature",
|
||||
lambda self: (("same-config",), {}),
|
||||
)
|
||||
|
||||
service = KnowledgeRagService()
|
||||
main_runtime = service._get_runtime()
|
||||
assert service._get_runtime() is main_runtime
|
||||
|
||||
worker_runtimes = []
|
||||
|
||||
def load_worker_runtime() -> None:
|
||||
worker_runtimes.append(KnowledgeRagService()._get_runtime())
|
||||
|
||||
thread = threading.Thread(target=load_worker_runtime)
|
||||
thread.start()
|
||||
thread.join(timeout=5)
|
||||
|
||||
assert len(created_runtimes) == 2
|
||||
assert worker_runtimes[0] is not main_runtime
|
||||
|
||||
knowledge_rag_module.shutdown_knowledge_rag_runtime()
|
||||
assert all(runtime.finalized for runtime in created_runtimes)
|
||||
|
||||
|
||||
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
|
||||
assert (
|
||||
KnowledgeRagService.is_query_ready_status(
|
||||
@@ -141,3 +193,89 @@ def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> N
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_build_document_graph_summary_reads_lightrag_storage(tmp_path) -> None:
|
||||
workspace = tmp_path / "knowledge" / ".lightrag" / "test_workspace"
|
||||
workspace.mkdir(parents=True)
|
||||
(workspace / "kv_store_full_entities.json").write_text(
|
||||
json.dumps({"doc-1": {"entity_names": ["远光软件", "支出管理", "远光软件"]}}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(workspace / "kv_store_full_relations.json").write_text(
|
||||
json.dumps({"doc-1": {"relation_pairs": [["远光软件", "支出管理"]]}}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(workspace / "kv_store_text_chunks.json").write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"chunk-2": {
|
||||
"_id": "chunk-2",
|
||||
"full_doc_id": "doc-1",
|
||||
"chunk_order_index": 1,
|
||||
"tokens": 45,
|
||||
"content": "第二条 支出审批需要结合预算、归口部门和授权标准执行。",
|
||||
},
|
||||
"chunk-1": {
|
||||
"_id": "chunk-1",
|
||||
"full_doc_id": "doc-1",
|
||||
"chunk_order_index": 0,
|
||||
"tokens": 31,
|
||||
"content": "第一条 本办法适用于公司支出管理。",
|
||||
},
|
||||
}
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
summary = build_document_graph_summary(
|
||||
tmp_path,
|
||||
workspace="test_workspace",
|
||||
document_id="doc-1",
|
||||
)
|
||||
|
||||
assert summary["entity_count"] == 2
|
||||
assert summary["entities"] == ["远光软件", "支出管理"]
|
||||
assert summary["relation_count"] == 1
|
||||
assert summary["relations"] == [{"source": "远光软件", "target": "支出管理", "type": "关联"}]
|
||||
assert [item["id"] for item in summary["chunks"]] == ["chunk-1", "chunk-2"]
|
||||
|
||||
|
||||
def test_build_ingest_document_summary_extracts_sections() -> None:
|
||||
summary = build_ingest_document_summary(
|
||||
document_id="doc-1",
|
||||
entry={
|
||||
"original_name": "公司支出管理办法.pdf",
|
||||
"folder": "制度文件",
|
||||
"extension": "pdf",
|
||||
"mime_type": "application/pdf",
|
||||
},
|
||||
raw_text="第一章 总则\n本办法用于规范公司支出。",
|
||||
indexed_text="# 第一章 总则\n本办法用于规范公司支出。\n第二条 审批\n审批需按授权执行。",
|
||||
)
|
||||
|
||||
assert summary["name"] == "公司支出管理办法.pdf"
|
||||
assert summary["section_count"] == 2
|
||||
assert summary["sections"][0]["title"] == "第一章 总则"
|
||||
|
||||
|
||||
def test_build_ingest_status_summary_keeps_chunk_status() -> None:
|
||||
summary = build_ingest_status_summary(
|
||||
status_payload={
|
||||
"status": "processed",
|
||||
"query_ready": True,
|
||||
"chunks_count": 2,
|
||||
"chunks_list": ["chunk-1", "chunk-2"],
|
||||
},
|
||||
graph_summary={
|
||||
"entity_count": 1,
|
||||
"relation_count": 0,
|
||||
"entities": ["预算"],
|
||||
"relations": [],
|
||||
},
|
||||
)
|
||||
|
||||
assert summary["lightrag_status"] == "processed"
|
||||
assert summary["query_ready"] is True
|
||||
assert summary["chunk_count"] == 2
|
||||
assert summary["chunk_ids"] == ["chunk-1", "chunk-2"]
|
||||
|
||||
Reference in New Issue
Block a user