feat: 增强知识库功能,优化索引和RAG检索

This commit is contained in:
caoxiaozhu
2026-05-18 02:49:39 +00:00
parent 55e0591a5e
commit 4414ffb34c
18 changed files with 5656 additions and 4659 deletions

View File

@@ -44,9 +44,9 @@ def test_knowledge_normalizer_appends_structured_table(monkeypatch) -> None:
enriched = service.build_enriched_text(raw_text)
assert enriched.startswith("# 结构化表格补充")
assert enriched.startswith(raw_text.strip())
assert "| 餐补 | 75 | 65 | 55 | 140 |" in enriched
assert enriched.endswith(raw_text.strip())
assert enriched.endswith("| 合计 | 110 | 100 | 90 | 175 |")
def test_knowledge_normalizer_keeps_only_markdown_table_body() -> None:
@@ -79,12 +79,12 @@ def test_knowledge_normalizer_builds_section_navigation_without_table() -> None:
service = KnowledgeNormalizationService(db)
enriched = service.build_enriched_text(raw_text)
assert enriched.startswith("# 章节导航")
assert enriched.startswith(raw_text.strip())
assert "- 第一章 总则" in enriched
assert "## 第二章 住宿费标准" in enriched
assert "# 问答线索补充" in enriched
assert "- 第二章 住宿费标准:住宿费按照出差城市档位和职级标准执行" in enriched
assert enriched.endswith(raw_text.strip())
assert "# 章节导航" in enriched
def test_knowledge_normalizer_builds_answer_clues_from_lists_and_kv_lines() -> None:

View File

@@ -74,6 +74,28 @@ def test_build_hits_prioritizes_answer_clue_appendix_for_rule_queries() -> None:
assert [item["candidate_id"] for item in hits] == ["clue-1", "plain-1"]
def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="探亲差旅归哪个部门管理?",
chunks=[
{
"chunk_id": "toc-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "# 章节导航\n\n- 第一章 总则\n- 第二章 职责分工\n- 第三章 支出归口",
},
{
"chunk_id": "body-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "附表3支出归口管理部门与归口业务范围\n组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。",
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["body-1", "toc-1"]
def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
monkeypatch.setattr(
knowledge_rag_module.socket,
@@ -93,3 +115,29 @@ def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
monkeypatch.setattr(knowledge_rag_module.socket, "getaddrinfo", raise_lookup_error)
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
{
"status": "failed",
"chunks_count": 11,
"chunks_list": ["chunk-1"],
}
)
is False
)
def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
{
"status": "processing",
"chunks_count": 11,
"chunks_list": ["chunk-1"],
}
)
is False
)

View File

@@ -0,0 +1,81 @@
from __future__ import annotations
from datetime import UTC, datetime
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import StaticPool
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentName, AgentRunSource, AgentRunStatus
from app.db.base import Base
from app.services.agent_runs import AgentRunService
from app.services.knowledge import (
KNOWLEDGE_INGEST_STATUS_FAILED,
KNOWLEDGE_INGEST_STATUS_SYNCING,
KnowledgeService,
)
def build_session() -> Session:
engine = create_engine(
"sqlite+pysqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(bind=engine)
session_factory = sessionmaker(bind=engine, autoflush=False, autocommit=False)
return session_factory()
def test_reconcile_document_ingest_status_keeps_failed_when_linked_run_failed(
tmp_path,
monkeypatch,
) -> None:
with build_session() as db:
service = KnowledgeService(storage_root=tmp_path, db=db)
uploaded = service.upload_document(
"报销制度",
"demo.txt",
b"hello",
CurrentUserContext(
username="admin",
name="管理员",
role_codes=["manager"],
is_admin=True,
),
)
run = AgentRunService(db).create_run(
agent=AgentName.HERMES.value,
source=AgentRunSource.USER_MESSAGE.value,
status=AgentRunStatus.FAILED.value,
route_json={"job_type": "knowledge_index_sync"},
)
service.set_document_ingest_statuses(
[uploaded.id],
KNOWLEDGE_INGEST_STATUS_SYNCING,
agent_run_id=run.run_id,
)
monkeypatch.setattr(
"app.services.knowledge_rag.KnowledgeRagService.get_document_status_map",
lambda self, _document_ids: {
uploaded.id: {
"status": "processing",
"query_ready": False,
"updated_at": datetime.now(UTC).isoformat(),
}
},
)
index = service._load_index()
changed = service._reconcile_document_ingest_statuses(
index,
document_ids=[uploaded.id],
preserve_syncing=False,
)
entry = next(item for item in index["documents"] if item["id"] == uploaded.id)
assert changed is True
assert entry["ingest_status"] == KNOWLEDGE_INGEST_STATUS_FAILED