feat: 增强知识库功能,优化索引和RAG检索
This commit is contained in:
@@ -44,9 +44,9 @@ def test_knowledge_normalizer_appends_structured_table(monkeypatch) -> None:
|
||||
|
||||
enriched = service.build_enriched_text(raw_text)
|
||||
|
||||
assert enriched.startswith("# 结构化表格补充")
|
||||
assert enriched.startswith(raw_text.strip())
|
||||
assert "| 餐补 | 75 | 65 | 55 | 140 |" in enriched
|
||||
assert enriched.endswith(raw_text.strip())
|
||||
assert enriched.endswith("| 合计 | 110 | 100 | 90 | 175 |")
|
||||
|
||||
|
||||
def test_knowledge_normalizer_keeps_only_markdown_table_body() -> None:
|
||||
@@ -79,12 +79,12 @@ def test_knowledge_normalizer_builds_section_navigation_without_table() -> None:
|
||||
service = KnowledgeNormalizationService(db)
|
||||
enriched = service.build_enriched_text(raw_text)
|
||||
|
||||
assert enriched.startswith("# 章节导航")
|
||||
assert enriched.startswith(raw_text.strip())
|
||||
assert "- 第一章 总则" in enriched
|
||||
assert "## 第二章 住宿费标准" in enriched
|
||||
assert "# 问答线索补充" in enriched
|
||||
assert "- 第二章 住宿费标准:住宿费按照出差城市档位和职级标准执行" in enriched
|
||||
assert enriched.endswith(raw_text.strip())
|
||||
assert "# 章节导航" in enriched
|
||||
|
||||
|
||||
def test_knowledge_normalizer_builds_answer_clues_from_lists_and_kv_lines() -> None:
|
||||
|
||||
@@ -74,6 +74,28 @@ def test_build_hits_prioritizes_answer_clue_appendix_for_rule_queries() -> None:
|
||||
assert [item["candidate_id"] for item in hits] == ["clue-1", "plain-1"]
|
||||
|
||||
|
||||
def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> None:
|
||||
hits = KnowledgeRagService._build_hits_from_query_data(
|
||||
query="探亲差旅归哪个部门管理?",
|
||||
chunks=[
|
||||
{
|
||||
"chunk_id": "toc-1",
|
||||
"file_path": "/tmp/doc-1__费用制度.md",
|
||||
"content": "# 章节导航\n\n- 第一章 总则\n- 第二章 职责分工\n- 第三章 支出归口",
|
||||
},
|
||||
{
|
||||
"chunk_id": "body-1",
|
||||
"file_path": "/tmp/doc-1__费用制度.md",
|
||||
"content": "附表3:支出归口管理部门与归口业务范围\n组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。",
|
||||
},
|
||||
],
|
||||
entities=[],
|
||||
limit=2,
|
||||
)
|
||||
|
||||
assert [item["candidate_id"] for item in hits] == ["body-1", "toc-1"]
|
||||
|
||||
|
||||
def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
knowledge_rag_module.socket,
|
||||
@@ -93,3 +115,29 @@ def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
|
||||
monkeypatch.setattr(knowledge_rag_module.socket, "getaddrinfo", raise_lookup_error)
|
||||
|
||||
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
|
||||
|
||||
|
||||
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
|
||||
assert (
|
||||
KnowledgeRagService.is_query_ready_status(
|
||||
{
|
||||
"status": "failed",
|
||||
"chunks_count": 11,
|
||||
"chunks_list": ["chunk-1"],
|
||||
}
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
|
||||
def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> None:
|
||||
assert (
|
||||
KnowledgeRagService.is_query_ready_status(
|
||||
{
|
||||
"status": "processing",
|
||||
"chunks_count": 11,
|
||||
"chunks_list": ["chunk-1"],
|
||||
}
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
81
server/tests/test_knowledge_service.py
Normal file
81
server/tests/test_knowledge_service.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from app.core.agent_enums import AgentName, AgentRunSource, AgentRunStatus
|
||||
from app.db.base import Base
|
||||
from app.services.agent_runs import AgentRunService
|
||||
from app.services.knowledge import (
|
||||
KNOWLEDGE_INGEST_STATUS_FAILED,
|
||||
KNOWLEDGE_INGEST_STATUS_SYNCING,
|
||||
KnowledgeService,
|
||||
)
|
||||
|
||||
|
||||
def build_session() -> Session:
|
||||
engine = create_engine(
|
||||
"sqlite+pysqlite:///:memory:",
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
Base.metadata.create_all(bind=engine)
|
||||
session_factory = sessionmaker(bind=engine, autoflush=False, autocommit=False)
|
||||
return session_factory()
|
||||
|
||||
|
||||
def test_reconcile_document_ingest_status_keeps_failed_when_linked_run_failed(
|
||||
tmp_path,
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
with build_session() as db:
|
||||
service = KnowledgeService(storage_root=tmp_path, db=db)
|
||||
uploaded = service.upload_document(
|
||||
"报销制度",
|
||||
"demo.txt",
|
||||
b"hello",
|
||||
CurrentUserContext(
|
||||
username="admin",
|
||||
name="管理员",
|
||||
role_codes=["manager"],
|
||||
is_admin=True,
|
||||
),
|
||||
)
|
||||
|
||||
run = AgentRunService(db).create_run(
|
||||
agent=AgentName.HERMES.value,
|
||||
source=AgentRunSource.USER_MESSAGE.value,
|
||||
status=AgentRunStatus.FAILED.value,
|
||||
route_json={"job_type": "knowledge_index_sync"},
|
||||
)
|
||||
service.set_document_ingest_statuses(
|
||||
[uploaded.id],
|
||||
KNOWLEDGE_INGEST_STATUS_SYNCING,
|
||||
agent_run_id=run.run_id,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.services.knowledge_rag.KnowledgeRagService.get_document_status_map",
|
||||
lambda self, _document_ids: {
|
||||
uploaded.id: {
|
||||
"status": "processing",
|
||||
"query_ready": False,
|
||||
"updated_at": datetime.now(UTC).isoformat(),
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
index = service._load_index()
|
||||
changed = service._reconcile_document_ingest_statuses(
|
||||
index,
|
||||
document_ids=[uploaded.id],
|
||||
preserve_syncing=False,
|
||||
)
|
||||
|
||||
entry = next(item for item in index["documents"] if item["id"] == uploaded.id)
|
||||
assert changed is True
|
||||
assert entry["ingest_status"] == KNOWLEDGE_INGEST_STATUS_FAILED
|
||||
Reference in New Issue
Block a user