feat(server): 重构知识库服务和路由配置,优化LLM维基知识管理接口,增强知识检索能力
This commit is contained in:
@@ -16,7 +16,7 @@ from app.core.agent_enums import AgentReviewStatus, AgentRunSource, AgentRunStat
|
||||
from app.db.base import Base
|
||||
from app.main import create_app
|
||||
from app.schemas.agent_asset import AgentAssetReviewCreate
|
||||
from app.schemas.knowledge import LlmWikiSummaryUpdateWrite, LlmWikiSyncRead
|
||||
from app.schemas.knowledge import LlmWikiSummaryUpdateWrite
|
||||
from app.services.agent_assets import AgentAssetService
|
||||
from app.services.agent_runs import AgentRunService
|
||||
from app.services.knowledge import (
|
||||
@@ -25,7 +25,7 @@ from app.services.knowledge import (
|
||||
KNOWLEDGE_INGEST_STATUS_PUBLISHED,
|
||||
KnowledgeService,
|
||||
)
|
||||
from app.services.llm_wiki import LlmWikiService
|
||||
from app.services.llm_wiki import CandidateModelAttempt, LlmWikiService
|
||||
|
||||
|
||||
def build_session() -> Session:
|
||||
@@ -86,6 +86,36 @@ def upload_policy_document(storage_root: Path, *, filename: str = "公司差旅
|
||||
return document.id
|
||||
|
||||
|
||||
def upload_multipage_policy_document(storage_root: Path, *, filename: str = "公司支出管理办法.txt") -> str:
|
||||
service = KnowledgeService(storage_root=storage_root)
|
||||
service.ensure_library_ready()
|
||||
document = service.upload_document(
|
||||
folder="报销制度",
|
||||
filename=filename,
|
||||
content=(
|
||||
"商密【中】\n"
|
||||
"关于颁布《公司支出管理办法》的通知\n"
|
||||
"特此通知。\n"
|
||||
"\f"
|
||||
"目录\n"
|
||||
"第一章 总则................................4\n"
|
||||
"第二章 报销审批................................7\n"
|
||||
"\f"
|
||||
"第一条 报销申请\n"
|
||||
"员工提交报销申请时,应附发票、行程单和审批说明。\n"
|
||||
"第二条 报销审批\n"
|
||||
"住宿费超过制度标准时,必须升级至总经理审批。\n"
|
||||
"第三条 附件补充\n"
|
||||
"缺少附件时不得提交报销。\n"
|
||||
"\f"
|
||||
"第四条 财务复核\n"
|
||||
"财务复核时应校验预算、发票真伪和审批链完整性。\n"
|
||||
).encode("utf-8"),
|
||||
current_user=build_admin_user(),
|
||||
)
|
||||
return document.id
|
||||
|
||||
|
||||
def build_candidate_payload(chunk_id: str, *, summary: str = "住宿费超过标准时必须升级审批。") -> dict[str, object]:
|
||||
return {
|
||||
"knowledge_candidates": [
|
||||
@@ -222,10 +252,14 @@ def test_llm_wiki_sync_creates_artifacts_and_draft_rule(tmp_path, monkeypatch) -
|
||||
|
||||
document_payload = json.loads((document_dir / "document.json").read_text(encoding="utf-8"))
|
||||
assert document_payload["sync_reason"] == "initial_build"
|
||||
assert document_payload["quality_status"] == "formal"
|
||||
assert document_payload["formal_knowledge_candidate_count"] == 1
|
||||
assert document_payload["fallback_knowledge_candidate_count"] == 0
|
||||
|
||||
detail = service.get_document_detail(document_id)
|
||||
assert "公司差旅报销制度.txt 知识总结" in detail.knowledge_summary_markdown
|
||||
assert "住宿费升级审批要求" in detail.knowledge_summary_markdown
|
||||
assert detail.quality_status == "formal"
|
||||
|
||||
asset = AgentAssetService(db).get_asset(result.generated_rule_asset_ids[0])
|
||||
assert asset is not None
|
||||
@@ -386,9 +420,91 @@ def test_llm_wiki_sync_uses_fallback_candidates_when_system_hermes_times_out(
|
||||
assert result.knowledge_candidate_count >= 1
|
||||
assert runtime_called["count"] == 0
|
||||
|
||||
detail = KnowledgeService(storage_root=tmp_path).get_document_detail(document_id)
|
||||
assert detail.stateCode == KNOWLEDGE_INGEST_STATUS_INGESTED
|
||||
assert detail.state == "已归纳"
|
||||
knowledge_service = KnowledgeService(storage_root=tmp_path)
|
||||
detail = knowledge_service.get_document_detail(document_id)
|
||||
assert detail.stateCode == KNOWLEDGE_INGEST_STATUS_FAILED
|
||||
assert detail.state == "归纳失败"
|
||||
assert detail.llmWikiAvailable is True
|
||||
assert detail.llmWikiQualityStatus == "fallback_only"
|
||||
|
||||
document_payload = json.loads(
|
||||
(
|
||||
tmp_path
|
||||
/ "knowledge"
|
||||
/ ".llm_wiki"
|
||||
/ "documents"
|
||||
/ document_id
|
||||
/ "document.json"
|
||||
).read_text(encoding="utf-8")
|
||||
)
|
||||
assert document_payload["quality_status"] == "fallback_only"
|
||||
assert document_payload["formal_knowledge_candidate_count"] == 0
|
||||
assert document_payload["fallback_knowledge_candidate_count"] == 1
|
||||
|
||||
candidates_payload = json.loads(
|
||||
(
|
||||
tmp_path
|
||||
/ "knowledge"
|
||||
/ ".llm_wiki"
|
||||
/ "documents"
|
||||
/ document_id
|
||||
/ "knowledge_candidates.json"
|
||||
).read_text(encoding="utf-8")
|
||||
)
|
||||
assert candidates_payload[0]["extraction_mode"] == "fallback"
|
||||
assert "fallback_only" in candidates_payload[0]["quality_flags"]
|
||||
|
||||
|
||||
def test_llm_wiki_sync_continues_after_single_group_failure(tmp_path, monkeypatch) -> None:
|
||||
document_id = upload_multipage_policy_document(tmp_path, filename="多页支出制度.txt")
|
||||
call_count = {"count": 0}
|
||||
|
||||
def fake_call_candidate_model(self, *, entry, chunk_group):
|
||||
call_count["count"] += 1
|
||||
if call_count["count"] == 1:
|
||||
return CandidateModelAttempt(
|
||||
payload={},
|
||||
source="hermes",
|
||||
ok=False,
|
||||
failure_reason="simulated_timeout",
|
||||
)
|
||||
return build_candidate_payload(chunk_group[0]["chunk_id"])
|
||||
|
||||
monkeypatch.setattr(LlmWikiService, "_call_candidate_model", fake_call_candidate_model)
|
||||
|
||||
with build_session() as db:
|
||||
service = LlmWikiService(db, storage_root=tmp_path)
|
||||
result = service.sync_folder(
|
||||
folder="报销制度",
|
||||
current_user=build_admin_user(),
|
||||
document_ids=[document_id],
|
||||
)
|
||||
detail = service.get_document_detail(document_id)
|
||||
|
||||
assert result.document_count == 1
|
||||
assert call_count["count"] >= 2
|
||||
assert detail.quality_status == "partial_degraded"
|
||||
assert detail.successful_group_count >= 1
|
||||
assert detail.failed_group_count >= 1
|
||||
assert detail.formal_knowledge_candidate_count >= 1
|
||||
|
||||
knowledge_detail = KnowledgeService(storage_root=tmp_path).get_document_detail(document_id)
|
||||
assert knowledge_detail.stateCode == KNOWLEDGE_INGEST_STATUS_INGESTED
|
||||
assert knowledge_detail.llmWikiQualityStatus == "partial_degraded"
|
||||
|
||||
|
||||
def test_llm_wiki_filters_cover_and_catalog_chunks_before_candidate_extraction(tmp_path) -> None:
|
||||
document_id = upload_multipage_policy_document(tmp_path, filename="封面目录过滤测试.txt")
|
||||
|
||||
with build_session() as db:
|
||||
service = LlmWikiService(db, storage_root=tmp_path)
|
||||
text = service.knowledge_service.extract_document_text(document_id)
|
||||
chunks = service._build_chunks(document_id=document_id, text=text)
|
||||
candidate_chunks = service._select_candidate_chunks(chunks)
|
||||
|
||||
assert len(chunks) > len(candidate_chunks)
|
||||
assert candidate_chunks
|
||||
assert all(int(item.get("source_page") or 0) >= 3 for item in candidate_chunks)
|
||||
|
||||
|
||||
def test_llm_wiki_sync_skips_unchanged_and_rebuilds_on_updated_at_change(tmp_path, monkeypatch) -> None:
|
||||
@@ -475,22 +591,46 @@ def test_llm_wiki_sync_does_not_overwrite_active_rule(tmp_path, monkeypatch) ->
|
||||
|
||||
|
||||
def test_llm_wiki_sync_endpoint_records_agent_run(monkeypatch) -> None:
|
||||
def fake_sync_folder(self, *, folder="报销制度", current_user, document_ids=None, force=False):
|
||||
return LlmWikiSyncRead(
|
||||
ok=True,
|
||||
run_id="wiki_test_sync",
|
||||
folder=folder,
|
||||
document_count=1,
|
||||
knowledge_candidate_count=2,
|
||||
rule_candidate_count=1,
|
||||
generated_rule_count=1,
|
||||
generated_rule_asset_ids=["asset-rule-1"],
|
||||
summary="已完成 Hermes LLM Wiki 同步。",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(LlmWikiService, "sync_folder", fake_sync_folder)
|
||||
|
||||
client, session_factory = build_client()
|
||||
|
||||
def fake_submit_sync(*, agent_run_id, folder, current_user, document_ids=None, force=False):
|
||||
with session_factory() as db:
|
||||
service = AgentRunService(db)
|
||||
service.record_tool_call(
|
||||
run_id=agent_run_id,
|
||||
tool_type="llm",
|
||||
tool_name="system_hermes_llm_wiki_sync",
|
||||
request_json={
|
||||
"folder": folder,
|
||||
"document_ids": list(document_ids or []),
|
||||
"force": force,
|
||||
},
|
||||
response_json={"run_id": "wiki_test_sync"},
|
||||
status="succeeded",
|
||||
duration_ms=0,
|
||||
)
|
||||
service.merge_route_json(
|
||||
agent_run_id,
|
||||
{
|
||||
"phase": "succeeded",
|
||||
"sync_run_id": "wiki_test_sync",
|
||||
"progress": {
|
||||
"total_documents": len(document_ids or []),
|
||||
"completed_documents": len(document_ids or []),
|
||||
"failed_documents": 0,
|
||||
"skipped_documents": 0,
|
||||
"percent": 100,
|
||||
},
|
||||
},
|
||||
status=AgentRunStatus.SUCCEEDED.value,
|
||||
result_summary="已完成 Hermes LLM Wiki 同步。",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"app.services.llm_wiki_tasks.llm_wiki_task_manager.submit_sync",
|
||||
fake_submit_sync,
|
||||
)
|
||||
|
||||
with session_factory() as db:
|
||||
before_count = len(AgentRunService(db).list_runs(limit=100))
|
||||
|
||||
@@ -506,8 +646,8 @@ def test_llm_wiki_sync_endpoint_records_agent_run(monkeypatch) -> None:
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["run_id"] == "wiki_test_sync"
|
||||
assert payload["generated_rule_count"] == 1
|
||||
assert payload["agent_run_id"].startswith("run_")
|
||||
assert payload["status"] == AgentRunStatus.RUNNING.value
|
||||
|
||||
with session_factory() as db:
|
||||
service = AgentRunService(db)
|
||||
@@ -521,4 +661,4 @@ def test_llm_wiki_sync_endpoint_records_agent_run(monkeypatch) -> None:
|
||||
assert latest_run.tool_calls
|
||||
assert latest_run.tool_calls[0].tool_name == "system_hermes_llm_wiki_sync"
|
||||
assert latest_run.tool_calls[0].status == "succeeded"
|
||||
assert latest_run.tool_calls[0].response_json["run_id"] == "wiki_test_sync"
|
||||
assert latest_run.route_json["sync_run_id"] == "wiki_test_sync"
|
||||
|
||||
Reference in New Issue
Block a user