feat: 增强知识库索引与设置页面模块化拆分
扩展知识库索引任务和 RAG 检索支持增量入库和文档去重,优 化本体检测和规则匹配精度,前端设置页面拆分为 LLM、邮件 和 Hermes 员工同步子面板并重构样式,新增日志详情组件和 知识入库日志模型,补充单元测试覆盖。
This commit is contained in:
@@ -63,6 +63,7 @@ class KnowledgeIndexTaskManager:
|
||||
heartbeat_stop = threading.Event()
|
||||
heartbeat_thread: threading.Thread | None = None
|
||||
tool_call_id = ""
|
||||
knowledge_ingest: dict[str, Any] | None = None
|
||||
tool_request_json = {
|
||||
"agent": AgentName.HERMES.value,
|
||||
"folder": folder,
|
||||
@@ -74,6 +75,10 @@ class KnowledgeIndexTaskManager:
|
||||
run_service = AgentRunService(db)
|
||||
knowledge_service = KnowledgeService(db=db)
|
||||
rag_service = KnowledgeRagService(db=db)
|
||||
knowledge_ingest = _build_initial_knowledge_ingest_state(
|
||||
knowledge_service,
|
||||
document_ids=document_ids,
|
||||
)
|
||||
|
||||
run_service.merge_route_json(
|
||||
agent_run_id,
|
||||
@@ -93,7 +98,18 @@ class KnowledgeIndexTaskManager:
|
||||
"skipped_documents": 0,
|
||||
"percent": 10 if document_ids else 100,
|
||||
},
|
||||
"knowledge_ingest": knowledge_ingest,
|
||||
},
|
||||
result_summary=_build_ingest_running_summary(
|
||||
knowledge_ingest,
|
||||
{
|
||||
"total_documents": len(document_ids),
|
||||
"completed_documents": 0,
|
||||
"failed_documents": 0,
|
||||
"skipped_documents": 0,
|
||||
"percent": 10 if document_ids else 100,
|
||||
},
|
||||
),
|
||||
)
|
||||
tool_call = run_service.record_tool_call(
|
||||
run_id=agent_run_id,
|
||||
@@ -134,44 +150,159 @@ class KnowledgeIndexTaskManager:
|
||||
)
|
||||
heartbeat_thread.start()
|
||||
|
||||
response = rag_service.index_documents(document_ids=document_ids, force=force)
|
||||
succeeded_document_ids = [
|
||||
str(item).strip()
|
||||
for item in list(response.get("succeeded_document_ids") or [])
|
||||
if str(item).strip()
|
||||
]
|
||||
failed_documents = [
|
||||
item
|
||||
for item in list(response.get("failed_documents") or [])
|
||||
if isinstance(item, dict)
|
||||
]
|
||||
responses: list[dict[str, Any]] = []
|
||||
succeeded_document_ids: list[str] = []
|
||||
failed_documents: list[dict[str, str]] = []
|
||||
total_documents = len(document_ids)
|
||||
|
||||
for index, document_id in enumerate(document_ids, start=1):
|
||||
_patch_ingest_document(
|
||||
knowledge_ingest,
|
||||
document_id,
|
||||
{
|
||||
"status": "running",
|
||||
"phase": "indexing",
|
||||
"started_at": datetime.now(UTC).isoformat(),
|
||||
},
|
||||
event=f"开始处理第 {index}/{total_documents} 个文件,正在写入 LightRAG。",
|
||||
)
|
||||
knowledge_ingest["current_document_id"] = document_id
|
||||
_sync_ingest_route_json(
|
||||
run_service,
|
||||
agent_run_id,
|
||||
knowledge_ingest,
|
||||
progress=_build_ingest_progress(knowledge_ingest, total_documents),
|
||||
)
|
||||
|
||||
try:
|
||||
response = rag_service.index_documents(document_ids=[document_id], force=force)
|
||||
except Exception as exc:
|
||||
logger.exception(
|
||||
"Knowledge document index failed run_id=%s doc_id=%s",
|
||||
agent_run_id,
|
||||
document_id,
|
||||
)
|
||||
failed_documents.append(
|
||||
{
|
||||
"document_id": document_id,
|
||||
"status": "exception",
|
||||
"error": str(exc),
|
||||
}
|
||||
)
|
||||
_patch_ingest_document(
|
||||
knowledge_ingest,
|
||||
document_id,
|
||||
{
|
||||
"status": "failed",
|
||||
"phase": "failed",
|
||||
"finished_at": datetime.now(UTC).isoformat(),
|
||||
"error": str(exc),
|
||||
},
|
||||
event=f"归集失败:{exc}",
|
||||
level="error",
|
||||
)
|
||||
knowledge_service.set_document_ingest_statuses(
|
||||
[document_id],
|
||||
KNOWLEDGE_INGEST_STATUS_FAILED,
|
||||
agent_run_id=agent_run_id,
|
||||
)
|
||||
_refresh_ingest_graph(knowledge_ingest)
|
||||
_sync_ingest_route_json(
|
||||
run_service,
|
||||
agent_run_id,
|
||||
knowledge_ingest,
|
||||
progress=_build_ingest_progress(knowledge_ingest, total_documents),
|
||||
)
|
||||
continue
|
||||
|
||||
responses.append(response)
|
||||
response_failed_documents = _extract_failed_documents(response, document_id)
|
||||
document_summary = _extract_document_summary(response, document_id)
|
||||
if response_failed_documents:
|
||||
failed_documents.extend(response_failed_documents)
|
||||
error_text = (
|
||||
response_failed_documents[0].get("error") or "LightRAG 未返回可查询状态"
|
||||
)
|
||||
_patch_ingest_document(
|
||||
knowledge_ingest,
|
||||
document_id,
|
||||
{
|
||||
**document_summary,
|
||||
"status": "failed",
|
||||
"phase": "failed",
|
||||
"finished_at": datetime.now(UTC).isoformat(),
|
||||
"error": error_text,
|
||||
"track_id": str(response.get("track_id") or "").strip(),
|
||||
},
|
||||
event=f"LightRAG 索引失败:{error_text}",
|
||||
level="error",
|
||||
)
|
||||
knowledge_service.set_document_ingest_statuses(
|
||||
[document_id],
|
||||
KNOWLEDGE_INGEST_STATUS_FAILED,
|
||||
agent_run_id=agent_run_id,
|
||||
)
|
||||
else:
|
||||
succeeded_document_ids.append(document_id)
|
||||
chunk_count = int(document_summary.get("chunk_count") or 0)
|
||||
entity_count = int(document_summary.get("entity_count") or 0)
|
||||
relation_count = int(document_summary.get("relation_count") or 0)
|
||||
_patch_ingest_document(
|
||||
knowledge_ingest,
|
||||
document_id,
|
||||
{
|
||||
**document_summary,
|
||||
"status": "succeeded",
|
||||
"phase": "indexed",
|
||||
"finished_at": datetime.now(UTC).isoformat(),
|
||||
"track_id": str(response.get("track_id") or "").strip(),
|
||||
},
|
||||
event=(
|
||||
"LightRAG 索引完成:"
|
||||
f"{chunk_count} 个 chunk,{entity_count} 个实体,"
|
||||
f"{relation_count} 条关系。"
|
||||
),
|
||||
)
|
||||
knowledge_service.set_document_ingest_statuses(
|
||||
[document_id],
|
||||
KNOWLEDGE_INGEST_STATUS_INGESTED,
|
||||
agent_run_id=agent_run_id,
|
||||
)
|
||||
_refresh_ingest_graph(knowledge_ingest)
|
||||
_sync_ingest_route_json(
|
||||
run_service,
|
||||
agent_run_id,
|
||||
knowledge_ingest,
|
||||
progress=_build_ingest_progress(knowledge_ingest, total_documents),
|
||||
)
|
||||
|
||||
failed_document_ids = [
|
||||
str(item.get("document_id") or "").strip()
|
||||
for item in failed_documents
|
||||
if str(item.get("document_id") or "").strip()
|
||||
]
|
||||
|
||||
if succeeded_document_ids:
|
||||
knowledge_service.set_document_ingest_statuses(
|
||||
succeeded_document_ids,
|
||||
KNOWLEDGE_INGEST_STATUS_INGESTED,
|
||||
agent_run_id=agent_run_id,
|
||||
)
|
||||
if failed_document_ids:
|
||||
knowledge_service.set_document_ingest_statuses(
|
||||
failed_document_ids,
|
||||
KNOWLEDGE_INGEST_STATUS_FAILED,
|
||||
agent_run_id=agent_run_id,
|
||||
)
|
||||
|
||||
duration_ms = int((perf_counter() - started) * 1000)
|
||||
tool_status = "succeeded" if not failed_document_ids else "failed"
|
||||
latest_track_id = _resolve_latest_track_id(responses)
|
||||
knowledge_ingest["current_document_id"] = ""
|
||||
knowledge_ingest["status"] = tool_status
|
||||
knowledge_ingest["phase"] = "completed"
|
||||
knowledge_ingest["finished_at"] = datetime.now(UTC).isoformat()
|
||||
knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)
|
||||
heartbeat_stop.set()
|
||||
if heartbeat_thread is not None:
|
||||
heartbeat_thread.join(timeout=1)
|
||||
run_service.update_tool_call(
|
||||
tool_call_id,
|
||||
response_json=response,
|
||||
response_json={
|
||||
"track_id": latest_track_id,
|
||||
"requested_document_ids": document_ids,
|
||||
"succeeded_document_ids": succeeded_document_ids,
|
||||
"failed_documents": failed_documents,
|
||||
"documents": knowledge_ingest.get("documents", []),
|
||||
"responses": responses,
|
||||
},
|
||||
status=tool_status,
|
||||
duration_ms=duration_ms,
|
||||
error_message=None if tool_status == "succeeded" else "部分文档索引失败。",
|
||||
@@ -183,14 +314,17 @@ class KnowledgeIndexTaskManager:
|
||||
summary = (
|
||||
f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引。"
|
||||
if failed_count == 0
|
||||
else f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引,失败 {failed_count} 个。"
|
||||
else (
|
||||
f"LightRAG 已完成 {completed_documents}/{total_documents} 个知识文档索引,"
|
||||
f"失败 {failed_count} 个。"
|
||||
)
|
||||
)
|
||||
run_service.merge_route_json(
|
||||
agent_run_id,
|
||||
{
|
||||
"job_type": "knowledge_index_sync",
|
||||
"phase": "completed",
|
||||
"track_id": str(response.get("track_id") or "").strip(),
|
||||
"track_id": latest_track_id,
|
||||
"heartbeat_at": datetime.now(UTC).isoformat(),
|
||||
"progress": {
|
||||
"total_documents": total_documents,
|
||||
@@ -199,6 +333,7 @@ class KnowledgeIndexTaskManager:
|
||||
"skipped_documents": 0,
|
||||
"percent": 100,
|
||||
},
|
||||
"knowledge_ingest": knowledge_ingest,
|
||||
},
|
||||
status=(
|
||||
AgentRunStatus.SUCCEEDED.value
|
||||
@@ -234,24 +369,50 @@ class KnowledgeIndexTaskManager:
|
||||
error_message=str(exc),
|
||||
)
|
||||
KnowledgeService(db=db).set_document_ingest_statuses(
|
||||
document_ids,
|
||||
_resolve_failed_ingest_document_ids(knowledge_ingest, document_ids),
|
||||
KNOWLEDGE_INGEST_STATUS_FAILED,
|
||||
agent_run_id=agent_run_id,
|
||||
)
|
||||
if knowledge_ingest is not None:
|
||||
for document_id in document_ids:
|
||||
document = _find_ingest_document(knowledge_ingest, document_id)
|
||||
if document is None or document.get("status") in {"succeeded", "failed"}:
|
||||
continue
|
||||
_patch_ingest_document(
|
||||
knowledge_ingest,
|
||||
document_id,
|
||||
{
|
||||
"status": "failed",
|
||||
"phase": "failed",
|
||||
"finished_at": datetime.now(UTC).isoformat(),
|
||||
"error": str(exc),
|
||||
},
|
||||
event=f"归集任务中断:{exc}",
|
||||
level="error",
|
||||
)
|
||||
knowledge_ingest["status"] = "failed"
|
||||
knowledge_ingest["phase"] = "failed"
|
||||
knowledge_ingest["current_document_id"] = ""
|
||||
knowledge_ingest["finished_at"] = datetime.now(UTC).isoformat()
|
||||
knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)
|
||||
|
||||
route_payload: dict[str, Any] = {
|
||||
"job_type": "knowledge_index_sync",
|
||||
"phase": "failed",
|
||||
"heartbeat_at": datetime.now(UTC).isoformat(),
|
||||
"progress": {
|
||||
"total_documents": len(document_ids),
|
||||
"completed_documents": 0,
|
||||
"failed_documents": len(document_ids),
|
||||
"skipped_documents": 0,
|
||||
"percent": 100,
|
||||
},
|
||||
}
|
||||
if knowledge_ingest is not None:
|
||||
route_payload["knowledge_ingest"] = knowledge_ingest
|
||||
AgentRunService(db).merge_route_json(
|
||||
agent_run_id,
|
||||
{
|
||||
"job_type": "knowledge_index_sync",
|
||||
"phase": "failed",
|
||||
"heartbeat_at": datetime.now(UTC).isoformat(),
|
||||
"progress": {
|
||||
"total_documents": len(document_ids),
|
||||
"completed_documents": 0,
|
||||
"failed_documents": len(document_ids),
|
||||
"skipped_documents": 0,
|
||||
"percent": 100,
|
||||
},
|
||||
},
|
||||
route_payload,
|
||||
status=AgentRunStatus.FAILED.value,
|
||||
result_summary=str(exc),
|
||||
error_message=str(exc),
|
||||
@@ -267,4 +428,312 @@ class KnowledgeIndexTaskManager:
|
||||
db.close()
|
||||
|
||||
|
||||
def _build_initial_knowledge_ingest_state(
|
||||
knowledge_service: KnowledgeService,
|
||||
*,
|
||||
document_ids: list[str],
|
||||
) -> dict[str, Any]:
|
||||
now = datetime.now(UTC).isoformat()
|
||||
documents = [
|
||||
_build_initial_knowledge_ingest_document(knowledge_service, document_id, now=now)
|
||||
for document_id in document_ids
|
||||
]
|
||||
return {
|
||||
"schema_version": 1,
|
||||
"status": "running",
|
||||
"phase": "queued",
|
||||
"started_at": now,
|
||||
"finished_at": None,
|
||||
"current_document_id": documents[0]["document_id"] if documents else "",
|
||||
"documents": documents,
|
||||
"graph": _build_ingest_graph({"documents": documents}),
|
||||
}
|
||||
|
||||
|
||||
def _build_initial_knowledge_ingest_document(
|
||||
knowledge_service: KnowledgeService,
|
||||
document_id: str,
|
||||
*,
|
||||
now: str,
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
entry = knowledge_service.get_document_entry(document_id)
|
||||
except Exception:
|
||||
entry = {}
|
||||
return {
|
||||
"document_id": document_id,
|
||||
"name": str(entry.get("original_name") or document_id).strip(),
|
||||
"folder": str(entry.get("folder") or "").strip(),
|
||||
"extension": str(entry.get("extension") or "").strip(),
|
||||
"mime_type": str(entry.get("mime_type") or "").strip(),
|
||||
"status": "queued",
|
||||
"phase": "queued",
|
||||
"started_at": None,
|
||||
"finished_at": None,
|
||||
"text_chars": 0,
|
||||
"indexed_text_chars": 0,
|
||||
"section_count": 0,
|
||||
"sections": [],
|
||||
"chunk_count": 0,
|
||||
"chunk_ids": [],
|
||||
"chunks": [],
|
||||
"entity_count": 0,
|
||||
"relation_count": 0,
|
||||
"entities": [],
|
||||
"relations": [],
|
||||
"events": [
|
||||
{
|
||||
"at": now,
|
||||
"level": "info",
|
||||
"message": "已进入知识归集队列,等待 LightRAG 处理。",
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _patch_ingest_document(
|
||||
knowledge_ingest: dict[str, Any],
|
||||
document_id: str,
|
||||
updates: dict[str, Any],
|
||||
*,
|
||||
event: str = "",
|
||||
level: str = "info",
|
||||
) -> None:
|
||||
document = _find_ingest_document(knowledge_ingest, document_id)
|
||||
if document is None:
|
||||
return
|
||||
document.update(updates)
|
||||
if event:
|
||||
_append_ingest_event(document, event, level=level)
|
||||
|
||||
|
||||
def _append_ingest_event(document: dict[str, Any], message: str, *, level: str) -> None:
|
||||
events = document.get("events")
|
||||
if not isinstance(events, list):
|
||||
events = []
|
||||
events.append(
|
||||
{
|
||||
"at": datetime.now(UTC).isoformat(),
|
||||
"level": level,
|
||||
"message": message,
|
||||
}
|
||||
)
|
||||
document["events"] = events[-30:]
|
||||
|
||||
|
||||
def _find_ingest_document(
|
||||
knowledge_ingest: dict[str, Any],
|
||||
document_id: str,
|
||||
) -> dict[str, Any] | None:
|
||||
for document in list(knowledge_ingest.get("documents") or []):
|
||||
if not isinstance(document, dict):
|
||||
continue
|
||||
if str(document.get("document_id") or "").strip() == document_id:
|
||||
return document
|
||||
return None
|
||||
|
||||
|
||||
def _sync_ingest_route_json(
|
||||
run_service: AgentRunService,
|
||||
agent_run_id: str,
|
||||
knowledge_ingest: dict[str, Any],
|
||||
*,
|
||||
progress: dict[str, int],
|
||||
) -> None:
|
||||
run_service.merge_route_json(
|
||||
agent_run_id,
|
||||
{
|
||||
"job_type": "knowledge_index_sync",
|
||||
"phase": "indexing",
|
||||
"heartbeat_at": datetime.now(UTC).isoformat(),
|
||||
"progress": progress,
|
||||
"knowledge_ingest": knowledge_ingest,
|
||||
},
|
||||
result_summary=_build_ingest_running_summary(knowledge_ingest, progress),
|
||||
)
|
||||
|
||||
|
||||
def _build_ingest_running_summary(
|
||||
knowledge_ingest: dict[str, Any],
|
||||
progress: dict[str, int],
|
||||
) -> str:
|
||||
total_documents = int(progress.get("total_documents") or 0)
|
||||
completed_documents = int(progress.get("completed_documents") or 0)
|
||||
failed_documents = int(progress.get("failed_documents") or 0)
|
||||
current_document_id = str(knowledge_ingest.get("current_document_id") or "").strip()
|
||||
current_document = (
|
||||
_find_ingest_document(knowledge_ingest, current_document_id)
|
||||
if current_document_id
|
||||
else None
|
||||
)
|
||||
if current_document is not None:
|
||||
name = str(current_document.get("name") or current_document_id).strip()
|
||||
current_index = _resolve_ingest_document_index(knowledge_ingest, current_document_id)
|
||||
return (
|
||||
f"知识归纳正在处理 {current_index}/{total_documents}:{name}。"
|
||||
f"已完成 {completed_documents} 个,失败 {failed_documents} 个。"
|
||||
)
|
||||
return (
|
||||
f"知识归纳正在运行,已完成 {completed_documents}/{total_documents} 个文档,"
|
||||
f"失败 {failed_documents} 个。"
|
||||
)
|
||||
|
||||
|
||||
def _resolve_ingest_document_index(
|
||||
knowledge_ingest: dict[str, Any],
|
||||
document_id: str,
|
||||
) -> int:
|
||||
documents = [
|
||||
item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
|
||||
]
|
||||
for index, document in enumerate(documents, start=1):
|
||||
if str(document.get("document_id") or "").strip() == document_id:
|
||||
return index
|
||||
return 0
|
||||
|
||||
|
||||
def _build_ingest_progress(
|
||||
knowledge_ingest: dict[str, Any],
|
||||
total_documents: int,
|
||||
) -> dict[str, int]:
|
||||
documents = [
|
||||
item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
|
||||
]
|
||||
completed_documents = sum(1 for item in documents if item.get("status") == "succeeded")
|
||||
failed_documents = sum(1 for item in documents if item.get("status") == "failed")
|
||||
skipped_documents = sum(1 for item in documents if item.get("status") == "skipped")
|
||||
done_documents = completed_documents + failed_documents + skipped_documents
|
||||
if total_documents <= 0:
|
||||
percent = 100
|
||||
else:
|
||||
percent = min(95, max(10, 10 + int(done_documents * 85 / total_documents)))
|
||||
return {
|
||||
"total_documents": total_documents,
|
||||
"completed_documents": completed_documents,
|
||||
"failed_documents": failed_documents,
|
||||
"skipped_documents": skipped_documents,
|
||||
"percent": percent,
|
||||
}
|
||||
|
||||
|
||||
def _extract_document_summary(response: dict[str, Any], document_id: str) -> dict[str, Any]:
|
||||
for item in list(response.get("document_summaries") or []):
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
if str(item.get("document_id") or "").strip() == document_id:
|
||||
return dict(item)
|
||||
return {}
|
||||
|
||||
|
||||
def _extract_failed_documents(
|
||||
response: dict[str, Any],
|
||||
document_id: str,
|
||||
) -> list[dict[str, str]]:
|
||||
failed_documents: list[dict[str, str]] = []
|
||||
for item in list(response.get("failed_documents") or []):
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
item_document_id = str(item.get("document_id") or "").strip()
|
||||
if item_document_id and item_document_id != document_id:
|
||||
continue
|
||||
failed_documents.append(
|
||||
{
|
||||
"document_id": item_document_id or document_id,
|
||||
"status": str(item.get("status") or "failed").strip(),
|
||||
"error": str(item.get("error") or "LightRAG 索引失败").strip(),
|
||||
}
|
||||
)
|
||||
return failed_documents
|
||||
|
||||
|
||||
def _resolve_failed_ingest_document_ids(
|
||||
knowledge_ingest: dict[str, Any] | None,
|
||||
document_ids: list[str],
|
||||
) -> list[str]:
|
||||
if knowledge_ingest is None:
|
||||
return document_ids
|
||||
failed_document_ids: list[str] = []
|
||||
seen_document_ids: set[str] = set()
|
||||
for document in list(knowledge_ingest.get("documents") or []):
|
||||
if not isinstance(document, dict):
|
||||
continue
|
||||
document_id = str(document.get("document_id") or "").strip()
|
||||
if not document_id:
|
||||
continue
|
||||
seen_document_ids.add(document_id)
|
||||
if document.get("status") != "succeeded":
|
||||
failed_document_ids.append(document_id)
|
||||
failed_document_ids.extend(
|
||||
document_id for document_id in document_ids if document_id not in seen_document_ids
|
||||
)
|
||||
return failed_document_ids
|
||||
|
||||
|
||||
def _refresh_ingest_graph(knowledge_ingest: dict[str, Any]) -> None:
|
||||
knowledge_ingest["graph"] = _build_ingest_graph(knowledge_ingest)
|
||||
|
||||
|
||||
def _build_ingest_graph(knowledge_ingest: dict[str, Any]) -> dict[str, Any]:
|
||||
documents = [
|
||||
item for item in list(knowledge_ingest.get("documents") or []) if isinstance(item, dict)
|
||||
]
|
||||
entities = _dedupe_text_items(
|
||||
entity for document in documents for entity in list(document.get("entities") or [])
|
||||
)
|
||||
relations = _dedupe_relations(
|
||||
relation for document in documents for relation in list(document.get("relations") or [])
|
||||
)
|
||||
return {
|
||||
"chunk_count": sum(_to_int(document.get("chunk_count")) for document in documents),
|
||||
"entity_count": sum(_to_int(document.get("entity_count")) for document in documents),
|
||||
"relation_count": sum(_to_int(document.get("relation_count")) for document in documents),
|
||||
"entities": entities[:60],
|
||||
"relations": relations[:60],
|
||||
}
|
||||
|
||||
|
||||
def _dedupe_text_items(items: Any) -> list[str]:
|
||||
deduped: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for item in items:
|
||||
text = str(item or "").strip()
|
||||
if not text or text in seen:
|
||||
continue
|
||||
seen.add(text)
|
||||
deduped.append(text)
|
||||
return deduped
|
||||
|
||||
|
||||
def _dedupe_relations(items: Any) -> list[dict[str, str]]:
|
||||
deduped: list[dict[str, str]] = []
|
||||
seen: set[tuple[str, str, str]] = set()
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
source = str(item.get("source") or "").strip()
|
||||
target = str(item.get("target") or "").strip()
|
||||
relation_type = str(item.get("type") or "关联").strip()
|
||||
key = (source, target, relation_type)
|
||||
if not source or not target or key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
deduped.append({"source": source, "target": target, "type": relation_type})
|
||||
return deduped
|
||||
|
||||
|
||||
def _resolve_latest_track_id(responses: list[dict[str, Any]]) -> str:
|
||||
for response in reversed(responses):
|
||||
track_id = str(response.get("track_id") or "").strip()
|
||||
if track_id:
|
||||
return track_id
|
||||
return ""
|
||||
|
||||
|
||||
def _to_int(value: Any) -> int:
|
||||
try:
|
||||
return int(value or 0)
|
||||
except (TypeError, ValueError):
|
||||
return 0
|
||||
|
||||
|
||||
knowledge_index_task_manager = KnowledgeIndexTaskManager()
|
||||
|
||||
Reference in New Issue
Block a user