feat: 增强知识库功能,优化索引和RAG检索

This commit is contained in:
caoxiaozhu
2026-05-18 02:49:39 +00:00
parent 55e0591a5e
commit 4414ffb34c
18 changed files with 5656 additions and 4659 deletions

View File

@@ -856,7 +856,13 @@ class KnowledgeService:
status_payload = status_map.get(document_id) or {} status_payload = status_map.get(document_id) or {}
rag_status = str(status_payload.get("status") or "").strip().lower() rag_status = str(status_payload.get("status") or "").strip().lower()
if bool(status_payload.get("query_ready")): linked_run_status = self._resolve_linked_ingest_run_status(entry)
if (
linked_run_status == AgentRunStatus.FAILED.value
and rag_status in {"pending", "processing", "preprocessed"}
):
desired_status = KNOWLEDGE_INGEST_STATUS_FAILED
elif bool(status_payload.get("query_ready")):
desired_status = KNOWLEDGE_INGEST_STATUS_INGESTED desired_status = KNOWLEDGE_INGEST_STATUS_INGESTED
elif rag_status in {"pending", "processing", "preprocessed"}: elif rag_status in {"pending", "processing", "preprocessed"}:
desired_status = KNOWLEDGE_INGEST_STATUS_SYNCING desired_status = KNOWLEDGE_INGEST_STATUS_SYNCING
@@ -1007,12 +1013,22 @@ class KnowledgeService:
probe_entry = {"ingest_status_updated_at": heartbeat_at} probe_entry = {"ingest_status_updated_at": heartbeat_at}
return not self._is_syncing_status_stale(probe_entry) return not self._is_syncing_status_stale(probe_entry)
return not self._is_syncing_status_stale(entry) return not self._is_syncing_status_stale(entry)
def _require_entry(self, index: dict[str, Any], document_id: str) -> dict[str, Any]: def _resolve_linked_ingest_run_status(self, entry: dict[str, Any]) -> str:
for entry in index["documents"]: agent_run_id = str(entry.get("ingest_agent_run_id") or "").strip()
if entry["id"] == document_id: if not agent_run_id or self.db is None:
return entry return ""
run = self.db.scalar(select(AgentRun).where(AgentRun.run_id == agent_run_id))
if run is None:
return ""
return str(run.status or "").strip()
def _require_entry(self, index: dict[str, Any], document_id: str) -> dict[str, Any]:
for entry in index["documents"]:
if entry["id"] == document_id:
return entry
raise FileNotFoundError(document_id) raise FileNotFoundError(document_id)
def _resolve_document_path(self, entry: dict[str, Any]) -> Path: def _resolve_document_path(self, entry: dict[str, Any]) -> Path:

View File

@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import threading
from concurrent.futures import Future, ThreadPoolExecutor from concurrent.futures import Future, ThreadPoolExecutor
from datetime import UTC, datetime from datetime import UTC, datetime
from time import perf_counter from time import perf_counter
@@ -18,6 +19,7 @@ from app.services.knowledge import (
from app.services.knowledge_rag import KnowledgeRagService from app.services.knowledge_rag import KnowledgeRagService
logger = get_logger("app.services.knowledge_index_tasks") logger = get_logger("app.services.knowledge_index_tasks")
HEARTBEAT_INTERVAL_SECONDS = 10
class KnowledgeIndexTaskManager: class KnowledgeIndexTaskManager:
@@ -58,6 +60,15 @@ class KnowledgeIndexTaskManager:
session_factory = get_session_factory() session_factory = get_session_factory()
db = session_factory() db = session_factory()
started = perf_counter() started = perf_counter()
heartbeat_stop = threading.Event()
heartbeat_thread: threading.Thread | None = None
tool_call_id = ""
tool_request_json = {
"agent": AgentName.HERMES.value,
"folder": folder,
"document_ids": document_ids,
"force": force,
}
try: try:
run_service = AgentRunService(db) run_service = AgentRunService(db)
@@ -84,6 +95,44 @@ class KnowledgeIndexTaskManager:
}, },
}, },
) )
tool_call = run_service.record_tool_call(
run_id=agent_run_id,
tool_type=AgentToolType.LLM.value,
tool_name="lightrag.index_documents",
request_json=tool_request_json,
response_json={"phase": "indexing"},
status="running",
duration_ms=0,
error_message=None,
)
tool_call_id = tool_call.id
def heartbeat_worker() -> None:
while not heartbeat_stop.wait(HEARTBEAT_INTERVAL_SECONDS):
heartbeat_db = session_factory()
try:
AgentRunService(heartbeat_db).merge_route_json(
agent_run_id,
{
"job_type": "knowledge_index_sync",
"phase": "indexing",
"heartbeat_at": datetime.now(UTC).isoformat(),
},
)
except Exception:
logger.exception(
"Knowledge index heartbeat update failed run_id=%s",
agent_run_id,
)
finally:
heartbeat_db.close()
heartbeat_thread = threading.Thread(
target=heartbeat_worker,
name=f"knowledge-index-heartbeat-{agent_run_id}",
daemon=True,
)
heartbeat_thread.start()
response = rag_service.index_documents(document_ids=document_ids, force=force) response = rag_service.index_documents(document_ids=document_ids, force=force)
succeeded_document_ids = [ succeeded_document_ids = [
@@ -117,16 +166,11 @@ class KnowledgeIndexTaskManager:
duration_ms = int((perf_counter() - started) * 1000) duration_ms = int((perf_counter() - started) * 1000)
tool_status = "succeeded" if not failed_document_ids else "failed" tool_status = "succeeded" if not failed_document_ids else "failed"
run_service.record_tool_call( heartbeat_stop.set()
run_id=agent_run_id, if heartbeat_thread is not None:
tool_type=AgentToolType.LLM.value, heartbeat_thread.join(timeout=1)
tool_name="lightrag.index_documents", run_service.update_tool_call(
request_json={ tool_call_id,
"agent": AgentName.HERMES.value,
"folder": folder,
"document_ids": document_ids,
"force": force,
},
response_json=response, response_json=response,
status=tool_status, status=tool_status,
duration_ms=duration_ms, duration_ms=duration_ms,
@@ -166,22 +210,29 @@ class KnowledgeIndexTaskManager:
finished_at=datetime.now(UTC), finished_at=datetime.now(UTC),
) )
except Exception as exc: except Exception as exc:
heartbeat_stop.set()
if heartbeat_thread is not None:
heartbeat_thread.join(timeout=1)
try: try:
AgentRunService(db).record_tool_call( if tool_call_id:
run_id=agent_run_id, AgentRunService(db).update_tool_call(
tool_type=AgentToolType.LLM.value, tool_call_id,
tool_name="lightrag.index_documents", response_json={"error": str(exc)},
request_json={ status="failed",
"agent": AgentName.HERMES.value, duration_ms=int((perf_counter() - started) * 1000),
"folder": folder, error_message=str(exc),
"document_ids": document_ids, )
"force": force, else:
}, AgentRunService(db).record_tool_call(
response_json={"error": str(exc)}, run_id=agent_run_id,
status="failed", tool_type=AgentToolType.LLM.value,
duration_ms=int((perf_counter() - started) * 1000), tool_name="lightrag.index_documents",
error_message=str(exc), request_json=tool_request_json,
) response_json={"error": str(exc)},
status="failed",
duration_ms=int((perf_counter() - started) * 1000),
error_message=str(exc),
)
KnowledgeService(db=db).set_document_ingest_statuses( KnowledgeService(db=db).set_document_ingest_statuses(
document_ids, document_ids,
KNOWLEDGE_INGEST_STATUS_FAILED, KNOWLEDGE_INGEST_STATUS_FAILED,
@@ -210,6 +261,9 @@ class KnowledgeIndexTaskManager:
logger.exception("Knowledge index task finalization failed run_id=%s", agent_run_id) logger.exception("Knowledge index task finalization failed run_id=%s", agent_run_id)
logger.exception("Knowledge index task failed run_id=%s", agent_run_id) logger.exception("Knowledge index task failed run_id=%s", agent_run_id)
finally: finally:
heartbeat_stop.set()
if heartbeat_thread is not None and heartbeat_thread.is_alive():
heartbeat_thread.join(timeout=1)
db.close() db.close()

View File

@@ -83,24 +83,23 @@ class KnowledgeNormalizationService:
if rendered: if rendered:
normalized_tables.append(f"## {candidate.title}\n\n{rendered}") normalized_tables.append(f"## {candidate.title}\n\n{rendered}")
parts: list[str] = [] appendix_parts: list[str] = []
if section_appendix: if section_appendix:
parts.append(section_appendix) appendix_parts.append(section_appendix)
if answer_clue_appendix: if answer_clue_appendix:
parts.append(answer_clue_appendix) appendix_parts.append(answer_clue_appendix)
if normalized_tables: if normalized_tables:
appendix = "\n\n".join(normalized_tables) appendix = "\n\n".join(normalized_tables)
parts.append( appendix_parts.append(
"# 结构化表格补充\n\n" "# 结构化表格补充\n\n"
"以下表格由知识归纳阶段依据原文重新整理,供问答检索时优先理解行列关系。\n\n" "以下表格由知识归纳阶段依据原文重新整理,供问答检索时优先理解行列关系。\n\n"
f"{appendix}" f"{appendix}"
) )
if not parts: if not appendix_parts:
return normalized_text return normalized_text
parts.append(f"# 原文\n\n{normalized_text}") return "\n\n".join([normalized_text, *appendix_parts])
return "\n\n".join(parts)
@staticmethod @staticmethod
def _extract_table_candidates(text: str) -> list[TableCandidate]: def _extract_table_candidates(text: str) -> list[TableCandidate]:

View File

@@ -33,6 +33,7 @@ DEFAULT_LIGHTRAG_QUERY_MODE = "naive"
DEFAULT_LLM_TIMEOUT_SECONDS = 180 DEFAULT_LLM_TIMEOUT_SECONDS = 180
DEFAULT_EMBEDDING_TIMEOUT_SECONDS = 120 DEFAULT_EMBEDDING_TIMEOUT_SECONDS = 120
MAX_KNOWLEDGE_HIT_CONTENT_LENGTH = 2200 MAX_KNOWLEDGE_HIT_CONTENT_LENGTH = 2200
MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH = 220
MAX_QUERY_TERMS = 12 MAX_QUERY_TERMS = 12
QUERY_TERM_STOPWORDS = { QUERY_TERM_STOPWORDS = {
"什么", "什么",
@@ -62,6 +63,13 @@ TABLE_OR_STANDARD_QUERY_HINTS = (
"档位", "档位",
"额度", "额度",
) )
STRUCTURED_APPENDIX_LEADING_MARKERS = (
"# 章节导航",
"# 重点章节摘录",
"# 问答线索补充",
"# 结构化表格补充",
)
STRUCTURED_APPENDIX_LEADING_WINDOW = 220
_runtime_lock = threading.RLock() _runtime_lock = threading.RLock()
_runtime_instance: _LightRagRuntime | None = None _runtime_instance: _LightRagRuntime | None = None
@@ -830,7 +838,11 @@ class KnowledgeRagService:
document_id, document_name = _parse_document_identity(file_path) document_id, document_name = _parse_document_identity(file_path)
normalized_chunk_id = chunk_id or f"path-{rank}" normalized_chunk_id = chunk_id or f"path-{rank}"
normalized_content = _truncate_text(content, max_length=MAX_KNOWLEDGE_HIT_CONTENT_LENGTH) normalized_content = _truncate_text(content, max_length=MAX_KNOWLEDGE_HIT_CONTENT_LENGTH)
excerpt = _build_excerpt(normalized_content, max_length=220) excerpt = _build_query_focused_excerpt(
normalized_content,
query_terms=query_terms,
max_length=MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH,
)
candidates.append( candidates.append(
{ {
"code": f"knowledge.{document_id or 'unknown'}.{normalized_chunk_id}", "code": f"knowledge.{document_id or 'unknown'}.{normalized_chunk_id}",
@@ -907,8 +919,12 @@ class KnowledgeRagService:
@staticmethod @staticmethod
def is_query_ready_status(status_obj: Any) -> bool: def is_query_ready_status(status_obj: Any) -> bool:
status_text = KnowledgeRagService._status_value(status_obj) status_text = KnowledgeRagService._status_value(status_obj)
if status_text in {"failed", "error", "aborted"}:
return False
if status_text == "processed": if status_text == "processed":
return True return True
if status_text in {"pending", "processing", "preprocessed"}:
return False
chunks_count = getattr(status_obj, "chunks_count", None) chunks_count = getattr(status_obj, "chunks_count", None)
if chunks_count is None and isinstance(status_obj, dict): if chunks_count is None and isinstance(status_obj, dict):
@@ -1168,6 +1184,35 @@ def _build_excerpt(text: str, *, max_length: int = 180) -> str:
return f"{normalized[: max_length - 3].rstrip()}..." return f"{normalized[: max_length - 3].rstrip()}..."
def _build_query_focused_excerpt(
text: str,
*,
query_terms: list[str],
max_length: int = 180,
) -> str:
normalized = " ".join(str(text or "").split()).strip()
if not normalized:
return ""
lowered = normalized.lower()
match_positions = [
lowered.find(term)
for term in query_terms
if term and lowered.find(term) >= 0
]
if not match_positions:
return _build_excerpt(normalized, max_length=max_length)
start = max(0, min(match_positions) - max_length // 3)
end = min(len(normalized), start + max_length)
snippet = normalized[start:end].strip()
if start > 0:
snippet = f"...{snippet.lstrip()}"
if end < len(normalized):
snippet = f"{snippet.rstrip()}..."
return snippet
def _truncate_text(text: str, *, max_length: int) -> str: def _truncate_text(text: str, *, max_length: int) -> str:
normalized = str(text or "").strip() normalized = str(text or "").strip()
if len(normalized) <= max_length: if len(normalized) <= max_length:
@@ -1243,19 +1288,43 @@ def _score_knowledge_hit(
score += len(matched_terms) * 8 score += len(matched_terms) * 8
score += sum(1 for term in matched_terms if term in title) * 6 score += sum(1 for term in matched_terms if term in title) * 6
if "结构化表格补充" in content: leading_appendix_marker = _leading_structured_appendix_marker(content)
score += 18 if leading_appendix_marker == "# 章节导航":
if "问答线索补充" in content: score -= 24
score += 16 if not prefers_tabular_evidence else 8 elif leading_appendix_marker == "# 重点章节摘录":
if "重点章节摘录" in content: score += 4 if matched_terms else -12
elif leading_appendix_marker == "# 问答线索补充":
score += 8 if matched_terms and not prefers_tabular_evidence else 2 if matched_terms else -20
elif leading_appendix_marker == "# 结构化表格补充":
if prefers_tabular_evidence and matched_terms:
score += 16
elif matched_terms:
score += 6
else:
score -= 18
if prefers_tabular_evidence and matched_terms and ("|" in content or "" in content):
score += 10 score += 10
if "章节导航" in content: if matched_terms and any(marker in content for marker in ("", ":")):
score += 10
if matched_terms and "\n" in content:
score += 4 score += 4
if prefers_tabular_evidence and ("|" in content or "" in content or "结构化表格补充" in content): if matched_terms and any(marker in content for marker in ("", "", "")):
score += 12 score += 4
if not prefers_tabular_evidence and any(marker in content for marker in ("", "", "", "-", "")): if not prefers_tabular_evidence and matched_terms and any(marker in content for marker in ("", "", "", "-", "")):
score += 4 score += 4
if title and any(term in title for term in query_terms): if title and any(term in title for term in query_terms):
score += 6 score += 6
if re.search(r"没有.{0,8}(信息|规定|说明|依据)", content):
score -= 12
return score return score
def _leading_structured_appendix_marker(content: str) -> str:
normalized = str(content or "").lstrip()
for marker in STRUCTURED_APPENDIX_LEADING_MARKERS:
index = normalized.find(marker)
if 0 <= index <= STRUCTURED_APPENDIX_LEADING_WINDOW:
return marker
return ""

View File

@@ -2,25 +2,46 @@
"version": 1, "version": 1,
"documents": [ "documents": [
{ {
"id": "bf761bd8eccf402bb676423d64401a56", "id": "2c1cb358f08d44ceb0e4d287133206ec",
"folder": "报销制度", "folder": "报销制度",
"original_name": "远光《公司支出管理办法2024》.pdf", "original_name": "远光《公司支出管理办法2024》.pdf",
"stored_name": "bf761bd8eccf402bb676423d64401a56__远光《公司支出管理办法2024》.pdf", "stored_name": "2c1cb358f08d44ceb0e4d287133206ec__远光《公司支出管理办法2024》.pdf",
"mime_type": "application/pdf", "mime_type": "application/pdf",
"extension": "pdf", "extension": "pdf",
"size_bytes": 621401, "size_bytes": 621401,
"sha256": "67a74538bce0dec71ccbb947256cc2c9c0e672d148de49406b967ae1379dbece", "sha256": "67a74538bce0dec71ccbb947256cc2c9c0e672d148de49406b967ae1379dbece",
"created_at": "2026-05-09T08:39:53.788042+00:00", "created_at": "2026-05-17T09:28:28.999515+00:00",
"updated_at": "2026-05-09T08:39:53.788042+00:00", "updated_at": "2026-05-17T09:28:28.999515+00:00",
"uploaded_by": "admin", "uploaded_by": "admin",
"version_number": 1, "version_number": 1,
"ingest_status": 3, "ingest_status": 3,
"ingest_status_updated_at": "2026-05-16T15:37:12.723203+00:00", "ingest_status_updated_at": "2026-05-17T10:01:33.272539+00:00",
"ingest_agent_run_id": "run_94562b13f7a54341", "ingest_completed_at": "2026-05-17T10:01:33.272539+00:00",
"ingest_completed_at": "2026-05-16T15:37:12.723203+00:00",
"ingest_document_name": "远光《公司支出管理办法2024》.pdf", "ingest_document_name": "远光《公司支出管理办法2024》.pdf",
"ingest_document_updated_at": "2026-05-09T08:39:53.788042+00:00", "ingest_document_updated_at": "2026-05-17T09:28:28.999515+00:00",
"ingest_document_sha256": "67a74538bce0dec71ccbb947256cc2c9c0e672d148de49406b967ae1379dbece" "ingest_document_sha256": "67a74538bce0dec71ccbb947256cc2c9c0e672d148de49406b967ae1379dbece",
"ingest_agent_run_id": "run_8b0ead1e3c734a53"
},
{
"id": "a8f8465df08e455ebe133351721d49f8",
"folder": "报销制度",
"original_name": "无单需求文档0506.docx",
"stored_name": "a8f8465df08e455ebe133351721d49f8__无单需求文档0506.docx",
"mime_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"extension": "docx",
"size_bytes": 454307,
"sha256": "00985ec85a8163be9c9ffc5eb522df18ed52d4b131ceed12102c2d75e4df85a9",
"created_at": "2026-05-17T13:00:09.485818+00:00",
"updated_at": "2026-05-17T13:00:09.485818+00:00",
"uploaded_by": "admin",
"version_number": 1,
"ingest_status": 1,
"ingest_status_updated_at": "2026-05-17T13:00:09.485818+00:00",
"ingest_completed_at": "",
"ingest_document_name": "",
"ingest_document_updated_at": "",
"ingest_document_sha256": "",
"ingest_agent_run_id": ""
} }
] ]
} }

View File

@@ -1,29 +1,28 @@
{ {
"bf761bd8eccf402bb676423d64401a56": { "2c1cb358f08d44ceb0e4d287133206ec": {
"status": "processed", "status": "processed",
"chunks_count": 11, "chunks_count": 10,
"chunks_list": [ "chunks_list": [
"chunk-a28dc5c0a449bfa3ec07f3ea70720339", "chunk-dd87aa5bc62cc9587ecb4c26d35a5263",
"chunk-0e8b903e5d2a7deeadd9ec0ca70d964c", "chunk-74c01decac4a10cd40a491786743b0ee",
"chunk-16edf05e3f89da28ca60c9b8e3101d26", "chunk-061324cc36078214691a6fc1cd0aaeea",
"chunk-60066b4c758ad553106e2343a99c890e", "chunk-613d6dfd4c5e9c807229a3147f96b584",
"chunk-30373ec763ee53fb2c91741699128f30", "chunk-d26b288ed4001dc5c504dce0eb841362",
"chunk-2d84cd4e27b2bcd246988dabe93d2062", "chunk-e9438f69c9e221d9f0f00a05ad84eac6",
"chunk-090b225cc6d57e9bf0cf7e0f34b4760c", "chunk-9841d66d8fb8548aab40220663a51693",
"chunk-8881e68061e1b668defe35b1cd9d8a83", "chunk-afc57a0e9548d1f484da6df6c182676b",
"chunk-cca4d7b1d51b1e831b80471cd168fef0", "chunk-18d968b78afe916b419c1b5973421ebe",
"chunk-78998358de8a8cc3c018264c9a553b4d", "chunk-aa5435156b829944c173fa1d2d7a93d4"
"chunk-37889c882c89c19f96b9b2ca93685014"
], ],
"content_summary": "# 章节导航\n\n以下内容由入库阶段从制度原文中提取供检索时优先理解制度层级、条目和标准所在章节。\n\n- 第一章 总则.............................................................. 4\n- 第二章 职责分工 .......................................................... 4\n- 第三章 支出报销申请与审批 ................................", "content_summary": "商密【中】\n\n 远光软件股份有限公司文件\n\n 远光制度202414 号\n\n关于颁布《公司支出管理办法2024》的\n 通知\n\n公司各部门、分支机构、子公司\n 为适应公司业务发展需要,优化、完善支出报销标准,规\n范支出业务审批和报销过程防范经营风险依据国家有关法\n律法规参照国家电网公司和国网数科公司有关管理规定结\n合市场经营环境和公司实际情况在广泛征求意见的基础上\n公司对《公司支出管理办法》进行了修订现予颁布。本办法自\n颁布之日起...",
"content_length": 25627, "content_length": 25301,
"created_at": "2026-05-16T15:30:53.520431+00:00", "created_at": "2026-05-17T09:57:22.410485+00:00",
"updated_at": "2026-05-16T15:37:12.723203+00:00", "updated_at": "2026-05-17T10:01:33.272539+00:00",
"file_path": "/app/server/storage/knowledge/报销制度/bf761bd8eccf402bb676423d64401a56__远光《公司支出管理办法2024》.pdf", "file_path": "/app/server/storage/knowledge/报销制度/2c1cb358f08d44ceb0e4d287133206ec__远光《公司支出管理办法2024》.pdf",
"track_id": "insert_20260516_153053_5bdb18b7", "track_id": "insert_20260517_095722_e223c7de",
"metadata": { "metadata": {
"processing_start_time": 1778945453, "processing_start_time": 1779011842,
"processing_end_time": 1778945832 "processing_end_time": 1779012093
} }
} }
} }

File diff suppressed because one or more lines are too long

View File

@@ -1,225 +1,268 @@
{ {
"bf761bd8eccf402bb676423d64401a56": { "2c1cb358f08d44ceb0e4d287133206ec": {
"entity_names": [ "entity_names": [
"公对公结算方式",
"业务招待",
"营销中心",
"预算外支出",
"第十四条业务招待费",
"材料采购",
"对外捐赠支出",
"第五章附则",
"事业部总经理",
"第四章重点支出管理规定",
"Home Visit Travel Expenses Management Policy",
"组织人事部",
"三流一致原则",
"预算先行原则",
"第五条计划财务部主要职责",
"业务招待费",
"Long-Term Business Accommodation",
"无形资产",
"各级管理人员",
"第二十三条",
"第四条归口管理部门主要职责",
"附表3",
"各委员会主任",
"信息管理部",
"经办人",
"品牌及市场运营中心",
"计划财务部",
"Department Head",
"业务原始凭据",
"经办部门",
"系统单据",
"保证金",
"邮递费",
"第十一条",
"第二十一条",
"分级授权原则",
"后勤服务部",
"Company Business Travel System",
"国家电网公司",
"Meeting Expenses",
"工会委员会", "工会委员会",
"特殊事项", "Business Original Documents",
"前款不清、后款不借", "First Approver",
"Advertising and Promotion Expenses",
"广告宣传费",
"异地挂职锻炼",
"Grassroots Manager P4",
"Other Employees",
"税控系统明细清单",
"商旅订票",
"分支机构",
"低值易耗品",
"产品规划设计部",
"培训费",
"第一条规定义",
"经济舱6折及以下",
"三个月",
"托运费",
"Commercial Insurance",
"工会经费管理办法",
"增值税专用发票",
"第七条各级管理人员主要职责",
"Compensation and Benefits Expenses",
"外聘专家",
"第十二条市内交通费",
"分类控制原则",
"Value Reimbursement System",
"第八条支出报销申请",
"会议费",
"轮船三等舱",
"第四条归口管理",
"终审岗",
"总经理",
"中国外汇交易中心",
"Middle And Grassroots Manager P4-P6",
"办公用品",
"办公室(党委办公室)",
"控股子公司",
"支出审批权限表",
"公司领导",
"证券与法律事务部",
"支出审批流转程序",
"第二条适用范围",
"出差补贴标准",
"附表2",
"高铁/动车二等座",
"P8", "P8",
"报销资料规范", "一级部门总经理",
"Staff P1-P3", "组织人事部",
"2024年4月17日", "业务原始凭据",
"董事长", "营销中心",
"通信费", "保证金",
"财务审核时限",
"一万元",
"工会支出",
"交通工具等级标准",
"第二十条",
"影像扫描",
"异地调动邮寄费",
"第二十二条",
"基层经理",
"第二十四条附件",
"公对私结算方式",
"第二十三条本办法的归口与实施",
"异地挂职锻炼补贴标准",
"公司酒店住宿限额标准",
"经办部门(个人)",
"投标保证金", "投标保证金",
"远光制度202414号", "餐补",
"Business Travel", "第十四条业务招待费",
"Communication Expenses", "Chief Engineer",
"交通费", "业务招待",
"远光软件股份有限公司", "Employee Welfare",
"第三条管理原则",
"全资子公司",
"第十三条差旅费",
"薪酬福利支出",
"Relocation Expenses",
"住宿费",
"公司支出管理办法(2024)",
"中层经理",
"第六条经办部门主要职责",
"Business Trip",
"批办分离原则",
"备用金借款",
"岗位支出业务",
"公司支出管理办法",
"第二十四条",
"报销业务",
"第七条管理人员",
"外包分包业务",
"归口管理部门",
"第十条",
"财务审核",
"备用金",
"预付款项",
"支出报销申请",
"公司团建管理办法",
"总工程师",
"商旅系统",
"Training Expenses",
"固定资产",
"DAP研发中心",
"第五条计划财务部",
"第一条目的",
"全列软席列车二等座",
"涉外业务汇率标准",
"客服及商务",
"其他支出",
"快递费",
"需求计划",
"党委办公室",
"财务信息化系统",
"P5及以上",
"因公借款",
"效益优先原则",
"市内交通",
"Business Entertainment Expenses",
"第二章职责分工",
"出差补贴",
"人力资源服务部",
"P4及以下",
"因公用车补贴",
"Company Leader P8 And Above",
"国网数科公司",
"Mailing and Courier Expenses",
"季度清理",
"附表1",
"正式员工",
"审批流转程序",
"后续审批人",
"商密【中】",
"High-Level Manager P7",
"Home Visit Travel Expenses",
"支出成本中心归属",
"供应商",
"High-Speed Rail And Bullet Train",
"资产采购",
"附表1员工支出报销审批权限表",
"第九条支出报销审批",
"审批权限",
"经济舱5折及以下",
"产业投资部",
"第六条经办部门",
"第十条支出成本中心归属",
"第九条",
"邮件费",
"第四章",
"第十三条",
"Travel Allowance",
"第十一条备用金借款",
"财务部门",
"公司",
"中国银行",
"Business Travel Ticket Booking",
"市内交通费",
"发票",
"第十二条",
"支出报销审批",
"经济舱", "经济舱",
"第一审批人", "2024年4月17日",
"品牌", "三等舱",
"火车硬席", "财务信息化系统",
"审批时限", "分管领导",
"重点支出管理规定",
"备用金借款",
"Financial Review",
"第五章附则",
"Company Leadership",
"第十九条",
"经办人",
"预算内支出", "预算内支出",
"President", "Current Account Payment",
"Business Entertainment",
"Tax Control System Details",
"第二十一条",
"成本中心归属",
"岗位支出报销审批权限表",
"工会经费管理办法",
"商旅系统",
"Special Subsidy",
"中国银行外汇折算价",
"因公借款",
"资产采购",
"广告费",
"First-Level Department General Manager",
"正式员工",
"一万元",
"公司员工教育培训管理办法",
"责任原则",
"第二章职责分工",
"预算先行",
"Planning and Finance Department",
"Accommodation Cost Reimbursement",
"Official Vehicle Subsidy",
"第四条归口管理部门主要职责",
"Personal Service Compensation",
"邮递费",
"附表3支出归口管理部门与归口业务范围",
"员工",
"第二条目的",
"Director",
"支出归口管理部门与归口业务范围",
"其他支出(员工)",
"报销标准",
"5000000 Yuan Approval Limit",
"第十一条备用金借款",
"会议费",
"第十七条",
"第七条各级管理人员主要职责",
"50000 Yuan Approval Limit",
"全资子公司",
"涉外业务汇率标准",
"总监",
"第十三条差旅费",
"审批权限表",
"商旅订票规范",
"Final Approval Position",
"报销资格",
"新增报销规定",
"公司支出管理办法",
"Institution General Manager",
"房屋租金",
"Staff Activities",
"分包外包(内部单位)",
"报销申请时限",
"Financial Information System",
"Expenditure Authorization Approval Scope",
"直辖市",
"培训费",
"第十二条市内交通费",
"第十五条",
"终审岗",
"Remote Work Housing",
"Centralized Management department",
"第二十条",
"办公室(党委办公室)",
"Three Flows Consistency Principle",
"审批权限",
"VAT Special Invoice",
"后勤服务部",
"员工支出报销审批权限表",
"公司总裁",
"出差补贴",
"Basic Level Managers",
"预付款项",
"附表1员工支出报销审批权限表",
"经办部门",
"信息管理部",
"通信费",
"第十六条",
"增值税发票",
"财务入账条件",
"Hotel Accommodation Standards",
"审批流转程序",
"Self-Driving Travel Provisions",
"交通费",
"第九条支出报销审批",
"薪酬福利支出分配计划",
"产品规划设计部",
"因公用车补贴",
"Committee Chairpersons",
"Business Division General Manager",
"组织安排",
"1 Yuan Per Person Per Kilometer Reimbursement",
"Separation of Approval and Processing Principle",
"第五条计划财务部主要职责",
"200000 Yuan Approval Limit",
"公司各部门",
"第十四条",
"Other Areas",
"分支机构",
"Departments And Units",
"计划财务部",
"Other Employees",
"第二十三条",
"公司团建管理办法",
"火车硬席",
"税控系统明细清单",
"Trade Union Fund",
"报销标准变化情况",
"薪酬福利支出",
"Hong Kong, Macau, And Taiwan Region",
"对外捐赠支出",
"Multi-Level Approval Rule",
"Three Working Days Deadline",
"Employee Remuneration",
"销售退款",
"股权投资、兼并收购",
"控股子公司",
"取消报销规定",
"Procurement Management Regulations",
"Middle Managers",
"差旅费", "差旅费",
"高层经理", "批办分离",
"厉行节约原则", "住宿费",
"第三章支出报销申请与审批", "Travel Allowance Standards",
"第二十三条本办法的归口与实施",
"Senior Vice President",
"供应商",
"人事归口管理部门",
"Management Personnel At All Levels",
"效益优先",
"Operating Department Individual",
"Remote Work Housing Rental Expenses",
"取消报销规定内容",
"Company",
"修订说明",
"国网数科公司",
"Vice President",
"分级授权",
"Expenditure Reimbursement Application",
"第二十四条附件",
"第二十二条",
"出租车",
"Night High-Speed Rail Provision",
"各级管理人员",
"受益原则",
"公司员工因公通讯费用实施细则",
"公司支出管理办法(2024)",
"出差补贴标准",
"Bid Security Deposit Approval Limits Table",
"第二条范围", "第二条范围",
"基建工程", "Company Property Rental Management",
"调动工作",
"远光软件股份有限公司",
"市内交通费",
"交通工具等级标准",
"Operator",
"第八条支出报销申请",
"Directly-Controlled Municipalities And Special Administrative Regions",
"出差规定",
"业务招待费",
"Senior Managers",
"逐级审批规则", "逐级审批规则",
"招标采购规定", "Company Business Travel System",
"第一章总则" "广告宣传费",
"Transportation Cost Reimbursement",
"财务",
"第一章总则",
"材料采购",
"人力资源服务部",
"证券与法律事务部",
"Transportation Level Standards",
"归口管理部门",
"商旅客服",
"第四章重点支出管理规定",
"出差审批程序",
"Business Trip Approval",
"西藏",
"附表2岗位支出报销审批权限表",
"第十八条",
"第二十四条",
"Company Hotel Accommodation Limit Standards",
"办法",
"DAP研发中心",
"新增规定内容",
"基本补助",
"Travel Allowance",
"异地挂职锻炼补贴标准",
"部门负责人",
"Provincial Capitals",
"特区",
"Transportation Tickets",
"第三章支出报销申请与审批",
"品牌及市场运营中心",
"分包外包(外部单位)",
"探亲路费",
"President",
"凭据报销",
"基本出差补贴",
"Taxi Usage Regulations",
"Government Fees",
"Commercial Travel System",
"远光制度202414号",
"审批权限变化情况",
"基建工程",
"支出报销申请与审批",
"中国外汇交易中心参考汇率",
"Department Manager",
"支出报销审批",
"预算调整决策程序",
"公司1号文",
"External Conference Accommodation",
"厉行节约",
"Commercial Insurance",
"公司",
"第三条管理原则",
"捐赠申请",
"分类控制",
"业务宣传费",
"产业投资部",
"公司员工探亲管理办法",
"Subsequent Approver",
"100000 Yuan Approval Limit",
"Tax Authority Recognized Invoice",
"国家电网公司",
"业务佐证材料",
"第六条经办部门(个人)主要职责",
"结算起点",
"第十条支出成本中心归属",
"母公司"
], ],
"count": 215, "count": 258,
"create_time": 1778945832, "create_time": 1779012093,
"update_time": 1778945832, "update_time": 1779012093,
"_id": "bf761bd8eccf402bb676423d64401a56" "_id": "2c1cb358f08d44ceb0e4d287133206ec"
} }
} }

View File

@@ -1,182 +1,166 @@
{ {
"bf761bd8eccf402bb676423d64401a56": { "2c1cb358f08d44ceb0e4d287133206ec": {
"relation_pairs": [ "relation_pairs": [
[ [
"公司支出管理办法", "Departments And Units",
"审批流转程序" "Taxi Usage Regulations"
], ],
[ [
"供应商", "取消报销规定内容",
"公司" "报销标准变化情况"
], ],
[ [
"全资子公司", "业务招待费",
"远光软件股份有限公司" "第十四条"
], ],
[ [
"发票", "控股子公司",
"报销业务"
],
[
"出差补贴",
"组织人事部"
],
[
"第十三条差旅费",
"第四章重点支出管理规定"
],
[
"公司支出管理办法",
"差旅费"
],
[
"第一章总则",
"远光软件股份有限公司"
],
[
"计划财务部",
"远光软件股份有限公司"
],
[
"分支机构",
"远光软件股份有限公司"
],
[
"经办部门",
"需求计划"
],
[
"增值税专用发票",
"税控系统明细清单"
],
[
"第三章支出报销申请与审批",
"财务信息化系统"
],
[
"业务原始凭据",
"经办人"
],
[
"第十四条业务招待费",
"第四章重点支出管理规定"
],
[
"系统单据",
"经办人"
],
[
"第二十三条本办法的归口与实施",
"第五章附则"
],
[
"第二十四条",
"附表2"
],
[
"第二十三条",
"计划财务部" "计划财务部"
], ],
[
"归口管理部门",
"报销业务"
],
[ [
"公司支出管理办法", "公司支出管理办法",
"投标保证金" "工会委员会"
], ],
[ [
"对外捐赠支出", "第一章总则",
"第二十一条" "第三条管理原则"
], ],
[ [
"第二十条", "广告宣传费",
"薪酬福利支出" "第十六条"
], ],
[ [
"国网数科公司", "Tax Control System Details",
"VAT Special Invoice"
],
[
"Expenditure Reimbursement Application",
"Tax Authority Recognized Invoice"
],
[
"远光制度202414号",
"远光软件股份有限公司" "远光软件股份有限公司"
], ],
[ [
"事业部总经理", "Financial Review",
"逐级审批规则" "Operator"
], ],
[ [
"特殊事项", "Operating Department Individual",
"终审岗" "Procurement Management Regulations"
], ],
[ [
"发票", "会议费",
"经办人" "第十五条"
],
[
"Company",
"Management Personnel At All Levels"
],
[
"公司",
"第十七条"
],
[
"公司",
"第十八条"
],
[
"Operator",
"Three Working Days Deadline"
], ],
[ [
"第十一条备用金借款", "第十一条备用金借款",
"第四章重点支出管理规定" "第四章重点支出管理规定"
], ],
[ [
"归口管理部门", "Expenditure Reimbursement Application",
"Operator"
],
[
"业务招待费",
"差旅费"
],
[
"公司",
"第二十一条"
],
[
"公司支出管理办法(2024)",
"远光软件股份有限公司"
],
[
"第四条归口管理部门主要职责",
"计划财务部" "计划财务部"
], ],
[ [
"工会委员会", "会议费",
"工会支出" "差旅费"
], ],
[ [
"第二十四条附件", "Company",
"第五章附则" "Operating Department Individual"
], ],
[ [
"涉外业务汇率标准", "商旅系统",
"第二十二条" "差旅费"
], ],
[ [
"各级管理人员", "会议费",
"支出报销审批" "公司总裁"
], ],
[ [
"第十二条市内交通费", "计划财务部",
"第四章重点支出管理规定"
],
[
"支出审批流转程序",
"逐级审批规则"
],
[
"支出审批流转程序",
"终审岗"
],
[
"各级管理人员",
"报销业务"
],
[
"归口管理部门",
"远光软件股份有限公司" "远光软件股份有限公司"
], ],
[ [
"控股子公司", "公司",
"第十九条"
],
[
"公司",
"第二十条"
],
[
"Company",
"Planning and Finance Department"
],
[
"公司支出管理办法",
"营销中心"
],
[
"Business Original Documents",
"Operator"
],
[
"公司支出管理办法",
"办公室(党委办公室)"
],
[
"Departments And Units",
"Night High-Speed Rail Provision"
],
[
"Centralized Management department",
"Company"
],
[
"组织人事部",
"调动工作"
],
[
"报销标准变化情况",
"远光软件股份有限公司" "远光软件股份有限公司"
], ],
[ [
"报销业务", "第一章总则",
"财务部门"
],
[
"报销业务",
"经办部门"
],
[
"国家电网公司",
"远光软件股份有限公司" "远光软件股份有限公司"
],
[
"第二十四条",
"附表1"
] ]
], ],
"count": 43, "count": 39,
"create_time": 1778945832, "create_time": 1779012093,
"update_time": 1778945832, "update_time": 1779012093,
"_id": "bf761bd8eccf402bb676423d64401a56" "_id": "2c1cb358f08d44ceb0e4d287133206ec"
} }
} }

View File

@@ -1,389 +1,353 @@
{ {
"第一章总则<SEP>远光软件股份有限公司": { "第一章总则<SEP>远光软件股份有限公司": {
"chunk_ids": [ "chunk_ids": [
"chunk-a28dc5c0a449bfa3ec07f3ea70720339" "chunk-aa5435156b829944c173fa1d2d7a93d4"
], ],
"count": 1, "count": 1,
"create_time": 1778945805, "create_time": 1779012088,
"update_time": 1778945805, "update_time": 1779012088,
"_id": "第一章总则<SEP>远光软件股份有限公司" "_id": "第一章总则<SEP>远光软件股份有限公司"
}, },
"第三章支出报销申请与审批<SEP>财务信息化系统": {
"chunk_ids": [
"chunk-a28dc5c0a449bfa3ec07f3ea70720339"
],
"count": 1,
"create_time": 1778945805,
"update_time": 1778945805,
"_id": "第三章支出报销申请与审批<SEP>财务信息化系统"
},
"第十一条备用金借款<SEP>第四章重点支出管理规定": { "第十一条备用金借款<SEP>第四章重点支出管理规定": {
"chunk_ids": [ "chunk_ids": [
"chunk-a28dc5c0a449bfa3ec07f3ea70720339" "chunk-aa5435156b829944c173fa1d2d7a93d4"
], ],
"count": 1, "count": 1,
"create_time": 1778945805, "create_time": 1779012088,
"update_time": 1778945805, "update_time": 1779012088,
"_id": "第十一条备用金借款<SEP>第四章重点支出管理规定" "_id": "第十一条备用金借款<SEP>第四章重点支出管理规定"
}, },
"第二十三条本办法的归口与实施<SEP>第五章附则": { "公司支出管理办法<SEP>办公室(党委办公室)": {
"chunk_ids": [ "chunk_ids": [
"chunk-a28dc5c0a449bfa3ec07f3ea70720339" "chunk-afc57a0e9548d1f484da6df6c182676b"
], ],
"count": 1, "count": 1,
"create_time": 1778945805, "create_time": 1779012088,
"update_time": 1778945805, "update_time": 1779012088,
"_id": "第二十三条本办法的归口与实施<SEP>第五章附则" "_id": "公司支出管理办法<SEP>办公室(党委办公室)"
},
"第十二条市内交通费<SEP>第四章重点支出管理规定": {
"chunk_ids": [
"chunk-a28dc5c0a449bfa3ec07f3ea70720339"
],
"count": 1,
"create_time": 1778945808,
"update_time": 1778945808,
"_id": "第十二条市内交通费<SEP>第四章重点支出管理规定"
},
"归口管理部门<SEP>报销业务": {
"chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e"
],
"count": 1,
"create_time": 1778945808,
"update_time": 1778945808,
"_id": "归口管理部门<SEP>报销业务"
},
"第十三条差旅费<SEP>第四章重点支出管理规定": {
"chunk_ids": [
"chunk-a28dc5c0a449bfa3ec07f3ea70720339"
],
"count": 1,
"create_time": 1778945808,
"update_time": 1778945808,
"_id": "第十三条差旅费<SEP>第四章重点支出管理规定"
},
"第二十四条附件<SEP>第五章附则": {
"chunk_ids": [
"chunk-a28dc5c0a449bfa3ec07f3ea70720339"
],
"count": 1,
"create_time": 1778945809,
"update_time": 1778945809,
"_id": "第二十四条附件<SEP>第五章附则"
},
"业务原始凭据<SEP>经办人": {
"chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e"
],
"count": 1,
"create_time": 1778945809,
"update_time": 1778945809,
"_id": "业务原始凭据<SEP>经办人"
},
"报销业务<SEP>财务部门": {
"chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e"
],
"count": 1,
"create_time": 1778945812,
"update_time": 1778945812,
"_id": "报销业务<SEP>财务部门"
},
"增值税专用发票<SEP>税控系统明细清单": {
"chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e"
],
"count": 1,
"create_time": 1778945812,
"update_time": 1778945812,
"_id": "增值税专用发票<SEP>税控系统明细清单"
},
"第十四条业务招待费<SEP>第四章重点支出管理规定": {
"chunk_ids": [
"chunk-a28dc5c0a449bfa3ec07f3ea70720339"
],
"count": 1,
"create_time": 1778945812,
"update_time": 1778945812,
"_id": "第十四条业务招待费<SEP>第四章重点支出管理规定"
},
"经办部门<SEP>需求计划": {
"chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e"
],
"count": 1,
"create_time": 1778945812,
"update_time": 1778945812,
"_id": "经办部门<SEP>需求计划"
},
"系统单据<SEP>经办人": {
"chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e"
],
"count": 1,
"create_time": 1778945812,
"update_time": 1778945812,
"_id": "系统单据<SEP>经办人"
},
"报销业务<SEP>经办部门": {
"chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e"
],
"count": 1,
"create_time": 1778945812,
"update_time": 1778945812,
"_id": "报销业务<SEP>经办部门"
},
"供应商<SEP>公司": {
"chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e"
],
"count": 1,
"create_time": 1778945812,
"update_time": 1778945812,
"_id": "供应商<SEP>公司"
},
"工会委员会<SEP>工会支出": {
"chunk_ids": [
"chunk-78998358de8a8cc3c018264c9a553b4d"
],
"count": 1,
"create_time": 1778945812,
"update_time": 1778945812,
"_id": "工会委员会<SEP>工会支出"
},
"出差补贴<SEP>组织人事部": {
"chunk_ids": [
"chunk-78998358de8a8cc3c018264c9a553b4d"
],
"count": 1,
"create_time": 1778945812,
"update_time": 1778945812,
"_id": "出差补贴<SEP>组织人事部"
},
"公司支出管理办法<SEP>差旅费": {
"chunk_ids": [
"chunk-78998358de8a8cc3c018264c9a553b4d"
],
"count": 1,
"create_time": 1778945812,
"update_time": 1778945812,
"_id": "公司支出管理办法<SEP>差旅费"
},
"各级管理人员<SEP>报销业务": {
"chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e"
],
"count": 1,
"create_time": 1778945813,
"update_time": 1778945813,
"_id": "各级管理人员<SEP>报销业务"
},
"公司支出管理办法<SEP>投标保证金": {
"chunk_ids": [
"chunk-78998358de8a8cc3c018264c9a553b4d"
],
"count": 1,
"create_time": 1778945815,
"update_time": 1778945815,
"_id": "公司支出管理办法<SEP>投标保证金"
},
"第二十条<SEP>薪酬福利支出": {
"chunk_ids": [
"chunk-8881e68061e1b668defe35b1cd9d8a83"
],
"count": 1,
"create_time": 1778945815,
"update_time": 1778945815,
"_id": "第二十条<SEP>薪酬福利支出"
},
"对外捐赠支出<SEP>第二十一条": {
"chunk_ids": [
"chunk-8881e68061e1b668defe35b1cd9d8a83"
],
"count": 1,
"create_time": 1778945815,
"update_time": 1778945815,
"_id": "对外捐赠支出<SEP>第二十一条"
},
"涉外业务汇率标准<SEP>第二十二条": {
"chunk_ids": [
"chunk-8881e68061e1b668defe35b1cd9d8a83"
],
"count": 1,
"create_time": 1778945827,
"update_time": 1778945827,
"_id": "涉外业务汇率标准<SEP>第二十二条"
},
"第二十三条<SEP>计划财务部": {
"chunk_ids": [
"chunk-8881e68061e1b668defe35b1cd9d8a83"
],
"count": 1,
"create_time": 1778945815,
"update_time": 1778945815,
"_id": "第二十三条<SEP>计划财务部"
},
"第二十四条<SEP>附表1": {
"chunk_ids": [
"chunk-8881e68061e1b668defe35b1cd9d8a83"
],
"count": 1,
"create_time": 1778945816,
"update_time": 1778945816,
"_id": "第二十四条<SEP>附表1"
},
"第二十四条<SEP>附表2": {
"chunk_ids": [
"chunk-8881e68061e1b668defe35b1cd9d8a83"
],
"count": 1,
"create_time": 1778945816,
"update_time": 1778945816,
"_id": "第二十四条<SEP>附表2"
},
"发票<SEP>报销业务": {
"chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e"
],
"count": 1,
"create_time": 1778945816,
"update_time": 1778945816,
"_id": "发票<SEP>报销业务"
},
"支出审批流转程序<SEP>逐级审批规则": {
"chunk_ids": [
"chunk-37889c882c89c19f96b9b2ca93685014"
],
"count": 1,
"create_time": 1778945816,
"update_time": 1778945816,
"_id": "支出审批流转程序<SEP>逐级审批规则"
},
"公司支出管理办法<SEP>审批流转程序": {
"chunk_ids": [
"chunk-78998358de8a8cc3c018264c9a553b4d"
],
"count": 1,
"create_time": 1778945820,
"update_time": 1778945820,
"_id": "公司支出管理办法<SEP>审批流转程序"
},
"特殊事项<SEP>终审岗": {
"chunk_ids": [
"chunk-37889c882c89c19f96b9b2ca93685014"
],
"count": 1,
"create_time": 1778945820,
"update_time": 1778945820,
"_id": "特殊事项<SEP>终审岗"
},
"事业部总经理<SEP>逐级审批规则": {
"chunk_ids": [
"chunk-37889c882c89c19f96b9b2ca93685014"
],
"count": 1,
"create_time": 1778945821,
"update_time": 1778945821,
"_id": "事业部总经理<SEP>逐级审批规则"
}, },
"计划财务部<SEP>远光软件股份有限公司": { "计划财务部<SEP>远光软件股份有限公司": {
"chunk_ids": [ "chunk_ids": [
"chunk-16edf05e3f89da28ca60c9b8e3101d26" "chunk-aa5435156b829944c173fa1d2d7a93d4"
], ],
"count": 1, "count": 1,
"create_time": 1778945821, "create_time": 1779012076,
"update_time": 1778945821, "update_time": 1779012076,
"_id": "计划财务部<SEP>远光软件股份有限公司" "_id": "计划财务部<SEP>远光软件股份有限公司"
}, },
"发票<SEP>经办人": { "第一章总则<SEP>第三条管理原则": {
"chunk_ids": [ "chunk_ids": [
"chunk-60066b4c758ad553106e2343a99c890e" "chunk-aa5435156b829944c173fa1d2d7a93d4"
], ],
"count": 1, "count": 1,
"create_time": 1778945822, "create_time": 1779012076,
"update_time": 1778945822, "update_time": 1779012076,
"_id": "发票<SEP>经办人" "_id": "第一章总则<SEP>第三条管理原则"
}, },
"支出审批流转程序<SEP>终审岗": { "Company<SEP>Management Personnel At All Levels": {
"chunk_ids": [ "chunk_ids": [
"chunk-37889c882c89c19f96b9b2ca93685014" "chunk-74c01decac4a10cd40a491786743b0ee"
], ],
"count": 1, "count": 1,
"create_time": 1778945823, "create_time": 1779012076,
"update_time": 1778945823, "update_time": 1779012076,
"_id": "支出审批流转程序<SEP>终审岗" "_id": "Company<SEP>Management Personnel At All Levels"
}, },
"归口管理部门<SEP>远光软件股份有限公司": { "Centralized Management department<SEP>Company": {
"chunk_ids": [ "chunk_ids": [
"chunk-16edf05e3f89da28ca60c9b8e3101d26" "chunk-74c01decac4a10cd40a491786743b0ee"
], ],
"count": 1, "count": 1,
"create_time": 1778945824, "create_time": 1779012077,
"update_time": 1778945824, "update_time": 1779012077,
"_id": "归口管理部门<SEP>远光软件股份有限公司" "_id": "Centralized Management department<SEP>Company"
}, },
"国家电网公司<SEP>远光软件股份有限公司": { "Company<SEP>Planning and Finance Department": {
"chunk_ids": [ "chunk_ids": [
"chunk-16edf05e3f89da28ca60c9b8e3101d26" "chunk-74c01decac4a10cd40a491786743b0ee"
], ],
"count": 1, "count": 1,
"create_time": 1778945824, "create_time": 1779012077,
"update_time": 1778945824, "update_time": 1779012077,
"_id": "国家电网公司<SEP>远光软件股份有限公司" "_id": "Company<SEP>Planning and Finance Department"
}, },
"归口管理部门<SEP>计划财务部": { "Company<SEP>Operating Department Individual": {
"chunk_ids": [ "chunk_ids": [
"chunk-16edf05e3f89da28ca60c9b8e3101d26" "chunk-74c01decac4a10cd40a491786743b0ee"
], ],
"count": 1, "count": 1,
"create_time": 1778945824, "create_time": 1779012078,
"update_time": 1778945824, "update_time": 1779012078,
"_id": "归口管理部门<SEP>计划财务部" "_id": "Company<SEP>Operating Department Individual"
}, },
"各级管理人员<SEP>支出报销审批": { "公司支出管理办法<SEP>工会委员会": {
"chunk_ids": [ "chunk_ids": [
"chunk-16edf05e3f89da28ca60c9b8e3101d26" "chunk-afc57a0e9548d1f484da6df6c182676b"
], ],
"count": 1, "count": 1,
"create_time": 1778945824, "create_time": 1779012079,
"update_time": 1778945824, "update_time": 1779012079,
"_id": "各级管理人员<SEP>支出报销审批" "_id": "公司支出管理办法<SEP>工会委员会"
}, },
"国网数科公司<SEP>远光软件股份有限公司": { "Expenditure Reimbursement Application<SEP>Operator": {
"chunk_ids": [ "chunk_ids": [
"chunk-16edf05e3f89da28ca60c9b8e3101d26" "chunk-74c01decac4a10cd40a491786743b0ee"
], ],
"count": 1, "count": 1,
"create_time": 1778945828, "create_time": 1779012079,
"update_time": 1778945828, "update_time": 1779012079,
"_id": "国网数科公司<SEP>远光软件股份有限公司" "_id": "Expenditure Reimbursement Application<SEP>Operator"
}, },
"分支机构<SEP>远光软件股份有限公司": { "公司支出管理办法<SEP>营销中心": {
"chunk_ids": [ "chunk_ids": [
"chunk-16edf05e3f89da28ca60c9b8e3101d26" "chunk-afc57a0e9548d1f484da6df6c182676b"
], ],
"count": 1, "count": 1,
"create_time": 1778945831, "create_time": 1779012079,
"update_time": 1778945831, "update_time": 1779012079,
"_id": "分支机构<SEP>远光软件股份有限公司" "_id": "公司支出管理办法<SEP>营销中心"
}, },
"全资子公司<SEP>远光软件股份有限公司": { "第四条归口管理部门主要职责<SEP>计划财务部": {
"chunk_ids": [ "chunk_ids": [
"chunk-16edf05e3f89da28ca60c9b8e3101d26" "chunk-aa5435156b829944c173fa1d2d7a93d4"
], ],
"count": 1, "count": 1,
"create_time": 1778945832, "create_time": 1779012079,
"update_time": 1778945832, "update_time": 1779012079,
"_id": "全资子公司<SEP>远光软件股份有限公司" "_id": "第四条归口管理部门主要职责<SEP>计划财务部"
}, },
"控股子公司<SEP>远光软件股份有限公司": { "Tax Control System Details<SEP>VAT Special Invoice": {
"chunk_ids": [ "chunk_ids": [
"chunk-16edf05e3f89da28ca60c9b8e3101d26" "chunk-74c01decac4a10cd40a491786743b0ee"
], ],
"count": 1, "count": 1,
"create_time": 1778945832, "create_time": 1779012079,
"update_time": 1778945832, "update_time": 1779012079,
"_id": "控股子公司<SEP>远光软件股份有限公司" "_id": "Tax Control System Details<SEP>VAT Special Invoice"
},
"Operating Department Individual<SEP>Procurement Management Regulations": {
"chunk_ids": [
"chunk-74c01decac4a10cd40a491786743b0ee"
],
"count": 1,
"create_time": 1779012081,
"update_time": 1779012081,
"_id": "Operating Department Individual<SEP>Procurement Management Regulations"
},
"Business Original Documents<SEP>Operator": {
"chunk_ids": [
"chunk-74c01decac4a10cd40a491786743b0ee"
],
"count": 1,
"create_time": 1779012094,
"update_time": 1779012094,
"_id": "Business Original Documents<SEP>Operator"
},
"Expenditure Reimbursement Application<SEP>Tax Authority Recognized Invoice": {
"chunk_ids": [
"chunk-74c01decac4a10cd40a491786743b0ee"
],
"count": 1,
"create_time": 1779012094,
"update_time": 1779012094,
"_id": "Expenditure Reimbursement Application<SEP>Tax Authority Recognized Invoice"
},
"公司<SEP>第十七条": {
"chunk_ids": [
"chunk-e9438f69c9e221d9f0f00a05ad84eac6"
],
"count": 1,
"create_time": 1779012094,
"update_time": 1779012094,
"_id": "公司<SEP>第十七条"
},
"Operator<SEP>Three Working Days Deadline": {
"chunk_ids": [
"chunk-74c01decac4a10cd40a491786743b0ee"
],
"count": 1,
"create_time": 1779012083,
"update_time": 1779012083,
"_id": "Operator<SEP>Three Working Days Deadline"
},
"Departments And Units<SEP>Night High-Speed Rail Provision": {
"chunk_ids": [
"chunk-613d6dfd4c5e9c807229a3147f96b584"
],
"count": 1,
"create_time": 1779012084,
"update_time": 1779012084,
"_id": "Departments And Units<SEP>Night High-Speed Rail Provision"
},
"公司<SEP>第十八条": {
"chunk_ids": [
"chunk-e9438f69c9e221d9f0f00a05ad84eac6"
],
"count": 1,
"create_time": 1779012084,
"update_time": 1779012084,
"_id": "公司<SEP>第十八条"
},
"公司<SEP>第十九条": {
"chunk_ids": [
"chunk-e9438f69c9e221d9f0f00a05ad84eac6"
],
"count": 1,
"create_time": 1779012084,
"update_time": 1779012084,
"_id": "公司<SEP>第十九条"
},
"报销标准变化情况<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-18d968b78afe916b419c1b5973421ebe"
],
"count": 1,
"create_time": 1779012084,
"update_time": 1779012084,
"_id": "报销标准变化情况<SEP>远光软件股份有限公司"
},
"取消报销规定内容<SEP>报销标准变化情况": {
"chunk_ids": [
"chunk-18d968b78afe916b419c1b5973421ebe"
],
"count": 1,
"create_time": 1779012085,
"update_time": 1779012085,
"_id": "取消报销规定内容<SEP>报销标准变化情况"
},
"Financial Review<SEP>Operator": {
"chunk_ids": [
"chunk-74c01decac4a10cd40a491786743b0ee"
],
"count": 1,
"create_time": 1779012085,
"update_time": 1779012085,
"_id": "Financial Review<SEP>Operator"
},
"公司支出管理办法(2024)<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-dd87aa5bc62cc9587ecb4c26d35a5263"
],
"count": 1,
"create_time": 1779012085,
"update_time": 1779012085,
"_id": "公司支出管理办法(2024)<SEP>远光软件股份有限公司"
},
"远光制度202414号<SEP>远光软件股份有限公司": {
"chunk_ids": [
"chunk-dd87aa5bc62cc9587ecb4c26d35a5263"
],
"count": 1,
"create_time": 1779012086,
"update_time": 1779012086,
"_id": "远光制度202414号<SEP>远光软件股份有限公司"
},
"Departments And Units<SEP>Taxi Usage Regulations": {
"chunk_ids": [
"chunk-613d6dfd4c5e9c807229a3147f96b584"
],
"count": 1,
"create_time": 1779012099,
"update_time": 1779012099,
"_id": "Departments And Units<SEP>Taxi Usage Regulations"
},
"控股子公司<SEP>计划财务部": {
"chunk_ids": [
"chunk-dd87aa5bc62cc9587ecb4c26d35a5263"
],
"count": 1,
"create_time": 1779012099,
"update_time": 1779012099,
"_id": "控股子公司<SEP>计划财务部"
},
"公司<SEP>第二十条": {
"chunk_ids": [
"chunk-e9438f69c9e221d9f0f00a05ad84eac6"
],
"count": 1,
"create_time": 1779012086,
"update_time": 1779012086,
"_id": "公司<SEP>第二十条"
},
"商旅系统<SEP>差旅费": {
"chunk_ids": [
"chunk-d26b288ed4001dc5c504dce0eb841362"
],
"count": 1,
"create_time": 1779012086,
"update_time": 1779012086,
"_id": "商旅系统<SEP>差旅费"
},
"业务招待费<SEP>差旅费": {
"chunk_ids": [
"chunk-d26b288ed4001dc5c504dce0eb841362"
],
"count": 1,
"create_time": 1779012089,
"update_time": 1779012089,
"_id": "业务招待费<SEP>差旅费"
},
"公司<SEP>第二十一条": {
"chunk_ids": [
"chunk-e9438f69c9e221d9f0f00a05ad84eac6"
],
"count": 1,
"create_time": 1779012089,
"update_time": 1779012089,
"_id": "公司<SEP>第二十一条"
},
"广告宣传费<SEP>第十六条": {
"chunk_ids": [
"chunk-d26b288ed4001dc5c504dce0eb841362"
],
"count": 1,
"create_time": 1779012089,
"update_time": 1779012089,
"_id": "广告宣传费<SEP>第十六条"
},
"组织人事部<SEP>调动工作": {
"chunk_ids": [
"chunk-d26b288ed4001dc5c504dce0eb841362"
],
"count": 1,
"create_time": 1779012090,
"update_time": 1779012090,
"_id": "组织人事部<SEP>调动工作"
},
"会议费<SEP>差旅费": {
"chunk_ids": [
"chunk-d26b288ed4001dc5c504dce0eb841362"
],
"count": 1,
"create_time": 1779012092,
"update_time": 1779012092,
"_id": "会议费<SEP>差旅费"
},
"业务招待费<SEP>第十四条": {
"chunk_ids": [
"chunk-d26b288ed4001dc5c504dce0eb841362"
],
"count": 1,
"create_time": 1779012092,
"update_time": 1779012092,
"_id": "业务招待费<SEP>第十四条"
},
"会议费<SEP>第十五条": {
"chunk_ids": [
"chunk-d26b288ed4001dc5c504dce0eb841362"
],
"count": 1,
"create_time": 1779012092,
"update_time": 1779012092,
"_id": "会议费<SEP>第十五条"
},
"会议费<SEP>公司总裁": {
"chunk_ids": [
"chunk-d26b288ed4001dc5c504dce0eb841362"
],
"count": 1,
"create_time": 1779012093,
"update_time": 1779012093,
"_id": "会议费<SEP>公司总裁"
} }
} }

File diff suppressed because one or more lines are too long

View File

@@ -44,9 +44,9 @@ def test_knowledge_normalizer_appends_structured_table(monkeypatch) -> None:
enriched = service.build_enriched_text(raw_text) enriched = service.build_enriched_text(raw_text)
assert enriched.startswith("# 结构化表格补充") assert enriched.startswith(raw_text.strip())
assert "| 餐补 | 75 | 65 | 55 | 140 |" in enriched assert "| 餐补 | 75 | 65 | 55 | 140 |" in enriched
assert enriched.endswith(raw_text.strip()) assert enriched.endswith("| 合计 | 110 | 100 | 90 | 175 |")
def test_knowledge_normalizer_keeps_only_markdown_table_body() -> None: def test_knowledge_normalizer_keeps_only_markdown_table_body() -> None:
@@ -79,12 +79,12 @@ def test_knowledge_normalizer_builds_section_navigation_without_table() -> None:
service = KnowledgeNormalizationService(db) service = KnowledgeNormalizationService(db)
enriched = service.build_enriched_text(raw_text) enriched = service.build_enriched_text(raw_text)
assert enriched.startswith("# 章节导航") assert enriched.startswith(raw_text.strip())
assert "- 第一章 总则" in enriched assert "- 第一章 总则" in enriched
assert "## 第二章 住宿费标准" in enriched assert "## 第二章 住宿费标准" in enriched
assert "# 问答线索补充" in enriched assert "# 问答线索补充" in enriched
assert "- 第二章 住宿费标准:住宿费按照出差城市档位和职级标准执行" in enriched assert "- 第二章 住宿费标准:住宿费按照出差城市档位和职级标准执行" in enriched
assert enriched.endswith(raw_text.strip()) assert "# 章节导航" in enriched
def test_knowledge_normalizer_builds_answer_clues_from_lists_and_kv_lines() -> None: def test_knowledge_normalizer_builds_answer_clues_from_lists_and_kv_lines() -> None:

View File

@@ -74,6 +74,28 @@ def test_build_hits_prioritizes_answer_clue_appendix_for_rule_queries() -> None:
assert [item["candidate_id"] for item in hits] == ["clue-1", "plain-1"] assert [item["candidate_id"] for item in hits] == ["clue-1", "plain-1"]
def test_build_hits_demotes_chapter_navigation_for_specific_rule_queries() -> None:
hits = KnowledgeRagService._build_hits_from_query_data(
query="探亲差旅归哪个部门管理?",
chunks=[
{
"chunk_id": "toc-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "# 章节导航\n\n- 第一章 总则\n- 第二章 职责分工\n- 第三章 支出归口",
},
{
"chunk_id": "body-1",
"file_path": "/tmp/doc-1__费用制度.md",
"content": "附表3支出归口管理部门与归口业务范围\n组织人事部:探亲差旅、条件艰苦及安全风险较高区域补助等支出。",
},
],
entities=[],
limit=2,
)
assert [item["candidate_id"] for item in hits] == ["body-1", "toc-1"]
def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None: def test_resolve_default_qdrant_url_prefers_container_host(monkeypatch) -> None:
monkeypatch.setattr( monkeypatch.setattr(
knowledge_rag_module.socket, knowledge_rag_module.socket,
@@ -93,3 +115,29 @@ def test_resolve_default_qdrant_url_falls_back_to_loopback(monkeypatch) -> None:
monkeypatch.setattr(knowledge_rag_module.socket, "getaddrinfo", raise_lookup_error) monkeypatch.setattr(knowledge_rag_module.socket, "getaddrinfo", raise_lookup_error)
assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333" assert knowledge_rag_module._resolve_default_qdrant_url() == "http://127.0.0.1:6333"
def test_is_query_ready_status_rejects_failed_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
{
"status": "failed",
"chunks_count": 11,
"chunks_list": ["chunk-1"],
}
)
is False
)
def test_is_query_ready_status_rejects_processing_status_even_with_chunks() -> None:
assert (
KnowledgeRagService.is_query_ready_status(
{
"status": "processing",
"chunks_count": 11,
"chunks_list": ["chunk-1"],
}
)
is False
)

View File

@@ -0,0 +1,81 @@
from __future__ import annotations
from datetime import UTC, datetime
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import StaticPool
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentName, AgentRunSource, AgentRunStatus
from app.db.base import Base
from app.services.agent_runs import AgentRunService
from app.services.knowledge import (
KNOWLEDGE_INGEST_STATUS_FAILED,
KNOWLEDGE_INGEST_STATUS_SYNCING,
KnowledgeService,
)
def build_session() -> Session:
engine = create_engine(
"sqlite+pysqlite:///:memory:",
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(bind=engine)
session_factory = sessionmaker(bind=engine, autoflush=False, autocommit=False)
return session_factory()
def test_reconcile_document_ingest_status_keeps_failed_when_linked_run_failed(
tmp_path,
monkeypatch,
) -> None:
with build_session() as db:
service = KnowledgeService(storage_root=tmp_path, db=db)
uploaded = service.upload_document(
"报销制度",
"demo.txt",
b"hello",
CurrentUserContext(
username="admin",
name="管理员",
role_codes=["manager"],
is_admin=True,
),
)
run = AgentRunService(db).create_run(
agent=AgentName.HERMES.value,
source=AgentRunSource.USER_MESSAGE.value,
status=AgentRunStatus.FAILED.value,
route_json={"job_type": "knowledge_index_sync"},
)
service.set_document_ingest_statuses(
[uploaded.id],
KNOWLEDGE_INGEST_STATUS_SYNCING,
agent_run_id=run.run_id,
)
monkeypatch.setattr(
"app.services.knowledge_rag.KnowledgeRagService.get_document_status_map",
lambda self, _document_ids: {
uploaded.id: {
"status": "processing",
"query_ready": False,
"updated_at": datetime.now(UTC).isoformat(),
}
},
)
index = service._load_index()
changed = service._reconcile_document_ingest_statuses(
index,
document_ids=[uploaded.id],
preserve_syncing=False,
)
entry = next(item for item in index["documents"] if item["id"] == uploaded.id)
assert changed is True
assert entry["ingest_status"] == KNOWLEDGE_INGEST_STATUS_FAILED