feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口,优化风险
规则生成模板执行和员工数据模型字段,知识库 RAG 增强本
地回退和文档提取能力,清理旧风险规则文件统一由生成引擎
管理,前端审计页面增加运行时调试面板和规则资产编辑交互,
补充单元测试覆盖。
This commit is contained in:
caoxiaozhu
2026-05-24 21:44:17 +08:00
parent 575f093c74
commit 50b1c3f9a9
113 changed files with 13896 additions and 5044 deletions

View File

@@ -50,6 +50,12 @@ QUERY_TERM_STOPWORDS = {
"哪些人",
}
TABLE_OR_STANDARD_QUERY_HINTS = (
"",
"表格",
"清单",
"明细",
"目录",
"科目",
"标准",
"金额",
"限额",
@@ -61,6 +67,20 @@ TABLE_OR_STANDARD_QUERY_HINTS = (
"档位",
"额度",
)
QUERY_ANCHOR_TERMS = (
"财务基础知识手册",
"基础知识手册",
"会计科目",
"常用会计科目",
"财务报表",
"主要税种",
"税种",
"标准",
"清单",
"明细",
"流程",
)
GENERIC_TITLE_TERMS = {"远光软件", "股份有限", "有限公司"}
STRUCTURED_APPENDIX_LEADING_MARKERS = (
"# 章节导航",
"# 重点章节摘录",
@@ -96,6 +116,10 @@ class KnowledgeRagService:
"message": "请先输入要检索的知识库问题。",
}
rewritten_query = normalized_query
if conversation_history:
rewritten_query = self._rewrite_query(normalized_query, conversation_history)
workspace = (
os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
or DEFAULT_LIGHTRAG_WORKSPACE
@@ -103,81 +127,102 @@ class KnowledgeRagService:
local_result = query_local_text_chunks(
lightrag_root=(self.storage_root / "knowledge" / ".lightrag").resolve(),
workspace=workspace,
query=normalized_query,
query=rewritten_query,
limit=limit,
)
if local_result.confident:
return {
"result_type": "knowledge_search",
"query": normalized_query,
"record_count": len(local_result.hits),
"hits": local_result.hits,
"references": [
str(item.get("code") or "").strip()
for item in local_result.hits
if str(item.get("code") or "").strip()
],
"raw_references": [],
"metadata": {
"retrieval_strategy": "local_text_chunks",
"elapsed_seconds": round(local_result.elapsed_seconds, 4),
"total_chunks": local_result.total_chunks,
"best_score": local_result.best_score,
},
"message": f"已从本地知识块中检索到 {len(local_result.hits)} 条相关内容。",
}
runtime_hits: list[dict[str, Any]] = []
runtime_references: list[str] = []
try:
runtime = self._get_runtime()
raw = runtime.query_data(normalized_query, conversation_history=conversation_history)
raw = runtime.query_data(rewritten_query, conversation_history=conversation_history)
data = raw.get("data") if isinstance(raw, dict) else {}
chunks = list(data.get("chunks") or []) if isinstance(data, dict) else []
entities = list(data.get("entities") or []) if isinstance(data, dict) else []
runtime_references = list(data.get("references") or []) if isinstance(data, dict) else []
runtime_hits = self._build_hits_from_query_data(
query=rewritten_query,
chunks=chunks,
entities=entities,
limit=limit,
)
except Exception as exc:
logger.warning("Knowledge query failed: %s", exc)
all_hits: dict[str, dict[str, Any]] = {}
for hit in local_result.hits:
hit["score"] = int(hit.get("score") or 0)
all_hits[hit["code"]] = hit
for hit in runtime_hits:
code = hit["code"]
if code in all_hits:
all_hits[code]["score"] = max(all_hits[code]["score"], int(hit.get("score") or 0) + 20)
if not all_hits[code].get("tags") and hit.get("tags"):
all_hits[code]["tags"] = hit["tags"]
else:
hit["score"] = int(hit.get("score") or 0)
all_hits[code] = hit
merged_hits = sorted(all_hits.values(), key=lambda x: int(x.get("score") or 0), reverse=True)[:max(1, limit)]
if not merged_hits:
return {
"result_type": "knowledge_search",
"query": normalized_query,
"query": rewritten_query,
"record_count": 0,
"hits": [],
"references": [],
"message": f"知识库检索暂不可用:{exc}",
}
data = raw.get("data") if isinstance(raw, dict) else {}
chunks = list(data.get("chunks") or []) if isinstance(data, dict) else []
entities = list(data.get("entities") or []) if isinstance(data, dict) else []
references = list(data.get("references") or []) if isinstance(data, dict) else []
hits = self._build_hits_from_query_data(
query=normalized_query,
chunks=chunks,
entities=entities,
limit=limit,
)
if not hits:
return {
"result_type": "knowledge_search",
"query": normalized_query,
"record_count": 0,
"hits": [],
"references": [],
"raw_references": references,
"raw_references": runtime_references,
"message": "当前知识库中没有检索到与本次问题直接匹配的内容。",
}
return {
"result_type": "knowledge_search",
"query": normalized_query,
"record_count": len(hits),
"hits": hits,
"query": rewritten_query,
"record_count": len(merged_hits),
"hits": merged_hits,
"references": [
str(item.get("code") or "").strip()
for item in hits
for item in merged_hits
if str(item.get("code") or "").strip()
],
"raw_references": references,
"metadata": raw.get("metadata") if isinstance(raw, dict) else {},
"message": f"已从知识库中检索到 {len(hits)} 条相关内容。",
"raw_references": runtime_references,
"metadata": {
"retrieval_strategy": "fusion",
"local_total_chunks": local_result.total_chunks,
"local_best_score": local_result.best_score,
},
"message": f"已从知识库中联合检索到 {len(merged_hits)} 条相关内容。",
}
def _rewrite_query(self, query: str, conversation_history: list[dict[str, str]]) -> str:
if not self.db:
return query
from app.services.runtime_chat import RuntimeChatService
try:
chat_service = RuntimeChatService(self.db)
messages: list[dict[str, Any]] = [{"role": "system", "content": "你是一个查询重写助手。你的任务是根据用户的多轮对话历史,将用户的最后一次提问重写为一句独立、完整的查询语句,以便于在知识库中进行向量检索。只输出重写后的句子,不要任何解释。"}]
for msg in conversation_history[-6:]:
messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
messages.append({"role": "user", "content": f"当前提问:{query}\n\n请重写当前提问。"})
rewritten = chat_service.complete(
messages,
max_tokens=60,
temperature=0.1,
timeout_seconds=10,
)
if rewritten and len(rewritten) > 2 and len(rewritten) < 80:
logger.info("Query rewritten: '%s' -> '%s'", query, rewritten)
return rewritten
except Exception as exc:
logger.warning("Query rewrite failed: %s", exc)
return query
def index_documents(
self,
*,
@@ -686,6 +731,24 @@ def _extract_query_terms(query: str) -> list[str]:
remember(item)
for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_query):
for marker in ("标准", "金额", "限额", "额度"):
marker_index = block.find(marker)
if marker_index <= 0:
continue
subject = block[:marker_index]
for width in (6, 4, 3, 2):
remember(subject[-width:])
for anchor in QUERY_ANCHOR_TERMS:
if anchor in block:
remember(anchor)
tail = block[-14:]
for size in (8, 7, 6, 5, 4):
for start in range(0, len(tail) - size + 1):
piece = tail[start : start + size]
if any(anchor in piece for anchor in QUERY_ANCHOR_TERMS):
remember(piece)
if len(terms) >= MAX_QUERY_TERMS:
return terms
if len(block) <= 4:
remember(block)
continue
@@ -715,6 +778,11 @@ def _score_knowledge_hit(
matched_terms = [term for term in query_terms if term in haystack]
score += len(matched_terms) * 8
score += sum(1 for term in matched_terms if term in title) * 6
score += sum(
(len(term) - 3) * 12
for term in matched_terms
if len(term) >= 4 and term in title and term not in GENERIC_TITLE_TERMS
)
leading_appendix_marker = _leading_structured_appendix_marker(content)
if leading_appendix_marker == "# 章节导航":