feat: 增强规则资产管理与审计页面运行时调试
后端新增规则资产版本管理和规则文件 CRUD 接口,优化风险 规则生成模板执行和员工数据模型字段,知识库 RAG 增强本 地回退和文档提取能力,清理旧风险规则文件统一由生成引擎 管理,前端审计页面增加运行时调试面板和规则资产编辑交互, 补充单元测试覆盖。
This commit is contained in:
@@ -50,6 +50,12 @@ QUERY_TERM_STOPWORDS = {
|
||||
"哪些人",
|
||||
}
|
||||
TABLE_OR_STANDARD_QUERY_HINTS = (
|
||||
"表",
|
||||
"表格",
|
||||
"清单",
|
||||
"明细",
|
||||
"目录",
|
||||
"科目",
|
||||
"标准",
|
||||
"金额",
|
||||
"限额",
|
||||
@@ -61,6 +67,20 @@ TABLE_OR_STANDARD_QUERY_HINTS = (
|
||||
"档位",
|
||||
"额度",
|
||||
)
|
||||
QUERY_ANCHOR_TERMS = (
|
||||
"财务基础知识手册",
|
||||
"基础知识手册",
|
||||
"会计科目",
|
||||
"常用会计科目",
|
||||
"财务报表",
|
||||
"主要税种",
|
||||
"税种",
|
||||
"标准",
|
||||
"清单",
|
||||
"明细",
|
||||
"流程",
|
||||
)
|
||||
GENERIC_TITLE_TERMS = {"远光软件", "股份有限", "有限公司"}
|
||||
STRUCTURED_APPENDIX_LEADING_MARKERS = (
|
||||
"# 章节导航",
|
||||
"# 重点章节摘录",
|
||||
@@ -96,6 +116,10 @@ class KnowledgeRagService:
|
||||
"message": "请先输入要检索的知识库问题。",
|
||||
}
|
||||
|
||||
rewritten_query = normalized_query
|
||||
if conversation_history:
|
||||
rewritten_query = self._rewrite_query(normalized_query, conversation_history)
|
||||
|
||||
workspace = (
|
||||
os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip()
|
||||
or DEFAULT_LIGHTRAG_WORKSPACE
|
||||
@@ -103,81 +127,102 @@ class KnowledgeRagService:
|
||||
local_result = query_local_text_chunks(
|
||||
lightrag_root=(self.storage_root / "knowledge" / ".lightrag").resolve(),
|
||||
workspace=workspace,
|
||||
query=normalized_query,
|
||||
query=rewritten_query,
|
||||
limit=limit,
|
||||
)
|
||||
if local_result.confident:
|
||||
return {
|
||||
"result_type": "knowledge_search",
|
||||
"query": normalized_query,
|
||||
"record_count": len(local_result.hits),
|
||||
"hits": local_result.hits,
|
||||
"references": [
|
||||
str(item.get("code") or "").strip()
|
||||
for item in local_result.hits
|
||||
if str(item.get("code") or "").strip()
|
||||
],
|
||||
"raw_references": [],
|
||||
"metadata": {
|
||||
"retrieval_strategy": "local_text_chunks",
|
||||
"elapsed_seconds": round(local_result.elapsed_seconds, 4),
|
||||
"total_chunks": local_result.total_chunks,
|
||||
"best_score": local_result.best_score,
|
||||
},
|
||||
"message": f"已从本地知识块中检索到 {len(local_result.hits)} 条相关内容。",
|
||||
}
|
||||
|
||||
runtime_hits: list[dict[str, Any]] = []
|
||||
runtime_references: list[str] = []
|
||||
try:
|
||||
runtime = self._get_runtime()
|
||||
raw = runtime.query_data(normalized_query, conversation_history=conversation_history)
|
||||
raw = runtime.query_data(rewritten_query, conversation_history=conversation_history)
|
||||
data = raw.get("data") if isinstance(raw, dict) else {}
|
||||
chunks = list(data.get("chunks") or []) if isinstance(data, dict) else []
|
||||
entities = list(data.get("entities") or []) if isinstance(data, dict) else []
|
||||
runtime_references = list(data.get("references") or []) if isinstance(data, dict) else []
|
||||
runtime_hits = self._build_hits_from_query_data(
|
||||
query=rewritten_query,
|
||||
chunks=chunks,
|
||||
entities=entities,
|
||||
limit=limit,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Knowledge query failed: %s", exc)
|
||||
|
||||
all_hits: dict[str, dict[str, Any]] = {}
|
||||
for hit in local_result.hits:
|
||||
hit["score"] = int(hit.get("score") or 0)
|
||||
all_hits[hit["code"]] = hit
|
||||
|
||||
for hit in runtime_hits:
|
||||
code = hit["code"]
|
||||
if code in all_hits:
|
||||
all_hits[code]["score"] = max(all_hits[code]["score"], int(hit.get("score") or 0) + 20)
|
||||
if not all_hits[code].get("tags") and hit.get("tags"):
|
||||
all_hits[code]["tags"] = hit["tags"]
|
||||
else:
|
||||
hit["score"] = int(hit.get("score") or 0)
|
||||
all_hits[code] = hit
|
||||
|
||||
merged_hits = sorted(all_hits.values(), key=lambda x: int(x.get("score") or 0), reverse=True)[:max(1, limit)]
|
||||
|
||||
if not merged_hits:
|
||||
return {
|
||||
"result_type": "knowledge_search",
|
||||
"query": normalized_query,
|
||||
"query": rewritten_query,
|
||||
"record_count": 0,
|
||||
"hits": [],
|
||||
"references": [],
|
||||
"message": f"知识库检索暂不可用:{exc}",
|
||||
}
|
||||
|
||||
data = raw.get("data") if isinstance(raw, dict) else {}
|
||||
chunks = list(data.get("chunks") or []) if isinstance(data, dict) else []
|
||||
entities = list(data.get("entities") or []) if isinstance(data, dict) else []
|
||||
references = list(data.get("references") or []) if isinstance(data, dict) else []
|
||||
hits = self._build_hits_from_query_data(
|
||||
query=normalized_query,
|
||||
chunks=chunks,
|
||||
entities=entities,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
if not hits:
|
||||
return {
|
||||
"result_type": "knowledge_search",
|
||||
"query": normalized_query,
|
||||
"record_count": 0,
|
||||
"hits": [],
|
||||
"references": [],
|
||||
"raw_references": references,
|
||||
"raw_references": runtime_references,
|
||||
"message": "当前知识库中没有检索到与本次问题直接匹配的内容。",
|
||||
}
|
||||
|
||||
return {
|
||||
"result_type": "knowledge_search",
|
||||
"query": normalized_query,
|
||||
"record_count": len(hits),
|
||||
"hits": hits,
|
||||
"query": rewritten_query,
|
||||
"record_count": len(merged_hits),
|
||||
"hits": merged_hits,
|
||||
"references": [
|
||||
str(item.get("code") or "").strip()
|
||||
for item in hits
|
||||
for item in merged_hits
|
||||
if str(item.get("code") or "").strip()
|
||||
],
|
||||
"raw_references": references,
|
||||
"metadata": raw.get("metadata") if isinstance(raw, dict) else {},
|
||||
"message": f"已从知识库中检索到 {len(hits)} 条相关内容。",
|
||||
"raw_references": runtime_references,
|
||||
"metadata": {
|
||||
"retrieval_strategy": "fusion",
|
||||
"local_total_chunks": local_result.total_chunks,
|
||||
"local_best_score": local_result.best_score,
|
||||
},
|
||||
"message": f"已从知识库中联合检索到 {len(merged_hits)} 条相关内容。",
|
||||
}
|
||||
|
||||
def _rewrite_query(self, query: str, conversation_history: list[dict[str, str]]) -> str:
|
||||
if not self.db:
|
||||
return query
|
||||
|
||||
from app.services.runtime_chat import RuntimeChatService
|
||||
try:
|
||||
chat_service = RuntimeChatService(self.db)
|
||||
messages: list[dict[str, Any]] = [{"role": "system", "content": "你是一个查询重写助手。你的任务是根据用户的多轮对话历史,将用户的最后一次提问重写为一句独立、完整的查询语句,以便于在知识库中进行向量检索。只输出重写后的句子,不要任何解释。"}]
|
||||
for msg in conversation_history[-6:]:
|
||||
messages.append({"role": msg.get("role", "user"), "content": msg.get("content", "")})
|
||||
messages.append({"role": "user", "content": f"当前提问:{query}\n\n请重写当前提问。"})
|
||||
|
||||
rewritten = chat_service.complete(
|
||||
messages,
|
||||
max_tokens=60,
|
||||
temperature=0.1,
|
||||
timeout_seconds=10,
|
||||
)
|
||||
|
||||
if rewritten and len(rewritten) > 2 and len(rewritten) < 80:
|
||||
logger.info("Query rewritten: '%s' -> '%s'", query, rewritten)
|
||||
return rewritten
|
||||
except Exception as exc:
|
||||
logger.warning("Query rewrite failed: %s", exc)
|
||||
|
||||
return query
|
||||
|
||||
def index_documents(
|
||||
self,
|
||||
*,
|
||||
@@ -686,6 +731,24 @@ def _extract_query_terms(query: str) -> list[str]:
|
||||
remember(item)
|
||||
|
||||
for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_query):
|
||||
for marker in ("标准", "金额", "限额", "额度"):
|
||||
marker_index = block.find(marker)
|
||||
if marker_index <= 0:
|
||||
continue
|
||||
subject = block[:marker_index]
|
||||
for width in (6, 4, 3, 2):
|
||||
remember(subject[-width:])
|
||||
for anchor in QUERY_ANCHOR_TERMS:
|
||||
if anchor in block:
|
||||
remember(anchor)
|
||||
tail = block[-14:]
|
||||
for size in (8, 7, 6, 5, 4):
|
||||
for start in range(0, len(tail) - size + 1):
|
||||
piece = tail[start : start + size]
|
||||
if any(anchor in piece for anchor in QUERY_ANCHOR_TERMS):
|
||||
remember(piece)
|
||||
if len(terms) >= MAX_QUERY_TERMS:
|
||||
return terms
|
||||
if len(block) <= 4:
|
||||
remember(block)
|
||||
continue
|
||||
@@ -715,6 +778,11 @@ def _score_knowledge_hit(
|
||||
matched_terms = [term for term in query_terms if term in haystack]
|
||||
score += len(matched_terms) * 8
|
||||
score += sum(1 for term in matched_terms if term in title) * 6
|
||||
score += sum(
|
||||
(len(term) - 3) * 12
|
||||
for term in matched_terms
|
||||
if len(term) >= 4 and term in title and term not in GENERIC_TITLE_TERMS
|
||||
)
|
||||
|
||||
leading_appendix_marker = _leading_structured_appendix_marker(content)
|
||||
if leading_appendix_marker == "# 章节导航":
|
||||
|
||||
Reference in New Issue
Block a user