feat: 增强规则资产管理与审计页面运行时调试

后端新增规则资产版本管理和规则文件 CRUD 接口,优化风险
规则生成模板执行和员工数据模型字段,知识库 RAG 增强本
地回退和文档提取能力,清理旧风险规则文件统一由生成引擎
管理,前端审计页面增加运行时调试面板和规则资产编辑交互,
补充单元测试覆盖。
This commit is contained in:
caoxiaozhu
2026-05-24 21:44:17 +08:00
parent 575f093c74
commit 50b1c3f9a9
113 changed files with 13896 additions and 5044 deletions

View File

@@ -15,6 +15,20 @@ from app.services.user_agent_knowledge_constants import (
class UserAgentKnowledgeHelpersMixin:
GENERIC_KNOWLEDGE_TITLE_TERMS = {"远光软件", "股份有限", "有限公司"}
KNOWLEDGE_QUERY_ANCHOR_TERMS = (
"财务基础知识手册",
"基础知识手册",
"会计科目",
"常用会计科目",
"财务报表",
"主要税种",
"税种",
"标准",
"清单",
"明细",
"流程",
)
@staticmethod
def _select_knowledge_model_hits(
@@ -26,7 +40,7 @@ class UserAgentKnowledgeHelpersMixin:
item
for item in list(tool_payload.get("hits") or [])
if isinstance(item, dict)
][: max(MAX_KNOWLEDGE_MODEL_HITS + 1, 6)]
][: max(MAX_KNOWLEDGE_MODEL_HITS + 3, 8)]
if not raw_hits:
return []
@@ -64,7 +78,16 @@ class UserAgentKnowledgeHelpersMixin:
matched_terms = [term for term in query_terms if term in haystack]
score = max(1, 48 - rank_index * 4)
score += len(matched_terms) * 10
score += sum(max(0, len(term) - 4) * 8 for term in matched_terms)
score += sum(1 for term in matched_terms if term in title) * 8
score += sum(max(0, len(term) - 4) * 6 for term in matched_terms if term in title)
score += sum(
(len(term) - 3) * 10
for term in matched_terms
if len(term) >= 4
and term in title
and term not in UserAgentKnowledgeHelpersMixin.GENERIC_KNOWLEDGE_TITLE_TERMS
)
leading_marker = UserAgentKnowledgeHelpersMixin._leading_knowledge_appendix_marker(content)
if leading_marker == "# 章节导航":
@@ -149,6 +172,40 @@ class UserAgentKnowledgeHelpersMixin:
return ""
@staticmethod
def _knowledge_list_marker_sort_key(content: str) -> int:
normalized = str(content or "").strip()
match = re.match(r"^[(]([一二三四五六七八九十百零0-9]+)[)]", normalized)
if not match:
return 999
marker = match.group(1)
if marker.isdigit():
return int(marker)
values = {
"": 0,
"": 1,
"": 2,
"": 3,
"": 4,
"": 5,
"": 6,
"": 7,
"": 8,
"": 9,
"": 10,
}
if marker in values:
return values[marker]
if marker.startswith("") and len(marker) == 2:
return 10 + values.get(marker[1], 0)
if marker.endswith("") and len(marker) == 2:
return values.get(marker[0], 0) * 10
if "" in marker:
left, right = marker.split("", 1)
return values.get(left, 1) * 10 + values.get(right, 0)
return 999
@staticmethod
def _format_knowledge_heading_label(heading: str) -> str:
@@ -156,6 +213,169 @@ class UserAgentKnowledgeHelpersMixin:
return " / ".join(parts)
@staticmethod
def _has_inline_numbered_knowledge_items(content: str) -> bool:
return len(
re.findall(
r"[(][一二三四五六七八九十百零0-9]+[)]",
str(content or ""),
)
) >= 2
@staticmethod
def _split_inline_numbered_knowledge_items(content: str) -> list[str]:
normalized = str(content or "").strip()
if not UserAgentKnowledgeHelpersMixin._has_inline_numbered_knowledge_items(normalized):
return [normalized] if normalized else []
marker_pattern = r"[(][一二三四五六七八九十百零0-9]+[)]"
first_marker = re.search(marker_pattern, normalized)
if first_marker is None:
return [normalized] if normalized else []
prefix = normalized[: first_marker.start()].strip(" :")
tail = normalized[first_marker.start() :].strip()
item_pattern = (
r"([(][一二三四五六七八九十百零0-9]+[)]\s*.*?"
r"(?=\s*[(][一二三四五六七八九十百零0-9]+[)]|\s*$))"
)
items = [item.strip() for item in re.findall(item_pattern, tail) if item.strip()]
if prefix:
return [prefix, *items]
return items or [normalized]
@staticmethod
def _focus_knowledge_segment_content(content: str, query_terms: list[str]) -> str:
normalized = re.sub(r"\s+", " ", str(content or "").strip())
if not normalized:
return ""
anchor_terms = sorted(
{
str(term or "").strip()
for term in query_terms
if len(str(term or "").strip()) >= 3
},
key=len,
reverse=True,
)
anchor_index = -1
for term in anchor_terms:
anchor_index = normalized.lower().find(term.lower())
if anchor_index >= 0:
break
if anchor_index < 0:
return normalized
prefix_window = normalized[max(0, anchor_index - 40) : anchor_index]
marker_match = None
for match in re.finditer(
r"(?:第[一二三四五六七八九十百零0-9]+[部分章节条]|[一二三四五六七八九十]+、|[(][一二三四五六七八九十百零0-9]+[)])",
prefix_window,
):
marker_match = match
start = anchor_index
if marker_match is not None:
start = max(0, anchor_index - len(prefix_window) + marker_match.start())
return normalized[start : start + 700].strip()
@staticmethod
def _split_markdown_table_cells(line: str) -> list[str]:
stripped = str(line or "").strip()
if stripped.startswith("|"):
stripped = stripped[1:]
if stripped.endswith("|"):
stripped = stripped[:-1]
return [
re.sub(r"\s+", " ", cell.replace("**", "").strip())
for cell in stripped.split("|")
]
@classmethod
def _summarize_knowledge_table_preview(cls, preview: str) -> str:
rows: list[list[str]] = []
for line in str(preview or "").splitlines():
if line.count("|") < 2:
continue
cells = cls._split_markdown_table_cells(line)
if not cells or all(re.fullmatch(r":?-{2,}:?", cell.replace(" ", "")) for cell in cells):
continue
rows.append(cells)
if len(rows) < 2:
return "可直接参考的标准表如下。"
header = rows[0]
data_rows = [row for row in rows[1:] if len(row) == len(header)]
if len(data_rows) == 1 and len(header) >= 2:
row = data_rows[0]
subject = row[0] or "该项目"
pairs = [
f"{label}{value}"
for label, value in zip(header[1:], row[1:])
if label and value and value not in {"-", ""}
]
if pairs:
return f"{subject}的标准为:{''.join(pairs)}"
return "相关标准项如下,请按表头和行内容对应使用。"
def _summarize_knowledge_lines_conclusion(
self,
lines: list[str],
*,
heading: str = "",
) -> str:
clean_lines = [
self._clean_knowledge_segment_text(line)
for line in lines
if self._clean_knowledge_segment_text(line)
]
if not clean_lines:
return ""
clean_heading = str(heading or "").strip()
if not clean_heading and clean_lines and "" not in clean_lines[0] and ":" not in clean_lines[0]:
clean_heading = clean_lines[0]
clean_heading = re.sub(
r"^[一二三四五六七八九十百零0-9]+、\s*",
"",
clean_heading,
)
item_labels: list[str] = []
for line in clean_lines:
if "" not in line and ":" not in line:
continue
label = re.split(r"[:]", line, maxsplit=1)[0].strip()
if 1 <= len(label) <= 24:
item_labels.append(label)
if clean_heading and len(item_labels) >= 2:
return f"{clean_heading}包括:{''.join(item_labels[:6])}"
if item_labels:
return f"{item_labels[0]}{clean_lines[0].split('', 1)[-1].strip()}"
return clean_lines[0]
@staticmethod
def _knowledge_lines_have_multiple_labeled_items(lines: list[str]) -> bool:
labeled_count = 0
for line in lines:
normalized = str(line or "").strip()
if "" not in normalized and ":" not in normalized:
continue
label = re.split(r"[:]", normalized, maxsplit=1)[0].strip()
if 1 <= len(label) <= 24:
labeled_count += 1
return labeled_count >= 2
def _score_knowledge_evidence_candidate(
self,
@@ -169,10 +389,14 @@ class UserAgentKnowledgeHelpersMixin:
matched_terms = [term for term in query_terms if term in haystack]
score = len(matched_terms) * 10
score += sum(max(0, len(term) - 4) * 8 for term in matched_terms)
score += sum(1 for term in matched_terms if term in heading) * 6
score += sum(max(0, len(term) - 4) * 6 for term in matched_terms if term in heading)
if kind == "table":
score += 10
if content.count("\n") < 2:
score -= 24
elif kind in {"kv", "clause", "list"}:
score += 8
elif kind == "paragraph":
@@ -220,6 +444,30 @@ class UserAgentKnowledgeHelpersMixin:
remember(item)
for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_question):
remember(block)
if len(terms) >= MAX_KNOWLEDGE_QUERY_TERMS:
return terms
for marker in ("标准", "金额", "限额", "额度"):
marker_index = block.find(marker)
if marker_index <= 0:
continue
subject = block[:marker_index]
for width in (6, 4, 3, 2):
remember(subject[-width:])
for anchor in UserAgentKnowledgeHelpersMixin.KNOWLEDGE_QUERY_ANCHOR_TERMS:
if anchor in block:
remember(anchor)
tail = block[-14:]
for size in (8, 7, 6, 5, 4):
for start in range(0, len(tail) - size + 1):
piece = tail[start : start + size]
if any(
anchor in piece
for anchor in UserAgentKnowledgeHelpersMixin.KNOWLEDGE_QUERY_ANCHOR_TERMS
):
remember(piece)
if len(terms) >= MAX_KNOWLEDGE_QUERY_TERMS:
return terms
if len(block) <= 4:
remember(block)
continue
@@ -276,7 +524,14 @@ class UserAgentKnowledgeHelpersMixin:
@staticmethod
def _extract_relevant_table_preview(content: str, query_terms: list[str]) -> str:
def _extract_relevant_table_preview(
content: str,
query_terms: list[str],
*,
preferred_terms: list[str] | None = None,
max_rows: int = 3,
fallback_rows: int = 2,
) -> str:
lines = [line.strip() for line in str(content or "").splitlines() if line.strip()]
if len(lines) <= 3:
return "\n".join(lines)
@@ -285,12 +540,39 @@ class UserAgentKnowledgeHelpersMixin:
divider = lines[1] if len(lines) > 1 else ""
body = lines[2:] if divider.count("|") >= 2 else lines[1:]
preferred = [
str(term or "").strip().lower()
for term in list(preferred_terms or [])
if str(term or "").strip()
]
base_terms = preferred + [
str(term or "").strip().lower()
for term in query_terms
if str(term or "").strip().lower() not in preferred
]
derived_terms: list[str] = []
for term in base_terms:
for marker in ("标准", "金额", "限额", "额度", "是多少"):
marker_index = term.find(marker)
if marker_index <= 0:
continue
subject = term[:marker_index].strip()
if len(subject) < 2:
continue
for width in (6, 4, 3, 2):
derived_terms.append(subject[-width:])
search_terms: list[str] = []
for term in [*preferred, *derived_terms, *base_terms]:
if term and term not in search_terms:
search_terms.append(term)
matched_rows = [
row
for row in body
if any(term in row.lower() for term in query_terms)
if any(term in row.lower() for term in search_terms)
]
selected_rows = matched_rows[:3] or body[:2]
selected_rows = matched_rows[:max_rows] or body[:fallback_rows]
preview_lines = [header]
if divider:
preview_lines.append(divider)
@@ -298,6 +580,18 @@ class UserAgentKnowledgeHelpersMixin:
return "\n".join(preview_lines).strip()
@staticmethod
def _question_requests_broad_knowledge_table(question: str) -> bool:
normalized = str(question or "").strip()
if not normalized:
return False
broad_hints = ("有哪些", "是什么", "介绍", "说明", "列表", "清单", "全部", "完整")
table_subject_hints = ("科目", "目录", "清单", "列表", "", "明细")
return any(hint in normalized for hint in broad_hints) and any(
hint in normalized for hint in table_subject_hints
)
@staticmethod
def _question_requires_explicit_condition(question: str) -> bool: