feat: 增强知识库索引与设置页面模块化拆分
扩展知识库索引任务和 RAG 检索支持增量入库和文档去重,优 化本体检测和规则匹配精度,前端设置页面拆分为 LLM、邮件 和 Hermes 员工同步子面板并重构样式,新增日志详情组件和 知识入库日志模型,补充单元测试覆盖。
This commit is contained in:
@@ -37,6 +37,39 @@ from app.services.ontology_rules import (
|
||||
logger = get_logger("app.services.ontology")
|
||||
|
||||
|
||||
TRANSPORT_EXPENSE_OVERRIDE_KEYWORDS = (
|
||||
"打车",
|
||||
"网约车",
|
||||
"出租车票",
|
||||
"出租车",
|
||||
"的士票",
|
||||
"的士",
|
||||
"滴滴",
|
||||
"市内交通",
|
||||
"乘车",
|
||||
"乘车费",
|
||||
"用车",
|
||||
"叫车",
|
||||
"车费",
|
||||
"车资",
|
||||
"机场",
|
||||
)
|
||||
EXPLICIT_ENTERTAINMENT_KEYWORDS = (
|
||||
"业务招待",
|
||||
"招待费",
|
||||
"招待",
|
||||
"宴请",
|
||||
"请客",
|
||||
"请客户吃饭",
|
||||
"客户吃饭",
|
||||
"客户用餐",
|
||||
"客户餐",
|
||||
"商务接待",
|
||||
"商务宴请",
|
||||
"接待餐",
|
||||
)
|
||||
|
||||
|
||||
class OntologyDetectionMixin:
|
||||
def _detect_scenario(self, compact_query: str) -> tuple[str, float]:
|
||||
scores = {key: 0.0 for key in SCENARIO_KEYWORDS}
|
||||
@@ -337,6 +370,9 @@ class OntologyDetectionMixin:
|
||||
"出现“客户”不等于应收,出现“供应商”不等于应付,必须结合动作词和业务目标判断。"
|
||||
"只有明确查询、统计、列出、多少、明细、对比时才优先使用 query 或 compare。"
|
||||
"附件名称和 OCR 摘要只作为辅助证据,不能编造未出现的事实。"
|
||||
"如果用户明确提到打车、的士票、出租车票、网约车、乘车费、车费等交通票据,"
|
||||
"即使句子里出现“客户”,也必须优先识别为 transport,不要推断为 entertainment。"
|
||||
"不要输出用户原文未出现、且与规则候选冲突的费用类型。"
|
||||
"信息不足时 clarification_required=true,并给出一句简短中文追问。"
|
||||
"missing_slots 使用简短 snake_case,例如 expense_type, amount, "
|
||||
"customer_name, participants, attachments。"
|
||||
@@ -351,12 +387,12 @@ class OntologyDetectionMixin:
|
||||
' "intent": "draft",\n'
|
||||
' "confidence": 0.88,\n'
|
||||
' "clarification_required": true,\n'
|
||||
' "clarification_question": "请补充客户单位、参与人员和票据附件。",\n'
|
||||
' "missing_slots": ["customer_name", "participants", "attachments"],\n'
|
||||
' "clarification_question": "请补充发生时间、金额和票据附件。",\n'
|
||||
' "missing_slots": ["time_range", "amount", "attachments"],\n'
|
||||
' "ambiguity": [],\n'
|
||||
' "entity_hints": [\n'
|
||||
' {"type": "expense_type", "value": "招待", '
|
||||
'"normalized_value": "entertainment", "role": "filter", '
|
||||
' {"type": "expense_type", "value": "交通费", '
|
||||
'"normalized_value": "transport", "role": "filter", '
|
||||
'"confidence": 0.86}\n'
|
||||
" ]\n"
|
||||
"}"
|
||||
@@ -432,6 +468,7 @@ class OntologyDetectionMixin:
|
||||
def _merge_entities(
|
||||
base_entities: list[OntologyEntity],
|
||||
entity_hints: list[LlmOntologyEntityHint],
|
||||
compact_query: str = "",
|
||||
) -> list[OntologyEntity]:
|
||||
merged: dict[tuple[str, str], OntologyEntity] = {
|
||||
(item.type, item.normalized_value): item for item in base_entities
|
||||
@@ -454,7 +491,36 @@ class OntologyDetectionMixin:
|
||||
if existing is None or existing.confidence < candidate.confidence:
|
||||
merged[key] = candidate
|
||||
|
||||
return list(merged.values())
|
||||
items = list(merged.values())
|
||||
if OntologyDetectionMixin._should_transport_override_entertainment(
|
||||
compact_query,
|
||||
items,
|
||||
):
|
||||
items = [
|
||||
item
|
||||
for item in items
|
||||
if not (
|
||||
item.type == "expense_type"
|
||||
and item.normalized_value == "entertainment"
|
||||
)
|
||||
]
|
||||
return items
|
||||
|
||||
@staticmethod
|
||||
def _should_transport_override_entertainment(
|
||||
compact_query: str,
|
||||
entities: list[OntologyEntity],
|
||||
) -> bool:
|
||||
expense_types = {
|
||||
str(item.normalized_value or item.value or "").strip()
|
||||
for item in entities
|
||||
if item.type == "expense_type"
|
||||
}
|
||||
if not {"transport", "entertainment"}.issubset(expense_types):
|
||||
return False
|
||||
if not any(keyword in compact_query for keyword in TRANSPORT_EXPENSE_OVERRIDE_KEYWORDS):
|
||||
return False
|
||||
return not any(keyword in compact_query for keyword in EXPLICIT_ENTERTAINMENT_KEYWORDS)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_short_text_list(values: list[str]) -> list[str]:
|
||||
|
||||
Reference in New Issue
Block a user