feat: 增强知识库索引与设置页面模块化拆分

扩展知识库索引任务和 RAG 检索支持增量入库和文档去重,优
化本体检测和规则匹配精度,前端设置页面拆分为 LLM、邮件
和 Hermes 员工同步子面板并重构样式,新增日志详情组件和
知识入库日志模型,补充单元测试覆盖。
This commit is contained in:
caoxiaozhu
2026-05-22 23:47:28 +08:00
parent 88ff04bef8
commit 5b388d08c0
84 changed files with 10170 additions and 2599 deletions

View File

@@ -9,16 +9,32 @@ from app.schemas.user_agent import UserAgentRequest, UserAgentReviewDocumentCard
DEFAULT_GROUP_SCENE_LABELS = {
"travel": "差旅费",
"entertainment": "业务招待费",
"meal": "伙食",
"meal": "业务招待",
"transport": "交通费",
"hotel": "住宿费",
"office": "办公费",
"office": "办公用品",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
}
DOCUMENT_SCENE_LABELS = {
"flight_itinerary": "机票/航班行程单",
"train_ticket": "火车/高铁票",
"ship_ticket": "轮船票",
"travel_ticket": "交通出行票据",
"hotel_invoice": "酒店住宿票据",
"taxi_receipt": "出租车/网约车票据",
"transport_receipt": "乘车票据",
"parking_toll_receipt": "停车/通行费票据",
"meal_receipt": "餐饮发票",
"office_invoice": "文具/办公用品发票",
"meeting_invoice": "会议/会务票据",
"training_invoice": "培训票据",
"other": "其他票据",
}
DOCUMENT_DATE_TEXT_PATTERN = re.compile(
r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[:][0-5]\d)?)"
)
@@ -48,55 +64,55 @@ class UserAgentDocumentService:
provided_type = str(item.get("document_type") or "").strip().lower()
normalized_expense_type = str(expense_type_code or "").strip().lower()
if provided_type:
if provided_type in {"flight_itinerary", "train_ticket"}:
if provided_type in {"flight_itinerary", "train_ticket", "ship_ticket"}:
return {
"document_type": provided_type,
"expense_type": "travel",
"group_code": "travel",
"scene_label": "差旅票据",
"scene_label": DOCUMENT_SCENE_LABELS.get(provided_type, "交通出行票据"),
}
if provided_type == "hotel_invoice":
return {
"document_type": provided_type,
"expense_type": "hotel",
"group_code": "travel",
"scene_label": "住宿票据",
"scene_label": DOCUMENT_SCENE_LABELS["hotel_invoice"],
}
if provided_type in {"taxi_receipt", "parking_toll_receipt"}:
if provided_type in {"taxi_receipt", "transport_receipt", "parking_toll_receipt"}:
return {
"document_type": provided_type,
"expense_type": "transport",
"group_code": "travel",
"scene_label": "交通票据",
"scene_label": DOCUMENT_SCENE_LABELS.get(provided_type, "乘车票据"),
}
if provided_type == "meal_receipt":
group_code = "entertainment" if normalized_expense_type == "entertainment" or has_customer else "meal"
group_code = "meal"
return {
"document_type": provided_type,
"expense_type": group_code,
"group_code": group_code,
"scene_label": "餐饮票据",
"scene_label": DOCUMENT_SCENE_LABELS["meal_receipt"],
}
if provided_type == "office_invoice":
return {
"document_type": provided_type,
"expense_type": "office",
"group_code": "office",
"scene_label": "办公用品票据",
"scene_label": DOCUMENT_SCENE_LABELS["office_invoice"],
}
if provided_type == "meeting_invoice":
return {
"document_type": provided_type,
"expense_type": "meeting",
"group_code": "meeting",
"scene_label": "会务票据",
"scene_label": DOCUMENT_SCENE_LABELS["meeting_invoice"],
}
if provided_type == "training_invoice":
return {
"document_type": provided_type,
"expense_type": "training",
"group_code": "training",
"scene_label": "培训票据",
"scene_label": DOCUMENT_SCENE_LABELS["training_invoice"],
}
text = " ".join(
@@ -108,41 +124,69 @@ class UserAgentDocumentService:
).lower()
compact = text.replace(" ", "")
if any(keyword in compact for keyword in ("机票", "航班", "", "", "行程单")):
if any(keyword in compact for keyword in ("火车", "高铁", "", "", "车次")):
return {
"document_type": "travel_ticket",
"document_type": "train_ticket",
"expense_type": "travel",
"group_code": "travel",
"scene_label": "差旅票据",
"scene_label": DOCUMENT_SCENE_LABELS["train_ticket"],
}
if any(keyword in compact for keyword in ("过路费", "停车", "通行费", "收费站")):
return {
"document_type": "parking_toll_receipt",
"expense_type": "transport",
"group_code": "travel",
"scene_label": DOCUMENT_SCENE_LABELS["parking_toll_receipt"],
}
if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "叫车", "车费", "车资", "的士")):
return {
"document_type": "taxi_receipt",
"expense_type": "transport",
"group_code": "travel",
"scene_label": DOCUMENT_SCENE_LABELS["taxi_receipt"],
}
if any(keyword in compact for keyword in ("乘车", "用车")):
return {
"document_type": "transport_receipt",
"expense_type": "transport",
"group_code": "travel",
"scene_label": DOCUMENT_SCENE_LABELS["transport_receipt"],
}
if any(keyword in compact for keyword in ("机票", "航班", "登机", "航空", "客票")):
return {
"document_type": "flight_itinerary",
"expense_type": "travel",
"group_code": "travel",
"scene_label": DOCUMENT_SCENE_LABELS["flight_itinerary"],
}
if any(keyword in compact for keyword in ("轮船", "船票", "客轮", "渡轮", "航运")):
return {
"document_type": "ship_ticket",
"expense_type": "travel",
"group_code": "travel",
"scene_label": DOCUMENT_SCENE_LABELS["ship_ticket"],
}
if any(keyword in compact for keyword in ("酒店", "住宿", "宾馆")):
return {
"document_type": "hotel_invoice",
"expense_type": "hotel",
"group_code": "travel",
"scene_label": "住宿票据",
}
if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "乘车", "用车", "叫车", "车费", "车资", "的士", "过路费", "停车")):
return {
"document_type": "transport_receipt",
"expense_type": "transport",
"group_code": "travel",
"scene_label": "交通票据",
"scene_label": DOCUMENT_SCENE_LABELS["hotel_invoice"],
}
if any(keyword in compact for keyword in ("", "饭店", "酒楼", "酒家", "餐饮", "meal")):
group_code = "entertainment" if normalized_expense_type == "entertainment" or has_customer else "meal"
group_code = "meal"
return {
"document_type": "meal_receipt",
"expense_type": group_code,
"group_code": group_code,
"scene_label": "餐饮票据",
"scene_label": DOCUMENT_SCENE_LABELS["meal_receipt"],
}
if any(keyword in compact for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "键盘", "鼠标", "白板", "墨盒", "硒鼓")):
return {
"document_type": "other",
"document_type": "office_invoice",
"expense_type": "office",
"group_code": "office",
"scene_label": "办公用品票据",
"scene_label": DOCUMENT_SCENE_LABELS["office_invoice"],
}
return {
"document_type": "other",