feat: 增强知识库索引与设置页面模块化拆分
扩展知识库索引任务和 RAG 检索支持增量入库和文档去重,优 化本体检测和规则匹配精度,前端设置页面拆分为 LLM、邮件 和 Hermes 员工同步子面板并重构样式,新增日志详情组件和 知识入库日志模型,补充单元测试覆盖。
This commit is contained in:
@@ -9,16 +9,32 @@ from app.schemas.user_agent import UserAgentRequest, UserAgentReviewDocumentCard
|
||||
DEFAULT_GROUP_SCENE_LABELS = {
|
||||
"travel": "差旅费",
|
||||
"entertainment": "业务招待费",
|
||||
"meal": "伙食费",
|
||||
"meal": "业务招待费",
|
||||
"transport": "交通费",
|
||||
"hotel": "住宿费",
|
||||
"office": "办公费",
|
||||
"office": "办公用品费",
|
||||
"training": "培训费",
|
||||
"communication": "通讯费",
|
||||
"welfare": "福利费",
|
||||
"other": "其他费用",
|
||||
}
|
||||
|
||||
DOCUMENT_SCENE_LABELS = {
|
||||
"flight_itinerary": "机票/航班行程单",
|
||||
"train_ticket": "火车/高铁票",
|
||||
"ship_ticket": "轮船票",
|
||||
"travel_ticket": "交通出行票据",
|
||||
"hotel_invoice": "酒店住宿票据",
|
||||
"taxi_receipt": "出租车/网约车票据",
|
||||
"transport_receipt": "乘车票据",
|
||||
"parking_toll_receipt": "停车/通行费票据",
|
||||
"meal_receipt": "餐饮发票",
|
||||
"office_invoice": "文具/办公用品发票",
|
||||
"meeting_invoice": "会议/会务票据",
|
||||
"training_invoice": "培训票据",
|
||||
"other": "其他票据",
|
||||
}
|
||||
|
||||
DOCUMENT_DATE_TEXT_PATTERN = re.compile(
|
||||
r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[::][0-5]\d)?)"
|
||||
)
|
||||
@@ -48,55 +64,55 @@ class UserAgentDocumentService:
|
||||
provided_type = str(item.get("document_type") or "").strip().lower()
|
||||
normalized_expense_type = str(expense_type_code or "").strip().lower()
|
||||
if provided_type:
|
||||
if provided_type in {"flight_itinerary", "train_ticket"}:
|
||||
if provided_type in {"flight_itinerary", "train_ticket", "ship_ticket"}:
|
||||
return {
|
||||
"document_type": provided_type,
|
||||
"expense_type": "travel",
|
||||
"group_code": "travel",
|
||||
"scene_label": "差旅票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS.get(provided_type, "交通出行票据"),
|
||||
}
|
||||
if provided_type == "hotel_invoice":
|
||||
return {
|
||||
"document_type": provided_type,
|
||||
"expense_type": "hotel",
|
||||
"group_code": "travel",
|
||||
"scene_label": "住宿票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["hotel_invoice"],
|
||||
}
|
||||
if provided_type in {"taxi_receipt", "parking_toll_receipt"}:
|
||||
if provided_type in {"taxi_receipt", "transport_receipt", "parking_toll_receipt"}:
|
||||
return {
|
||||
"document_type": provided_type,
|
||||
"expense_type": "transport",
|
||||
"group_code": "travel",
|
||||
"scene_label": "交通票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS.get(provided_type, "乘车票据"),
|
||||
}
|
||||
if provided_type == "meal_receipt":
|
||||
group_code = "entertainment" if normalized_expense_type == "entertainment" or has_customer else "meal"
|
||||
group_code = "meal"
|
||||
return {
|
||||
"document_type": provided_type,
|
||||
"expense_type": group_code,
|
||||
"group_code": group_code,
|
||||
"scene_label": "餐饮票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["meal_receipt"],
|
||||
}
|
||||
if provided_type == "office_invoice":
|
||||
return {
|
||||
"document_type": provided_type,
|
||||
"expense_type": "office",
|
||||
"group_code": "office",
|
||||
"scene_label": "办公用品票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["office_invoice"],
|
||||
}
|
||||
if provided_type == "meeting_invoice":
|
||||
return {
|
||||
"document_type": provided_type,
|
||||
"expense_type": "meeting",
|
||||
"group_code": "meeting",
|
||||
"scene_label": "会务票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["meeting_invoice"],
|
||||
}
|
||||
if provided_type == "training_invoice":
|
||||
return {
|
||||
"document_type": provided_type,
|
||||
"expense_type": "training",
|
||||
"group_code": "training",
|
||||
"scene_label": "培训票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["training_invoice"],
|
||||
}
|
||||
|
||||
text = " ".join(
|
||||
@@ -108,41 +124,69 @@ class UserAgentDocumentService:
|
||||
).lower()
|
||||
compact = text.replace(" ", "")
|
||||
|
||||
if any(keyword in compact for keyword in ("机票", "航班", "火车", "高铁", "行程单")):
|
||||
if any(keyword in compact for keyword in ("火车", "高铁", "动车", "铁路", "车次")):
|
||||
return {
|
||||
"document_type": "travel_ticket",
|
||||
"document_type": "train_ticket",
|
||||
"expense_type": "travel",
|
||||
"group_code": "travel",
|
||||
"scene_label": "差旅票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["train_ticket"],
|
||||
}
|
||||
if any(keyword in compact for keyword in ("过路费", "停车", "通行费", "收费站")):
|
||||
return {
|
||||
"document_type": "parking_toll_receipt",
|
||||
"expense_type": "transport",
|
||||
"group_code": "travel",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["parking_toll_receipt"],
|
||||
}
|
||||
if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "叫车", "车费", "车资", "的士")):
|
||||
return {
|
||||
"document_type": "taxi_receipt",
|
||||
"expense_type": "transport",
|
||||
"group_code": "travel",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["taxi_receipt"],
|
||||
}
|
||||
if any(keyword in compact for keyword in ("乘车", "用车")):
|
||||
return {
|
||||
"document_type": "transport_receipt",
|
||||
"expense_type": "transport",
|
||||
"group_code": "travel",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["transport_receipt"],
|
||||
}
|
||||
if any(keyword in compact for keyword in ("机票", "航班", "登机", "航空", "客票")):
|
||||
return {
|
||||
"document_type": "flight_itinerary",
|
||||
"expense_type": "travel",
|
||||
"group_code": "travel",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["flight_itinerary"],
|
||||
}
|
||||
if any(keyword in compact for keyword in ("轮船", "船票", "客轮", "渡轮", "航运")):
|
||||
return {
|
||||
"document_type": "ship_ticket",
|
||||
"expense_type": "travel",
|
||||
"group_code": "travel",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["ship_ticket"],
|
||||
}
|
||||
if any(keyword in compact for keyword in ("酒店", "住宿", "宾馆")):
|
||||
return {
|
||||
"document_type": "hotel_invoice",
|
||||
"expense_type": "hotel",
|
||||
"group_code": "travel",
|
||||
"scene_label": "住宿票据",
|
||||
}
|
||||
if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "乘车", "用车", "叫车", "车费", "车资", "的士", "过路费", "停车")):
|
||||
return {
|
||||
"document_type": "transport_receipt",
|
||||
"expense_type": "transport",
|
||||
"group_code": "travel",
|
||||
"scene_label": "交通票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["hotel_invoice"],
|
||||
}
|
||||
if any(keyword in compact for keyword in ("餐", "饭店", "酒楼", "酒家", "餐饮", "meal")):
|
||||
group_code = "entertainment" if normalized_expense_type == "entertainment" or has_customer else "meal"
|
||||
group_code = "meal"
|
||||
return {
|
||||
"document_type": "meal_receipt",
|
||||
"expense_type": group_code,
|
||||
"group_code": group_code,
|
||||
"scene_label": "餐饮票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["meal_receipt"],
|
||||
}
|
||||
if any(keyword in compact for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "键盘", "鼠标", "白板", "墨盒", "硒鼓")):
|
||||
return {
|
||||
"document_type": "other",
|
||||
"document_type": "office_invoice",
|
||||
"expense_type": "office",
|
||||
"group_code": "office",
|
||||
"scene_label": "办公用品票据",
|
||||
"scene_label": DOCUMENT_SCENE_LABELS["office_invoice"],
|
||||
}
|
||||
return {
|
||||
"document_type": "other",
|
||||
|
||||
Reference in New Issue
Block a user