2026-05-22 10:42:31 +08:00
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
SCENARIO_LABELS = {
|
|
|
|
|
|
"expense": "报销",
|
|
|
|
|
|
"accounts_receivable": "应收",
|
|
|
|
|
|
"accounts_payable": "应付",
|
|
|
|
|
|
"knowledge": "知识",
|
|
|
|
|
|
"unknown": "通用",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
RISK_REASON_MAP = {
|
|
|
|
|
|
"duplicate_expense": "检测到同员工、同金额或近似单据存在重复提交迹象。",
|
|
|
|
|
|
"location_mismatch": "申报出差地点与票据识别地点可能不一致,需要核对行程或补充说明。",
|
|
|
|
|
|
"amount_over_limit": "金额超过当前制度或预算阈值,需要补充例外说明。",
|
|
|
|
|
|
"invoice_anomaly": "票据或附件完整性不满足当前规则要求,需要补件或人工复核。",
|
|
|
|
|
|
"ar_overdue": "应收账款已出现逾期,存在回款延迟风险。",
|
|
|
|
|
|
"ap_overdue": "应付付款已出现逾期,可能影响供应商履约或合作关系。",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
GENERIC_EXPENSE_PROMPTS = {
|
|
|
|
|
|
"报销",
|
|
|
|
|
|
"我要报销",
|
|
|
|
|
|
"我想报销",
|
|
|
|
|
|
"帮我报销",
|
|
|
|
|
|
"我要申请报销",
|
|
|
|
|
|
"发起报销",
|
|
|
|
|
|
"提交报销",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
EXPLICIT_DRAFT_KEYWORDS = ("生成", "草稿", "起草", "创建", "发起", "准备")
|
|
|
|
|
|
|
|
|
|
|
|
EXPENSE_TYPE_LABELS = {
|
|
|
|
|
|
"travel": "差旅费",
|
|
|
|
|
|
"hotel": "住宿费",
|
|
|
|
|
|
"transport": "交通费",
|
2026-05-22 23:47:28 +08:00
|
|
|
|
"meal": "业务招待费",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"meeting": "会务费",
|
|
|
|
|
|
"entertainment": "业务招待费",
|
2026-05-26 12:16:20 +08:00
|
|
|
|
"marketing": "市场推广费",
|
2026-05-22 23:47:28 +08:00
|
|
|
|
"office": "办公用品费",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"training": "培训费",
|
2026-05-26 12:16:20 +08:00
|
|
|
|
"software": "软件服务费",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"communication": "通讯费",
|
|
|
|
|
|
"welfare": "福利费",
|
|
|
|
|
|
"other": "其他费用",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
GROUP_SCENE_LABELS = {
|
|
|
|
|
|
"travel": "差旅费",
|
|
|
|
|
|
"entertainment": "业务招待费",
|
2026-05-22 23:47:28 +08:00
|
|
|
|
"meal": "业务招待费",
|
2026-05-26 12:16:20 +08:00
|
|
|
|
"marketing": "市场推广费",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"transport": "交通费",
|
|
|
|
|
|
"hotel": "住宿费",
|
2026-05-22 23:47:28 +08:00
|
|
|
|
"office": "办公用品费",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"training": "培训费",
|
2026-05-26 12:16:20 +08:00
|
|
|
|
"software": "软件服务费",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"communication": "通讯费",
|
|
|
|
|
|
"welfare": "福利费",
|
|
|
|
|
|
"other": "其他费用",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
EXPENSE_SCENE_SELECTION_OPTIONS = (
|
|
|
|
|
|
("travel", "差旅费", "出差、长途交通、住宿、差旅补贴等场景。"),
|
|
|
|
|
|
("transport", "交通费", "市内打车、停车、过路费等日常交通场景。"),
|
|
|
|
|
|
("hotel", "住宿费", "单独住宿、酒店发票等场景。"),
|
2026-05-22 23:47:28 +08:00
|
|
|
|
("meal", "业务招待费", "客户接待、工作餐、加班餐、餐饮票据等场景。"),
|
|
|
|
|
|
("meeting", "会务费", "会议、论坛、会场、参会等场景。"),
|
2026-05-26 12:16:20 +08:00
|
|
|
|
("marketing", "市场推广费", "广告投放、品牌宣传、营销物料等推广场景。"),
|
2026-05-22 23:47:28 +08:00
|
|
|
|
("office", "办公用品费", "办公用品、耗材、办公设备等采购场景。"),
|
|
|
|
|
|
("training", "培训费", "培训课程、讲师费、教材、认证等场景。"),
|
2026-05-26 12:16:20 +08:00
|
|
|
|
("software", "软件服务费", "软件订阅、云资源、平台服务等技术服务场景。"),
|
2026-05-22 23:47:28 +08:00
|
|
|
|
("communication", "通讯费", "话费、流量、宽带、网络等场景。"),
|
|
|
|
|
|
("welfare", "福利费", "团建、体检、慰问、节日福利等场景。"),
|
2026-05-22 10:42:31 +08:00
|
|
|
|
("other", "其他费用", "暂不属于以上分类的报销场景。"),
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-05-24 21:44:17 +08:00
|
|
|
|
KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS = 20
|
|
|
|
|
|
KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS = 30
|
2026-05-22 10:42:31 +08:00
|
|
|
|
KNOWLEDGE_MODEL_TIMEOUT_SECONDS = KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS
|
|
|
|
|
|
|
|
|
|
|
|
EXPENSE_STATUS_LABELS = {
|
|
|
|
|
|
"draft": "草稿",
|
|
|
|
|
|
"submitted": "已提交",
|
|
|
|
|
|
"review": "审核中",
|
|
|
|
|
|
"approved": "已通过",
|
|
|
|
|
|
"rejected": "已驳回",
|
|
|
|
|
|
"paid": "已付款",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
EXPENSE_STATUS_GROUP_LABELS = {
|
|
|
|
|
|
"draft": "草稿",
|
|
|
|
|
|
"in_progress": "审批中",
|
|
|
|
|
|
"completed": "审批完成",
|
|
|
|
|
|
"other": "其他状态",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
SLOT_LABELS = {
|
|
|
|
|
|
"expense_type": "报销类型",
|
|
|
|
|
|
"customer_name": "客户名称",
|
|
|
|
|
|
"time_range": "发生时间",
|
|
|
|
|
|
"location": "地点",
|
|
|
|
|
|
"merchant_name": "酒店/商户",
|
|
|
|
|
|
"amount": "金额",
|
|
|
|
|
|
"reason": "事由说明",
|
|
|
|
|
|
"participants": "参与人员",
|
|
|
|
|
|
"attachments": "票据附件",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
DATE_TEXT_PATTERN = re.compile(
|
|
|
|
|
|
r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[::][0-5]\d)?)"
|
|
|
|
|
|
)
|
|
|
|
|
|
AMOUNT_TEXT_PATTERN = re.compile(
|
|
|
|
|
|
r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)"
|
|
|
|
|
|
)
|
|
|
|
|
|
TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)")
|
2026-05-26 12:16:20 +08:00
|
|
|
|
TRAVEL_ROUTE_PATTERN = re.compile(
|
|
|
|
|
|
r"([\u4e00-\u9fa5]{2,12})\s*(?:至|→|->|-|—)\s*"
|
|
|
|
|
|
r"([\u4e00-\u9fa5]{2,12})"
|
|
|
|
|
|
)
|
2026-05-22 10:42:31 +08:00
|
|
|
|
|
|
|
|
|
|
SOURCE_LABELS = {
|
|
|
|
|
|
"user_text": "用户描述",
|
|
|
|
|
|
"user_form": "用户修改",
|
|
|
|
|
|
"ocr": "票据识别",
|
|
|
|
|
|
"upload": "上传附件",
|
|
|
|
|
|
"detail_context": "关联单据",
|
|
|
|
|
|
"system_context": "系统上下文",
|
|
|
|
|
|
"inferred": "语义推断",
|
|
|
|
|
|
"system": "系统判断",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS = ("历史报销画像", "用户画像", "制度注意事项", "制度注意")
|
|
|
|
|
|
|
|
|
|
|
|
SCENE_REQUIRED_SLOT_KEYS = {
|
|
|
|
|
|
"hotel": {"merchant_name"},
|
|
|
|
|
|
"meeting": {"location"},
|
|
|
|
|
|
"entertainment": {"location", "customer_name", "participants"},
|
|
|
|
|
|
}
|
|
|
|
|
|
INFERRED_REASON_LABELS = {
|
|
|
|
|
|
"travel": "出差行程",
|
|
|
|
|
|
"hotel": "住宿报销",
|
|
|
|
|
|
"transport": "交通出行",
|
2026-05-22 23:47:28 +08:00
|
|
|
|
"meal": "业务招待",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"meeting": "会务活动",
|
|
|
|
|
|
"entertainment": "客户接待",
|
2026-05-26 12:16:20 +08:00
|
|
|
|
"marketing": "市场推广",
|
2026-05-22 23:47:28 +08:00
|
|
|
|
"office": "办公用品采购",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"training": "培训学习",
|
2026-05-26 12:16:20 +08:00
|
|
|
|
"software": "软件服务",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"communication": "通讯使用",
|
|
|
|
|
|
"welfare": "员工福利",
|
|
|
|
|
|
"other": "其他费用",
|
|
|
|
|
|
}
|
|
|
|
|
|
SYSTEM_GENERATED_REASON_PREFIXES = (
|
|
|
|
|
|
"我上传了",
|
|
|
|
|
|
"请按当前已识别信息",
|
|
|
|
|
|
"请把当前上传的票据",
|
|
|
|
|
|
"请基于当前上传的多张票据",
|
|
|
|
|
|
"我已核对右侧识别结果",
|
|
|
|
|
|
"请同步修正逐票据识别结果",
|
|
|
|
|
|
"我已校正核对信息",
|
|
|
|
|
|
"查看报销草稿",
|
|
|
|
|
|
"请解释一下当前这笔报销的合规风险和待补充项",
|
|
|
|
|
|
)
|
|
|
|
|
|
LEADING_REASON_TIME_PATTERNS = (
|
|
|
|
|
|
re.compile(
|
|
|
|
|
|
r"^\s*(?:识别事项(?:有)?[::]\s*)?"
|
|
|
|
|
|
r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*"
|
|
|
|
|
|
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
|
|
|
|
|
|
r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
|
|
|
|
|
|
r"\s*[,,。;;、]?\s*"
|
|
|
|
|
|
),
|
|
|
|
|
|
re.compile(
|
|
|
|
|
|
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
|
|
|
|
|
|
r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
|
|
|
|
|
|
r"\s*[,,。;;、]\s*"
|
|
|
|
|
|
),
|
|
|
|
|
|
)
|
|
|
|
|
|
AMOUNT_UNIT_ALIASES = {
|
|
|
|
|
|
"员": "元",
|
|
|
|
|
|
"圆": "元",
|
|
|
|
|
|
"园": "元",
|
|
|
|
|
|
"块": "元",
|
|
|
|
|
|
"块钱": "元",
|
|
|
|
|
|
"元整": "元",
|
|
|
|
|
|
"万员": "万元",
|
|
|
|
|
|
"万圆": "万元",
|
|
|
|
|
|
"万园": "万元",
|
|
|
|
|
|
"万块": "万元",
|
|
|
|
|
|
"万元整": "万元",
|
|
|
|
|
|
}
|