2026-05-22 10:42:31 +08:00
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
|
|
|
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field
|
|
|
|
|
|
|
|
|
|
|
|
from app.schemas.ontology import OntologyIntent, OntologyScenario
|
|
|
|
|
|
|
|
|
|
|
|
DATE_RANGE_PATTERN = re.compile(
|
|
|
|
|
|
r"(?P<start>\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P<end>\d{4}-\d{1,2}-\d{1,2})"
|
|
|
|
|
|
)
|
|
|
|
|
|
EXPLICIT_MONTH_PATTERN = re.compile(r"(?P<year>\d{4})年(?P<month>\d{1,2})月")
|
|
|
|
|
|
EXPLICIT_DATE_PATTERN = re.compile(
|
|
|
|
|
|
r"(?P<year>\d{4})[年/-](?P<month>\d{1,2})[月/-](?P<day>\d{1,2})日?"
|
|
|
|
|
|
)
|
|
|
|
|
|
MONTH_DAY_RANGE_PATTERN = re.compile(
|
|
|
|
|
|
r"(?P<start_month>\d{1,2})月(?P<start_day>\d{1,2})日?\s*(?:到|至|~|-)\s*"
|
|
|
|
|
|
r"(?P<end_month>\d{1,2})月(?P<end_day>\d{1,2})日?"
|
|
|
|
|
|
)
|
|
|
|
|
|
MONTH_DAY_PATTERN = re.compile(r"(?P<month>\d{1,2})月(?P<day>\d{1,2})日?")
|
|
|
|
|
|
AMOUNT_PATTERN = re.compile(
|
|
|
|
|
|
r"(?P<prefix>超过|大于|高于|不少于|不低于|小于|低于|少于|至多|不超过|<=|>=|<|>|=|=)?\s*"
|
|
|
|
|
|
r"(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?"
|
|
|
|
|
|
)
|
|
|
|
|
|
TOP_N_PATTERN = re.compile(r"(?:top|TOP|前|最高的?|最低的?)\s*(?P<top>\d+)")
|
|
|
|
|
|
|
|
|
|
|
|
SCENARIO_KEYWORDS = {
|
|
|
|
|
|
"expense": (
|
|
|
|
|
|
("报销", 0.20),
|
|
|
|
|
|
("报销单", 0.20),
|
|
|
|
|
|
("单据报销", 0.18),
|
|
|
|
|
|
("报账", 0.20),
|
|
|
|
|
|
("差旅", 0.20),
|
|
|
|
|
|
("费用", 0.14),
|
|
|
|
|
|
("发票", 0.14),
|
|
|
|
|
|
("票据", 0.12),
|
|
|
|
|
|
("借款", 0.12),
|
|
|
|
|
|
("住宿", 0.10),
|
|
|
|
|
|
("餐费", 0.10),
|
|
|
|
|
|
("招待", 0.18),
|
|
|
|
|
|
("招待费", 0.18),
|
|
|
|
|
|
("花销", 0.16),
|
|
|
|
|
|
("花了", 0.14),
|
|
|
|
|
|
("支出", 0.14),
|
|
|
|
|
|
("垫付", 0.14),
|
|
|
|
|
|
),
|
|
|
|
|
|
"accounts_receivable": (
|
|
|
|
|
|
("应收", 0.22),
|
|
|
|
|
|
("回款", 0.20),
|
|
|
|
|
|
("收款", 0.18),
|
|
|
|
|
|
("账龄", 0.18),
|
|
|
|
|
|
("客户欠款", 0.22),
|
|
|
|
|
|
),
|
|
|
|
|
|
"accounts_payable": (
|
|
|
|
|
|
("应付", 0.22),
|
|
|
|
|
|
("付款", 0.20),
|
|
|
|
|
|
("请款", 0.18),
|
|
|
|
|
|
("供应商", 0.20),
|
|
|
|
|
|
("待付", 0.16),
|
|
|
|
|
|
("打款", 0.18),
|
|
|
|
|
|
),
|
|
|
|
|
|
"knowledge": (
|
|
|
|
|
|
("制度", 0.20),
|
|
|
|
|
|
("规则", 0.20),
|
|
|
|
|
|
("办法", 0.18),
|
|
|
|
|
|
("依据", 0.18),
|
|
|
|
|
|
("政策", 0.16),
|
|
|
|
|
|
("知识库", 0.18),
|
|
|
|
|
|
),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
QUERY_KEYWORDS = (
|
|
|
|
|
|
"查",
|
|
|
|
|
|
"查询",
|
|
|
|
|
|
"查看",
|
|
|
|
|
|
"列出",
|
|
|
|
|
|
"统计",
|
|
|
|
|
|
"汇总",
|
|
|
|
|
|
"多少",
|
|
|
|
|
|
"几笔",
|
|
|
|
|
|
"金额",
|
|
|
|
|
|
"明细",
|
|
|
|
|
|
)
|
|
|
|
|
|
EXPLAIN_KEYWORDS = ("为什么", "依据", "原因", "怎么处理", "是否可以", "能不能", "按什么规则")
|
|
|
|
|
|
COMPARE_KEYWORDS = ("对比", "比较", "相比", "差异", "变化")
|
|
|
|
|
|
RISK_KEYWORDS = ("风险", "异常", "重复", "超标", "超预算", "逾期", "验真", "巡检")
|
|
|
|
|
|
DRAFT_KEYWORDS = ("生成", "草稿", "起草", "拟一份", "创建", "发起", "准备")
|
|
|
|
|
|
DRAFT_FOLLOW_UP_KEYWORDS = (
|
|
|
|
|
|
"继续",
|
|
|
|
|
|
"下一步",
|
|
|
|
|
|
"核对",
|
|
|
|
|
|
"补充",
|
|
|
|
|
|
"补一下",
|
|
|
|
|
|
"修改",
|
|
|
|
|
|
"改成",
|
|
|
|
|
|
"改为",
|
|
|
|
|
|
"换成",
|
|
|
|
|
|
"更新",
|
|
|
|
|
|
"确认",
|
|
|
|
|
|
"提交",
|
|
|
|
|
|
"保存",
|
|
|
|
|
|
"客户是",
|
|
|
|
|
|
"地点是",
|
|
|
|
|
|
"金额是",
|
|
|
|
|
|
"日期是",
|
|
|
|
|
|
"时间是",
|
|
|
|
|
|
)
|
|
|
|
|
|
EXPENSE_REVIEW_ACTIONS = {
|
|
|
|
|
|
"save_draft",
|
|
|
|
|
|
"next_step",
|
|
|
|
|
|
"edit_review",
|
|
|
|
|
|
"link_to_existing_draft",
|
|
|
|
|
|
"create_new_claim_from_documents",
|
|
|
|
|
|
}
|
|
|
|
|
|
OPERATE_KEYWORDS = (
|
|
|
|
|
|
"直接付款",
|
|
|
|
|
|
"帮我付款",
|
|
|
|
|
|
"安排付款",
|
|
|
|
|
|
"发起付款",
|
|
|
|
|
|
"直接审批",
|
|
|
|
|
|
"审批通过",
|
|
|
|
|
|
"帮我审批",
|
|
|
|
|
|
"驳回",
|
|
|
|
|
|
"上线",
|
|
|
|
|
|
"激活",
|
|
|
|
|
|
"停用",
|
|
|
|
|
|
"删除",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
EXPENSE_TYPE_KEYWORDS = {
|
|
|
|
|
|
"差旅": "travel",
|
|
|
|
|
|
"出差": "travel",
|
|
|
|
|
|
"住宿": "hotel",
|
|
|
|
|
|
"酒店": "hotel",
|
|
|
|
|
|
"交通": "transport",
|
|
|
|
|
|
"打车": "transport",
|
|
|
|
|
|
"网约车": "transport",
|
|
|
|
|
|
"出租车": "transport",
|
2026-05-22 16:00:19 +08:00
|
|
|
|
"出租车票": "transport",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"乘车": "transport",
|
|
|
|
|
|
"乘车费": "transport",
|
|
|
|
|
|
"用车": "transport",
|
|
|
|
|
|
"叫车": "transport",
|
|
|
|
|
|
"车资": "transport",
|
2026-05-22 16:00:19 +08:00
|
|
|
|
"的士": "transport",
|
|
|
|
|
|
"的士票": "transport",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"停车费": "transport",
|
|
|
|
|
|
"餐费": "meal",
|
|
|
|
|
|
"用餐": "meal",
|
|
|
|
|
|
"会务": "meeting",
|
|
|
|
|
|
"招待费": "entertainment",
|
|
|
|
|
|
"招待": "entertainment",
|
|
|
|
|
|
"宴请": "entertainment",
|
|
|
|
|
|
"办公费": "office",
|
|
|
|
|
|
"办公用品": "office",
|
|
|
|
|
|
"文具": "office",
|
|
|
|
|
|
"耗材": "office",
|
|
|
|
|
|
"办公耗材": "office",
|
|
|
|
|
|
"打印纸": "office",
|
|
|
|
|
|
"办公设备": "office",
|
|
|
|
|
|
"培训费": "training",
|
|
|
|
|
|
"培训": "training",
|
|
|
|
|
|
"通讯费": "communication",
|
|
|
|
|
|
"话费": "communication",
|
|
|
|
|
|
"福利费": "welfare",
|
|
|
|
|
|
"团建": "welfare",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
EXPENSE_NARRATIVE_KEYWORDS = (
|
|
|
|
|
|
"报销",
|
|
|
|
|
|
"报账",
|
|
|
|
|
|
"招待",
|
|
|
|
|
|
"招待费",
|
|
|
|
|
|
"花销",
|
|
|
|
|
|
"花了",
|
|
|
|
|
|
"支出",
|
|
|
|
|
|
"垫付",
|
|
|
|
|
|
"打车",
|
|
|
|
|
|
"车费",
|
|
|
|
|
|
"乘车",
|
|
|
|
|
|
"乘车费",
|
|
|
|
|
|
"用车",
|
|
|
|
|
|
"叫车",
|
|
|
|
|
|
"车资",
|
2026-05-22 16:00:19 +08:00
|
|
|
|
"的士",
|
|
|
|
|
|
"的士票",
|
|
|
|
|
|
"出租车票",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"餐费",
|
|
|
|
|
|
"吃饭",
|
|
|
|
|
|
"用餐",
|
|
|
|
|
|
"宴请",
|
|
|
|
|
|
"请客",
|
|
|
|
|
|
"住宿",
|
|
|
|
|
|
"发票",
|
|
|
|
|
|
"票据",
|
|
|
|
|
|
"差旅",
|
|
|
|
|
|
"客户现场",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
AR_CORE_KEYWORDS = ("应收", "回款", "收款", "账龄", "欠款", "未回款")
|
|
|
|
|
|
AP_CORE_KEYWORDS = ("应付", "付款", "请款", "待付", "打款", "未付款")
|
|
|
|
|
|
GENERIC_EXPENSE_PROMPTS = {
|
|
|
|
|
|
"报销",
|
|
|
|
|
|
"我要报销",
|
|
|
|
|
|
"我想报销",
|
|
|
|
|
|
"帮我报销",
|
|
|
|
|
|
"我要申请报销",
|
|
|
|
|
|
"发起报销",
|
|
|
|
|
|
"提交报销",
|
|
|
|
|
|
}
|
|
|
|
|
|
MISSING_SLOT_LABELS = {
|
|
|
|
|
|
"expense_type": "费用类型",
|
|
|
|
|
|
"amount": "金额",
|
|
|
|
|
|
"customer_name": "客户单位",
|
|
|
|
|
|
"vendor_name": "供应商",
|
|
|
|
|
|
"participants": "参与人员",
|
|
|
|
|
|
"attachments": "票据附件",
|
|
|
|
|
|
"time_range": "发生时间",
|
|
|
|
|
|
"reason": "事由说明",
|
|
|
|
|
|
"document_id": "单据号",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
STATUS_KEYWORDS = {
|
|
|
|
|
|
"草稿": "draft",
|
|
|
|
|
|
"待提交": "draft",
|
|
|
|
|
|
"待补充": "supplement",
|
|
|
|
|
|
"退回": "returned",
|
|
|
|
|
|
"已退回": "returned",
|
|
|
|
|
|
"进行中": "review",
|
|
|
|
|
|
"审批中": "review",
|
|
|
|
|
|
"审核中": "review",
|
|
|
|
|
|
"流转中": "review",
|
|
|
|
|
|
"已提交": "submitted",
|
|
|
|
|
|
"逾期": "overdue",
|
|
|
|
|
|
"待审批": "pending",
|
|
|
|
|
|
"待审": "pending",
|
|
|
|
|
|
"已审批": "approved",
|
|
|
|
|
|
"已通过": "approved",
|
|
|
|
|
|
"已审核": "approved",
|
2026-05-22 16:00:19 +08:00
|
|
|
|
"归档": "archived",
|
|
|
|
|
|
"已归档": "archived",
|
|
|
|
|
|
"入账": "archived",
|
2026-05-22 10:42:31 +08:00
|
|
|
|
"已入账": "paid",
|
|
|
|
|
|
"已付款": "paid",
|
|
|
|
|
|
"未付款": "unpaid",
|
|
|
|
|
|
"未回款": "unreceived",
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
LOCATION_KEYWORDS = (
|
|
|
|
|
|
"北京",
|
|
|
|
|
|
"上海",
|
|
|
|
|
|
"广州",
|
|
|
|
|
|
"深圳",
|
|
|
|
|
|
"杭州",
|
|
|
|
|
|
"南京",
|
|
|
|
|
|
"苏州",
|
|
|
|
|
|
"成都",
|
|
|
|
|
|
"重庆",
|
|
|
|
|
|
"天津",
|
|
|
|
|
|
"武汉",
|
|
|
|
|
|
"西安",
|
|
|
|
|
|
"郑州",
|
|
|
|
|
|
"长沙",
|
|
|
|
|
|
"青岛",
|
|
|
|
|
|
"厦门",
|
|
|
|
|
|
"宁波",
|
|
|
|
|
|
"合肥",
|
|
|
|
|
|
"济南",
|
|
|
|
|
|
"福州",
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"}
|
|
|
|
|
|
CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"}
|
|
|
|
|
|
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(slots=True)
|
|
|
|
|
|
class ReferenceCatalog:
|
|
|
|
|
|
employees: list[str]
|
|
|
|
|
|
departments: list[str]
|
|
|
|
|
|
customers: list[str]
|
|
|
|
|
|
vendors: list[str]
|
|
|
|
|
|
projects: list[str]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LlmOntologyEntityHint(BaseModel):
|
|
|
|
|
|
model_config = ConfigDict(extra="ignore")
|
|
|
|
|
|
|
|
|
|
|
|
type: str
|
|
|
|
|
|
value: str
|
|
|
|
|
|
normalized_value: str | None = None
|
|
|
|
|
|
role: str = "target"
|
|
|
|
|
|
confidence: float = Field(default=0.72, ge=0.0, le=1.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LlmOntologyParseResult(BaseModel):
|
|
|
|
|
|
model_config = ConfigDict(extra="ignore")
|
|
|
|
|
|
|
|
|
|
|
|
scenario: OntologyScenario = Field(default="unknown")
|
|
|
|
|
|
intent: OntologyIntent = Field(default="query")
|
|
|
|
|
|
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
|
|
|
|
|
|
clarification_required: bool = False
|
|
|
|
|
|
clarification_question: str | None = None
|
|
|
|
|
|
missing_slots: list[str] = Field(default_factory=list)
|
|
|
|
|
|
ambiguity: list[str] = Field(default_factory=list)
|
|
|
|
|
|
entity_hints: list[LlmOntologyEntityHint] = Field(default_factory=list)
|