from __future__ import annotations import re from dataclasses import dataclass from pydantic import BaseModel, ConfigDict, Field from app.schemas.ontology import OntologyIntent, OntologyScenario DATE_RANGE_PATTERN = re.compile( r"(?P\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P\d{4}-\d{1,2}-\d{1,2})" ) EXPLICIT_MONTH_PATTERN = re.compile(r"(?P\d{4})年(?P\d{1,2})月") EXPLICIT_DATE_PATTERN = re.compile( r"(?P\d{4})[年/-](?P\d{1,2})[月/-](?P\d{1,2})日?" ) MONTH_DAY_RANGE_PATTERN = re.compile( r"(?P\d{1,2})月(?P\d{1,2})日?\s*(?:到|至|~|-)\s*" r"(?P\d{1,2})月(?P\d{1,2})日?" ) MONTH_DAY_PATTERN = re.compile(r"(?P\d{1,2})月(?P\d{1,2})日?") AMOUNT_PATTERN = re.compile( r"(?P超过|大于|高于|不少于|不低于|小于|低于|少于|至多|不超过|<=|>=|<|>|=|=)?\s*" r"(?P\d+(?:\.\d+)?)\s*(?P万元|万|元)?" ) TOP_N_PATTERN = re.compile(r"(?:top|TOP|前|最高的?|最低的?)\s*(?P\d+)") SCENARIO_KEYWORDS = { "expense": ( ("报销", 0.20), ("报销单", 0.20), ("单据报销", 0.18), ("报账", 0.20), ("差旅", 0.20), ("费用", 0.14), ("发票", 0.14), ("票据", 0.12), ("借款", 0.12), ("住宿", 0.10), ("餐费", 0.10), ("招待", 0.18), ("招待费", 0.18), ("花销", 0.16), ("花了", 0.14), ("支出", 0.14), ("垫付", 0.14), ), "accounts_receivable": ( ("应收", 0.22), ("回款", 0.20), ("收款", 0.18), ("账龄", 0.18), ("客户欠款", 0.22), ), "accounts_payable": ( ("应付", 0.22), ("付款", 0.20), ("请款", 0.18), ("供应商", 0.20), ("待付", 0.16), ("打款", 0.18), ), "knowledge": ( ("制度", 0.20), ("规则", 0.20), ("办法", 0.18), ("依据", 0.18), ("政策", 0.16), ("知识库", 0.18), ), } QUERY_KEYWORDS = ( "查", "查询", "查看", "列出", "统计", "汇总", "多少", "几笔", "金额", "明细", ) EXPLAIN_KEYWORDS = ("为什么", "依据", "原因", "怎么处理", "是否可以", "能不能", "按什么规则") COMPARE_KEYWORDS = ("对比", "比较", "相比", "差异", "变化") RISK_KEYWORDS = ("风险", "异常", "重复", "超标", "超预算", "逾期", "验真", "巡检") DRAFT_KEYWORDS = ("生成", "草稿", "起草", "拟一份", "创建", "发起", "准备") DRAFT_FOLLOW_UP_KEYWORDS = ( "继续", "下一步", "核对", "补充", "补一下", "修改", "改成", "改为", "换成", "更新", "确认", "提交", "保存", "客户是", "地点是", "金额是", "日期是", "时间是", ) EXPENSE_REVIEW_ACTIONS = { "save_draft", "next_step", "edit_review", "link_to_existing_draft", "create_new_claim_from_documents", } OPERATE_KEYWORDS = ( "直接付款", "帮我付款", "安排付款", "发起付款", "直接审批", "审批通过", "帮我审批", "驳回", "上线", "激活", "停用", "删除", ) EXPENSE_TYPE_KEYWORDS = { "差旅": "travel", "出差": "travel", "住宿": "hotel", "酒店": "hotel", "交通": "transport", "打车": "transport", "网约车": "transport", "出租车": "transport", "出租车票": "transport", "乘车": "transport", "乘车费": "transport", "用车": "transport", "叫车": "transport", "车资": "transport", "的士": "transport", "的士票": "transport", "停车费": "transport", "餐费": "meal", "用餐": "meal", "会务": "meeting", "招待费": "entertainment", "招待": "entertainment", "宴请": "entertainment", "办公费": "office", "办公用品": "office", "文具": "office", "耗材": "office", "办公耗材": "office", "打印纸": "office", "办公设备": "office", "培训费": "training", "培训": "training", "通讯费": "communication", "话费": "communication", "福利费": "welfare", "团建": "welfare", } EXPENSE_NARRATIVE_KEYWORDS = ( "报销", "报账", "招待", "招待费", "花销", "花了", "支出", "垫付", "打车", "车费", "乘车", "乘车费", "用车", "叫车", "车资", "的士", "的士票", "出租车票", "餐费", "吃饭", "用餐", "宴请", "请客", "住宿", "发票", "票据", "差旅", "客户现场", ) AR_CORE_KEYWORDS = ("应收", "回款", "收款", "账龄", "欠款", "未回款") AP_CORE_KEYWORDS = ("应付", "付款", "请款", "待付", "打款", "未付款") GENERIC_EXPENSE_PROMPTS = { "报销", "我要报销", "我想报销", "帮我报销", "我要申请报销", "发起报销", "提交报销", } MISSING_SLOT_LABELS = { "expense_type": "费用类型", "amount": "金额", "customer_name": "客户单位", "vendor_name": "供应商", "participants": "参与人员", "attachments": "票据附件", "time_range": "发生时间", "reason": "事由说明", "document_id": "单据号", } STATUS_KEYWORDS = { "草稿": "draft", "待提交": "draft", "待补充": "supplement", "退回": "returned", "已退回": "returned", "进行中": "review", "审批中": "review", "审核中": "review", "流转中": "review", "已提交": "submitted", "逾期": "overdue", "待审批": "pending", "待审": "pending", "已审批": "approved", "已通过": "approved", "已审核": "approved", "归档": "archived", "已归档": "archived", "入账": "archived", "已入账": "paid", "已付款": "paid", "未付款": "unpaid", "未回款": "unreceived", } LOCATION_KEYWORDS = ( "北京", "上海", "广州", "深圳", "杭州", "南京", "苏州", "成都", "重庆", "天津", "武汉", "西安", "郑州", "长沙", "青岛", "厦门", "宁波", "合肥", "济南", "福州", ) PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"} CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"} KNOWLEDGE_INTENTS = {"query", "explain", "compare"} @dataclass(slots=True) class ReferenceCatalog: employees: list[str] departments: list[str] customers: list[str] vendors: list[str] projects: list[str] class LlmOntologyEntityHint(BaseModel): model_config = ConfigDict(extra="ignore") type: str value: str normalized_value: str | None = None role: str = "target" confidence: float = Field(default=0.72, ge=0.0, le=1.0) class LlmOntologyParseResult(BaseModel): model_config = ConfigDict(extra="ignore") scenario: OntologyScenario = Field(default="unknown") intent: OntologyIntent = Field(default="query") confidence: float = Field(default=0.0, ge=0.0, le=1.0) clarification_required: bool = False clarification_question: str | None = None missing_slots: list[str] = Field(default_factory=list) ambiguity: list[str] = Field(default_factory=list) entity_hints: list[LlmOntologyEntityHint] = Field(default_factory=list)