Files
X-Financial/server/src/app/services/ontology_rules.py

299 lines
7.2 KiB
Python
Raw Normal View History

from __future__ import annotations
import re
from dataclasses import dataclass
from pydantic import BaseModel, ConfigDict, Field
from app.schemas.ontology import OntologyIntent, OntologyScenario
DATE_RANGE_PATTERN = re.compile(
r"(?P<start>\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P<end>\d{4}-\d{1,2}-\d{1,2})"
)
EXPLICIT_MONTH_PATTERN = re.compile(r"(?P<year>\d{4})年(?P<month>\d{1,2})月")
EXPLICIT_DATE_PATTERN = re.compile(
r"(?P<year>\d{4})[年/-](?P<month>\d{1,2})[月/-](?P<day>\d{1,2})日?"
)
MONTH_DAY_RANGE_PATTERN = re.compile(
r"(?P<start_month>\d{1,2})月(?P<start_day>\d{1,2})日?\s*(?:到|至|~|-)\s*"
r"(?P<end_month>\d{1,2})月(?P<end_day>\d{1,2})日?"
)
MONTH_DAY_PATTERN = re.compile(r"(?P<month>\d{1,2})月(?P<day>\d{1,2})日?")
AMOUNT_PATTERN = re.compile(
r"(?P<prefix>超过|大于|高于|不少于|不低于|小于|低于|少于|至多|不超过|<=|>=|<|>||=)?\s*"
r"(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?"
)
TOP_N_PATTERN = re.compile(r"(?:top|TOP|前|最高的?|最低的?)\s*(?P<top>\d+)")
SCENARIO_KEYWORDS = {
"expense": (
("报销", 0.20),
("报销单", 0.20),
("单据报销", 0.18),
("报账", 0.20),
("差旅", 0.20),
("费用", 0.14),
("发票", 0.14),
("票据", 0.12),
("借款", 0.12),
("住宿", 0.10),
("餐费", 0.10),
("招待", 0.18),
("招待费", 0.18),
("花销", 0.16),
("花了", 0.14),
("支出", 0.14),
("垫付", 0.14),
),
"accounts_receivable": (
("应收", 0.22),
("回款", 0.20),
("收款", 0.18),
("账龄", 0.18),
("客户欠款", 0.22),
),
"accounts_payable": (
("应付", 0.22),
("付款", 0.20),
("请款", 0.18),
("供应商", 0.20),
("待付", 0.16),
("打款", 0.18),
),
"knowledge": (
("制度", 0.20),
("规则", 0.20),
("办法", 0.18),
("依据", 0.18),
("政策", 0.16),
("知识库", 0.18),
),
}
QUERY_KEYWORDS = (
"",
"查询",
"查看",
"列出",
"统计",
"汇总",
"多少",
"几笔",
"金额",
"明细",
)
EXPLAIN_KEYWORDS = ("为什么", "依据", "原因", "怎么处理", "是否可以", "能不能", "按什么规则")
COMPARE_KEYWORDS = ("对比", "比较", "相比", "差异", "变化")
RISK_KEYWORDS = ("风险", "异常", "重复", "超标", "超预算", "逾期", "验真", "巡检")
DRAFT_KEYWORDS = ("生成", "草稿", "起草", "拟一份", "创建", "发起", "准备")
DRAFT_FOLLOW_UP_KEYWORDS = (
"继续",
"下一步",
"核对",
"补充",
"补一下",
"修改",
"改成",
"改为",
"换成",
"更新",
"确认",
"提交",
"保存",
"客户是",
"地点是",
"金额是",
"日期是",
"时间是",
)
EXPENSE_REVIEW_ACTIONS = {
"save_draft",
"next_step",
"edit_review",
"link_to_existing_draft",
"create_new_claim_from_documents",
}
OPERATE_KEYWORDS = (
"直接付款",
"帮我付款",
"安排付款",
"发起付款",
"直接审批",
"审批通过",
"帮我审批",
"驳回",
"上线",
"激活",
"停用",
"删除",
)
EXPENSE_TYPE_KEYWORDS = {
"差旅": "travel",
"出差": "travel",
"住宿": "hotel",
"酒店": "hotel",
"交通": "transport",
"打车": "transport",
"网约车": "transport",
"出租车": "transport",
"乘车": "transport",
"乘车费": "transport",
"用车": "transport",
"叫车": "transport",
"车资": "transport",
"停车费": "transport",
"餐费": "meal",
"用餐": "meal",
"会务": "meeting",
"招待费": "entertainment",
"招待": "entertainment",
"宴请": "entertainment",
"办公费": "office",
"办公用品": "office",
"文具": "office",
"耗材": "office",
"办公耗材": "office",
"打印纸": "office",
"办公设备": "office",
"培训费": "training",
"培训": "training",
"通讯费": "communication",
"话费": "communication",
"福利费": "welfare",
"团建": "welfare",
}
EXPENSE_NARRATIVE_KEYWORDS = (
"报销",
"报账",
"招待",
"招待费",
"花销",
"花了",
"支出",
"垫付",
"打车",
"车费",
"乘车",
"乘车费",
"用车",
"叫车",
"车资",
"餐费",
"吃饭",
"用餐",
"宴请",
"请客",
"住宿",
"发票",
"票据",
"差旅",
"客户现场",
)
AR_CORE_KEYWORDS = ("应收", "回款", "收款", "账龄", "欠款", "未回款")
AP_CORE_KEYWORDS = ("应付", "付款", "请款", "待付", "打款", "未付款")
GENERIC_EXPENSE_PROMPTS = {
"报销",
"我要报销",
"我想报销",
"帮我报销",
"我要申请报销",
"发起报销",
"提交报销",
}
MISSING_SLOT_LABELS = {
"expense_type": "费用类型",
"amount": "金额",
"customer_name": "客户单位",
"vendor_name": "供应商",
"participants": "参与人员",
"attachments": "票据附件",
"time_range": "发生时间",
"reason": "事由说明",
"document_id": "单据号",
}
STATUS_KEYWORDS = {
"草稿": "draft",
"待提交": "draft",
"待补充": "supplement",
"退回": "returned",
"已退回": "returned",
"进行中": "review",
"审批中": "review",
"审核中": "review",
"流转中": "review",
"已提交": "submitted",
"逾期": "overdue",
"待审批": "pending",
"待审": "pending",
"已审批": "approved",
"已通过": "approved",
"已审核": "approved",
"已入账": "paid",
"已付款": "paid",
"未付款": "unpaid",
"未回款": "unreceived",
}
LOCATION_KEYWORDS = (
"北京",
"上海",
"广州",
"深圳",
"杭州",
"南京",
"苏州",
"成都",
"重庆",
"天津",
"武汉",
"西安",
"郑州",
"长沙",
"青岛",
"厦门",
"宁波",
"合肥",
"济南",
"福州",
)
PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"}
CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"}
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}
@dataclass(slots=True)
class ReferenceCatalog:
employees: list[str]
departments: list[str]
customers: list[str]
vendors: list[str]
projects: list[str]
class LlmOntologyEntityHint(BaseModel):
model_config = ConfigDict(extra="ignore")
type: str
value: str
normalized_value: str | None = None
role: str = "target"
confidence: float = Field(default=0.72, ge=0.0, le=1.0)
class LlmOntologyParseResult(BaseModel):
model_config = ConfigDict(extra="ignore")
scenario: OntologyScenario = Field(default="unknown")
intent: OntologyIntent = Field(default="query")
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
clarification_required: bool = False
clarification_question: str | None = None
missing_slots: list[str] = Field(default_factory=list)
ambiguity: list[str] = Field(default_factory=list)
entity_hints: list[LlmOntologyEntityHint] = Field(default_factory=list)