Files
X-Financial/server/src/app/services/ontology_rules.py
caoxiaozhu 88ff04bef8 feat: 新增归档中心页面并完善知识库与报销查询能力
新增前端归档中心视图及相关工具函数,扩充知识库文档分类和
提取器支持多种格式,增强编排器报销查询的多维度检索,优
化本体规则和用户代理审核消息,前端完善报销创建和审批详
情交互细节,补充单元测试覆盖。
2026-05-22 16:00:19 +08:00

308 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import re
from dataclasses import dataclass
from pydantic import BaseModel, ConfigDict, Field
from app.schemas.ontology import OntologyIntent, OntologyScenario
DATE_RANGE_PATTERN = re.compile(
r"(?P<start>\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P<end>\d{4}-\d{1,2}-\d{1,2})"
)
EXPLICIT_MONTH_PATTERN = re.compile(r"(?P<year>\d{4})年(?P<month>\d{1,2})月")
EXPLICIT_DATE_PATTERN = re.compile(
r"(?P<year>\d{4})[年/-](?P<month>\d{1,2})[月/-](?P<day>\d{1,2})日?"
)
MONTH_DAY_RANGE_PATTERN = re.compile(
r"(?P<start_month>\d{1,2})月(?P<start_day>\d{1,2})日?\s*(?:到|至|~|-)\s*"
r"(?P<end_month>\d{1,2})月(?P<end_day>\d{1,2})日?"
)
MONTH_DAY_PATTERN = re.compile(r"(?P<month>\d{1,2})月(?P<day>\d{1,2})日?")
AMOUNT_PATTERN = re.compile(
r"(?P<prefix>超过|大于|高于|不少于|不低于|小于|低于|少于|至多|不超过|<=|>=|<|>||=)?\s*"
r"(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?"
)
TOP_N_PATTERN = re.compile(r"(?:top|TOP|前|最高的?|最低的?)\s*(?P<top>\d+)")
SCENARIO_KEYWORDS = {
"expense": (
("报销", 0.20),
("报销单", 0.20),
("单据报销", 0.18),
("报账", 0.20),
("差旅", 0.20),
("费用", 0.14),
("发票", 0.14),
("票据", 0.12),
("借款", 0.12),
("住宿", 0.10),
("餐费", 0.10),
("招待", 0.18),
("招待费", 0.18),
("花销", 0.16),
("花了", 0.14),
("支出", 0.14),
("垫付", 0.14),
),
"accounts_receivable": (
("应收", 0.22),
("回款", 0.20),
("收款", 0.18),
("账龄", 0.18),
("客户欠款", 0.22),
),
"accounts_payable": (
("应付", 0.22),
("付款", 0.20),
("请款", 0.18),
("供应商", 0.20),
("待付", 0.16),
("打款", 0.18),
),
"knowledge": (
("制度", 0.20),
("规则", 0.20),
("办法", 0.18),
("依据", 0.18),
("政策", 0.16),
("知识库", 0.18),
),
}
QUERY_KEYWORDS = (
"",
"查询",
"查看",
"列出",
"统计",
"汇总",
"多少",
"几笔",
"金额",
"明细",
)
EXPLAIN_KEYWORDS = ("为什么", "依据", "原因", "怎么处理", "是否可以", "能不能", "按什么规则")
COMPARE_KEYWORDS = ("对比", "比较", "相比", "差异", "变化")
RISK_KEYWORDS = ("风险", "异常", "重复", "超标", "超预算", "逾期", "验真", "巡检")
DRAFT_KEYWORDS = ("生成", "草稿", "起草", "拟一份", "创建", "发起", "准备")
DRAFT_FOLLOW_UP_KEYWORDS = (
"继续",
"下一步",
"核对",
"补充",
"补一下",
"修改",
"改成",
"改为",
"换成",
"更新",
"确认",
"提交",
"保存",
"客户是",
"地点是",
"金额是",
"日期是",
"时间是",
)
EXPENSE_REVIEW_ACTIONS = {
"save_draft",
"next_step",
"edit_review",
"link_to_existing_draft",
"create_new_claim_from_documents",
}
OPERATE_KEYWORDS = (
"直接付款",
"帮我付款",
"安排付款",
"发起付款",
"直接审批",
"审批通过",
"帮我审批",
"驳回",
"上线",
"激活",
"停用",
"删除",
)
EXPENSE_TYPE_KEYWORDS = {
"差旅": "travel",
"出差": "travel",
"住宿": "hotel",
"酒店": "hotel",
"交通": "transport",
"打车": "transport",
"网约车": "transport",
"出租车": "transport",
"出租车票": "transport",
"乘车": "transport",
"乘车费": "transport",
"用车": "transport",
"叫车": "transport",
"车资": "transport",
"的士": "transport",
"的士票": "transport",
"停车费": "transport",
"餐费": "meal",
"用餐": "meal",
"会务": "meeting",
"招待费": "entertainment",
"招待": "entertainment",
"宴请": "entertainment",
"办公费": "office",
"办公用品": "office",
"文具": "office",
"耗材": "office",
"办公耗材": "office",
"打印纸": "office",
"办公设备": "office",
"培训费": "training",
"培训": "training",
"通讯费": "communication",
"话费": "communication",
"福利费": "welfare",
"团建": "welfare",
}
EXPENSE_NARRATIVE_KEYWORDS = (
"报销",
"报账",
"招待",
"招待费",
"花销",
"花了",
"支出",
"垫付",
"打车",
"车费",
"乘车",
"乘车费",
"用车",
"叫车",
"车资",
"的士",
"的士票",
"出租车票",
"餐费",
"吃饭",
"用餐",
"宴请",
"请客",
"住宿",
"发票",
"票据",
"差旅",
"客户现场",
)
AR_CORE_KEYWORDS = ("应收", "回款", "收款", "账龄", "欠款", "未回款")
AP_CORE_KEYWORDS = ("应付", "付款", "请款", "待付", "打款", "未付款")
GENERIC_EXPENSE_PROMPTS = {
"报销",
"我要报销",
"我想报销",
"帮我报销",
"我要申请报销",
"发起报销",
"提交报销",
}
MISSING_SLOT_LABELS = {
"expense_type": "费用类型",
"amount": "金额",
"customer_name": "客户单位",
"vendor_name": "供应商",
"participants": "参与人员",
"attachments": "票据附件",
"time_range": "发生时间",
"reason": "事由说明",
"document_id": "单据号",
}
STATUS_KEYWORDS = {
"草稿": "draft",
"待提交": "draft",
"待补充": "supplement",
"退回": "returned",
"已退回": "returned",
"进行中": "review",
"审批中": "review",
"审核中": "review",
"流转中": "review",
"已提交": "submitted",
"逾期": "overdue",
"待审批": "pending",
"待审": "pending",
"已审批": "approved",
"已通过": "approved",
"已审核": "approved",
"归档": "archived",
"已归档": "archived",
"入账": "archived",
"已入账": "paid",
"已付款": "paid",
"未付款": "unpaid",
"未回款": "unreceived",
}
LOCATION_KEYWORDS = (
"北京",
"上海",
"广州",
"深圳",
"杭州",
"南京",
"苏州",
"成都",
"重庆",
"天津",
"武汉",
"西安",
"郑州",
"长沙",
"青岛",
"厦门",
"宁波",
"合肥",
"济南",
"福州",
)
PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"}
CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"}
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}
@dataclass(slots=True)
class ReferenceCatalog:
employees: list[str]
departments: list[str]
customers: list[str]
vendors: list[str]
projects: list[str]
class LlmOntologyEntityHint(BaseModel):
model_config = ConfigDict(extra="ignore")
type: str
value: str
normalized_value: str | None = None
role: str = "target"
confidence: float = Field(default=0.72, ge=0.0, le=1.0)
class LlmOntologyParseResult(BaseModel):
model_config = ConfigDict(extra="ignore")
scenario: OntologyScenario = Field(default="unknown")
intent: OntologyIntent = Field(default="query")
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
clarification_required: bool = False
clarification_question: str | None = None
missing_slots: list[str] = Field(default_factory=list)
ambiguity: list[str] = Field(default_factory=list)
entity_hints: list[LlmOntologyEntityHint] = Field(default_factory=list)