新增前端归档中心视图及相关工具函数,扩充知识库文档分类和 提取器支持多种格式,增强编排器报销查询的多维度检索,优 化本体规则和用户代理审核消息,前端完善报销创建和审批详 情交互细节,补充单元测试覆盖。
308 lines
7.4 KiB
Python
308 lines
7.4 KiB
Python
from __future__ import annotations
|
||
|
||
import re
|
||
from dataclasses import dataclass
|
||
|
||
from pydantic import BaseModel, ConfigDict, Field
|
||
|
||
from app.schemas.ontology import OntologyIntent, OntologyScenario
|
||
|
||
DATE_RANGE_PATTERN = re.compile(
|
||
r"(?P<start>\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P<end>\d{4}-\d{1,2}-\d{1,2})"
|
||
)
|
||
EXPLICIT_MONTH_PATTERN = re.compile(r"(?P<year>\d{4})年(?P<month>\d{1,2})月")
|
||
EXPLICIT_DATE_PATTERN = re.compile(
|
||
r"(?P<year>\d{4})[年/-](?P<month>\d{1,2})[月/-](?P<day>\d{1,2})日?"
|
||
)
|
||
MONTH_DAY_RANGE_PATTERN = re.compile(
|
||
r"(?P<start_month>\d{1,2})月(?P<start_day>\d{1,2})日?\s*(?:到|至|~|-)\s*"
|
||
r"(?P<end_month>\d{1,2})月(?P<end_day>\d{1,2})日?"
|
||
)
|
||
MONTH_DAY_PATTERN = re.compile(r"(?P<month>\d{1,2})月(?P<day>\d{1,2})日?")
|
||
AMOUNT_PATTERN = re.compile(
|
||
r"(?P<prefix>超过|大于|高于|不少于|不低于|小于|低于|少于|至多|不超过|<=|>=|<|>|=|=)?\s*"
|
||
r"(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?"
|
||
)
|
||
TOP_N_PATTERN = re.compile(r"(?:top|TOP|前|最高的?|最低的?)\s*(?P<top>\d+)")
|
||
|
||
SCENARIO_KEYWORDS = {
|
||
"expense": (
|
||
("报销", 0.20),
|
||
("报销单", 0.20),
|
||
("单据报销", 0.18),
|
||
("报账", 0.20),
|
||
("差旅", 0.20),
|
||
("费用", 0.14),
|
||
("发票", 0.14),
|
||
("票据", 0.12),
|
||
("借款", 0.12),
|
||
("住宿", 0.10),
|
||
("餐费", 0.10),
|
||
("招待", 0.18),
|
||
("招待费", 0.18),
|
||
("花销", 0.16),
|
||
("花了", 0.14),
|
||
("支出", 0.14),
|
||
("垫付", 0.14),
|
||
),
|
||
"accounts_receivable": (
|
||
("应收", 0.22),
|
||
("回款", 0.20),
|
||
("收款", 0.18),
|
||
("账龄", 0.18),
|
||
("客户欠款", 0.22),
|
||
),
|
||
"accounts_payable": (
|
||
("应付", 0.22),
|
||
("付款", 0.20),
|
||
("请款", 0.18),
|
||
("供应商", 0.20),
|
||
("待付", 0.16),
|
||
("打款", 0.18),
|
||
),
|
||
"knowledge": (
|
||
("制度", 0.20),
|
||
("规则", 0.20),
|
||
("办法", 0.18),
|
||
("依据", 0.18),
|
||
("政策", 0.16),
|
||
("知识库", 0.18),
|
||
),
|
||
}
|
||
|
||
QUERY_KEYWORDS = (
|
||
"查",
|
||
"查询",
|
||
"查看",
|
||
"列出",
|
||
"统计",
|
||
"汇总",
|
||
"多少",
|
||
"几笔",
|
||
"金额",
|
||
"明细",
|
||
)
|
||
EXPLAIN_KEYWORDS = ("为什么", "依据", "原因", "怎么处理", "是否可以", "能不能", "按什么规则")
|
||
COMPARE_KEYWORDS = ("对比", "比较", "相比", "差异", "变化")
|
||
RISK_KEYWORDS = ("风险", "异常", "重复", "超标", "超预算", "逾期", "验真", "巡检")
|
||
DRAFT_KEYWORDS = ("生成", "草稿", "起草", "拟一份", "创建", "发起", "准备")
|
||
DRAFT_FOLLOW_UP_KEYWORDS = (
|
||
"继续",
|
||
"下一步",
|
||
"核对",
|
||
"补充",
|
||
"补一下",
|
||
"修改",
|
||
"改成",
|
||
"改为",
|
||
"换成",
|
||
"更新",
|
||
"确认",
|
||
"提交",
|
||
"保存",
|
||
"客户是",
|
||
"地点是",
|
||
"金额是",
|
||
"日期是",
|
||
"时间是",
|
||
)
|
||
EXPENSE_REVIEW_ACTIONS = {
|
||
"save_draft",
|
||
"next_step",
|
||
"edit_review",
|
||
"link_to_existing_draft",
|
||
"create_new_claim_from_documents",
|
||
}
|
||
OPERATE_KEYWORDS = (
|
||
"直接付款",
|
||
"帮我付款",
|
||
"安排付款",
|
||
"发起付款",
|
||
"直接审批",
|
||
"审批通过",
|
||
"帮我审批",
|
||
"驳回",
|
||
"上线",
|
||
"激活",
|
||
"停用",
|
||
"删除",
|
||
)
|
||
|
||
EXPENSE_TYPE_KEYWORDS = {
|
||
"差旅": "travel",
|
||
"出差": "travel",
|
||
"住宿": "hotel",
|
||
"酒店": "hotel",
|
||
"交通": "transport",
|
||
"打车": "transport",
|
||
"网约车": "transport",
|
||
"出租车": "transport",
|
||
"出租车票": "transport",
|
||
"乘车": "transport",
|
||
"乘车费": "transport",
|
||
"用车": "transport",
|
||
"叫车": "transport",
|
||
"车资": "transport",
|
||
"的士": "transport",
|
||
"的士票": "transport",
|
||
"停车费": "transport",
|
||
"餐费": "meal",
|
||
"用餐": "meal",
|
||
"会务": "meeting",
|
||
"招待费": "entertainment",
|
||
"招待": "entertainment",
|
||
"宴请": "entertainment",
|
||
"办公费": "office",
|
||
"办公用品": "office",
|
||
"文具": "office",
|
||
"耗材": "office",
|
||
"办公耗材": "office",
|
||
"打印纸": "office",
|
||
"办公设备": "office",
|
||
"培训费": "training",
|
||
"培训": "training",
|
||
"通讯费": "communication",
|
||
"话费": "communication",
|
||
"福利费": "welfare",
|
||
"团建": "welfare",
|
||
}
|
||
|
||
EXPENSE_NARRATIVE_KEYWORDS = (
|
||
"报销",
|
||
"报账",
|
||
"招待",
|
||
"招待费",
|
||
"花销",
|
||
"花了",
|
||
"支出",
|
||
"垫付",
|
||
"打车",
|
||
"车费",
|
||
"乘车",
|
||
"乘车费",
|
||
"用车",
|
||
"叫车",
|
||
"车资",
|
||
"的士",
|
||
"的士票",
|
||
"出租车票",
|
||
"餐费",
|
||
"吃饭",
|
||
"用餐",
|
||
"宴请",
|
||
"请客",
|
||
"住宿",
|
||
"发票",
|
||
"票据",
|
||
"差旅",
|
||
"客户现场",
|
||
)
|
||
|
||
AR_CORE_KEYWORDS = ("应收", "回款", "收款", "账龄", "欠款", "未回款")
|
||
AP_CORE_KEYWORDS = ("应付", "付款", "请款", "待付", "打款", "未付款")
|
||
GENERIC_EXPENSE_PROMPTS = {
|
||
"报销",
|
||
"我要报销",
|
||
"我想报销",
|
||
"帮我报销",
|
||
"我要申请报销",
|
||
"发起报销",
|
||
"提交报销",
|
||
}
|
||
MISSING_SLOT_LABELS = {
|
||
"expense_type": "费用类型",
|
||
"amount": "金额",
|
||
"customer_name": "客户单位",
|
||
"vendor_name": "供应商",
|
||
"participants": "参与人员",
|
||
"attachments": "票据附件",
|
||
"time_range": "发生时间",
|
||
"reason": "事由说明",
|
||
"document_id": "单据号",
|
||
}
|
||
|
||
STATUS_KEYWORDS = {
|
||
"草稿": "draft",
|
||
"待提交": "draft",
|
||
"待补充": "supplement",
|
||
"退回": "returned",
|
||
"已退回": "returned",
|
||
"进行中": "review",
|
||
"审批中": "review",
|
||
"审核中": "review",
|
||
"流转中": "review",
|
||
"已提交": "submitted",
|
||
"逾期": "overdue",
|
||
"待审批": "pending",
|
||
"待审": "pending",
|
||
"已审批": "approved",
|
||
"已通过": "approved",
|
||
"已审核": "approved",
|
||
"归档": "archived",
|
||
"已归档": "archived",
|
||
"入账": "archived",
|
||
"已入账": "paid",
|
||
"已付款": "paid",
|
||
"未付款": "unpaid",
|
||
"未回款": "unreceived",
|
||
}
|
||
|
||
LOCATION_KEYWORDS = (
|
||
"北京",
|
||
"上海",
|
||
"广州",
|
||
"深圳",
|
||
"杭州",
|
||
"南京",
|
||
"苏州",
|
||
"成都",
|
||
"重庆",
|
||
"天津",
|
||
"武汉",
|
||
"西安",
|
||
"郑州",
|
||
"长沙",
|
||
"青岛",
|
||
"厦门",
|
||
"宁波",
|
||
"合肥",
|
||
"济南",
|
||
"福州",
|
||
)
|
||
|
||
PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"}
|
||
CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"}
|
||
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}
|
||
|
||
|
||
@dataclass(slots=True)
|
||
class ReferenceCatalog:
|
||
employees: list[str]
|
||
departments: list[str]
|
||
customers: list[str]
|
||
vendors: list[str]
|
||
projects: list[str]
|
||
|
||
|
||
class LlmOntologyEntityHint(BaseModel):
|
||
model_config = ConfigDict(extra="ignore")
|
||
|
||
type: str
|
||
value: str
|
||
normalized_value: str | None = None
|
||
role: str = "target"
|
||
confidence: float = Field(default=0.72, ge=0.0, le=1.0)
|
||
|
||
|
||
class LlmOntologyParseResult(BaseModel):
|
||
model_config = ConfigDict(extra="ignore")
|
||
|
||
scenario: OntologyScenario = Field(default="unknown")
|
||
intent: OntologyIntent = Field(default="query")
|
||
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
|
||
clarification_required: bool = False
|
||
clarification_question: str | None = None
|
||
missing_slots: list[str] = Field(default_factory=list)
|
||
ambiguity: list[str] = Field(default_factory=list)
|
||
entity_hints: list[LlmOntologyEntityHint] = Field(default_factory=list)
|