refactor(server): split oversized backend services
This commit is contained in:
@@ -2,178 +2,20 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, ValidationError
|
||||
from pydantic import ValidationError
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DocumentField:
|
||||
key: str
|
||||
label: str
|
||||
value: str
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DocumentInsight:
|
||||
document_type: str
|
||||
document_type_label: str
|
||||
scene_code: str
|
||||
scene_label: str
|
||||
expense_type: str
|
||||
fields: tuple[DocumentField, ...] = ()
|
||||
classification_source: str = "rule"
|
||||
classification_confidence: float = 0.0
|
||||
evidence: tuple[str, ...] = ()
|
||||
warnings: tuple[str, ...] = ()
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DocumentRule:
|
||||
document_type: str
|
||||
document_type_label: str
|
||||
scene_code: str
|
||||
scene_label: str
|
||||
expense_type: str
|
||||
keywords: tuple[str, ...]
|
||||
score_bias: float = 0.0
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RuleMatch:
|
||||
rule: DocumentRule | None
|
||||
confidence: float
|
||||
evidence: tuple[str, ...]
|
||||
score: float
|
||||
|
||||
|
||||
class LlmDocumentClassification(BaseModel):
|
||||
document_type: str = Field(default="other")
|
||||
scene_code: str = Field(default="other")
|
||||
scene_label: str = Field(default="其他票据")
|
||||
expense_type: str = Field(default="other")
|
||||
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
|
||||
evidence: list[str] = Field(default_factory=list)
|
||||
fields: list[DocumentField] = Field(default_factory=list)
|
||||
|
||||
|
||||
DEFAULT_RULE = DocumentRule(
|
||||
document_type="other",
|
||||
document_type_label="其他单据",
|
||||
scene_code="other",
|
||||
scene_label="其他票据",
|
||||
expense_type="other",
|
||||
keywords=(),
|
||||
score_bias=0.0,
|
||||
from app.services.document_intelligence_rules import DEFAULT_RULE, DOCUMENT_RULES, DOCUMENT_TYPE_RULE_MAP, SUPPORTED_DOCUMENT_TYPES
|
||||
from app.services.document_intelligence_types import (
|
||||
DocumentField,
|
||||
DocumentInsight,
|
||||
LlmDocumentClassification,
|
||||
RuleMatch,
|
||||
)
|
||||
|
||||
DOCUMENT_RULES: tuple[DocumentRule, ...] = (
|
||||
DocumentRule(
|
||||
document_type="flight_itinerary",
|
||||
document_type_label="机票/航班行程单",
|
||||
scene_code="travel",
|
||||
scene_label="差旅票据",
|
||||
expense_type="travel",
|
||||
keywords=("电子行程单", "航班号", "航班", "机票", "登机", "航空", "客票"),
|
||||
score_bias=0.34,
|
||||
),
|
||||
DocumentRule(
|
||||
document_type="train_ticket",
|
||||
document_type_label="火车/高铁票",
|
||||
scene_code="travel",
|
||||
scene_label="差旅票据",
|
||||
expense_type="travel",
|
||||
keywords=("铁路电子客票", "电子客票", "高铁", "火车", "动车", "铁路", "车次", "检票", "二等座", "一等座", "票价"),
|
||||
score_bias=0.32,
|
||||
),
|
||||
DocumentRule(
|
||||
document_type="hotel_invoice",
|
||||
document_type_label="酒店住宿票据",
|
||||
scene_code="hotel",
|
||||
scene_label="住宿票据",
|
||||
expense_type="hotel",
|
||||
keywords=("住宿", "房费", "客房", "入住", "离店", "酒店", "宾馆", "间夜"),
|
||||
score_bias=0.16,
|
||||
),
|
||||
DocumentRule(
|
||||
document_type="taxi_receipt",
|
||||
document_type_label="出租车/网约车票据",
|
||||
scene_code="transport",
|
||||
scene_label="交通票据",
|
||||
expense_type="transport",
|
||||
keywords=("滴滴出行", "滴滴", "网约车", "出租车", "打车", "乘车", "用车", "叫车", "车费", "车资", "的士", "快车", "专车", "订单号", "上车", "下车", "起点", "终点", "里程", "司机"),
|
||||
score_bias=0.38,
|
||||
),
|
||||
DocumentRule(
|
||||
document_type="parking_toll_receipt",
|
||||
document_type_label="停车/通行费票据",
|
||||
scene_code="transport",
|
||||
scene_label="交通票据",
|
||||
expense_type="transport",
|
||||
keywords=("停车费", "通行费", "过路费", "收费站", "停车场", "停车"),
|
||||
score_bias=0.28,
|
||||
),
|
||||
DocumentRule(
|
||||
document_type="meal_receipt",
|
||||
document_type_label="餐饮票据",
|
||||
scene_code="meal",
|
||||
scene_label="餐饮票据",
|
||||
expense_type="meal",
|
||||
keywords=("餐饮", "餐费", "用餐", "饭店", "酒楼", "餐厅", "食品", "外卖", "咖啡"),
|
||||
score_bias=0.14,
|
||||
),
|
||||
DocumentRule(
|
||||
document_type="office_invoice",
|
||||
document_type_label="办公用品票据",
|
||||
scene_code="office",
|
||||
scene_label="办公用品票据",
|
||||
expense_type="office",
|
||||
keywords=("办公用品", "文具", "耗材", "打印纸", "墨盒", "硒鼓", "键盘", "鼠标"),
|
||||
score_bias=0.14,
|
||||
),
|
||||
DocumentRule(
|
||||
document_type="meeting_invoice",
|
||||
document_type_label="会议/会务票据",
|
||||
scene_code="meeting",
|
||||
scene_label="会务票据",
|
||||
expense_type="meeting",
|
||||
keywords=("会议", "会务", "会展", "论坛", "会议室", "会场"),
|
||||
score_bias=0.12,
|
||||
),
|
||||
DocumentRule(
|
||||
document_type="training_invoice",
|
||||
document_type_label="培训票据",
|
||||
scene_code="training",
|
||||
scene_label="培训票据",
|
||||
expense_type="training",
|
||||
keywords=("培训", "课程", "讲师", "教材", "学费", "认证"),
|
||||
score_bias=0.12,
|
||||
),
|
||||
DocumentRule(
|
||||
document_type="vat_invoice",
|
||||
document_type_label="增值税发票",
|
||||
scene_code="other",
|
||||
scene_label="通用发票",
|
||||
expense_type="other",
|
||||
keywords=("发票代码", "发票号码", "价税合计", "增值税", "电子发票"),
|
||||
score_bias=-0.08,
|
||||
),
|
||||
DocumentRule(
|
||||
document_type="receipt",
|
||||
document_type_label="一般收据/凭证",
|
||||
scene_code="other",
|
||||
scene_label="其他票据",
|
||||
expense_type="other",
|
||||
keywords=("收据", "凭证", "票据"),
|
||||
score_bias=-0.18,
|
||||
),
|
||||
)
|
||||
|
||||
DOCUMENT_TYPE_RULE_MAP = {rule.document_type: rule for rule in DOCUMENT_RULES}
|
||||
SUPPORTED_DOCUMENT_TYPES = tuple(DOCUMENT_TYPE_RULE_MAP.keys()) + ("other",)
|
||||
|
||||
AMOUNT_PATTERNS = (
|
||||
re.compile(
|
||||
|
||||
Reference in New Issue
Block a user