feat: 新增风险图谱算法与系统仪表盘及操作反馈体系
后端新增风险图谱算法模块、风险观察与反馈服务、规则 DSL 校验器和可解释性引擎,完善系统仪表盘和财务仪表盘统计, 优化 agent 运行和编排执行链路,清理旧开发文档,前端新增 系统趋势、负载热力图等多种仪表盘图表组件,完善操作反馈 对话框和工作台日期选择器,优化报销创建和审批详情交互, 补充单元测试覆盖。
This commit is contained in:
@@ -22,6 +22,30 @@ from app.schemas.receipt_folder import (
|
||||
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
|
||||
from app.services.ocr import SUPPORTED_SUFFIXES
|
||||
|
||||
RECEIPT_DATE_PATTERN = re.compile(
|
||||
r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)"
|
||||
)
|
||||
RECEIPT_TIME_PATTERN = re.compile(r"(?<!\d)([01]?\d|2[0-3])[::]([0-5]\d)(?!\d)")
|
||||
TRAIN_INVOICE_DATE_PATTERN = re.compile(
|
||||
r"(?:开票日期|发票日期|开票时间)\s*[::]?\s*"
|
||||
r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)"
|
||||
)
|
||||
TRAIN_ROUTE_PATTERN = re.compile(
|
||||
r"([\u4e00-\u9fa5]{2,12})站?\s*(?:至|到|→|->|—|–|-)\s*"
|
||||
r"([\u4e00-\u9fa5]{2,12})站?"
|
||||
)
|
||||
TRAIN_NO_PATTERN = re.compile(r"(?:车次|列车号)\s*[::]?\s*([GCDZKTLYS]\d{1,5})", re.IGNORECASE)
|
||||
TRAIN_STANDALONE_NO_PATTERN = re.compile(r"(?<![A-Z0-9])([GCDZKTLYS]\d{1,5})(?![A-Z0-9])", re.IGNORECASE)
|
||||
TRAIN_PASSENGER_PATTERN = re.compile(r"(?:乘车人|旅客姓名|姓名)\s*[::]?\s*([\u4e00-\u9fa5·]{2,20})")
|
||||
TRAIN_ID_PATTERN = re.compile(r"(?:有效身份证件号码|身份证件号码|证件号码|身份证号)\s*[::]?\s*([0-9Xx*]{6,24})")
|
||||
TRAIN_ID_FALLBACK_PATTERN = re.compile(r"(?<![0-9A-Za-z])([0-9]{6,17}[0-9Xx*]{2,8})(?![0-9A-Za-z])")
|
||||
TRAIN_ETICKET_PATTERN = re.compile(r"(?:电子客票号|客票号)\s*[::]?\s*([A-Z0-9]{6,32})", re.IGNORECASE)
|
||||
TRAIN_SEAT_CLASS_PATTERN = re.compile(r"(商务座|特等座|一等座|二等座|一等卧|二等卧|软卧|硬卧|软座|硬座|无座)")
|
||||
TRAIN_CARRIAGE_PATTERN = re.compile(r"(?:车厢|车厢号)\s*[::]?\s*([0-9]{1,2}\s*车?)")
|
||||
TRAIN_SEAT_NO_PATTERN = re.compile(r"(?:座位|座位号)\s*[::]?\s*([0-9]{1,3}[A-F号]?)", re.IGNORECASE)
|
||||
TRAIN_COMBINED_SEAT_PATTERN = re.compile(r"([0-9]{1,2})车\s*([0-9]{1,3}[A-F])号?", re.IGNORECASE)
|
||||
TRAIN_FARE_PATTERN = re.compile(r"(?:票价|金额)\s*[::¥¥\s]*([0-9]+(?:[.,][0-9]{1,2})?)")
|
||||
|
||||
|
||||
class ReceiptFolderService:
|
||||
def __init__(self) -> None:
|
||||
@@ -372,8 +396,8 @@ class ReceiptFolderService:
|
||||
def _is_previewable(media_type: str) -> bool:
|
||||
return str(media_type or "").startswith("image/") or str(media_type or "") == "application/pdf"
|
||||
|
||||
@staticmethod
|
||||
def _build_document_meta(document: Any | None) -> dict[str, Any]:
|
||||
@classmethod
|
||||
def _build_document_meta(cls, document: Any | None) -> dict[str, Any]:
|
||||
fields = []
|
||||
for field in list(getattr(document, "document_fields", []) or []):
|
||||
if isinstance(field, dict):
|
||||
@@ -393,18 +417,33 @@ class ReceiptFolderService:
|
||||
}
|
||||
)
|
||||
fields = [field for field in fields if field["label"] and field["value"]]
|
||||
ocr_text = str(getattr(document, "text", "") or "")
|
||||
summary = str(getattr(document, "summary", "") or "")
|
||||
document_type = str(getattr(document, "document_type", "") or "other")
|
||||
document_type_label = str(getattr(document, "document_type_label", "") or "其他单据")
|
||||
scene_label = str(getattr(document, "scene_label", "") or "其他票据")
|
||||
if cls._is_train_ticket_values(
|
||||
document_type=document_type,
|
||||
document_type_label=document_type_label,
|
||||
scene_label=scene_label,
|
||||
text=f"{summary}\n{ocr_text}",
|
||||
):
|
||||
fields = cls._enrich_train_ticket_field_dicts(
|
||||
fields,
|
||||
text=f"{ocr_text}\n{summary}\n{str(getattr(document, 'filename', '') or '')}",
|
||||
)
|
||||
return {
|
||||
"engine": str(getattr(document, "engine", "") or ""),
|
||||
"model": str(getattr(document, "model", "") or ""),
|
||||
"ocr_text": str(getattr(document, "text", "") or ""),
|
||||
"summary": str(getattr(document, "summary", "") or ""),
|
||||
"ocr_text": ocr_text,
|
||||
"summary": summary,
|
||||
"ocr_avg_score": float(getattr(document, "avg_score", 0.0) or 0.0),
|
||||
"ocr_line_count": int(getattr(document, "line_count", 0) or 0),
|
||||
"page_count": int(getattr(document, "page_count", 1) or 1),
|
||||
"document_type": str(getattr(document, "document_type", "") or "other"),
|
||||
"document_type_label": str(getattr(document, "document_type_label", "") or "其他单据"),
|
||||
"document_type": document_type,
|
||||
"document_type_label": document_type_label,
|
||||
"scene_code": str(getattr(document, "scene_code", "") or "other"),
|
||||
"scene_label": str(getattr(document, "scene_label", "") or "其他票据"),
|
||||
"scene_label": scene_label,
|
||||
"ocr_classification_source": str(getattr(document, "classification_source", "") or ""),
|
||||
"ocr_classification_confidence": float(getattr(document, "classification_confidence", 0.0) or 0.0),
|
||||
"ocr_classification_evidence": [
|
||||
@@ -484,8 +523,8 @@ class ReceiptFolderService:
|
||||
scene_label=str(meta.get("scene_label") or "其他票据"),
|
||||
summary=str(meta.get("summary") or ""),
|
||||
amount=self._resolve_editable_or_field(meta, "amount", labels=("金额", "价税合计", "票价")),
|
||||
document_date=self._resolve_editable_or_field(meta, "document_date", labels=("日期", "开票日期", "乘车日期")),
|
||||
merchant_name=self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方")),
|
||||
document_date=self._resolve_receipt_document_date(meta),
|
||||
merchant_name=self._resolve_receipt_merchant_name(meta),
|
||||
avg_score=float(meta.get("ocr_avg_score") or 0.0),
|
||||
uploaded_at=self._parse_datetime(meta.get("uploaded_at")),
|
||||
linked_at=self._parse_datetime(meta.get("linked_at")),
|
||||
@@ -499,7 +538,7 @@ class ReceiptFolderService:
|
||||
)
|
||||
|
||||
def _resolve_fields(self, meta: dict[str, Any]) -> list[ReceiptFolderFieldRead]:
|
||||
return [
|
||||
fields = [
|
||||
ReceiptFolderFieldRead(
|
||||
key=str(field.get("key") or ""),
|
||||
label=str(field.get("label") or ""),
|
||||
@@ -508,6 +547,45 @@ class ReceiptFolderService:
|
||||
for field in list(meta.get("document_fields") or [])
|
||||
if isinstance(field, dict) and str(field.get("label") or "").strip()
|
||||
]
|
||||
if self._is_train_ticket_meta(meta):
|
||||
return [
|
||||
ReceiptFolderFieldRead(**field)
|
||||
for field in self._enrich_train_ticket_field_dicts(
|
||||
[field.model_dump() for field in fields],
|
||||
text=self._receipt_text(meta),
|
||||
)
|
||||
]
|
||||
return fields
|
||||
|
||||
def _resolve_receipt_document_date(self, meta: dict[str, Any]) -> str:
|
||||
editable = meta.get("editable_fields")
|
||||
if isinstance(editable, dict):
|
||||
value = str(editable.get("document_date") or "").strip()
|
||||
if value:
|
||||
return value
|
||||
|
||||
fields = self._resolve_fields(meta)
|
||||
for field in fields:
|
||||
if field.key in {"invoice_date", "issue_date"} or field.label in {"开票日期", "发票日期"}:
|
||||
return self._normalize_receipt_date_value(field.value)
|
||||
|
||||
if self._is_train_ticket_meta(meta):
|
||||
invoice_date = self._extract_train_invoice_date(self._receipt_text(meta))
|
||||
if invoice_date:
|
||||
return invoice_date
|
||||
|
||||
for field in fields:
|
||||
if field.key == "document_date" or field.label in {"日期", "乘车日期", "列车出发时间", "行程日期"}:
|
||||
return self._normalize_receipt_date_value(field.value)
|
||||
return ""
|
||||
|
||||
def _resolve_receipt_merchant_name(self, meta: dict[str, Any]) -> str:
|
||||
value = self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方", "开票方"))
|
||||
if value:
|
||||
return value
|
||||
if self._is_train_ticket_meta(meta):
|
||||
return "中国铁路"
|
||||
return ""
|
||||
|
||||
def _resolve_editable_or_field(self, meta: dict[str, Any], key: str, *, labels: tuple[str, ...]) -> str:
|
||||
editable = meta.get("editable_fields")
|
||||
@@ -521,6 +599,254 @@ class ReceiptFolderService:
|
||||
return field.value
|
||||
return ""
|
||||
|
||||
@classmethod
|
||||
def _enrich_train_ticket_field_dicts(
|
||||
cls,
|
||||
fields: list[dict[str, Any]],
|
||||
*,
|
||||
text: str,
|
||||
) -> list[dict[str, str]]:
|
||||
normalized: list[dict[str, str]] = []
|
||||
for field in fields:
|
||||
key = str(field.get("key") or "").strip()
|
||||
label = str(field.get("label") or "").strip()
|
||||
value = str(field.get("value") or "").strip()
|
||||
if not label or not value:
|
||||
continue
|
||||
if key == "trip_no" and label == "车次/航班":
|
||||
label = "车次"
|
||||
if key == "route" and label == "行程":
|
||||
label = "行程"
|
||||
normalized.append({"key": key, "label": label, "value": value})
|
||||
|
||||
def add_field(key: str, label: str, value: str) -> None:
|
||||
cleaned = str(value or "").strip()
|
||||
if not cleaned:
|
||||
return
|
||||
if any(item["key"] == key for item in normalized if item["key"]):
|
||||
return
|
||||
if any(item["label"] == label for item in normalized):
|
||||
return
|
||||
normalized.append({"key": key, "label": label, "value": cleaned})
|
||||
|
||||
invoice_date = cls._extract_train_invoice_date(text)
|
||||
add_field("invoice_date", "开票日期", invoice_date)
|
||||
|
||||
trip_datetime = cls._extract_train_trip_datetime(text)
|
||||
add_field("trip_date", "列车出发时间", trip_datetime)
|
||||
|
||||
departure, arrival = cls._extract_train_route_points(text)
|
||||
add_field("departure_station", "出发地点", departure)
|
||||
add_field("arrival_station", "到达地点", arrival)
|
||||
if departure and arrival:
|
||||
add_field("route", "行程", f"{departure}-{arrival}")
|
||||
|
||||
add_field("train_no", "车次", cls._extract_first(TRAIN_NO_PATTERN, text) or cls._extract_first(TRAIN_STANDALONE_NO_PATTERN, text))
|
||||
id_number = cls._extract_train_id_number(text)
|
||||
add_field("passenger_name", "乘车人", cls._extract_train_passenger_name(text, id_number=id_number))
|
||||
add_field("id_number", "身份证号", id_number)
|
||||
add_field("electronic_ticket_no", "电子客票号", cls._extract_first(TRAIN_ETICKET_PATTERN, text))
|
||||
add_field("seat_class", "席别", cls._extract_first(TRAIN_SEAT_CLASS_PATTERN, text))
|
||||
carriage_no, seat_no = cls._extract_train_carriage_and_seat(text)
|
||||
add_field("carriage_no", "车厢", carriage_no)
|
||||
add_field("seat_no", "座位号", seat_no)
|
||||
add_field("fare", "票价", cls._extract_train_fare(text))
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
def _is_train_ticket_values(
|
||||
*,
|
||||
document_type: str,
|
||||
document_type_label: str,
|
||||
scene_label: str,
|
||||
text: str,
|
||||
) -> bool:
|
||||
if str(document_type or "").strip().lower() == "train_ticket":
|
||||
return True
|
||||
compact = "".join([document_type_label, scene_label, text]).replace(" ", "")
|
||||
return any(token in compact for token in ("火车", "高铁", "动车", "铁路", "电子客票", "车次"))
|
||||
|
||||
@classmethod
|
||||
def _is_train_ticket_meta(cls, meta: dict[str, Any]) -> bool:
|
||||
return cls._is_train_ticket_values(
|
||||
document_type=str(meta.get("document_type") or ""),
|
||||
document_type_label=str(meta.get("document_type_label") or ""),
|
||||
scene_label=str(meta.get("scene_label") or ""),
|
||||
text=cls._receipt_text(meta),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _receipt_text(meta: dict[str, Any]) -> str:
|
||||
field_text = "\n".join(
|
||||
f"{field.get('label', '')} {field.get('value', '')}"
|
||||
for field in list(meta.get("document_fields") or [])
|
||||
if isinstance(field, dict)
|
||||
)
|
||||
return "\n".join(
|
||||
value
|
||||
for value in (
|
||||
str(meta.get("ocr_text") or ""),
|
||||
str(meta.get("summary") or ""),
|
||||
str(meta.get("file_name") or ""),
|
||||
field_text,
|
||||
)
|
||||
if value
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _extract_train_invoice_date(cls, text: str) -> str:
|
||||
match = TRAIN_INVOICE_DATE_PATTERN.search(str(text or ""))
|
||||
if not match:
|
||||
return ""
|
||||
return cls._normalize_receipt_date_value(match.group(1))
|
||||
|
||||
@classmethod
|
||||
def _extract_train_trip_datetime(cls, text: str) -> str:
|
||||
raw_text = str(text or "")
|
||||
candidates: list[tuple[int, int, str]] = []
|
||||
for index, match in enumerate(RECEIPT_DATE_PATTERN.finditer(raw_text)):
|
||||
window = raw_text[max(0, match.start() - 14): match.end() + 8].replace(" ", "")
|
||||
if any(token in window for token in ("开票日期", "发票日期", "开票时间")):
|
||||
continue
|
||||
value = cls._format_date_match_with_time(raw_text, match)
|
||||
score = 0
|
||||
nearby = raw_text[max(0, match.start() - 32): match.end() + 32]
|
||||
compact = nearby.replace(" ", "")
|
||||
if ":" in value or ":" in value:
|
||||
score += 8
|
||||
if any(token in compact for token in ("开车时间", "发车时间", "乘车日期", "乘车时间", "检票", "车次")):
|
||||
score += 6
|
||||
if any(token in compact for token in ("二等座", "一等座", "商务座", "硬座", "软卧", "硬卧")):
|
||||
score += 3
|
||||
candidates.append((score, -index, value))
|
||||
if not candidates:
|
||||
return ""
|
||||
return max(candidates, key=lambda item: (item[0], item[1]))[2]
|
||||
|
||||
@classmethod
|
||||
def _format_date_match_with_time(cls, text: str, match: re.Match[str]) -> str:
|
||||
date_value = cls._normalize_receipt_date_value(match.group(1))
|
||||
if not date_value:
|
||||
return ""
|
||||
surrounding = str(text or "")[max(0, match.start() - 18): match.end() + 24]
|
||||
time_match = RECEIPT_TIME_PATTERN.search(surrounding)
|
||||
if not time_match:
|
||||
return date_value
|
||||
return f"{date_value} {str(time_match.group(1)).zfill(2)}:{str(time_match.group(2)).zfill(2)}"
|
||||
|
||||
@staticmethod
|
||||
def _normalize_receipt_date_value(value: str) -> str:
|
||||
raw = str(value or "").strip()
|
||||
match = RECEIPT_DATE_PATTERN.search(raw)
|
||||
if not match:
|
||||
return raw
|
||||
normalized = match.group(1).replace("年", "-").replace("月", "-").replace("日", "")
|
||||
normalized = normalized.replace("/", "-").replace(".", "-")
|
||||
parts = [part for part in normalized.split("-") if part]
|
||||
if len(parts) != 3:
|
||||
return match.group(1)
|
||||
year, month, day = parts
|
||||
return f"{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}"
|
||||
|
||||
@classmethod
|
||||
def _extract_train_route_points(cls, text: str) -> tuple[str, str]:
|
||||
raw_text = str(text or "")
|
||||
station_candidates: list[str] = []
|
||||
for line in raw_text.replace("\r", "\n").splitlines():
|
||||
candidate = cls._clean_train_station(line)
|
||||
if not candidate or candidate in station_candidates:
|
||||
continue
|
||||
if not str(line or "").strip().endswith("站"):
|
||||
continue
|
||||
if any(token in candidate for token in ("发票", "客票", "铁路", "票价", "日期")):
|
||||
continue
|
||||
station_candidates.append(candidate)
|
||||
if len(station_candidates) >= 2:
|
||||
return station_candidates[0], station_candidates[1]
|
||||
|
||||
match = TRAIN_ROUTE_PATTERN.search(raw_text)
|
||||
if match:
|
||||
departure = cls._clean_train_station(match.group(1))
|
||||
arrival = cls._clean_train_station(match.group(2))
|
||||
if departure and arrival and departure != arrival:
|
||||
return departure, arrival
|
||||
return "", ""
|
||||
|
||||
@staticmethod
|
||||
def _clean_train_station(value: str) -> str:
|
||||
cleaned = re.sub(r"[^A-Za-z0-9\u4e00-\u9fa5()()·]", "", str(value or ""))
|
||||
cleaned = re.sub(r"(?:火车站|高铁站|站)$", "", cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
@staticmethod
|
||||
def _extract_first(pattern: re.Pattern[str], text: str) -> str:
|
||||
match = pattern.search(str(text or ""))
|
||||
return str(match.group(1) or "").strip() if match else ""
|
||||
|
||||
@classmethod
|
||||
def _extract_train_passenger_name(cls, text: str, *, id_number: str = "") -> str:
|
||||
labeled = cls._extract_first(TRAIN_PASSENGER_PATTERN, text)
|
||||
if labeled:
|
||||
return labeled
|
||||
|
||||
lines = [line.strip() for line in str(text or "").replace("\r", "\n").splitlines() if line.strip()]
|
||||
for index, line in enumerate(lines):
|
||||
if id_number and id_number not in line:
|
||||
continue
|
||||
for offset in (1, -1, 2):
|
||||
target_index = index + offset
|
||||
if target_index < 0 or target_index >= len(lines):
|
||||
continue
|
||||
candidate = cls._clean_train_passenger_candidate(lines[target_index])
|
||||
if candidate:
|
||||
return candidate
|
||||
for line in lines:
|
||||
if "购买方名称" in line:
|
||||
candidate = cls._clean_train_passenger_candidate(line.split(":", 1)[-1].split(":", 1)[-1])
|
||||
if candidate:
|
||||
return candidate
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _clean_train_passenger_candidate(value: str) -> str:
|
||||
cleaned = re.sub(r"[^·\u4e00-\u9fa5]", "", str(value or "")).strip()
|
||||
if not 2 <= len(cleaned) <= 8:
|
||||
return ""
|
||||
if any(token in cleaned for token in ("电子", "客票", "铁路", "发票", "税务", "湖北省", "中国铁路", "开票", "日期")):
|
||||
return ""
|
||||
return cleaned
|
||||
|
||||
@classmethod
|
||||
def _extract_train_id_number(cls, text: str) -> str:
|
||||
labeled = cls._extract_first(TRAIN_ID_PATTERN, text)
|
||||
if labeled:
|
||||
return labeled
|
||||
for line in str(text or "").replace("\r", "\n").splitlines():
|
||||
compact_line = line.replace(" ", "")
|
||||
if any(token in compact_line for token in ("发票号码", "电子客票号", "客票号", "订单号")):
|
||||
continue
|
||||
match = TRAIN_ID_FALLBACK_PATTERN.search(compact_line)
|
||||
if match:
|
||||
return str(match.group(1) or "").strip()
|
||||
return ""
|
||||
|
||||
@staticmethod
|
||||
def _extract_train_carriage_and_seat(text: str) -> tuple[str, str]:
|
||||
combined_match = TRAIN_COMBINED_SEAT_PATTERN.search(str(text or ""))
|
||||
if combined_match:
|
||||
return f"{combined_match.group(1)}车", combined_match.group(2)
|
||||
carriage_no = ReceiptFolderService._extract_first(TRAIN_CARRIAGE_PATTERN, text).replace(" ", "")
|
||||
seat_no = ReceiptFolderService._extract_first(TRAIN_SEAT_NO_PATTERN, text)
|
||||
return carriage_no, seat_no
|
||||
|
||||
@staticmethod
|
||||
def _extract_train_fare(text: str) -> str:
|
||||
match = TRAIN_FARE_PATTERN.search(str(text or ""))
|
||||
if not match:
|
||||
return ""
|
||||
value = str(match.group(1) or "").replace(",", ".").strip()
|
||||
return f"{value}元" if value else ""
|
||||
|
||||
@staticmethod
|
||||
def _parse_datetime(value: Any) -> datetime | None:
|
||||
raw = str(value or "").strip()
|
||||
|
||||
Reference in New Issue
Block a user