feat: 新增风险图谱算法与系统仪表盘及操作反馈体系

后端新增风险图谱算法模块、风险观察与反馈服务、规则 DSL 校验器和可解释性引擎，完善系统仪表盘和财务仪表盘统计，优化 agent 运行和编排执行链路，清理旧开发文档，前端新增系统趋势、负载热力图等多种仪表盘图表组件，完善操作反馈对话框和工作台日期选择器，优化报销创建和审批详情交互，补充单元测试覆盖。
2026-05-30 15:46:51 +08:00
parent 4c59941ec6
commit 7989f3a159
314 changed files with 30073 additions and 20626 deletions
--- a/server/src/app/services/receipt_folder.py
+++ b/server/src/app/services/receipt_folder.py
@@ -22,6 +22,30 @@ from app.schemas.receipt_folder import (
 from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
 from app.services.ocr import SUPPORTED_SUFFIXES

+RECEIPT_DATE_PATTERN = re.compile(
+    r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)"
+)
+RECEIPT_TIME_PATTERN = re.compile(r"(?<!\d)([01]?\d|2[0-3])[:：]([0-5]\d)(?!\d)")
+TRAIN_INVOICE_DATE_PATTERN = re.compile(
+    r"(?:开票日期|发票日期|开票时间)\s*[:：]?\s*"
+    r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)"
+)
+TRAIN_ROUTE_PATTERN = re.compile(
+    r"([\u4e00-\u9fa5]{2,12})站?\s*(?:至|到|→|->|—|–|-)\s*"
+    r"([\u4e00-\u9fa5]{2,12})站?"
+)
+TRAIN_NO_PATTERN = re.compile(r"(?:车次|列车号)\s*[:：]?\s*([GCDZKTLYS]\d{1,5})", re.IGNORECASE)
+TRAIN_STANDALONE_NO_PATTERN = re.compile(r"(?<![A-Z0-9])([GCDZKTLYS]\d{1,5})(?![A-Z0-9])", re.IGNORECASE)
+TRAIN_PASSENGER_PATTERN = re.compile(r"(?:乘车人|旅客姓名|姓名)\s*[:：]?\s*([\u4e00-\u9fa5·]{2,20})")
+TRAIN_ID_PATTERN = re.compile(r"(?:有效身份证件号码|身份证件号码|证件号码|身份证号)\s*[:：]?\s*([0-9Xx*]{6,24})")
+TRAIN_ID_FALLBACK_PATTERN = re.compile(r"(?<![0-9A-Za-z])([0-9]{6,17}[0-9Xx*]{2,8})(?![0-9A-Za-z])")
+TRAIN_ETICKET_PATTERN = re.compile(r"(?:电子客票号|客票号)\s*[:：]?\s*([A-Z0-9]{6,32})", re.IGNORECASE)
+TRAIN_SEAT_CLASS_PATTERN = re.compile(r"(商务座|特等座|一等座|二等座|一等卧|二等卧|软卧|硬卧|软座|硬座|无座)")
+TRAIN_CARRIAGE_PATTERN = re.compile(r"(?:车厢|车厢号)\s*[:：]?\s*([0-9]{1,2}\s*车?)")
+TRAIN_SEAT_NO_PATTERN = re.compile(r"(?:座位|座位号)\s*[:：]?\s*([0-9]{1,3}[A-F号]?)", re.IGNORECASE)
+TRAIN_COMBINED_SEAT_PATTERN = re.compile(r"([0-9]{1,2})车\s*([0-9]{1,3}[A-F])号?", re.IGNORECASE)
+TRAIN_FARE_PATTERN = re.compile(r"(?:票价|金额)\s*[:：￥¥\s]*([0-9]+(?:[.,][0-9]{1,2})?)")
+

 class ReceiptFolderService:
    def __init__(self) -> None:
@@ -372,8 +396,8 @@ class ReceiptFolderService:
    def _is_previewable(media_type: str) -> bool:
        return str(media_type or "").startswith("image/") or str(media_type or "") == "application/pdf"

-    @staticmethod
-    def _build_document_meta(document: Any | None) -> dict[str, Any]:
+    @classmethod
+    def _build_document_meta(cls, document: Any | None) -> dict[str, Any]:
        fields = []
        for field in list(getattr(document, "document_fields", []) or []):
            if isinstance(field, dict):
@@ -393,18 +417,33 @@ class ReceiptFolderService:
                    }
                )
        fields = [field for field in fields if field["label"] and field["value"]]
+        ocr_text = str(getattr(document, "text", "") or "")
+        summary = str(getattr(document, "summary", "") or "")
+        document_type = str(getattr(document, "document_type", "") or "other")
+        document_type_label = str(getattr(document, "document_type_label", "") or "其他单据")
+        scene_label = str(getattr(document, "scene_label", "") or "其他票据")
+        if cls._is_train_ticket_values(
+            document_type=document_type,
+            document_type_label=document_type_label,
+            scene_label=scene_label,
+            text=f"{summary}\n{ocr_text}",
+        ):
+            fields = cls._enrich_train_ticket_field_dicts(
+                fields,
+                text=f"{ocr_text}\n{summary}\n{str(getattr(document, 'filename', '') or '')}",
+            )
        return {
            "engine": str(getattr(document, "engine", "") or ""),
            "model": str(getattr(document, "model", "") or ""),
-            "ocr_text": str(getattr(document, "text", "") or ""),
-            "summary": str(getattr(document, "summary", "") or ""),
+            "ocr_text": ocr_text,
+            "summary": summary,
            "ocr_avg_score": float(getattr(document, "avg_score", 0.0) or 0.0),
            "ocr_line_count": int(getattr(document, "line_count", 0) or 0),
            "page_count": int(getattr(document, "page_count", 1) or 1),
-            "document_type": str(getattr(document, "document_type", "") or "other"),
-            "document_type_label": str(getattr(document, "document_type_label", "") or "其他单据"),
+            "document_type": document_type,
+            "document_type_label": document_type_label,
            "scene_code": str(getattr(document, "scene_code", "") or "other"),
-            "scene_label": str(getattr(document, "scene_label", "") or "其他票据"),
+            "scene_label": scene_label,
            "ocr_classification_source": str(getattr(document, "classification_source", "") or ""),
            "ocr_classification_confidence": float(getattr(document, "classification_confidence", 0.0) or 0.0),
            "ocr_classification_evidence": [
@@ -484,8 +523,8 @@ class ReceiptFolderService:
            scene_label=str(meta.get("scene_label") or "其他票据"),
            summary=str(meta.get("summary") or ""),
            amount=self._resolve_editable_or_field(meta, "amount", labels=("金额", "价税合计", "票价")),
-            document_date=self._resolve_editable_or_field(meta, "document_date", labels=("日期", "开票日期", "乘车日期")),
-            merchant_name=self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方")),
+            document_date=self._resolve_receipt_document_date(meta),
+            merchant_name=self._resolve_receipt_merchant_name(meta),
            avg_score=float(meta.get("ocr_avg_score") or 0.0),
            uploaded_at=self._parse_datetime(meta.get("uploaded_at")),
            linked_at=self._parse_datetime(meta.get("linked_at")),
@@ -499,7 +538,7 @@ class ReceiptFolderService:
        )

    def _resolve_fields(self, meta: dict[str, Any]) -> list[ReceiptFolderFieldRead]:
-        return [
+        fields = [
            ReceiptFolderFieldRead(
                key=str(field.get("key") or ""),
                label=str(field.get("label") or ""),
@@ -508,6 +547,45 @@ class ReceiptFolderService:
            for field in list(meta.get("document_fields") or [])
            if isinstance(field, dict) and str(field.get("label") or "").strip()
        ]
+        if self._is_train_ticket_meta(meta):
+            return [
+                ReceiptFolderFieldRead(**field)
+                for field in self._enrich_train_ticket_field_dicts(
+                    [field.model_dump() for field in fields],
+                    text=self._receipt_text(meta),
+                )
+            ]
+        return fields
+
+    def _resolve_receipt_document_date(self, meta: dict[str, Any]) -> str:
+        editable = meta.get("editable_fields")
+        if isinstance(editable, dict):
+            value = str(editable.get("document_date") or "").strip()
+            if value:
+                return value
+
+        fields = self._resolve_fields(meta)
+        for field in fields:
+            if field.key in {"invoice_date", "issue_date"} or field.label in {"开票日期", "发票日期"}:
+                return self._normalize_receipt_date_value(field.value)
+
+        if self._is_train_ticket_meta(meta):
+            invoice_date = self._extract_train_invoice_date(self._receipt_text(meta))
+            if invoice_date:
+                return invoice_date
+
+        for field in fields:
+            if field.key == "document_date" or field.label in {"日期", "乘车日期", "列车出发时间", "行程日期"}:
+                return self._normalize_receipt_date_value(field.value)
+        return ""
+
+    def _resolve_receipt_merchant_name(self, meta: dict[str, Any]) -> str:
+        value = self._resolve_editable_or_field(meta, "merchant_name", labels=("商户", "销售方", "收款方", "开票方"))
+        if value:
+            return value
+        if self._is_train_ticket_meta(meta):
+            return "中国铁路"
+        return ""

    def _resolve_editable_or_field(self, meta: dict[str, Any], key: str, *, labels: tuple[str, ...]) -> str:
        editable = meta.get("editable_fields")
@@ -521,6 +599,254 @@ class ReceiptFolderService:
                return field.value
        return ""

+    @classmethod
+    def _enrich_train_ticket_field_dicts(
+        cls,
+        fields: list[dict[str, Any]],
+        *,
+        text: str,
+    ) -> list[dict[str, str]]:
+        normalized: list[dict[str, str]] = []
+        for field in fields:
+            key = str(field.get("key") or "").strip()
+            label = str(field.get("label") or "").strip()
+            value = str(field.get("value") or "").strip()
+            if not label or not value:
+                continue
+            if key == "trip_no" and label == "车次/航班":
+                label = "车次"
+            if key == "route" and label == "行程":
+                label = "行程"
+            normalized.append({"key": key, "label": label, "value": value})
+
+        def add_field(key: str, label: str, value: str) -> None:
+            cleaned = str(value or "").strip()
+            if not cleaned:
+                return
+            if any(item["key"] == key for item in normalized if item["key"]):
+                return
+            if any(item["label"] == label for item in normalized):
+                return
+            normalized.append({"key": key, "label": label, "value": cleaned})
+
+        invoice_date = cls._extract_train_invoice_date(text)
+        add_field("invoice_date", "开票日期", invoice_date)
+
+        trip_datetime = cls._extract_train_trip_datetime(text)
+        add_field("trip_date", "列车出发时间", trip_datetime)
+
+        departure, arrival = cls._extract_train_route_points(text)
+        add_field("departure_station", "出发地点", departure)
+        add_field("arrival_station", "到达地点", arrival)
+        if departure and arrival:
+            add_field("route", "行程", f"{departure}-{arrival}")
+
+        add_field("train_no", "车次", cls._extract_first(TRAIN_NO_PATTERN, text) or cls._extract_first(TRAIN_STANDALONE_NO_PATTERN, text))
+        id_number = cls._extract_train_id_number(text)
+        add_field("passenger_name", "乘车人", cls._extract_train_passenger_name(text, id_number=id_number))
+        add_field("id_number", "身份证号", id_number)
+        add_field("electronic_ticket_no", "电子客票号", cls._extract_first(TRAIN_ETICKET_PATTERN, text))
+        add_field("seat_class", "席别", cls._extract_first(TRAIN_SEAT_CLASS_PATTERN, text))
+        carriage_no, seat_no = cls._extract_train_carriage_and_seat(text)
+        add_field("carriage_no", "车厢", carriage_no)
+        add_field("seat_no", "座位号", seat_no)
+        add_field("fare", "票价", cls._extract_train_fare(text))
+        return normalized
+
+    @staticmethod
+    def _is_train_ticket_values(
+        *,
+        document_type: str,
+        document_type_label: str,
+        scene_label: str,
+        text: str,
+    ) -> bool:
+        if str(document_type or "").strip().lower() == "train_ticket":
+            return True
+        compact = "".join([document_type_label, scene_label, text]).replace(" ", "")
+        return any(token in compact for token in ("火车", "高铁", "动车", "铁路", "电子客票", "车次"))
+
+    @classmethod
+    def _is_train_ticket_meta(cls, meta: dict[str, Any]) -> bool:
+        return cls._is_train_ticket_values(
+            document_type=str(meta.get("document_type") or ""),
+            document_type_label=str(meta.get("document_type_label") or ""),
+            scene_label=str(meta.get("scene_label") or ""),
+            text=cls._receipt_text(meta),
+        )
+
+    @staticmethod
+    def _receipt_text(meta: dict[str, Any]) -> str:
+        field_text = "\n".join(
+            f"{field.get('label', '')} {field.get('value', '')}"
+            for field in list(meta.get("document_fields") or [])
+            if isinstance(field, dict)
+        )
+        return "\n".join(
+            value
+            for value in (
+                str(meta.get("ocr_text") or ""),
+                str(meta.get("summary") or ""),
+                str(meta.get("file_name") or ""),
+                field_text,
+            )
+            if value
+        )
+
+    @classmethod
+    def _extract_train_invoice_date(cls, text: str) -> str:
+        match = TRAIN_INVOICE_DATE_PATTERN.search(str(text or ""))
+        if not match:
+            return ""
+        return cls._normalize_receipt_date_value(match.group(1))
+
+    @classmethod
+    def _extract_train_trip_datetime(cls, text: str) -> str:
+        raw_text = str(text or "")
+        candidates: list[tuple[int, int, str]] = []
+        for index, match in enumerate(RECEIPT_DATE_PATTERN.finditer(raw_text)):
+            window = raw_text[max(0, match.start() - 14): match.end() + 8].replace(" ", "")
+            if any(token in window for token in ("开票日期", "发票日期", "开票时间")):
+                continue
+            value = cls._format_date_match_with_time(raw_text, match)
+            score = 0
+            nearby = raw_text[max(0, match.start() - 32): match.end() + 32]
+            compact = nearby.replace(" ", "")
+            if ":" in value or "：" in value:
+                score += 8
+            if any(token in compact for token in ("开车时间", "发车时间", "乘车日期", "乘车时间", "检票", "车次")):
+                score += 6
+            if any(token in compact for token in ("二等座", "一等座", "商务座", "硬座", "软卧", "硬卧")):
+                score += 3
+            candidates.append((score, -index, value))
+        if not candidates:
+            return ""
+        return max(candidates, key=lambda item: (item[0], item[1]))[2]
+
+    @classmethod
+    def _format_date_match_with_time(cls, text: str, match: re.Match[str]) -> str:
+        date_value = cls._normalize_receipt_date_value(match.group(1))
+        if not date_value:
+            return ""
+        surrounding = str(text or "")[max(0, match.start() - 18): match.end() + 24]
+        time_match = RECEIPT_TIME_PATTERN.search(surrounding)
+        if not time_match:
+            return date_value
+        return f"{date_value} {str(time_match.group(1)).zfill(2)}:{str(time_match.group(2)).zfill(2)}"
+
+    @staticmethod
+    def _normalize_receipt_date_value(value: str) -> str:
+        raw = str(value or "").strip()
+        match = RECEIPT_DATE_PATTERN.search(raw)
+        if not match:
+            return raw
+        normalized = match.group(1).replace("年", "-").replace("月", "-").replace("日", "")
+        normalized = normalized.replace("/", "-").replace(".", "-")
+        parts = [part for part in normalized.split("-") if part]
+        if len(parts) != 3:
+            return match.group(1)
+        year, month, day = parts
+        return f"{year.zfill(4)}-{month.zfill(2)}-{day.zfill(2)}"
+
+    @classmethod
+    def _extract_train_route_points(cls, text: str) -> tuple[str, str]:
+        raw_text = str(text or "")
+        station_candidates: list[str] = []
+        for line in raw_text.replace("\r", "\n").splitlines():
+            candidate = cls._clean_train_station(line)
+            if not candidate or candidate in station_candidates:
+                continue
+            if not str(line or "").strip().endswith("站"):
+                continue
+            if any(token in candidate for token in ("发票", "客票", "铁路", "票价", "日期")):
+                continue
+            station_candidates.append(candidate)
+            if len(station_candidates) >= 2:
+                return station_candidates[0], station_candidates[1]
+
+        match = TRAIN_ROUTE_PATTERN.search(raw_text)
+        if match:
+            departure = cls._clean_train_station(match.group(1))
+            arrival = cls._clean_train_station(match.group(2))
+            if departure and arrival and departure != arrival:
+                return departure, arrival
+        return "", ""
+
+    @staticmethod
+    def _clean_train_station(value: str) -> str:
+        cleaned = re.sub(r"[^A-Za-z0-9\u4e00-\u9fa5（）()·]", "", str(value or ""))
+        cleaned = re.sub(r"(?:火车站|高铁站|站)$", "", cleaned)
+        return cleaned.strip()
+
+    @staticmethod
+    def _extract_first(pattern: re.Pattern[str], text: str) -> str:
+        match = pattern.search(str(text or ""))
+        return str(match.group(1) or "").strip() if match else ""
+
+    @classmethod
+    def _extract_train_passenger_name(cls, text: str, *, id_number: str = "") -> str:
+        labeled = cls._extract_first(TRAIN_PASSENGER_PATTERN, text)
+        if labeled:
+            return labeled
+
+        lines = [line.strip() for line in str(text or "").replace("\r", "\n").splitlines() if line.strip()]
+        for index, line in enumerate(lines):
+            if id_number and id_number not in line:
+                continue
+            for offset in (1, -1, 2):
+                target_index = index + offset
+                if target_index < 0 or target_index >= len(lines):
+                    continue
+                candidate = cls._clean_train_passenger_candidate(lines[target_index])
+                if candidate:
+                    return candidate
+        for line in lines:
+            if "购买方名称" in line:
+                candidate = cls._clean_train_passenger_candidate(line.split(":", 1)[-1].split("：", 1)[-1])
+                if candidate:
+                    return candidate
+        return ""
+
+    @staticmethod
+    def _clean_train_passenger_candidate(value: str) -> str:
+        cleaned = re.sub(r"[^·\u4e00-\u9fa5]", "", str(value or "")).strip()
+        if not 2 <= len(cleaned) <= 8:
+            return ""
+        if any(token in cleaned for token in ("电子", "客票", "铁路", "发票", "税务", "湖北省", "中国铁路", "开票", "日期")):
+            return ""
+        return cleaned
+
+    @classmethod
+    def _extract_train_id_number(cls, text: str) -> str:
+        labeled = cls._extract_first(TRAIN_ID_PATTERN, text)
+        if labeled:
+            return labeled
+        for line in str(text or "").replace("\r", "\n").splitlines():
+            compact_line = line.replace(" ", "")
+            if any(token in compact_line for token in ("发票号码", "电子客票号", "客票号", "订单号")):
+                continue
+            match = TRAIN_ID_FALLBACK_PATTERN.search(compact_line)
+            if match:
+                return str(match.group(1) or "").strip()
+        return ""
+
+    @staticmethod
+    def _extract_train_carriage_and_seat(text: str) -> tuple[str, str]:
+        combined_match = TRAIN_COMBINED_SEAT_PATTERN.search(str(text or ""))
+        if combined_match:
+            return f"{combined_match.group(1)}车", combined_match.group(2)
+        carriage_no = ReceiptFolderService._extract_first(TRAIN_CARRIAGE_PATTERN, text).replace(" ", "")
+        seat_no = ReceiptFolderService._extract_first(TRAIN_SEAT_NO_PATTERN, text)
+        return carriage_no, seat_no
+
+    @staticmethod
+    def _extract_train_fare(text: str) -> str:
+        match = TRAIN_FARE_PATTERN.search(str(text or ""))
+        if not match:
+            return ""
+        value = str(match.group(1) or "").replace(",", ".").strip()
+        return f"{value}元" if value else ""
+
    @staticmethod
    def _parse_datetime(value: Any) -> datetime | None:
        raw = str(value or "").strip()