feat: 财务看板口径重构与半年模拟数据及报销状态注册表

- 重构 finance_dashboard 口径计算，新增模拟公司画像数据生成与筛选 - 引入 expense_claim_status_registry 统一报销状态流转 - 完善报销草稿流程、Item Sync 与本体解析器 - 优化总览页趋势图、分页组件与请求进度步骤 - 增强报销申请快速预览、本体工具与详情展示 - 新增半年报销模拟数据种子脚本与状态审计工具 - 补充财务看板、报销状态注册与模拟数据测试覆盖
2026-06-02 16:22:59 +08:00
parent ca691f3ee0
commit 0c74b4ab4a
54 changed files with 6810 additions and 1238 deletions
--- a/server/scripts/audit_expense_claim_statuses.py
+++ b/server/scripts/audit_expense_claim_statuses.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from collections import Counter
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy import select
+
+SERVER_DIR = Path(__file__).resolve().parents[1]
+SRC_DIR = SERVER_DIR / "src"
+if str(SRC_DIR) not in sys.path:
+    sys.path.insert(0, str(SRC_DIR))
+
+from app.db.session import get_session_factory  # noqa: E402
+from app.models.financial_record import ExpenseClaim  # noqa: E402
+from app.services.expense_claim_status_registry import (  # noqa: E402
+    is_known_approval_stage,
+    is_known_claim_status,
+    normalize_expense_claim_state,
+)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Audit expense claim status consistency.")
+    parser.add_argument("--sample-limit", type=int, default=20)
+    args = parser.parse_args()
+
+    session_factory = get_session_factory()
+    with session_factory() as db:
+        claims = list(
+            db.scalars(
+                select(ExpenseClaim).order_by(
+                    ExpenseClaim.claim_no.asc(),
+                    ExpenseClaim.created_at.asc(),
+                )
+            ).all()
+        )
+        payload = audit_claims(claims, sample_limit=max(args.sample_limit, 0))
+        print(json.dumps(payload, ensure_ascii=False, indent=2))
+
+
+def audit_claims(claims: list[ExpenseClaim], *, sample_limit: int) -> dict[str, Any]:
+    status_counts: Counter[str] = Counter()
+    stage_counts: Counter[str] = Counter()
+    status_stage_counts: Counter[str] = Counter()
+    doc_type_counts: Counter[str] = Counter()
+    unknown_statuses: Counter[str] = Counter()
+    unknown_stages: Counter[str] = Counter()
+    normalization_counts: Counter[str] = Counter()
+    samples: list[dict[str, Any]] = []
+
+    for claim in claims:
+        status = str(claim.status or "").strip()
+        stage = str(claim.approval_stage or "").strip()
+        doc_type = _doc_type(claim)
+        status_counts[status or "<empty>"] += 1
+        stage_counts[stage or "<empty>"] += 1
+        status_stage_counts[f"{status or '<empty>'} | {stage or '<empty>'}"] += 1
+        doc_type_counts[doc_type] += 1
+
+        if not is_known_claim_status(status):
+            unknown_statuses[status or "<empty>"] += 1
+        if not is_known_approval_stage(stage):
+            unknown_stages[stage or "<empty>"] += 1
+
+        normalized = normalize_expense_claim_state(
+            status,
+            stage,
+            claim_no=claim.claim_no,
+            expense_type=claim.expense_type,
+        )
+        if normalized.changed:
+            key = (
+                f"{status or '<empty>'}/{stage or '<empty>'}"
+                f" -> {normalized.status}/{normalized.approval_stage}"
+            )
+            normalization_counts[key] += 1
+            if len(samples) < sample_limit:
+                samples.append(
+                    {
+                        "claim_no": claim.claim_no,
+                        "doc_type": doc_type,
+                        "status": status,
+                        "approval_stage": stage,
+                        "normalized_status": normalized.status,
+                        "normalized_approval_stage": normalized.approval_stage,
+                        "status_code": normalized.status_code,
+                    }
+                )
+
+    return {
+        "claim_count": len(claims),
+        "doc_type_counts": dict(doc_type_counts),
+        "status_counts": dict(status_counts),
+        "approval_stage_counts": dict(stage_counts),
+        "status_stage_counts": dict(status_stage_counts),
+        "unknown_statuses": dict(unknown_statuses),
+        "unknown_approval_stages": dict(unknown_stages),
+        "normalization_needed": sum(normalization_counts.values()),
+        "normalization_counts": dict(normalization_counts),
+        "normalization_samples": samples,
+    }
+
+
+def _doc_type(claim: ExpenseClaim) -> str:
+    claim_no = str(claim.claim_no or "").strip().upper()
+    expense_type = str(claim.expense_type or "").strip().lower()
+    if claim_no.startswith(("AP-", "APP-")) or expense_type.endswith("_application"):
+        return "application"
+    if claim_no.startswith("SIM-EXP-2026"):
+        return "sim_reimbursement"
+    return "reimbursement"
+
+
+if __name__ == "__main__":
+    main()
--- a/server/scripts/mock_half_year_expense_demo_attachments.py
+++ b/server/scripts/mock_half_year_expense_demo_attachments.py
@@ -0,0 +1,396 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from dataclasses import asdict, dataclass
+from datetime import UTC, datetime
+from decimal import Decimal
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy import select
+from sqlalchemy.orm import selectinload
+
+SERVER_DIR = Path(__file__).resolve().parents[1]
+SRC_DIR = SERVER_DIR / "src"
+if str(SRC_DIR) not in sys.path:
+    sys.path.insert(0, str(SRC_DIR))
+
+from app.db.session import get_session_factory  # noqa: E402
+from app.models.financial_record import ExpenseClaim, ExpenseClaimItem  # noqa: E402
+from app.services.demo_company_simulation_catalog import SIM_CLAIM_PREFIX  # noqa: E402
+from app.services.expense_claim_attachment_storage import (  # noqa: E402
+    ExpenseClaimAttachmentStorage,
+)
+
+DOCUMENT_BY_ITEM_TYPE = {
+    "hotel": ("hotel_invoice", "酒店住宿票据", "hotel", "住宿票据"),
+    "hotel_ticket": ("hotel_invoice", "酒店住宿票据", "hotel", "住宿票据"),
+    "transport": ("transport_receipt", "乘车票据", "transport", "交通票据"),
+    "train_ticket": ("train_ticket", "火车/高铁票", "travel", "差旅票据"),
+    "flight_ticket": ("flight_itinerary", "航空行程单", "travel", "差旅票据"),
+    "ride_ticket": ("taxi_receipt", "出租车/网约车票据", "transport", "交通票据"),
+    "meal": ("meal_receipt", "餐饮发票", "meal", "餐饮票据"),
+    "entertainment": ("meal_receipt", "餐饮发票", "meal", "餐饮票据"),
+    "office": ("office_invoice", "办公用品发票", "office", "办公票据"),
+    "communication": ("telecom_invoice", "通信服务发票", "communication", "通信票据"),
+    "travel_allowance": ("allowance_sheet", "差旅补贴测算单", "travel", "差旅测算"),
+}
+
+
+@dataclass(frozen=True, slots=True)
+class MockAttachmentSummary:
+    mode: str
+    sim_claims: int
+    sim_items: int
+    attachments_to_mock: int
+    missing_material_items: int
+    compliant_attachments: int
+    violation_attachments: int
+    already_mocked: int
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Mock attachment files and OCR metadata for half-year simulated claims."
+    )
+    parser.add_argument("--apply", action="store_true", help="Write mock attachment files.")
+    args = parser.parse_args()
+
+    session_factory = get_session_factory()
+    with session_factory() as db:
+        try:
+            summary = mock_attachments(db, apply=args.apply)
+            if args.apply:
+                db.commit()
+            print(json.dumps(summary.to_dict(), ensure_ascii=False, indent=2))
+            if not args.apply:
+                print("dry-run only; pass --apply after confirmation to write mock attachments.")
+        except Exception:
+            db.rollback()
+            raise
+
+
+def mock_attachments(db, *, apply: bool) -> MockAttachmentSummary:
+    claims = _sim_claims(db)
+    storage = ExpenseClaimAttachmentStorage()
+    attachments_to_mock = 0
+    missing_material_items = 0
+    compliant_attachments = 0
+    violation_attachments = 0
+    already_mocked = 0
+    sim_items = 0
+
+    for claim_index, claim in enumerate(claims, start=1):
+        items = list(claim.items or [])
+        sim_items += len(items)
+        for item_index, item in enumerate(items, start=1):
+            if _has_existing_mock(storage, item):
+                already_mocked += 1
+                continue
+            if _should_leave_missing(claim_index, item_index, claim):
+                missing_material_items += 1
+                if apply:
+                    item.invoice_id = None
+                continue
+
+            violated = _is_violation_sample(claim_index, item_index, claim)
+            attachments_to_mock += 1
+            violation_attachments += int(violated)
+            compliant_attachments += int(not violated)
+            if apply:
+                _write_mock_attachment(
+                    storage=storage,
+                    claim=claim,
+                    item=item,
+                    claim_index=claim_index,
+                    item_index=item_index,
+                    violated=violated,
+                )
+
+        if apply:
+            claim.invoice_count = sum(
+                1 for item in items if str(item.invoice_id or "").strip()
+            )
+
+    return MockAttachmentSummary(
+        mode="apply" if apply else "dry-run",
+        sim_claims=len(claims),
+        sim_items=sim_items,
+        attachments_to_mock=attachments_to_mock,
+        missing_material_items=missing_material_items,
+        compliant_attachments=compliant_attachments,
+        violation_attachments=violation_attachments,
+        already_mocked=already_mocked,
+    )
+
+
+def _sim_claims(db) -> list[ExpenseClaim]:
+    return list(
+        db.scalars(
+            select(ExpenseClaim)
+            .options(selectinload(ExpenseClaim.items))
+            .where(ExpenseClaim.claim_no.like(f"{SIM_CLAIM_PREFIX}%"))
+            .order_by(ExpenseClaim.claim_no.asc())
+        ).all()
+    )
+
+
+def _has_existing_mock(storage: ExpenseClaimAttachmentStorage, item: ExpenseClaimItem) -> bool:
+    file_path = storage.resolve_item_path(item)
+    if file_path is None or not file_path.exists():
+        return False
+    metadata = storage.read_meta(file_path)
+    return str(metadata.get("source") or "") == "half_year_expense_demo_mock"
+
+
+def _should_leave_missing(claim_index: int, item_index: int, claim: ExpenseClaim) -> bool:
+    if str(claim.status or "").strip().lower() in {"draft", "returned"}:
+        return (claim_index + item_index) % 4 == 0
+    return (claim_index + item_index) % 19 == 0
+
+
+def _is_violation_sample(claim_index: int, item_index: int, claim: ExpenseClaim) -> bool:
+    if claim.hermes_risk_flag or claim.risk_flags_json:
+        return True
+    return (claim_index * 7 + item_index * 3) % 11 == 0
+
+
+def _write_mock_attachment(
+    *,
+    storage: ExpenseClaimAttachmentStorage,
+    claim: ExpenseClaim,
+    item: ExpenseClaimItem,
+    claim_index: int,
+    item_index: int,
+    violated: bool,
+) -> None:
+    document_type, document_label, scene_code, scene_label = _document_meta(item.item_type)
+    filename = f"{claim.claim_no}-{item_index:02d}-{document_type}.txt"
+    attachment_dir = storage.build_item_dir(claim.id, item.id)
+    attachment_dir.mkdir(parents=True, exist_ok=True)
+    file_path = attachment_dir / filename
+    ocr_text = _ocr_text(
+        claim=claim,
+        item=item,
+        document_label=document_label,
+        claim_index=claim_index,
+        item_index=item_index,
+        violated=violated,
+    )
+    file_path.write_text(ocr_text, encoding="utf-8")
+    item.invoice_id = storage.to_storage_key(file_path)
+    storage.write_meta(
+        file_path,
+        _meta_payload(
+            storage_key=item.invoice_id,
+            filename=filename,
+            file_path=file_path,
+            claim=claim,
+            item=item,
+            document_type=document_type,
+            document_label=document_label,
+            scene_code=scene_code,
+            scene_label=scene_label,
+            ocr_text=ocr_text,
+            violated=violated,
+        ),
+    )
+
+
+def _document_meta(item_type: str) -> tuple[str, str, str, str]:
+    return DOCUMENT_BY_ITEM_TYPE.get(
+        str(item_type or "").strip().lower(),
+        ("invoice", "费用发票", "other", "其他票据"),
+    )
+
+
+def _ocr_text(
+    *,
+    claim: ExpenseClaim,
+    item: ExpenseClaimItem,
+    document_label: str,
+    claim_index: int,
+    item_index: int,
+    violated: bool,
+) -> str:
+    invoice_no = f"MOCK{claim_index:04d}{item_index:02d}"
+    amount = _display_amount(item.item_amount)
+    merchant = _merchant_name(item.item_type, violated)
+    violation_line = (
+        "校验提示：票据金额或场景需要人工复核。"
+        if violated
+        else "校验提示：票据字段与报销明细一致。"
+    )
+    return "\n".join(
+        [
+            f"票据类型：{document_label}",
+            f"发票号码：{invoice_no}",
+            f"开票方：{merchant}",
+            f"购买方：{claim.department_name}",
+            f"发生日期：{item.item_date.isoformat()}",
+            f"发生地点：{item.item_location}",
+            f"金额：{amount}",
+            f"关联报销单：{claim.claim_no}",
+            violation_line,
+        ]
+    )
+
+
+def _merchant_name(item_type: str, violated: bool) -> str:
+    normalized = str(item_type or "").strip().lower()
+    if violated:
+        return {
+            "hotel": "上海云栖酒店有限公司",
+            "transport": "跨城交通服务商",
+            "office": "综合采购供应商",
+            "meal": "高端商务餐饮有限公司",
+        }.get(normalized, "异常样本供应商")
+    return {
+        "hotel": "合规住宿服务有限公司",
+        "transport": "合规出行服务有限公司",
+        "travel_allowance": "系统差旅补贴测算",
+        "office": "合规办公用品有限公司",
+        "communication": "合规通信服务有限公司",
+        "meal": "合规餐饮服务有限公司",
+    }.get(normalized, "合规票据供应商")
+
+
+def _meta_payload(
+    *,
+    storage_key: str,
+    filename: str,
+    file_path: Path,
+    claim: ExpenseClaim,
+    item: ExpenseClaimItem,
+    document_type: str,
+    document_label: str,
+    scene_code: str,
+    scene_label: str,
+    ocr_text: str,
+    violated: bool,
+) -> dict[str, Any]:
+    amount_text = _display_amount(item.item_amount)
+    document_info = {
+        "document_type": document_type,
+        "document_type_label": document_label,
+        "scene_code": scene_code,
+        "scene_label": scene_label,
+        "fields": [
+            {"key": "invoice_no", "label": "发票号码", "value": _invoice_no(filename)},
+            {"key": "invoice_date", "label": "开票日期", "value": item.item_date.isoformat()},
+            {"key": "amount", "label": "金额", "value": amount_text},
+            {"key": "location", "label": "地点", "value": str(item.item_location or "")},
+            {
+                "key": "merchant",
+                "label": "开票方",
+                "value": _merchant_name(item.item_type, violated),
+            },
+        ],
+    }
+    requirement_check = _requirement_payload(
+        violated,
+        item,
+        document_type,
+        document_label,
+        scene_code,
+        scene_label,
+    )
+    ocr_summary = f"{document_label}，金额 {amount_text}，{'需复核' if violated else '字段匹配'}。"
+    return {
+        "source": "half_year_expense_demo_mock",
+        "file_name": filename,
+        "storage_key": storage_key,
+        "media_type": "text/plain",
+        "size_bytes": file_path.stat().st_size,
+        "uploaded_at": datetime.now(UTC).isoformat(),
+        "previewable": False,
+        "preview_kind": "",
+        "preview_storage_key": "",
+        "preview_media_type": "",
+        "preview_file_name": "",
+        "analysis": _analysis_payload(violated, claim, item),
+        "document_info": document_info,
+        "requirement_check": requirement_check,
+        "ocr_status": "mocked",
+        "ocr_error": "",
+        "ocr_text": ocr_text,
+        "ocr_summary": ocr_summary,
+        "ocr_avg_score": 0.97 if not violated else 0.81,
+        "ocr_line_count": len(ocr_text.splitlines()),
+        "ocr_classification_source": "mock_rule",
+        "ocr_classification_confidence": 0.96 if not violated else 0.78,
+        "ocr_classification_evidence": [document_label, scene_label],
+        "ocr_warnings": ["mock违规样本"] if violated else [],
+    }
+
+
+def _analysis_payload(
+    violated: bool,
+    claim: ExpenseClaim,
+    item: ExpenseClaimItem,
+) -> dict[str, Any]:
+    if violated:
+        return {
+            "severity": "warning",
+            "label": "需复核",
+            "headline": "票据字段存在合规疑点",
+            "summary": "系统 mock 的 OCR 字段与报销场景存在偏差，用于演示违规样本。",
+            "points": [
+                f"报销单 {claim.claim_no} 金额或场景需要人工复核。",
+                f"费用明细：{item.item_reason}，金额 {_display_amount(item.item_amount)}。",
+            ],
+            "rule_basis": ["票据金额与费用明细一致性", "票据场景与费用科目匹配"],
+            "suggestion": "请核对票据原件、业务事由和费用归口后再提交或付款。",
+        }
+    return {
+        "severity": "success",
+        "label": "合规",
+        "headline": "票据字段与报销明细一致",
+        "summary": "系统 mock 的 OCR 字段已覆盖金额、日期、地点和票据类型。",
+        "points": [
+            f"金额 {_display_amount(item.item_amount)} 与费用明细一致。",
+            f"票据类型匹配 {item.item_reason}。",
+        ],
+        "rule_basis": ["基础票据完整性", "金额一致性"],
+        "suggestion": "当前材料可作为演示合规样本。",
+    }
+
+
+def _requirement_payload(
+    violated: bool,
+    item: ExpenseClaimItem,
+    document_type: str,
+    document_label: str,
+    scene_code: str,
+    scene_label: str,
+) -> dict[str, Any]:
+    return {
+        "matches": not violated,
+        "current_expense_type": str(item.item_type or "other"),
+        "current_expense_type_label": str(item.item_reason or "费用明细"),
+        "allowed_scene_labels": [scene_label],
+        "recognized_scene_code": scene_code,
+        "recognized_scene_label": scene_label,
+        "recognized_document_type": document_type,
+        "recognized_document_type_label": document_label,
+        "message": "材料匹配，可继续处理。" if not violated else "材料存在疑点，建议人工复核。",
+    }
+
+
+def _invoice_no(filename: str) -> str:
+    return Path(filename).stem.replace("-", "").upper()[-20:]
+
+
+def _display_amount(value: Decimal | float | int | str | None) -> str:
+    amount = Decimal(str(value or "0")).quantize(Decimal("0.01"))
+    return f"{amount:.2f}"
+
+
+if __name__ == "__main__":
+    main()
--- a/server/scripts/repair_half_year_expense_demo_distribution.py
+++ b/server/scripts/repair_half_year_expense_demo_distribution.py
@@ -0,0 +1,570 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+import uuid
+from collections import defaultdict
+from dataclasses import asdict, dataclass
+from datetime import UTC, date, datetime
+from decimal import Decimal
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy import select
+from sqlalchemy.orm import selectinload
+
+SERVER_DIR = Path(__file__).resolve().parents[1]
+SRC_DIR = SERVER_DIR / "src"
+if str(SRC_DIR) not in sys.path:
+    sys.path.insert(0, str(SRC_DIR))
+
+from app.db.session import get_session_factory  # noqa: E402
+from app.models.budget import BudgetAllocation, BudgetReservation, BudgetTransaction  # noqa: E402
+from app.models.employee import Employee  # noqa: E402
+from app.models.financial_record import ExpenseClaim  # noqa: E402
+from app.models.organization import OrganizationUnit  # noqa: E402
+from app.services.demo_company_simulation_catalog import (  # noqa: E402
+    BUDGETED_STATUSES,
+    PENDING_STATUSES,
+    SIM_BUDGET_PREFIX,
+    SIM_CLAIM_PREFIX,
+    SIM_EMPLOYEE_PREFIX,
+    SIM_PROJECT_CODE,
+    SIM_RESERVATION_PREFIX,
+    SIM_TRANSACTION_PREFIX,
+    SUBJECT_LABELS,
+    SUCCESS_STATUSES,
+    target_budget_usage,
+)
+from app.services.demo_company_simulation_filters import is_admin_employee_like  # noqa: E402
+from app.services.employee_behavior_profile_service import (  # noqa: E402
+    EmployeeBehaviorProfileService,
+)
+from app.services.expense_claim_status_registry import (  # noqa: E402
+    normalize_expense_claim_state,
+)
+
+DEPARTMENT_PLAN = (
+    ("TECH-DEPT", Decimal("0.30")),
+    ("MARKET-DEPT", Decimal("0.24")),
+    ("PRODUCTION-DEPT", Decimal("0.18")),
+    ("FINANCE-DEPT", Decimal("0.12")),
+    ("HR-DEPT", Decimal("0.10")),
+    ("PRESIDENT-OFFICE", Decimal("0.06")),
+)
+RECENT_PENDING_PER_DEPARTMENT = 3
+RECENT_DATES = (
+    datetime(2026, 6, 1, 10, 0, tzinfo=UTC),
+    datetime(2026, 6, 1, 15, 0, tzinfo=UTC),
+    datetime(2026, 6, 2, 6, 0, tzinfo=UTC),
+)
+
+
+@dataclass(frozen=True, slots=True)
+class RepairSummary:
+    mode: str
+    sim_employees: int
+    sim_claims: int
+    employee_department_plan: dict[str, int]
+    claim_department_plan: dict[str, int]
+    recent_pending_plan: dict[str, int]
+    rebuilt_budget_allocations: int
+    rebuilt_budget_transactions: int
+    rebuilt_budget_reservations: int
+    before_all_department_amounts: dict[str, str]
+    before_recent_pending_amounts: dict[str, str]
+    after_all_department_amounts: dict[str, str]
+    after_recent_pending_amounts: dict[str, str]
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Repair simulated half-year demo data distribution."
+    )
+    parser.add_argument("--apply", action="store_true", help="Apply repair. Default is dry-run.")
+    parser.add_argument(
+        "--refresh-profiles",
+        action="store_true",
+        help="After --apply, refresh employee behavior profile snapshots for simulated employees.",
+    )
+    parser.add_argument("--profile-limit", type=int, default=120)
+    args = parser.parse_args()
+
+    session_factory = get_session_factory()
+    with session_factory() as db:
+        try:
+            summary = repair_distribution(db, apply=args.apply)
+            profile_refresh = None
+            if args.apply and args.refresh_profiles:
+                profile_refresh = _refresh_company_profiles(db, limit=args.profile_limit)
+            if args.apply:
+                db.commit()
+            payload = summary.to_dict()
+            if profile_refresh is not None:
+                payload["profile_refresh"] = profile_refresh
+            print(json.dumps(payload, ensure_ascii=False, indent=2))
+            if not args.apply:
+                print("dry-run only; pass --apply after confirmation to repair simulated data.")
+            elif not args.refresh_profiles:
+                print("pass --refresh-profiles to generate employee behavior profile snapshots.")
+        except Exception:
+            db.rollback()
+            raise
+
+
+def repair_distribution(db, *, apply: bool) -> RepairSummary:
+    departments = _canonical_departments(db)
+    if len(departments) < len(DEPARTMENT_PLAN):
+        missing = [code for code, _ in DEPARTMENT_PLAN if code not in departments]
+        raise RuntimeError(f"missing canonical departments: {missing}")
+
+    sim_employees = _sim_employees(db)
+    sim_claims = _sim_claims(db)
+    before_all = _department_amounts(sim_claims)
+    before_recent = _recent_pending_amounts(sim_claims)
+
+    employee_plan = _counts_by_weight(len(sim_employees))
+    claim_plan = _counts_by_weight(len(sim_claims))
+    recent_claims = _recent_claims(sim_claims)
+    fixed_recent_plan = {code: RECENT_PENDING_PER_DEPARTMENT for code, _ in DEPARTMENT_PLAN}
+    regular_plan = {
+        code: max(claim_plan.get(code, 0) - fixed_recent_plan.get(code, 0), 0)
+        for code, _ in DEPARTMENT_PLAN
+    }
+
+    if apply:
+        _normalize_sim_claim_workflow(sim_claims)
+        _redistribute_employees(sim_employees, departments, employee_plan)
+        db.flush()
+        employees_by_dept = _employees_by_department(db)
+        _redistribute_regular_claims(
+            [claim for claim in sim_claims if claim not in set(recent_claims)],
+            departments,
+            employees_by_dept,
+            regular_plan,
+        )
+        _repair_recent_pending_claims(recent_claims, departments, employees_by_dept)
+        db.flush()
+        _rebuild_sim_budget(db, sim_claims, departments)
+        db.flush()
+
+    after_claims = (
+        _sim_claims(db)
+        if apply
+        else _preview_claims(sim_claims, departments, claim_plan)
+    )
+    after_all = _department_amounts(after_claims)
+    after_recent = _recent_pending_amounts(after_claims)
+    allocation_count, transaction_count, reservation_count = _planned_budget_counts(after_claims)
+
+    return RepairSummary(
+        mode="apply" if apply else "dry-run",
+        sim_employees=len(sim_employees),
+        sim_claims=len(sim_claims),
+        employee_department_plan=employee_plan,
+        claim_department_plan=claim_plan,
+        recent_pending_plan=fixed_recent_plan,
+        rebuilt_budget_allocations=allocation_count,
+        rebuilt_budget_transactions=transaction_count,
+        rebuilt_budget_reservations=reservation_count,
+        before_all_department_amounts=before_all,
+        before_recent_pending_amounts=before_recent,
+        after_all_department_amounts=after_all,
+        after_recent_pending_amounts=after_recent,
+    )
+
+
+def _refresh_company_profiles(db, *, limit: int) -> dict[str, object]:
+    capped_limit = max(1, min(int(limit or 120), 500))
+    employees = list(
+        db.scalars(select(Employee).order_by(Employee.employee_no.asc())).all()
+    )
+    employee_ids = [
+        employee.id
+        for employee in employees
+        if not is_admin_employee_like(employee)
+    ][:capped_limit]
+    service = EmployeeBehaviorProfileService(db)
+    snapshot_count = 0
+    for employee_id in employee_ids:
+        snapshots = service.refresh_employee_profiles(
+            employee_id=employee_id,
+            window_days=(30, 90, 180),
+            expense_type_scope="overall",
+            source_task_type="half_year_expense_demo_repair",
+            commit=False,
+        )
+        snapshot_count += len(snapshots)
+
+    db.commit()
+    return {
+        "target_employee_count": len(employee_ids),
+        "snapshot_count": snapshot_count,
+        "window_days": [30, 90, 180],
+        "source_task_type": "half_year_expense_demo_repair",
+        "scope": "all_non_admin_employees",
+    }
+
+
+def _canonical_departments(db) -> dict[str, OrganizationUnit]:
+    department_codes = [code for code, _weight in DEPARTMENT_PLAN]
+    rows = db.scalars(
+        select(OrganizationUnit).where(OrganizationUnit.unit_code.in_(department_codes))
+    ).all()
+    return {row.unit_code: row for row in rows}
+
+
+def _sim_employees(db) -> list[Employee]:
+    return list(
+        db.scalars(
+            select(Employee)
+            .options(selectinload(Employee.organization_unit))
+            .where(Employee.employee_no.like(f"{SIM_EMPLOYEE_PREFIX}%"))
+            .order_by(Employee.employee_no.asc())
+        ).all()
+    )
+
+
+def _sim_claims(db) -> list[ExpenseClaim]:
+    return list(
+        db.scalars(
+            select(ExpenseClaim)
+            .options(selectinload(ExpenseClaim.items))
+            .where(ExpenseClaim.claim_no.like(f"{SIM_CLAIM_PREFIX}%"))
+            .order_by(ExpenseClaim.claim_no.asc())
+        ).all()
+    )
+
+
+def _normalize_sim_claim_workflow(claims: list[ExpenseClaim]) -> None:
+    for claim in claims:
+        normalized = normalize_expense_claim_state(
+            claim.status,
+            claim.approval_stage,
+            claim_no=claim.claim_no,
+            expense_type=claim.expense_type,
+            is_application_claim=False,
+        )
+        claim.status = normalized.status
+        claim.approval_stage = normalized.approval_stage
+
+
+def _counts_by_weight(total: int) -> dict[str, int]:
+    raw = [(code, total * weight) for code, weight in DEPARTMENT_PLAN]
+    counts = {code: int(value) for code, value in raw}
+    remainder = total - sum(counts.values())
+    remainder_order = sorted(
+        raw,
+        key=lambda item: item[1] - int(item[1]),
+        reverse=True,
+    )
+    for code, _value in remainder_order[:remainder]:
+        counts[code] += 1
+    return counts
+
+
+def _redistribute_employees(
+    employees: list[Employee],
+    departments: dict[str, OrganizationUnit],
+    plan: dict[str, int],
+) -> None:
+    index = 0
+    for code, _weight in DEPARTMENT_PLAN:
+        department = departments[code]
+        for employee in employees[index : index + plan.get(code, 0)]:
+            employee.organization_unit = department
+            employee.cost_center = department.cost_center
+            employee.location = department.location
+            employee.finance_owner_name = f"{department.name}财务BP"
+        index += plan.get(code, 0)
+
+
+def _employees_by_department(db) -> dict[str, list[Employee]]:
+    rows = db.scalars(
+        select(Employee)
+        .options(selectinload(Employee.organization_unit))
+        .where(Employee.organization_unit_id.is_not(None))
+        .order_by(Employee.employee_no.asc())
+    ).all()
+    grouped: dict[str, list[Employee]] = defaultdict(list)
+    for employee in rows:
+        unit = employee.organization_unit
+        if unit is not None and unit.unit_code:
+            grouped[unit.unit_code].append(employee)
+    return grouped
+
+
+def _redistribute_regular_claims(
+    claims: list[ExpenseClaim],
+    departments: dict[str, OrganizationUnit],
+    employees_by_dept: dict[str, list[Employee]],
+    plan: dict[str, int],
+) -> None:
+    index = 0
+    for code, _weight in DEPARTMENT_PLAN:
+        department = departments[code]
+        employees = employees_by_dept.get(code) or []
+        for offset, claim in enumerate(claims[index : index + plan.get(code, 0)]):
+            employee = employees[offset % len(employees)] if employees else None
+            _assign_claim_department(claim, department, employee)
+        index += plan.get(code, 0)
+
+
+def _repair_recent_pending_claims(
+    claims: list[ExpenseClaim],
+    departments: dict[str, OrganizationUnit],
+    employees_by_dept: dict[str, list[Employee]],
+) -> None:
+    index = 0
+    for code, _weight in DEPARTMENT_PLAN:
+        department = departments[code]
+        employees = employees_by_dept.get(code) or []
+        for offset in range(RECENT_PENDING_PER_DEPARTMENT):
+            claim = claims[index]
+            employee = employees[offset % len(employees)] if employees else None
+            _assign_claim_department(claim, department, employee)
+            claim.status = "submitted"
+            claim.approval_stage = "财务审批" if offset % 2 == 0 else "直属领导审批"
+            claim.occurred_at = RECENT_DATES[offset] - _hours(2)
+            claim.submitted_at = RECENT_DATES[offset]
+            claim.updated_at = RECENT_DATES[offset] + _hours(1)
+            index += 1
+
+
+def _assign_claim_department(
+    claim: ExpenseClaim,
+    department: OrganizationUnit,
+    employee: Employee | None,
+) -> None:
+    claim.department_id = department.id
+    claim.department_name = department.name
+    if employee is not None:
+        claim.employee_id = employee.id
+        claim.employee_name = employee.name
+    claim.location = department.location or claim.location
+
+
+def _rebuild_sim_budget(
+    db,
+    claims: list[ExpenseClaim],
+    departments: dict[str, OrganizationUnit],
+) -> None:
+    for model, field, prefix in (
+        (BudgetTransaction, BudgetTransaction.transaction_no, SIM_TRANSACTION_PREFIX),
+        (BudgetReservation, BudgetReservation.reservation_no, SIM_RESERVATION_PREFIX),
+        (BudgetAllocation, BudgetAllocation.budget_no, SIM_BUDGET_PREFIX),
+    ):
+        for row in db.scalars(select(model).where(field.like(f"{prefix}%"))).all():
+            db.delete(row)
+    db.flush()
+
+    groups: dict[tuple[int, str, str, str, str], list[ExpenseClaim]] = defaultdict(list)
+    for claim in claims:
+        if claim.status not in BUDGETED_STATUSES:
+            continue
+        subject_code = "meal" if claim.expense_type == "entertainment" else claim.expense_type
+        quarter = ((claim.occurred_at.month - 1) // 3) + 1
+        period_key = f"{claim.occurred_at.year}Q{quarter}"
+        cost_center = _claim_cost_center(claim, departments)
+        key = (claim.occurred_at.year, period_key, claim.department_id, cost_center, subject_code)
+        groups[key].append(claim)
+
+    allocation_index = 1
+    transaction_index = 1
+    for key, group_claims in sorted(groups.items()):
+        year, period_key, department_id, cost_center, subject_code = key
+        total_used = sum((Decimal(claim.amount or 0) for claim in group_claims), Decimal("0.00"))
+        original_amount = (
+            total_used / target_budget_usage(period_key, subject_code, allocation_index)
+        ).quantize(Decimal("0.01"))
+        allocation = BudgetAllocation(
+            id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{SIM_BUDGET_PREFIX}:{key}")),
+            budget_no=f"{SIM_BUDGET_PREFIX}-R{allocation_index:04d}",
+            fiscal_year=year,
+            period_type="quarter",
+            period_key=period_key,
+            department_id=department_id,
+            department_name=group_claims[0].department_name,
+            cost_center=cost_center,
+            project_code=SIM_PROJECT_CODE,
+            subject_code=subject_code,
+            subject_name=SUBJECT_LABELS.get(subject_code, subject_code),
+            original_amount=max(original_amount, Decimal("3000.00")),
+            adjusted_amount=Decimal("0.00"),
+            status="active",
+            warning_threshold=Decimal("80.00"),
+            control_action="warn",
+            description="半年报销模拟数据部门分布修复预算池",
+            created_by="simulation",
+            updated_by="simulation",
+        )
+        db.add(allocation)
+        db.flush()
+        for claim in group_claims:
+            db.add(_budget_transaction(allocation.id, claim, transaction_index))
+            if claim.status in PENDING_STATUSES:
+                db.add(_budget_reservation(allocation.id, claim, transaction_index))
+            transaction_index += 1
+        allocation_index += 1
+
+
+def _budget_transaction(allocation_id: str, claim: ExpenseClaim, index: int) -> BudgetTransaction:
+    transaction_no = f"{SIM_TRANSACTION_PREFIX}-R{index:04d}"
+    transaction_type = "consume" if claim.status in SUCCESS_STATUSES else "reserve"
+    return BudgetTransaction(
+        id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{transaction_no}")),
+        transaction_no=transaction_no,
+        allocation_id=allocation_id,
+        source_type="claim",
+        source_id=claim.id,
+        source_no=claim.claim_no,
+        transaction_type=transaction_type,
+        amount=Decimal(claim.amount or 0),
+        before_available_amount=Decimal("0.00"),
+        after_available_amount=Decimal("0.00"),
+        operator="simulation",
+        reason="修复后模拟数据预算台账",
+        context_json={"project_code": SIM_PROJECT_CODE, "simulated": True, "repair": True},
+        created_at=claim.submitted_at or claim.occurred_at,
+    )
+
+
+def _budget_reservation(allocation_id: str, claim: ExpenseClaim, index: int) -> BudgetReservation:
+    reservation_no = f"{SIM_RESERVATION_PREFIX}-R{index:04d}"
+    return BudgetReservation(
+        id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{reservation_no}")),
+        reservation_no=reservation_no,
+        allocation_id=allocation_id,
+        source_type="claim",
+        source_id=claim.id,
+        source_no=claim.claim_no,
+        source_status="active",
+        amount=Decimal(claim.amount or 0),
+        context_json={"project_code": SIM_PROJECT_CODE, "simulated": True, "repair": True},
+        created_at=claim.submitted_at or claim.occurred_at,
+    )
+
+
+def _recent_claims(claims: list[ExpenseClaim]) -> list[ExpenseClaim]:
+    needed = RECENT_PENDING_PER_DEPARTMENT * len(DEPARTMENT_PLAN)
+    return sorted(claims, key=lambda claim: Decimal(claim.amount or 0), reverse=True)[:needed]
+
+
+def _department_amounts(claims: list[ExpenseClaim]) -> dict[str, str]:
+    buckets: dict[str, Decimal] = defaultdict(Decimal)
+    for claim in claims:
+        buckets[claim.department_name or "待补充"] += Decimal(claim.amount or 0)
+    return _format_amounts(buckets)
+
+
+def _recent_pending_amounts(claims: list[ExpenseClaim]) -> dict[str, str]:
+    buckets: dict[str, Decimal] = defaultdict(Decimal)
+    for claim in claims:
+        if claim.status not in PENDING_STATUSES:
+            continue
+        submitted_at = claim.submitted_at or claim.occurred_at
+        if not submitted_at:
+            continue
+        day = submitted_at.date()
+        if date(2026, 6, 1) <= day <= date(2026, 6, 2):
+            buckets[claim.department_name or "待补充"] += Decimal(claim.amount or 0)
+    return _format_amounts(buckets)
+
+
+def _preview_claims(
+    claims: list[ExpenseClaim],
+    departments: dict[str, OrganizationUnit],
+    claim_plan: dict[str, int],
+) -> list[ExpenseClaim]:
+    preview: list[ExpenseClaim] = []
+    recent_claims = _recent_claims(claims)
+    recent_claim_set = set(recent_claims)
+    regular_claims = [claim for claim in claims if claim not in recent_claim_set]
+    index = 0
+    for code, _weight in DEPARTMENT_PLAN:
+        department = departments[code]
+        count = max(claim_plan.get(code, 0) - RECENT_PENDING_PER_DEPARTMENT, 0)
+        for claim in regular_claims[index : index + count]:
+            preview.append(_clone_claim(claim, department.name, claim.status, claim.submitted_at))
+        index += count
+    recent_index = 0
+    for code, _weight in DEPARTMENT_PLAN:
+        department = departments[code]
+        for offset in range(RECENT_PENDING_PER_DEPARTMENT):
+            preview.append(
+                _clone_claim(
+                    recent_claims[recent_index],
+                    department.name,
+                    "submitted",
+                    RECENT_DATES[offset],
+                )
+            )
+            recent_index += 1
+    return preview
+
+
+def _clone_claim(
+    claim: ExpenseClaim,
+    department_name: str,
+    status: str,
+    submitted_at: datetime | None,
+) -> Any:
+    return type(
+        "ClaimPreview",
+        (),
+        {
+            "department_name": department_name,
+            "status": status,
+            "submitted_at": submitted_at,
+            "occurred_at": claim.occurred_at,
+            "expense_type": claim.expense_type,
+            "amount": claim.amount,
+        },
+    )()
+
+
+def _planned_budget_counts(claims: list[Any]) -> tuple[int, int, int]:
+    allocation_keys = set()
+    transaction_count = 0
+    reservation_count = 0
+    for claim in claims:
+        if claim.status not in BUDGETED_STATUSES:
+            continue
+        submitted_at = claim.submitted_at or claim.occurred_at
+        period_key = f"{submitted_at.year}Q{((submitted_at.month - 1) // 3) + 1}"
+        allocation_keys.add((period_key, claim.department_name, getattr(claim, "expense_type", "")))
+        transaction_count += 1
+        reservation_count += int(claim.status in PENDING_STATUSES)
+    return len(allocation_keys), transaction_count, reservation_count
+
+
+def _claim_cost_center(
+    claim: ExpenseClaim,
+    departments: dict[str, OrganizationUnit],
+) -> str | None:
+    for department in departments.values():
+        if department.id == claim.department_id:
+            return department.cost_center
+    return None
+
+
+def _format_amounts(buckets: dict[str, Decimal]) -> dict[str, str]:
+    return {
+        key: str(value.quantize(Decimal("0.01")))
+        for key, value in sorted(buckets.items(), key=lambda item: item[1], reverse=True)
+    }
+
+
+def _hours(value: int):
+    from datetime import timedelta
+
+    return timedelta(hours=value)
+
+
+if __name__ == "__main__":
+    main()
--- a/server/scripts/seed_half_year_expense_demo.py
+++ b/server/scripts/seed_half_year_expense_demo.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from datetime import date
+from pathlib import Path
+
+from sqlalchemy import select
+
+SERVER_DIR = Path(__file__).resolve().parents[1]
+SRC_DIR = SERVER_DIR / "src"
+if str(SRC_DIR) not in sys.path:
+    sys.path.insert(0, str(SRC_DIR))
+
+from app.db.session import get_session_factory  # noqa: E402
+from app.models.employee import Employee  # noqa: E402
+from app.services.demo_company_simulation_filters import is_admin_employee_like  # noqa: E402
+from app.services.demo_company_simulation_seed import (  # noqa: E402
+    HalfYearExpenseSimulationSeeder,
+    SimulationConfig,
+)
+from app.services.employee_behavior_profile_service import (  # noqa: E402
+    EmployeeBehaviorProfileService,
+)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Seed half-year simulated reimbursement, budget, and employee data.",
+    )
+    parser.add_argument("--target-employees", type=int, default=100)
+    parser.add_argument("--start-date", type=date.fromisoformat, default=date(2026, 1, 1))
+    parser.add_argument("--months", type=int, default=6)
+    parser.add_argument("--seed", type=int, default=20260602)
+    parser.add_argument("--apply", action="store_true", help="Write data. Default is dry-run only.")
+    parser.add_argument(
+        "--refresh-profiles",
+        action="store_true",
+        help="After --apply, refresh employee behavior profile snapshots for simulated employees.",
+    )
+    parser.add_argument("--profile-limit", type=int, default=120)
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    config = SimulationConfig(
+        target_employees=args.target_employees,
+        start_date=args.start_date,
+        months=args.months,
+        seed=args.seed,
+    )
+    session_factory = get_session_factory()
+    with session_factory() as db:
+        seeder = HalfYearExpenseSimulationSeeder(db, config)
+        try:
+            summary = seeder.apply() if args.apply else seeder.preview()
+            profile_refresh = None
+            if args.apply and args.refresh_profiles:
+                profile_refresh = refresh_company_profiles(db, limit=args.profile_limit)
+            elif args.apply:
+                db.commit()
+            payload = summary.to_dict()
+            if profile_refresh is not None:
+                payload["profile_refresh"] = profile_refresh
+            print(json.dumps(payload, ensure_ascii=False, indent=2))
+            if not args.apply:
+                print("dry-run only; pass --apply after confirmation to write simulated data.")
+            elif not args.refresh_profiles:
+                print("pass --refresh-profiles to generate employee behavior profile snapshots.")
+        except Exception:
+            db.rollback()
+            raise
+
+
+def refresh_company_profiles(db, *, limit: int) -> dict[str, object]:
+    capped_limit = max(1, min(int(limit or 120), 500))
+    employees = list(
+        db.scalars(select(Employee).order_by(Employee.employee_no.asc())).all()
+    )
+    employee_ids = [
+        employee.id
+        for employee in employees
+        if not is_admin_employee_like(employee)
+    ][:capped_limit]
+    service = EmployeeBehaviorProfileService(db)
+    snapshot_count = 0
+    for employee_id in employee_ids:
+        snapshots = service.refresh_employee_profiles(
+            employee_id=employee_id,
+            window_days=(30, 90, 180),
+            expense_type_scope="overall",
+            source_task_type="half_year_expense_demo_seed",
+            commit=False,
+        )
+        snapshot_count += len(snapshots)
+
+    db.commit()
+    return {
+        "target_employee_count": len(employee_ids),
+        "snapshot_count": snapshot_count,
+        "window_days": [30, 90, 180],
+        "source_task_type": "half_year_expense_demo_seed",
+        "scope": "all_non_admin_employees",
+    }
+
+
+if __name__ == "__main__":
+    main()