feat: 财务看板口径重构与半年模拟数据及报销状态注册表

- 重构 finance_dashboard 口径计算,新增模拟公司画像数据生成与筛选
- 引入 expense_claim_status_registry 统一报销状态流转
- 完善报销草稿流程、Item Sync 与本体解析器
- 优化总览页趋势图、分页组件与请求进度步骤
- 增强报销申请快速预览、本体工具与详情展示
- 新增半年报销模拟数据种子脚本与状态审计工具
- 补充财务看板、报销状态注册与模拟数据测试覆盖
This commit is contained in:
caoxiaozhu
2026-06-02 16:22:59 +08:00
parent ca691f3ee0
commit 0c74b4ab4a
54 changed files with 6810 additions and 1238 deletions

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import sys
from collections import Counter
from pathlib import Path
from typing import Any
from sqlalchemy import select
SERVER_DIR = Path(__file__).resolve().parents[1]
SRC_DIR = SERVER_DIR / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
from app.db.session import get_session_factory # noqa: E402
from app.models.financial_record import ExpenseClaim # noqa: E402
from app.services.expense_claim_status_registry import ( # noqa: E402
is_known_approval_stage,
is_known_claim_status,
normalize_expense_claim_state,
)
def main() -> None:
parser = argparse.ArgumentParser(description="Audit expense claim status consistency.")
parser.add_argument("--sample-limit", type=int, default=20)
args = parser.parse_args()
session_factory = get_session_factory()
with session_factory() as db:
claims = list(
db.scalars(
select(ExpenseClaim).order_by(
ExpenseClaim.claim_no.asc(),
ExpenseClaim.created_at.asc(),
)
).all()
)
payload = audit_claims(claims, sample_limit=max(args.sample_limit, 0))
print(json.dumps(payload, ensure_ascii=False, indent=2))
def audit_claims(claims: list[ExpenseClaim], *, sample_limit: int) -> dict[str, Any]:
status_counts: Counter[str] = Counter()
stage_counts: Counter[str] = Counter()
status_stage_counts: Counter[str] = Counter()
doc_type_counts: Counter[str] = Counter()
unknown_statuses: Counter[str] = Counter()
unknown_stages: Counter[str] = Counter()
normalization_counts: Counter[str] = Counter()
samples: list[dict[str, Any]] = []
for claim in claims:
status = str(claim.status or "").strip()
stage = str(claim.approval_stage or "").strip()
doc_type = _doc_type(claim)
status_counts[status or "<empty>"] += 1
stage_counts[stage or "<empty>"] += 1
status_stage_counts[f"{status or '<empty>'} | {stage or '<empty>'}"] += 1
doc_type_counts[doc_type] += 1
if not is_known_claim_status(status):
unknown_statuses[status or "<empty>"] += 1
if not is_known_approval_stage(stage):
unknown_stages[stage or "<empty>"] += 1
normalized = normalize_expense_claim_state(
status,
stage,
claim_no=claim.claim_no,
expense_type=claim.expense_type,
)
if normalized.changed:
key = (
f"{status or '<empty>'}/{stage or '<empty>'}"
f" -> {normalized.status}/{normalized.approval_stage}"
)
normalization_counts[key] += 1
if len(samples) < sample_limit:
samples.append(
{
"claim_no": claim.claim_no,
"doc_type": doc_type,
"status": status,
"approval_stage": stage,
"normalized_status": normalized.status,
"normalized_approval_stage": normalized.approval_stage,
"status_code": normalized.status_code,
}
)
return {
"claim_count": len(claims),
"doc_type_counts": dict(doc_type_counts),
"status_counts": dict(status_counts),
"approval_stage_counts": dict(stage_counts),
"status_stage_counts": dict(status_stage_counts),
"unknown_statuses": dict(unknown_statuses),
"unknown_approval_stages": dict(unknown_stages),
"normalization_needed": sum(normalization_counts.values()),
"normalization_counts": dict(normalization_counts),
"normalization_samples": samples,
}
def _doc_type(claim: ExpenseClaim) -> str:
claim_no = str(claim.claim_no or "").strip().upper()
expense_type = str(claim.expense_type or "").strip().lower()
if claim_no.startswith(("AP-", "APP-")) or expense_type.endswith("_application"):
return "application"
if claim_no.startswith("SIM-EXP-2026"):
return "sim_reimbursement"
return "reimbursement"
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,396 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import sys
from dataclasses import asdict, dataclass
from datetime import UTC, datetime
from decimal import Decimal
from pathlib import Path
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import selectinload
SERVER_DIR = Path(__file__).resolve().parents[1]
SRC_DIR = SERVER_DIR / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
from app.db.session import get_session_factory # noqa: E402
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem # noqa: E402
from app.services.demo_company_simulation_catalog import SIM_CLAIM_PREFIX # noqa: E402
from app.services.expense_claim_attachment_storage import ( # noqa: E402
ExpenseClaimAttachmentStorage,
)
DOCUMENT_BY_ITEM_TYPE = {
"hotel": ("hotel_invoice", "酒店住宿票据", "hotel", "住宿票据"),
"hotel_ticket": ("hotel_invoice", "酒店住宿票据", "hotel", "住宿票据"),
"transport": ("transport_receipt", "乘车票据", "transport", "交通票据"),
"train_ticket": ("train_ticket", "火车/高铁票", "travel", "差旅票据"),
"flight_ticket": ("flight_itinerary", "航空行程单", "travel", "差旅票据"),
"ride_ticket": ("taxi_receipt", "出租车/网约车票据", "transport", "交通票据"),
"meal": ("meal_receipt", "餐饮发票", "meal", "餐饮票据"),
"entertainment": ("meal_receipt", "餐饮发票", "meal", "餐饮票据"),
"office": ("office_invoice", "办公用品发票", "office", "办公票据"),
"communication": ("telecom_invoice", "通信服务发票", "communication", "通信票据"),
"travel_allowance": ("allowance_sheet", "差旅补贴测算单", "travel", "差旅测算"),
}
@dataclass(frozen=True, slots=True)
class MockAttachmentSummary:
mode: str
sim_claims: int
sim_items: int
attachments_to_mock: int
missing_material_items: int
compliant_attachments: int
violation_attachments: int
already_mocked: int
def to_dict(self) -> dict[str, Any]:
return asdict(self)
def main() -> None:
parser = argparse.ArgumentParser(
description="Mock attachment files and OCR metadata for half-year simulated claims."
)
parser.add_argument("--apply", action="store_true", help="Write mock attachment files.")
args = parser.parse_args()
session_factory = get_session_factory()
with session_factory() as db:
try:
summary = mock_attachments(db, apply=args.apply)
if args.apply:
db.commit()
print(json.dumps(summary.to_dict(), ensure_ascii=False, indent=2))
if not args.apply:
print("dry-run only; pass --apply after confirmation to write mock attachments.")
except Exception:
db.rollback()
raise
def mock_attachments(db, *, apply: bool) -> MockAttachmentSummary:
claims = _sim_claims(db)
storage = ExpenseClaimAttachmentStorage()
attachments_to_mock = 0
missing_material_items = 0
compliant_attachments = 0
violation_attachments = 0
already_mocked = 0
sim_items = 0
for claim_index, claim in enumerate(claims, start=1):
items = list(claim.items or [])
sim_items += len(items)
for item_index, item in enumerate(items, start=1):
if _has_existing_mock(storage, item):
already_mocked += 1
continue
if _should_leave_missing(claim_index, item_index, claim):
missing_material_items += 1
if apply:
item.invoice_id = None
continue
violated = _is_violation_sample(claim_index, item_index, claim)
attachments_to_mock += 1
violation_attachments += int(violated)
compliant_attachments += int(not violated)
if apply:
_write_mock_attachment(
storage=storage,
claim=claim,
item=item,
claim_index=claim_index,
item_index=item_index,
violated=violated,
)
if apply:
claim.invoice_count = sum(
1 for item in items if str(item.invoice_id or "").strip()
)
return MockAttachmentSummary(
mode="apply" if apply else "dry-run",
sim_claims=len(claims),
sim_items=sim_items,
attachments_to_mock=attachments_to_mock,
missing_material_items=missing_material_items,
compliant_attachments=compliant_attachments,
violation_attachments=violation_attachments,
already_mocked=already_mocked,
)
def _sim_claims(db) -> list[ExpenseClaim]:
return list(
db.scalars(
select(ExpenseClaim)
.options(selectinload(ExpenseClaim.items))
.where(ExpenseClaim.claim_no.like(f"{SIM_CLAIM_PREFIX}%"))
.order_by(ExpenseClaim.claim_no.asc())
).all()
)
def _has_existing_mock(storage: ExpenseClaimAttachmentStorage, item: ExpenseClaimItem) -> bool:
file_path = storage.resolve_item_path(item)
if file_path is None or not file_path.exists():
return False
metadata = storage.read_meta(file_path)
return str(metadata.get("source") or "") == "half_year_expense_demo_mock"
def _should_leave_missing(claim_index: int, item_index: int, claim: ExpenseClaim) -> bool:
if str(claim.status or "").strip().lower() in {"draft", "returned"}:
return (claim_index + item_index) % 4 == 0
return (claim_index + item_index) % 19 == 0
def _is_violation_sample(claim_index: int, item_index: int, claim: ExpenseClaim) -> bool:
if claim.hermes_risk_flag or claim.risk_flags_json:
return True
return (claim_index * 7 + item_index * 3) % 11 == 0
def _write_mock_attachment(
*,
storage: ExpenseClaimAttachmentStorage,
claim: ExpenseClaim,
item: ExpenseClaimItem,
claim_index: int,
item_index: int,
violated: bool,
) -> None:
document_type, document_label, scene_code, scene_label = _document_meta(item.item_type)
filename = f"{claim.claim_no}-{item_index:02d}-{document_type}.txt"
attachment_dir = storage.build_item_dir(claim.id, item.id)
attachment_dir.mkdir(parents=True, exist_ok=True)
file_path = attachment_dir / filename
ocr_text = _ocr_text(
claim=claim,
item=item,
document_label=document_label,
claim_index=claim_index,
item_index=item_index,
violated=violated,
)
file_path.write_text(ocr_text, encoding="utf-8")
item.invoice_id = storage.to_storage_key(file_path)
storage.write_meta(
file_path,
_meta_payload(
storage_key=item.invoice_id,
filename=filename,
file_path=file_path,
claim=claim,
item=item,
document_type=document_type,
document_label=document_label,
scene_code=scene_code,
scene_label=scene_label,
ocr_text=ocr_text,
violated=violated,
),
)
def _document_meta(item_type: str) -> tuple[str, str, str, str]:
return DOCUMENT_BY_ITEM_TYPE.get(
str(item_type or "").strip().lower(),
("invoice", "费用发票", "other", "其他票据"),
)
def _ocr_text(
*,
claim: ExpenseClaim,
item: ExpenseClaimItem,
document_label: str,
claim_index: int,
item_index: int,
violated: bool,
) -> str:
invoice_no = f"MOCK{claim_index:04d}{item_index:02d}"
amount = _display_amount(item.item_amount)
merchant = _merchant_name(item.item_type, violated)
violation_line = (
"校验提示:票据金额或场景需要人工复核。"
if violated
else "校验提示:票据字段与报销明细一致。"
)
return "\n".join(
[
f"票据类型:{document_label}",
f"发票号码:{invoice_no}",
f"开票方:{merchant}",
f"购买方:{claim.department_name}",
f"发生日期:{item.item_date.isoformat()}",
f"发生地点:{item.item_location}",
f"金额:{amount}",
f"关联报销单:{claim.claim_no}",
violation_line,
]
)
def _merchant_name(item_type: str, violated: bool) -> str:
normalized = str(item_type or "").strip().lower()
if violated:
return {
"hotel": "上海云栖酒店有限公司",
"transport": "跨城交通服务商",
"office": "综合采购供应商",
"meal": "高端商务餐饮有限公司",
}.get(normalized, "异常样本供应商")
return {
"hotel": "合规住宿服务有限公司",
"transport": "合规出行服务有限公司",
"travel_allowance": "系统差旅补贴测算",
"office": "合规办公用品有限公司",
"communication": "合规通信服务有限公司",
"meal": "合规餐饮服务有限公司",
}.get(normalized, "合规票据供应商")
def _meta_payload(
*,
storage_key: str,
filename: str,
file_path: Path,
claim: ExpenseClaim,
item: ExpenseClaimItem,
document_type: str,
document_label: str,
scene_code: str,
scene_label: str,
ocr_text: str,
violated: bool,
) -> dict[str, Any]:
amount_text = _display_amount(item.item_amount)
document_info = {
"document_type": document_type,
"document_type_label": document_label,
"scene_code": scene_code,
"scene_label": scene_label,
"fields": [
{"key": "invoice_no", "label": "发票号码", "value": _invoice_no(filename)},
{"key": "invoice_date", "label": "开票日期", "value": item.item_date.isoformat()},
{"key": "amount", "label": "金额", "value": amount_text},
{"key": "location", "label": "地点", "value": str(item.item_location or "")},
{
"key": "merchant",
"label": "开票方",
"value": _merchant_name(item.item_type, violated),
},
],
}
requirement_check = _requirement_payload(
violated,
item,
document_type,
document_label,
scene_code,
scene_label,
)
ocr_summary = f"{document_label},金额 {amount_text}{'需复核' if violated else '字段匹配'}"
return {
"source": "half_year_expense_demo_mock",
"file_name": filename,
"storage_key": storage_key,
"media_type": "text/plain",
"size_bytes": file_path.stat().st_size,
"uploaded_at": datetime.now(UTC).isoformat(),
"previewable": False,
"preview_kind": "",
"preview_storage_key": "",
"preview_media_type": "",
"preview_file_name": "",
"analysis": _analysis_payload(violated, claim, item),
"document_info": document_info,
"requirement_check": requirement_check,
"ocr_status": "mocked",
"ocr_error": "",
"ocr_text": ocr_text,
"ocr_summary": ocr_summary,
"ocr_avg_score": 0.97 if not violated else 0.81,
"ocr_line_count": len(ocr_text.splitlines()),
"ocr_classification_source": "mock_rule",
"ocr_classification_confidence": 0.96 if not violated else 0.78,
"ocr_classification_evidence": [document_label, scene_label],
"ocr_warnings": ["mock违规样本"] if violated else [],
}
def _analysis_payload(
violated: bool,
claim: ExpenseClaim,
item: ExpenseClaimItem,
) -> dict[str, Any]:
if violated:
return {
"severity": "warning",
"label": "需复核",
"headline": "票据字段存在合规疑点",
"summary": "系统 mock 的 OCR 字段与报销场景存在偏差,用于演示违规样本。",
"points": [
f"报销单 {claim.claim_no} 金额或场景需要人工复核。",
f"费用明细:{item.item_reason},金额 {_display_amount(item.item_amount)}",
],
"rule_basis": ["票据金额与费用明细一致性", "票据场景与费用科目匹配"],
"suggestion": "请核对票据原件、业务事由和费用归口后再提交或付款。",
}
return {
"severity": "success",
"label": "合规",
"headline": "票据字段与报销明细一致",
"summary": "系统 mock 的 OCR 字段已覆盖金额、日期、地点和票据类型。",
"points": [
f"金额 {_display_amount(item.item_amount)} 与费用明细一致。",
f"票据类型匹配 {item.item_reason}",
],
"rule_basis": ["基础票据完整性", "金额一致性"],
"suggestion": "当前材料可作为演示合规样本。",
}
def _requirement_payload(
violated: bool,
item: ExpenseClaimItem,
document_type: str,
document_label: str,
scene_code: str,
scene_label: str,
) -> dict[str, Any]:
return {
"matches": not violated,
"current_expense_type": str(item.item_type or "other"),
"current_expense_type_label": str(item.item_reason or "费用明细"),
"allowed_scene_labels": [scene_label],
"recognized_scene_code": scene_code,
"recognized_scene_label": scene_label,
"recognized_document_type": document_type,
"recognized_document_type_label": document_label,
"message": "材料匹配,可继续处理。" if not violated else "材料存在疑点,建议人工复核。",
}
def _invoice_no(filename: str) -> str:
return Path(filename).stem.replace("-", "").upper()[-20:]
def _display_amount(value: Decimal | float | int | str | None) -> str:
amount = Decimal(str(value or "0")).quantize(Decimal("0.01"))
return f"{amount:.2f}"
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,570 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import sys
import uuid
from collections import defaultdict
from dataclasses import asdict, dataclass
from datetime import UTC, date, datetime
from decimal import Decimal
from pathlib import Path
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import selectinload
SERVER_DIR = Path(__file__).resolve().parents[1]
SRC_DIR = SERVER_DIR / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
from app.db.session import get_session_factory # noqa: E402
from app.models.budget import BudgetAllocation, BudgetReservation, BudgetTransaction # noqa: E402
from app.models.employee import Employee # noqa: E402
from app.models.financial_record import ExpenseClaim # noqa: E402
from app.models.organization import OrganizationUnit # noqa: E402
from app.services.demo_company_simulation_catalog import ( # noqa: E402
BUDGETED_STATUSES,
PENDING_STATUSES,
SIM_BUDGET_PREFIX,
SIM_CLAIM_PREFIX,
SIM_EMPLOYEE_PREFIX,
SIM_PROJECT_CODE,
SIM_RESERVATION_PREFIX,
SIM_TRANSACTION_PREFIX,
SUBJECT_LABELS,
SUCCESS_STATUSES,
target_budget_usage,
)
from app.services.demo_company_simulation_filters import is_admin_employee_like # noqa: E402
from app.services.employee_behavior_profile_service import ( # noqa: E402
EmployeeBehaviorProfileService,
)
from app.services.expense_claim_status_registry import ( # noqa: E402
normalize_expense_claim_state,
)
DEPARTMENT_PLAN = (
("TECH-DEPT", Decimal("0.30")),
("MARKET-DEPT", Decimal("0.24")),
("PRODUCTION-DEPT", Decimal("0.18")),
("FINANCE-DEPT", Decimal("0.12")),
("HR-DEPT", Decimal("0.10")),
("PRESIDENT-OFFICE", Decimal("0.06")),
)
RECENT_PENDING_PER_DEPARTMENT = 3
RECENT_DATES = (
datetime(2026, 6, 1, 10, 0, tzinfo=UTC),
datetime(2026, 6, 1, 15, 0, tzinfo=UTC),
datetime(2026, 6, 2, 6, 0, tzinfo=UTC),
)
@dataclass(frozen=True, slots=True)
class RepairSummary:
mode: str
sim_employees: int
sim_claims: int
employee_department_plan: dict[str, int]
claim_department_plan: dict[str, int]
recent_pending_plan: dict[str, int]
rebuilt_budget_allocations: int
rebuilt_budget_transactions: int
rebuilt_budget_reservations: int
before_all_department_amounts: dict[str, str]
before_recent_pending_amounts: dict[str, str]
after_all_department_amounts: dict[str, str]
after_recent_pending_amounts: dict[str, str]
def to_dict(self) -> dict[str, Any]:
return asdict(self)
def main() -> None:
parser = argparse.ArgumentParser(
description="Repair simulated half-year demo data distribution."
)
parser.add_argument("--apply", action="store_true", help="Apply repair. Default is dry-run.")
parser.add_argument(
"--refresh-profiles",
action="store_true",
help="After --apply, refresh employee behavior profile snapshots for simulated employees.",
)
parser.add_argument("--profile-limit", type=int, default=120)
args = parser.parse_args()
session_factory = get_session_factory()
with session_factory() as db:
try:
summary = repair_distribution(db, apply=args.apply)
profile_refresh = None
if args.apply and args.refresh_profiles:
profile_refresh = _refresh_company_profiles(db, limit=args.profile_limit)
if args.apply:
db.commit()
payload = summary.to_dict()
if profile_refresh is not None:
payload["profile_refresh"] = profile_refresh
print(json.dumps(payload, ensure_ascii=False, indent=2))
if not args.apply:
print("dry-run only; pass --apply after confirmation to repair simulated data.")
elif not args.refresh_profiles:
print("pass --refresh-profiles to generate employee behavior profile snapshots.")
except Exception:
db.rollback()
raise
def repair_distribution(db, *, apply: bool) -> RepairSummary:
departments = _canonical_departments(db)
if len(departments) < len(DEPARTMENT_PLAN):
missing = [code for code, _ in DEPARTMENT_PLAN if code not in departments]
raise RuntimeError(f"missing canonical departments: {missing}")
sim_employees = _sim_employees(db)
sim_claims = _sim_claims(db)
before_all = _department_amounts(sim_claims)
before_recent = _recent_pending_amounts(sim_claims)
employee_plan = _counts_by_weight(len(sim_employees))
claim_plan = _counts_by_weight(len(sim_claims))
recent_claims = _recent_claims(sim_claims)
fixed_recent_plan = {code: RECENT_PENDING_PER_DEPARTMENT for code, _ in DEPARTMENT_PLAN}
regular_plan = {
code: max(claim_plan.get(code, 0) - fixed_recent_plan.get(code, 0), 0)
for code, _ in DEPARTMENT_PLAN
}
if apply:
_normalize_sim_claim_workflow(sim_claims)
_redistribute_employees(sim_employees, departments, employee_plan)
db.flush()
employees_by_dept = _employees_by_department(db)
_redistribute_regular_claims(
[claim for claim in sim_claims if claim not in set(recent_claims)],
departments,
employees_by_dept,
regular_plan,
)
_repair_recent_pending_claims(recent_claims, departments, employees_by_dept)
db.flush()
_rebuild_sim_budget(db, sim_claims, departments)
db.flush()
after_claims = (
_sim_claims(db)
if apply
else _preview_claims(sim_claims, departments, claim_plan)
)
after_all = _department_amounts(after_claims)
after_recent = _recent_pending_amounts(after_claims)
allocation_count, transaction_count, reservation_count = _planned_budget_counts(after_claims)
return RepairSummary(
mode="apply" if apply else "dry-run",
sim_employees=len(sim_employees),
sim_claims=len(sim_claims),
employee_department_plan=employee_plan,
claim_department_plan=claim_plan,
recent_pending_plan=fixed_recent_plan,
rebuilt_budget_allocations=allocation_count,
rebuilt_budget_transactions=transaction_count,
rebuilt_budget_reservations=reservation_count,
before_all_department_amounts=before_all,
before_recent_pending_amounts=before_recent,
after_all_department_amounts=after_all,
after_recent_pending_amounts=after_recent,
)
def _refresh_company_profiles(db, *, limit: int) -> dict[str, object]:
capped_limit = max(1, min(int(limit or 120), 500))
employees = list(
db.scalars(select(Employee).order_by(Employee.employee_no.asc())).all()
)
employee_ids = [
employee.id
for employee in employees
if not is_admin_employee_like(employee)
][:capped_limit]
service = EmployeeBehaviorProfileService(db)
snapshot_count = 0
for employee_id in employee_ids:
snapshots = service.refresh_employee_profiles(
employee_id=employee_id,
window_days=(30, 90, 180),
expense_type_scope="overall",
source_task_type="half_year_expense_demo_repair",
commit=False,
)
snapshot_count += len(snapshots)
db.commit()
return {
"target_employee_count": len(employee_ids),
"snapshot_count": snapshot_count,
"window_days": [30, 90, 180],
"source_task_type": "half_year_expense_demo_repair",
"scope": "all_non_admin_employees",
}
def _canonical_departments(db) -> dict[str, OrganizationUnit]:
department_codes = [code for code, _weight in DEPARTMENT_PLAN]
rows = db.scalars(
select(OrganizationUnit).where(OrganizationUnit.unit_code.in_(department_codes))
).all()
return {row.unit_code: row for row in rows}
def _sim_employees(db) -> list[Employee]:
return list(
db.scalars(
select(Employee)
.options(selectinload(Employee.organization_unit))
.where(Employee.employee_no.like(f"{SIM_EMPLOYEE_PREFIX}%"))
.order_by(Employee.employee_no.asc())
).all()
)
def _sim_claims(db) -> list[ExpenseClaim]:
return list(
db.scalars(
select(ExpenseClaim)
.options(selectinload(ExpenseClaim.items))
.where(ExpenseClaim.claim_no.like(f"{SIM_CLAIM_PREFIX}%"))
.order_by(ExpenseClaim.claim_no.asc())
).all()
)
def _normalize_sim_claim_workflow(claims: list[ExpenseClaim]) -> None:
for claim in claims:
normalized = normalize_expense_claim_state(
claim.status,
claim.approval_stage,
claim_no=claim.claim_no,
expense_type=claim.expense_type,
is_application_claim=False,
)
claim.status = normalized.status
claim.approval_stage = normalized.approval_stage
def _counts_by_weight(total: int) -> dict[str, int]:
raw = [(code, total * weight) for code, weight in DEPARTMENT_PLAN]
counts = {code: int(value) for code, value in raw}
remainder = total - sum(counts.values())
remainder_order = sorted(
raw,
key=lambda item: item[1] - int(item[1]),
reverse=True,
)
for code, _value in remainder_order[:remainder]:
counts[code] += 1
return counts
def _redistribute_employees(
employees: list[Employee],
departments: dict[str, OrganizationUnit],
plan: dict[str, int],
) -> None:
index = 0
for code, _weight in DEPARTMENT_PLAN:
department = departments[code]
for employee in employees[index : index + plan.get(code, 0)]:
employee.organization_unit = department
employee.cost_center = department.cost_center
employee.location = department.location
employee.finance_owner_name = f"{department.name}财务BP"
index += plan.get(code, 0)
def _employees_by_department(db) -> dict[str, list[Employee]]:
rows = db.scalars(
select(Employee)
.options(selectinload(Employee.organization_unit))
.where(Employee.organization_unit_id.is_not(None))
.order_by(Employee.employee_no.asc())
).all()
grouped: dict[str, list[Employee]] = defaultdict(list)
for employee in rows:
unit = employee.organization_unit
if unit is not None and unit.unit_code:
grouped[unit.unit_code].append(employee)
return grouped
def _redistribute_regular_claims(
claims: list[ExpenseClaim],
departments: dict[str, OrganizationUnit],
employees_by_dept: dict[str, list[Employee]],
plan: dict[str, int],
) -> None:
index = 0
for code, _weight in DEPARTMENT_PLAN:
department = departments[code]
employees = employees_by_dept.get(code) or []
for offset, claim in enumerate(claims[index : index + plan.get(code, 0)]):
employee = employees[offset % len(employees)] if employees else None
_assign_claim_department(claim, department, employee)
index += plan.get(code, 0)
def _repair_recent_pending_claims(
claims: list[ExpenseClaim],
departments: dict[str, OrganizationUnit],
employees_by_dept: dict[str, list[Employee]],
) -> None:
index = 0
for code, _weight in DEPARTMENT_PLAN:
department = departments[code]
employees = employees_by_dept.get(code) or []
for offset in range(RECENT_PENDING_PER_DEPARTMENT):
claim = claims[index]
employee = employees[offset % len(employees)] if employees else None
_assign_claim_department(claim, department, employee)
claim.status = "submitted"
claim.approval_stage = "财务审批" if offset % 2 == 0 else "直属领导审批"
claim.occurred_at = RECENT_DATES[offset] - _hours(2)
claim.submitted_at = RECENT_DATES[offset]
claim.updated_at = RECENT_DATES[offset] + _hours(1)
index += 1
def _assign_claim_department(
claim: ExpenseClaim,
department: OrganizationUnit,
employee: Employee | None,
) -> None:
claim.department_id = department.id
claim.department_name = department.name
if employee is not None:
claim.employee_id = employee.id
claim.employee_name = employee.name
claim.location = department.location or claim.location
def _rebuild_sim_budget(
db,
claims: list[ExpenseClaim],
departments: dict[str, OrganizationUnit],
) -> None:
for model, field, prefix in (
(BudgetTransaction, BudgetTransaction.transaction_no, SIM_TRANSACTION_PREFIX),
(BudgetReservation, BudgetReservation.reservation_no, SIM_RESERVATION_PREFIX),
(BudgetAllocation, BudgetAllocation.budget_no, SIM_BUDGET_PREFIX),
):
for row in db.scalars(select(model).where(field.like(f"{prefix}%"))).all():
db.delete(row)
db.flush()
groups: dict[tuple[int, str, str, str, str], list[ExpenseClaim]] = defaultdict(list)
for claim in claims:
if claim.status not in BUDGETED_STATUSES:
continue
subject_code = "meal" if claim.expense_type == "entertainment" else claim.expense_type
quarter = ((claim.occurred_at.month - 1) // 3) + 1
period_key = f"{claim.occurred_at.year}Q{quarter}"
cost_center = _claim_cost_center(claim, departments)
key = (claim.occurred_at.year, period_key, claim.department_id, cost_center, subject_code)
groups[key].append(claim)
allocation_index = 1
transaction_index = 1
for key, group_claims in sorted(groups.items()):
year, period_key, department_id, cost_center, subject_code = key
total_used = sum((Decimal(claim.amount or 0) for claim in group_claims), Decimal("0.00"))
original_amount = (
total_used / target_budget_usage(period_key, subject_code, allocation_index)
).quantize(Decimal("0.01"))
allocation = BudgetAllocation(
id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{SIM_BUDGET_PREFIX}:{key}")),
budget_no=f"{SIM_BUDGET_PREFIX}-R{allocation_index:04d}",
fiscal_year=year,
period_type="quarter",
period_key=period_key,
department_id=department_id,
department_name=group_claims[0].department_name,
cost_center=cost_center,
project_code=SIM_PROJECT_CODE,
subject_code=subject_code,
subject_name=SUBJECT_LABELS.get(subject_code, subject_code),
original_amount=max(original_amount, Decimal("3000.00")),
adjusted_amount=Decimal("0.00"),
status="active",
warning_threshold=Decimal("80.00"),
control_action="warn",
description="半年报销模拟数据部门分布修复预算池",
created_by="simulation",
updated_by="simulation",
)
db.add(allocation)
db.flush()
for claim in group_claims:
db.add(_budget_transaction(allocation.id, claim, transaction_index))
if claim.status in PENDING_STATUSES:
db.add(_budget_reservation(allocation.id, claim, transaction_index))
transaction_index += 1
allocation_index += 1
def _budget_transaction(allocation_id: str, claim: ExpenseClaim, index: int) -> BudgetTransaction:
transaction_no = f"{SIM_TRANSACTION_PREFIX}-R{index:04d}"
transaction_type = "consume" if claim.status in SUCCESS_STATUSES else "reserve"
return BudgetTransaction(
id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{transaction_no}")),
transaction_no=transaction_no,
allocation_id=allocation_id,
source_type="claim",
source_id=claim.id,
source_no=claim.claim_no,
transaction_type=transaction_type,
amount=Decimal(claim.amount or 0),
before_available_amount=Decimal("0.00"),
after_available_amount=Decimal("0.00"),
operator="simulation",
reason="修复后模拟数据预算台账",
context_json={"project_code": SIM_PROJECT_CODE, "simulated": True, "repair": True},
created_at=claim.submitted_at or claim.occurred_at,
)
def _budget_reservation(allocation_id: str, claim: ExpenseClaim, index: int) -> BudgetReservation:
reservation_no = f"{SIM_RESERVATION_PREFIX}-R{index:04d}"
return BudgetReservation(
id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{reservation_no}")),
reservation_no=reservation_no,
allocation_id=allocation_id,
source_type="claim",
source_id=claim.id,
source_no=claim.claim_no,
source_status="active",
amount=Decimal(claim.amount or 0),
context_json={"project_code": SIM_PROJECT_CODE, "simulated": True, "repair": True},
created_at=claim.submitted_at or claim.occurred_at,
)
def _recent_claims(claims: list[ExpenseClaim]) -> list[ExpenseClaim]:
needed = RECENT_PENDING_PER_DEPARTMENT * len(DEPARTMENT_PLAN)
return sorted(claims, key=lambda claim: Decimal(claim.amount or 0), reverse=True)[:needed]
def _department_amounts(claims: list[ExpenseClaim]) -> dict[str, str]:
buckets: dict[str, Decimal] = defaultdict(Decimal)
for claim in claims:
buckets[claim.department_name or "待补充"] += Decimal(claim.amount or 0)
return _format_amounts(buckets)
def _recent_pending_amounts(claims: list[ExpenseClaim]) -> dict[str, str]:
buckets: dict[str, Decimal] = defaultdict(Decimal)
for claim in claims:
if claim.status not in PENDING_STATUSES:
continue
submitted_at = claim.submitted_at or claim.occurred_at
if not submitted_at:
continue
day = submitted_at.date()
if date(2026, 6, 1) <= day <= date(2026, 6, 2):
buckets[claim.department_name or "待补充"] += Decimal(claim.amount or 0)
return _format_amounts(buckets)
def _preview_claims(
claims: list[ExpenseClaim],
departments: dict[str, OrganizationUnit],
claim_plan: dict[str, int],
) -> list[ExpenseClaim]:
preview: list[ExpenseClaim] = []
recent_claims = _recent_claims(claims)
recent_claim_set = set(recent_claims)
regular_claims = [claim for claim in claims if claim not in recent_claim_set]
index = 0
for code, _weight in DEPARTMENT_PLAN:
department = departments[code]
count = max(claim_plan.get(code, 0) - RECENT_PENDING_PER_DEPARTMENT, 0)
for claim in regular_claims[index : index + count]:
preview.append(_clone_claim(claim, department.name, claim.status, claim.submitted_at))
index += count
recent_index = 0
for code, _weight in DEPARTMENT_PLAN:
department = departments[code]
for offset in range(RECENT_PENDING_PER_DEPARTMENT):
preview.append(
_clone_claim(
recent_claims[recent_index],
department.name,
"submitted",
RECENT_DATES[offset],
)
)
recent_index += 1
return preview
def _clone_claim(
claim: ExpenseClaim,
department_name: str,
status: str,
submitted_at: datetime | None,
) -> Any:
return type(
"ClaimPreview",
(),
{
"department_name": department_name,
"status": status,
"submitted_at": submitted_at,
"occurred_at": claim.occurred_at,
"expense_type": claim.expense_type,
"amount": claim.amount,
},
)()
def _planned_budget_counts(claims: list[Any]) -> tuple[int, int, int]:
allocation_keys = set()
transaction_count = 0
reservation_count = 0
for claim in claims:
if claim.status not in BUDGETED_STATUSES:
continue
submitted_at = claim.submitted_at or claim.occurred_at
period_key = f"{submitted_at.year}Q{((submitted_at.month - 1) // 3) + 1}"
allocation_keys.add((period_key, claim.department_name, getattr(claim, "expense_type", "")))
transaction_count += 1
reservation_count += int(claim.status in PENDING_STATUSES)
return len(allocation_keys), transaction_count, reservation_count
def _claim_cost_center(
claim: ExpenseClaim,
departments: dict[str, OrganizationUnit],
) -> str | None:
for department in departments.values():
if department.id == claim.department_id:
return department.cost_center
return None
def _format_amounts(buckets: dict[str, Decimal]) -> dict[str, str]:
return {
key: str(value.quantize(Decimal("0.01")))
for key, value in sorted(buckets.items(), key=lambda item: item[1], reverse=True)
}
def _hours(value: int):
from datetime import timedelta
return timedelta(hours=value)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,111 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import sys
from datetime import date
from pathlib import Path
from sqlalchemy import select
SERVER_DIR = Path(__file__).resolve().parents[1]
SRC_DIR = SERVER_DIR / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
from app.db.session import get_session_factory # noqa: E402
from app.models.employee import Employee # noqa: E402
from app.services.demo_company_simulation_filters import is_admin_employee_like # noqa: E402
from app.services.demo_company_simulation_seed import ( # noqa: E402
HalfYearExpenseSimulationSeeder,
SimulationConfig,
)
from app.services.employee_behavior_profile_service import ( # noqa: E402
EmployeeBehaviorProfileService,
)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Seed half-year simulated reimbursement, budget, and employee data.",
)
parser.add_argument("--target-employees", type=int, default=100)
parser.add_argument("--start-date", type=date.fromisoformat, default=date(2026, 1, 1))
parser.add_argument("--months", type=int, default=6)
parser.add_argument("--seed", type=int, default=20260602)
parser.add_argument("--apply", action="store_true", help="Write data. Default is dry-run only.")
parser.add_argument(
"--refresh-profiles",
action="store_true",
help="After --apply, refresh employee behavior profile snapshots for simulated employees.",
)
parser.add_argument("--profile-limit", type=int, default=120)
return parser.parse_args()
def main() -> None:
args = parse_args()
config = SimulationConfig(
target_employees=args.target_employees,
start_date=args.start_date,
months=args.months,
seed=args.seed,
)
session_factory = get_session_factory()
with session_factory() as db:
seeder = HalfYearExpenseSimulationSeeder(db, config)
try:
summary = seeder.apply() if args.apply else seeder.preview()
profile_refresh = None
if args.apply and args.refresh_profiles:
profile_refresh = refresh_company_profiles(db, limit=args.profile_limit)
elif args.apply:
db.commit()
payload = summary.to_dict()
if profile_refresh is not None:
payload["profile_refresh"] = profile_refresh
print(json.dumps(payload, ensure_ascii=False, indent=2))
if not args.apply:
print("dry-run only; pass --apply after confirmation to write simulated data.")
elif not args.refresh_profiles:
print("pass --refresh-profiles to generate employee behavior profile snapshots.")
except Exception:
db.rollback()
raise
def refresh_company_profiles(db, *, limit: int) -> dict[str, object]:
capped_limit = max(1, min(int(limit or 120), 500))
employees = list(
db.scalars(select(Employee).order_by(Employee.employee_no.asc())).all()
)
employee_ids = [
employee.id
for employee in employees
if not is_admin_employee_like(employee)
][:capped_limit]
service = EmployeeBehaviorProfileService(db)
snapshot_count = 0
for employee_id in employee_ids:
snapshots = service.refresh_employee_profiles(
employee_id=employee_id,
window_days=(30, 90, 180),
expense_type_scope="overall",
source_task_type="half_year_expense_demo_seed",
commit=False,
)
snapshot_count += len(snapshots)
db.commit()
return {
"target_employee_count": len(employee_ids),
"snapshot_count": snapshot_count,
"window_days": [30, 90, 180],
"source_task_type": "half_year_expense_demo_seed",
"scope": "all_non_admin_employees",
}
if __name__ == "__main__":
main()