from __future__ import annotations import random import uuid from datetime import UTC, date, datetime, timedelta from decimal import Decimal from typing import Any from sqlalchemy import or_, select from sqlalchemy.orm import Session, selectinload from app.core.security import hash_password from app.db.base import Base from app.models.budget import BudgetAllocation, BudgetReservation, BudgetTransaction from app.models.employee import Employee from app.models.financial_record import ExpenseClaim, ExpenseClaimItem from app.models.organization import OrganizationUnit from app.models.risk_observation import RiskObservation from app.models.role import Role from app.services.demo_company_simulation_catalog import ( BUDGETED_STATUSES, DEFAULT_DEPARTMENTS, DEFAULT_PASSWORD, DEPARTMENT_CLAIM_WEIGHTS, DEPARTMENT_EMPLOYEE_WEIGHTS, GRADE_FACTORS, MONTH_FACTORS, PENDING_STATUSES, SIM_BUDGET_PREFIX, SIM_CLAIM_ID_NAMESPACE, SIM_EMPLOYEE_PREFIX, SIM_PROJECT_CODE, SIM_RESERVATION_PREFIX, SIM_RISK_PREFIX, SIM_TRANSACTION_PREFIX, SUBJECT_BASE_AMOUNTS, SUBJECT_LABELS, SUCCESS_STATUSES, AllocationPlan, ClaimItemPlan, ClaimPlan, DepartmentRef, EmployeeRef, SimulationConfig, SimulationSummary, build_employee_name, build_simulation_reimbursement_no, claim_location, claim_reason, department_from_row, grade_for_index, item_reason, position_for_grade, risk_type, target_budget_usage, updated_at_for_claim_plan, ) from app.services.demo_company_simulation_filters import ( is_admin_employee_like, next_simulation_number, simulation_claim_count, simulation_claim_day, simulation_month_starts, simulation_period_end, ) class HalfYearExpenseSimulationSeeder: def __init__(self, db: Session, config: SimulationConfig | None = None) -> None: self.db = db self.config = config or SimulationConfig() self.rng = random.Random(self.config.seed) def preview(self) -> SimulationSummary: return self._run(apply=False) def apply(self) -> SimulationSummary: return self._run(apply=True) def _run(self, *, apply: bool) -> SimulationSummary: Base.metadata.create_all(bind=self.db.get_bind()) departments = self._department_refs(apply=apply) current_employee_count = self._employee_count() planned_employees = self._build_new_employee_refs(departments, current_employee_count) if apply: self._ensure_user_role() self._create_missing_employees(planned_employees) self.db.flush() employees = self._employee_refs(departments) if not apply: employees = [*employees, *planned_employees] selected_employees = self._select_company_employees(employees) claim_plans = self._build_claim_plans(selected_employees) allocation_plans = self._build_allocation_plans(claim_plans) allocation_map, allocation_count = self._ensure_allocations( allocation_plans, apply=apply, ) claim_count, item_count = self._ensure_claims(claim_plans, apply=apply) transaction_count, reservation_count = self._ensure_budget_usage( claim_plans, allocation_map, apply=apply, ) risk_count = self._ensure_risk_observations(claim_plans, apply=apply) return SimulationSummary( mode="apply" if apply else "dry-run", current_employee_count=current_employee_count, target_employee_count=self.config.target_employees, selected_employee_count=len(selected_employees), employees_to_create=len(planned_employees), claims_to_create=claim_count, claim_items_to_create=item_count, budget_allocations_to_create=allocation_count, budget_transactions_to_create=transaction_count, budget_reservations_to_create=reservation_count, risk_observations_to_create=risk_count, period_start=self.config.start_date.isoformat(), period_end=simulation_period_end(self.config).isoformat(), ) def _department_refs(self, *, apply: bool) -> list[DepartmentRef]: rows = list( self.db.scalars( select(OrganizationUnit) .where(OrganizationUnit.unit_type == "department") .order_by(OrganizationUnit.unit_code.asc()) ).all() ) if rows: return [department_from_row(row) for row in rows] if not apply: return list(DEFAULT_DEPARTMENTS) for item in DEFAULT_DEPARTMENTS: self.db.add( OrganizationUnit( id=item.id, unit_code=item.unit_code, name=item.name, unit_type="department", cost_center=item.cost_center, location=item.location, manager_name=item.manager_name, ) ) self.db.flush() return list(DEFAULT_DEPARTMENTS) def _employee_count(self) -> int: employees = list(self.db.scalars(select(Employee)).all()) return sum(1 for employee in employees if not is_admin_employee_like(employee)) def _build_new_employee_refs( self, departments: list[DepartmentRef], current_employee_count: int, ) -> list[EmployeeRef]: missing_count = max(self.config.target_employees - current_employee_count, 0) if missing_count <= 0: return [] existing_nos = set(self.db.scalars(select(Employee.employee_no)).all()) refs: list[EmployeeRef] = [] next_index = 1 while len(refs) < missing_count: employee_no = f"{SIM_EMPLOYEE_PREFIX}{next_index:03d}" next_index += 1 if employee_no in existing_nos: continue department = self._weighted_department(departments, len(refs)) grade = grade_for_index(len(refs)) refs.append( EmployeeRef( id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"x-financial:{employee_no}")), employee_no=employee_no, name=build_employee_name(len(refs)), email=f"{employee_no.lower()}@xf.com", grade=grade, position=position_for_grade(grade), department=department, is_new=True, ) ) return refs def _ensure_user_role(self) -> Role: role = self.db.scalar(select(Role).where(Role.role_code == "user")) if role is not None: return role role = Role( role_code="user", name="使用者", description="可以发起费用申请、报销和查看个人单据。", ) self.db.add(role) self.db.flush() return role def _create_missing_employees(self, refs: list[EmployeeRef]) -> None: if not refs: return user_role = self._ensure_user_role() existing_nos = set(self.db.scalars(select(Employee.employee_no)).all()) departments_by_id = {row.id: row for row in self.db.scalars(select(OrganizationUnit)).all()} for ref in refs: if ref.employee_no in existing_nos: continue employee = Employee( id=ref.id, employee_no=ref.employee_no, name=ref.name, email=ref.email, gender="女" if int(ref.employee_no[-1]) % 2 == 0 else "男", phone=f"139{int(ref.employee_no[-3:]):08d}", join_date=date(2025, (int(ref.employee_no[-3:]) % 12) + 1, 10), location=ref.department.location, position=ref.position, grade=ref.grade, cost_center=ref.department.cost_center, finance_owner_name=f"{ref.department.name}财务BP", bank_name="招商银行", bank_account_no=f"622588{int(ref.employee_no[-3:]):013d}", bank_account_name=ref.name, password_hash=hash_password(DEFAULT_PASSWORD), employment_status="在职", sync_state="已同步", compliance_score=92 + int(ref.employee_no[-3:]) % 8, organization_unit=departments_by_id.get(ref.department.id), roles=[user_role], last_sync_at=datetime.now(UTC), ) self.db.add(employee) def _employee_refs(self, departments: list[DepartmentRef]) -> list[EmployeeRef]: department_by_id = {item.id: item for item in departments} fallback_departments = departments or list(DEFAULT_DEPARTMENTS) rows = list( self.db.scalars( select(Employee) .options(selectinload(Employee.organization_unit)) .order_by(Employee.employee_no.asc()) ).all() ) refs: list[EmployeeRef] = [] for index, employee in enumerate(rows): department = ( department_by_id.get(str(employee.organization_unit_id or "")) or department_from_row(employee.organization_unit) if employee.organization_unit is not None else fallback_departments[index % len(fallback_departments)] ) refs.append( EmployeeRef( id=employee.id, employee_no=employee.employee_no, name=employee.name, email=employee.email, grade=employee.grade or "P4", position=employee.position or "员工", department=department, is_new=False, ) ) return refs def _select_company_employees(self, employees: list[EmployeeRef]) -> list[EmployeeRef]: sorted_employees = sorted( (employee for employee in employees if not is_admin_employee_like(employee)), key=lambda item: item.employee_no, ) target = max(1, self.config.target_employees) return sorted_employees[:target] if len(sorted_employees) > target else sorted_employees def _build_claim_plans(self, employees: list[EmployeeRef]) -> list[ClaimPlan]: plans: list[ClaimPlan] = [] months = simulation_month_starts(self.config) period_end = simulation_period_end(self.config) claim_index = 1 for employee_index, employee in enumerate(employees): count = simulation_claim_count(employee, employee_index) for local_index in range(count): occurred_day = simulation_claim_day( self.rng, months, employee_index=employee_index, local_index=local_index, claim_index=claim_index, period_end=period_end, ) expense_type = self._expense_type_for_employee(employee) amount = self._claim_amount(employee, expense_type, occurred_day) status, stage = self._status_for_claim(employee_index, local_index) risk_flags = self._risk_flags(employee, expense_type, amount, claim_index) submitted_at = None if status != "draft": submitted_at = datetime.combine(occurred_day, datetime.min.time(), tzinfo=UTC) submitted_at += timedelta(hours=9 + (claim_index % 7)) occurred_at = datetime.combine(occurred_day, datetime.min.time(), tzinfo=UTC) occurred_at += timedelta(hours=8 + (claim_index % 9)) plans.append( ClaimPlan( id=str( uuid.uuid5( uuid.NAMESPACE_DNS, f"x-financial:{SIM_CLAIM_ID_NAMESPACE}:{claim_index}", ) ), claim_no=self._simulation_claim_no(occurred_at, claim_index), employee=employee, expense_type=expense_type, reason=claim_reason( expense_type, employee.department.name, occurred_day, ), location=claim_location(employee.department.location, claim_index), amount=amount, invoice_count=1 + (claim_index % 3), occurred_at=occurred_at, submitted_at=submitted_at, status=status, approval_stage=stage, risk_flags=risk_flags, hermes_risk_flag=bool(risk_flags and claim_index % 2 == 0), items=self._claim_items(expense_type, amount, occurred_day, claim_index), ) ) claim_index += 1 return plans def _build_allocation_plans(self, claim_plans: list[ClaimPlan]) -> list[AllocationPlan]: bucket_amounts: dict[tuple[int, str, str, str, str], Decimal] = {} bucket_departments: dict[tuple[int, str, str, str, str], DepartmentRef] = {} for plan in claim_plans: if plan.status not in BUDGETED_STATUSES: continue department = plan.employee.department key = ( plan.occurred_at.year, plan.period_key, department.id, department.cost_center, plan.budget_subject_code, ) bucket_amounts[key] = bucket_amounts.get(key, Decimal("0.00")) + plan.amount bucket_departments[key] = department plans: list[AllocationPlan] = [] for index, (key, used_amount) in enumerate(sorted(bucket_amounts.items())): year, period_key, _department_id, _cost_center, subject_code = key target_usage = target_budget_usage(period_key, subject_code, index) original_amount = max( (used_amount / target_usage).quantize(Decimal("0.01")), Decimal("3000.00"), ) plans.append( AllocationPlan( key=key, department=bucket_departments[key], subject_code=subject_code, subject_name=SUBJECT_LABELS.get(subject_code, subject_code), period_key=period_key, original_amount=original_amount, ) ) return plans def _ensure_allocations( self, plans: list[AllocationPlan], *, apply: bool, ) -> tuple[dict[tuple[int, str, str, str, str], str], int]: allocation_map: dict[tuple[int, str, str, str, str], str] = {} created_count = 0 used_budget_nos = set( self.db.scalars( select(BudgetAllocation.budget_no).where( BudgetAllocation.budget_no.like(f"{SIM_BUDGET_PREFIX}%") ) ).all() ) budget_no_cursor = 1 for plan in plans: existing = self._find_sim_allocation(plan) if existing is not None: allocation_map[plan.key] = existing.id continue created_count += 1 budget_no, budget_no_cursor = next_simulation_number( SIM_BUDGET_PREFIX, used_budget_nos, budget_no_cursor, ) allocation_id = str( uuid.uuid5( uuid.NAMESPACE_DNS, f"x-financial:{SIM_BUDGET_PREFIX}:{plan.key}", ) ) allocation_map[plan.key] = allocation_id if not apply: continue self.db.add( BudgetAllocation( id=allocation_id, budget_no=budget_no, fiscal_year=plan.key[0], period_type="quarter", period_key=plan.period_key, department_id=plan.department.id, department_name=plan.department.name, cost_center=plan.department.cost_center, project_code=SIM_PROJECT_CODE, subject_code=plan.subject_code, subject_name=plan.subject_name, original_amount=plan.original_amount, adjusted_amount=Decimal("0.00"), status="active", warning_threshold=Decimal("80.00"), control_action="warn", description="半年报销模拟数据预算池", created_by="simulation", updated_by="simulation", ) ) if apply: self.db.flush() return allocation_map, created_count def _ensure_claims(self, plans: list[ClaimPlan], *, apply: bool) -> tuple[int, int]: existing_rows = list( self.db.execute( select(ExpenseClaim.id, ExpenseClaim.claim_no).where( ExpenseClaim.project_code == SIM_PROJECT_CODE ) ).all() ) existing_claim_ids = {str(row.id) for row in existing_rows} existing_claim_nos = set(self.db.scalars(select(ExpenseClaim.claim_no)).all()) claim_count = 0 item_count = 0 for plan in plans: if plan.id in existing_claim_ids or plan.claim_no in existing_claim_nos: continue claim_count += 1 item_count += len(plan.items) if not apply: continue claim = ExpenseClaim( id=plan.id, claim_no=plan.claim_no, employee_id=plan.employee.id, employee_name=plan.employee.name, department_id=plan.employee.department.id, department_name=plan.employee.department.name, project_code=SIM_PROJECT_CODE, expense_type=plan.expense_type, reason=plan.reason, location=plan.location, amount=plan.amount, currency="CNY", invoice_count=plan.invoice_count, occurred_at=plan.occurred_at, submitted_at=plan.submitted_at, status=plan.status, approval_stage=plan.approval_stage, risk_flags_json=plan.risk_flags, hermes_risk_flag=plan.hermes_risk_flag, created_at=plan.occurred_at, updated_at=updated_at_for_claim_plan(plan), ) claim.items = [ ExpenseClaimItem( id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"x-financial:{plan.claim_no}:{index}")), item_date=item.item_date, item_type=item.item_type, item_reason=item.item_reason, item_location=item.item_location, item_amount=item.item_amount, invoice_id=item.invoice_id, ) for index, item in enumerate(plan.items, start=1) ] self.db.add(claim) if apply: self.db.flush() return claim_count, item_count def _ensure_budget_usage( self, plans: list[ClaimPlan], allocation_map: dict[tuple[int, str, str, str, str], str], *, apply: bool, ) -> tuple[int, int]: existing_transactions = set( self.db.scalars( select(BudgetTransaction.transaction_no).where( BudgetTransaction.transaction_no.like(f"{SIM_TRANSACTION_PREFIX}%") ) ).all() ) existing_reservations = set( self.db.scalars( select(BudgetReservation.reservation_no).where( BudgetReservation.reservation_no.like(f"{SIM_RESERVATION_PREFIX}%") ) ).all() ) transaction_count = 0 reservation_count = 0 for index, plan in enumerate(plans, start=1): if plan.status not in BUDGETED_STATUSES: continue allocation_id = allocation_map.get(self._allocation_key(plan)) if not allocation_id: continue transaction_no = f"{SIM_TRANSACTION_PREFIX}-{index:04d}" if transaction_no not in existing_transactions: transaction_count += 1 if apply: self.db.add(self._transaction_for_plan(plan, allocation_id, transaction_no)) if plan.status in PENDING_STATUSES: reservation_no = f"{SIM_RESERVATION_PREFIX}-{index:04d}" if reservation_no not in existing_reservations: reservation_count += 1 if apply: self.db.add(self._reservation_for_plan(plan, allocation_id, reservation_no)) if apply: self.db.flush() return transaction_count, reservation_count def _ensure_risk_observations(self, plans: list[ClaimPlan], *, apply: bool) -> int: existing_keys = set( self.db.scalars( select(RiskObservation.observation_key).where( RiskObservation.observation_key.like(f"{SIM_RISK_PREFIX}%") ) ).all() ) count = 0 for index, plan in enumerate(plans, start=1): if not plan.risk_flags: continue key = f"{SIM_RISK_PREFIX}-{index:04d}" if key in existing_keys: continue count += 1 if not apply: continue first_flag = plan.risk_flags[0] self.db.add( RiskObservation( id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"x-financial:{key}")), observation_key=key, subject_type="expense_claim", subject_key=plan.claim_no, subject_label=plan.claim_no, claim_id=plan.id, claim_no=plan.claim_no, risk_type="simulation", risk_signal=str(first_flag.get("event_type") or "amount_outlier"), title=str(first_flag.get("label") or "模拟风险观察"), description=str(first_flag.get("message") or ""), risk_score=int(first_flag.get("risk_score") or 72), risk_level=str(first_flag.get("severity") or "medium"), confidence_score=0.78, control_stage="reimbursement", control_mode="manual_review", automation_mode="simulation", source="half_year_expense_simulation", algorithm_version="simulation.v1", status="pending_review", evidence_json=[ {"label": "报销单号", "value": plan.claim_no}, {"label": "金额", "value": str(plan.amount)}, ], ontology_json={"scenario": "expense", "intent": "risk_check"}, created_at=plan.submitted_at or plan.occurred_at, updated_at=updated_at_for_claim_plan(plan), ) ) if apply: self.db.flush() return count def _find_sim_allocation(self, plan: AllocationPlan) -> BudgetAllocation | None: year, period_key, department_id, cost_center, subject_code = plan.key stmt = ( select(BudgetAllocation) .where(BudgetAllocation.fiscal_year == year) .where(BudgetAllocation.period_key == period_key) .where(BudgetAllocation.subject_code == subject_code) .where(BudgetAllocation.project_code == SIM_PROJECT_CODE) .where( or_( BudgetAllocation.department_id == department_id, BudgetAllocation.cost_center == cost_center, BudgetAllocation.department_name == plan.department.name, ) ) .limit(1) ) return self.db.scalar(stmt) def _transaction_for_plan( self, plan: ClaimPlan, allocation_id: str, transaction_no: str, ) -> BudgetTransaction: transaction_type = "consume" if plan.status in SUCCESS_STATUSES else "reserve" return BudgetTransaction( id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"x-financial:{transaction_no}")), transaction_no=transaction_no, allocation_id=allocation_id, source_type="claim", source_id=plan.id, source_no=plan.claim_no, transaction_type=transaction_type, amount=plan.amount, before_available_amount=Decimal("0.00"), after_available_amount=Decimal("0.00"), operator="simulation", reason=( "半年报销模拟数据预算核销" if transaction_type == "consume" else "半年报销模拟数据预算预占" ), context_json={"project_code": SIM_PROJECT_CODE, "simulated": True}, created_at=plan.submitted_at or plan.occurred_at, ) def _reservation_for_plan( self, plan: ClaimPlan, allocation_id: str, reservation_no: str, ) -> BudgetReservation: return BudgetReservation( id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"x-financial:{reservation_no}")), reservation_no=reservation_no, allocation_id=allocation_id, source_type="claim", source_id=plan.id, source_no=plan.claim_no, source_status="active", amount=plan.amount, consumed_amount=Decimal("0.00"), released_amount=Decimal("0.00"), context_json={"project_code": SIM_PROJECT_CODE, "simulated": True}, created_at=plan.submitted_at or plan.occurred_at, ) def _allocation_key(self, plan: ClaimPlan) -> tuple[int, str, str, str, str]: department = plan.employee.department return ( plan.occurred_at.year, plan.period_key, department.id, department.cost_center, plan.budget_subject_code, ) def _weighted_department(self, departments: list[DepartmentRef], index: int) -> DepartmentRef: weighted: list[DepartmentRef] = [] by_code = {item.unit_code: item for item in departments} for code, weight in DEPARTMENT_EMPLOYEE_WEIGHTS.items(): if code in by_code: weighted.extend([by_code[code]] * weight) weighted = weighted or departments or list(DEFAULT_DEPARTMENTS) return weighted[index % len(weighted)] def _expense_type_for_employee(self, employee: EmployeeRef) -> str: weights = DEPARTMENT_CLAIM_WEIGHTS.get( employee.department.unit_code, {"travel": 3, "meal": 2, "office": 2, "communication": 1}, ) subjects = list(weights) return self.rng.choices(subjects, weights=[weights[item] for item in subjects], k=1)[0] def _claim_amount( self, employee: EmployeeRef, expense_type: str, occurred_day: date, ) -> Decimal: subject = "meal" if expense_type == "entertainment" else expense_type base = SUBJECT_BASE_AMOUNTS.get(subject, Decimal("1000.00")) grade_factor = GRADE_FACTORS.get(employee.grade, Decimal("1.00")) month_factor = MONTH_FACTORS.get(occurred_day.month, Decimal("1.00")) department_factor = ( Decimal("1.18") if employee.department.unit_code == "MARKET-DEPT" else Decimal("1.00") ) noise = Decimal(str(self.rng.uniform(0.72, 1.42))).quantize(Decimal("0.01")) return (base * grade_factor * month_factor * department_factor * noise).quantize( Decimal("0.01") ) @staticmethod def _simulation_claim_no(occurred_at: datetime, claim_index: int) -> str: return build_simulation_reimbursement_no(occurred_at, claim_index) def _status_for_claim(self, employee_index: int, local_index: int) -> tuple[str, str | None]: selector = (employee_index * 11 + local_index * 17 + self.config.seed) % 100 if selector < 42: return "paid", "已付款" if selector < 62: return "approved", "归档入账" if selector < 75: return "pending_payment", "待付款" if selector < 84: return "submitted", "财务审批" if selector < 92: return "submitted", "直属领导审批" if selector < 96: return "returned", "待补充" if selector < 99: return "rejected", "已驳回" return "draft", "待提交" def _risk_flags( self, employee: EmployeeRef, expense_type: str, amount: Decimal, claim_index: int, ) -> list[dict[str, Any]]: base_probability = Decimal("0.10") if amount >= SUBJECT_BASE_AMOUNTS.get(expense_type, Decimal("1000.00")) * Decimal("1.55"): base_probability += Decimal("0.08") if employee.department.unit_code in {"MARKET-DEPT", "PRESIDENT-OFFICE"}: base_probability += Decimal("0.04") if Decimal(str(self.rng.random())) > base_probability: return [] event_type, label = risk_type(claim_index, expense_type) severity = "high" if amount > Decimal("9000.00") or claim_index % 7 == 0 else "medium" return [ { "source": "half_year_expense_simulation", "event_type": event_type, "severity": severity, "label": label, "message": ( f"{employee.name} 的" f"{SUBJECT_LABELS.get(expense_type, expense_type)}样本触发{label}。" ), "risk_score": 82 if severity == "high" else 68, "created_at": datetime.now(UTC).isoformat(), } ] def _claim_items( self, expense_type: str, amount: Decimal, occurred_day: date, claim_index: int, ) -> list[ClaimItemPlan]: if expense_type == "travel": hotel = (amount * Decimal("0.48")).quantize(Decimal("0.01")) transport = (amount * Decimal("0.37")).quantize(Decimal("0.01")) allowance = amount - hotel - transport return [ self._item("hotel", "项目出差住宿", hotel, occurred_day, claim_index, 1), self._item("transport", "项目往返交通", transport, occurred_day, claim_index, 2), self._item("travel_allowance", "差旅补贴", allowance, occurred_day, claim_index, 3), ] return [ self._item( expense_type, item_reason(expense_type), amount, occurred_day, claim_index, 1, ) ] def _item( self, item_type: str, reason: str, amount: Decimal, item_date: date, claim_index: int, item_index: int, ) -> ClaimItemPlan: return ClaimItemPlan( item_date=item_date, item_type=item_type, item_reason=reason, item_location=claim_location("上海", claim_index + item_index), item_amount=amount.quantize(Decimal("0.01")), invoice_id=f"SIM-INV-2026-{claim_index:04d}-{item_index}", )