Files
X-Financial/server/scripts/repair_half_year_expense_demo_distribution.py

590 lines
21 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import sys
import uuid
from collections import defaultdict
from dataclasses import asdict, dataclass
from datetime import UTC, date, datetime
from decimal import Decimal
from pathlib import Path
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import selectinload
SERVER_DIR = Path(__file__).resolve().parents[1]
SRC_DIR = SERVER_DIR / "src"
if str(SRC_DIR) not in sys.path:
sys.path.insert(0, str(SRC_DIR))
from app.db.session import get_session_factory # noqa: E402
from app.models.budget import BudgetAllocation, BudgetReservation, BudgetTransaction # noqa: E402
from app.models.employee import Employee # noqa: E402
from app.models.financial_record import ExpenseClaim # noqa: E402
from app.models.organization import OrganizationUnit # noqa: E402
from app.services.demo_company_simulation_catalog import ( # noqa: E402
BUDGETED_STATUSES,
PENDING_STATUSES,
SIM_BUDGET_PREFIX,
SIM_EMPLOYEE_PREFIX,
SIM_PROJECT_CODE,
SIM_RESERVATION_PREFIX,
SIM_TRANSACTION_PREFIX,
SUBJECT_LABELS,
SUCCESS_STATUSES,
target_budget_usage,
)
from app.services.demo_company_simulation_filters import is_admin_employee_like # noqa: E402
from app.services.employee_behavior_profile_service import ( # noqa: E402
EmployeeBehaviorProfileService,
)
from app.services.expense_claim_status_registry import ( # noqa: E402
normalize_expense_claim_state,
)
DEPARTMENT_PLAN = (
("TECH-DEPT", Decimal("0.30")),
("MARKET-DEPT", Decimal("0.24")),
("PRODUCTION-DEPT", Decimal("0.18")),
("FINANCE-DEPT", Decimal("0.12")),
("HR-DEPT", Decimal("0.10")),
("PRESIDENT-OFFICE", Decimal("0.06")),
)
RECENT_PENDING_PER_DEPARTMENT = 3
RECENT_DATES = (
datetime(2026, 6, 1, 10, 0, tzinfo=UTC),
datetime(2026, 6, 1, 15, 0, tzinfo=UTC),
datetime(2026, 6, 2, 6, 0, tzinfo=UTC),
)
PERIOD_START = date(2026, 1, 1)
PERIOD_END = date(2026, 6, 2)
@dataclass(frozen=True, slots=True)
class RepairSummary:
mode: str
sim_employees: int
sim_claims: int
employee_department_plan: dict[str, int]
claim_department_plan: dict[str, int]
recent_pending_plan: dict[str, int]
rebuilt_budget_allocations: int
rebuilt_budget_transactions: int
rebuilt_budget_reservations: int
before_all_department_amounts: dict[str, str]
before_recent_pending_amounts: dict[str, str]
after_all_department_amounts: dict[str, str]
after_recent_pending_amounts: dict[str, str]
def to_dict(self) -> dict[str, Any]:
return asdict(self)
def main() -> None:
parser = argparse.ArgumentParser(
description="Repair simulated half-year demo data distribution."
)
parser.add_argument("--apply", action="store_true", help="Apply repair. Default is dry-run.")
parser.add_argument(
"--refresh-profiles",
action="store_true",
help="After --apply, refresh employee behavior profile snapshots for simulated employees.",
)
parser.add_argument("--profile-limit", type=int, default=120)
args = parser.parse_args()
session_factory = get_session_factory()
with session_factory() as db:
try:
summary = repair_distribution(db, apply=args.apply)
profile_refresh = None
if args.apply and args.refresh_profiles:
profile_refresh = _refresh_company_profiles(db, limit=args.profile_limit)
if args.apply:
db.commit()
payload = summary.to_dict()
if profile_refresh is not None:
payload["profile_refresh"] = profile_refresh
print(json.dumps(payload, ensure_ascii=False, indent=2))
if not args.apply:
print("dry-run only; pass --apply after confirmation to repair simulated data.")
elif not args.refresh_profiles:
print("pass --refresh-profiles to generate employee behavior profile snapshots.")
except Exception:
db.rollback()
raise
def repair_distribution(db, *, apply: bool) -> RepairSummary:
departments = _canonical_departments(db)
if len(departments) < len(DEPARTMENT_PLAN):
missing = [code for code, _ in DEPARTMENT_PLAN if code not in departments]
raise RuntimeError(f"missing canonical departments: {missing}")
sim_employees = _sim_employees(db)
sim_claims = _sim_claims(db)
before_all = _department_amounts(sim_claims)
before_recent = _recent_pending_amounts(sim_claims)
employee_plan = _counts_by_weight(len(sim_employees))
claim_plan = _counts_by_weight(len(sim_claims))
recent_claims = _recent_claims(sim_claims)
fixed_recent_plan = {code: RECENT_PENDING_PER_DEPARTMENT for code, _ in DEPARTMENT_PLAN}
regular_plan = {
code: max(claim_plan.get(code, 0) - fixed_recent_plan.get(code, 0), 0)
for code, _ in DEPARTMENT_PLAN
}
if apply:
_normalize_sim_claim_workflow(sim_claims)
_clamp_sim_claim_dates(sim_claims)
_redistribute_employees(sim_employees, departments, employee_plan)
db.flush()
employees_by_dept = _employees_by_department(db)
_redistribute_regular_claims(
[claim for claim in sim_claims if claim not in set(recent_claims)],
departments,
employees_by_dept,
regular_plan,
)
_repair_recent_pending_claims(recent_claims, departments, employees_by_dept)
db.flush()
_rebuild_sim_budget(db, sim_claims, departments)
db.flush()
after_claims = (
_sim_claims(db)
if apply
else _preview_claims(sim_claims, departments, claim_plan)
)
after_all = _department_amounts(after_claims)
after_recent = _recent_pending_amounts(after_claims)
allocation_count, transaction_count, reservation_count = _planned_budget_counts(after_claims)
return RepairSummary(
mode="apply" if apply else "dry-run",
sim_employees=len(sim_employees),
sim_claims=len(sim_claims),
employee_department_plan=employee_plan,
claim_department_plan=claim_plan,
recent_pending_plan=fixed_recent_plan,
rebuilt_budget_allocations=allocation_count,
rebuilt_budget_transactions=transaction_count,
rebuilt_budget_reservations=reservation_count,
before_all_department_amounts=before_all,
before_recent_pending_amounts=before_recent,
after_all_department_amounts=after_all,
after_recent_pending_amounts=after_recent,
)
def _refresh_company_profiles(db, *, limit: int) -> dict[str, object]:
capped_limit = max(1, min(int(limit or 120), 500))
employees = list(
db.scalars(select(Employee).order_by(Employee.employee_no.asc())).all()
)
employee_ids = [
employee.id
for employee in employees
if not is_admin_employee_like(employee)
][:capped_limit]
service = EmployeeBehaviorProfileService(db)
snapshot_count = 0
for employee_id in employee_ids:
snapshots = service.refresh_employee_profiles(
employee_id=employee_id,
window_days=(30, 90, 180),
expense_type_scope="overall",
source_task_type="half_year_expense_demo_repair",
commit=False,
)
snapshot_count += len(snapshots)
db.commit()
return {
"target_employee_count": len(employee_ids),
"snapshot_count": snapshot_count,
"window_days": [30, 90, 180],
"source_task_type": "half_year_expense_demo_repair",
"scope": "all_non_admin_employees",
}
def _canonical_departments(db) -> dict[str, OrganizationUnit]:
department_codes = [code for code, _weight in DEPARTMENT_PLAN]
rows = db.scalars(
select(OrganizationUnit).where(OrganizationUnit.unit_code.in_(department_codes))
).all()
return {row.unit_code: row for row in rows}
def _sim_employees(db) -> list[Employee]:
return list(
db.scalars(
select(Employee)
.options(selectinload(Employee.organization_unit))
.where(Employee.employee_no.like(f"{SIM_EMPLOYEE_PREFIX}%"))
.order_by(Employee.employee_no.asc())
).all()
)
def _sim_claims(db) -> list[ExpenseClaim]:
return list(
db.scalars(
select(ExpenseClaim)
.options(selectinload(ExpenseClaim.items))
.where(ExpenseClaim.project_code == SIM_PROJECT_CODE)
.order_by(ExpenseClaim.created_at.asc(), ExpenseClaim.claim_no.asc())
).all()
)
def _normalize_sim_claim_workflow(claims: list[ExpenseClaim]) -> None:
for claim in claims:
normalized = normalize_expense_claim_state(
claim.status,
claim.approval_stage,
claim_no=claim.claim_no,
expense_type=claim.expense_type,
is_application_claim=False,
)
claim.status = normalized.status
claim.approval_stage = normalized.approval_stage
def _clamp_sim_claim_dates(claims: list[ExpenseClaim]) -> None:
for index, claim in enumerate(claims):
occurred_at = claim.occurred_at or claim.submitted_at
if occurred_at is None:
continue
if PERIOD_START <= occurred_at.date() <= PERIOD_END:
continue
anchor = RECENT_DATES[index % len(RECENT_DATES)]
claim.occurred_at = anchor - _hours(2)
if claim.submitted_at is not None or claim.status != "draft":
claim.submitted_at = anchor
claim.created_at = claim.occurred_at
claim.updated_at = anchor + _hours(1)
for item in claim.items or []:
item.item_date = claim.occurred_at.date()
def _counts_by_weight(total: int) -> dict[str, int]:
raw = [(code, total * weight) for code, weight in DEPARTMENT_PLAN]
counts = {code: int(value) for code, value in raw}
remainder = total - sum(counts.values())
remainder_order = sorted(
raw,
key=lambda item: item[1] - int(item[1]),
reverse=True,
)
for code, _value in remainder_order[:remainder]:
counts[code] += 1
return counts
def _redistribute_employees(
employees: list[Employee],
departments: dict[str, OrganizationUnit],
plan: dict[str, int],
) -> None:
index = 0
for code, _weight in DEPARTMENT_PLAN:
department = departments[code]
for employee in employees[index : index + plan.get(code, 0)]:
employee.organization_unit = department
employee.cost_center = department.cost_center
employee.location = department.location
employee.finance_owner_name = f"{department.name}财务BP"
index += plan.get(code, 0)
def _employees_by_department(db) -> dict[str, list[Employee]]:
rows = db.scalars(
select(Employee)
.options(selectinload(Employee.organization_unit))
.where(Employee.organization_unit_id.is_not(None))
.order_by(Employee.employee_no.asc())
).all()
grouped: dict[str, list[Employee]] = defaultdict(list)
for employee in rows:
unit = employee.organization_unit
if unit is not None and unit.unit_code:
grouped[unit.unit_code].append(employee)
return grouped
def _redistribute_regular_claims(
claims: list[ExpenseClaim],
departments: dict[str, OrganizationUnit],
employees_by_dept: dict[str, list[Employee]],
plan: dict[str, int],
) -> None:
index = 0
for code, _weight in DEPARTMENT_PLAN:
department = departments[code]
employees = employees_by_dept.get(code) or []
for offset, claim in enumerate(claims[index : index + plan.get(code, 0)]):
employee = employees[offset % len(employees)] if employees else None
_assign_claim_department(claim, department, employee)
index += plan.get(code, 0)
def _repair_recent_pending_claims(
claims: list[ExpenseClaim],
departments: dict[str, OrganizationUnit],
employees_by_dept: dict[str, list[Employee]],
) -> None:
index = 0
for code, _weight in DEPARTMENT_PLAN:
department = departments[code]
employees = employees_by_dept.get(code) or []
for offset in range(RECENT_PENDING_PER_DEPARTMENT):
claim = claims[index]
employee = employees[offset % len(employees)] if employees else None
_assign_claim_department(claim, department, employee)
claim.status = "submitted"
claim.approval_stage = "财务审批" if offset % 2 == 0 else "直属领导审批"
claim.occurred_at = RECENT_DATES[offset] - _hours(2)
claim.submitted_at = RECENT_DATES[offset]
claim.updated_at = RECENT_DATES[offset] + _hours(1)
index += 1
def _assign_claim_department(
claim: ExpenseClaim,
department: OrganizationUnit,
employee: Employee | None,
) -> None:
claim.department_id = department.id
claim.department_name = department.name
if employee is not None:
claim.employee_id = employee.id
claim.employee_name = employee.name
claim.location = department.location or claim.location
def _rebuild_sim_budget(
db,
claims: list[ExpenseClaim],
departments: dict[str, OrganizationUnit],
) -> None:
for model, field, prefix in (
(BudgetTransaction, BudgetTransaction.transaction_no, SIM_TRANSACTION_PREFIX),
(BudgetReservation, BudgetReservation.reservation_no, SIM_RESERVATION_PREFIX),
(BudgetAllocation, BudgetAllocation.budget_no, SIM_BUDGET_PREFIX),
):
for row in db.scalars(select(model).where(field.like(f"{prefix}%"))).all():
db.delete(row)
db.flush()
groups: dict[tuple[int, str, str, str, str], list[ExpenseClaim]] = defaultdict(list)
for claim in claims:
if claim.status not in BUDGETED_STATUSES:
continue
subject_code = "meal" if claim.expense_type == "entertainment" else claim.expense_type
quarter = ((claim.occurred_at.month - 1) // 3) + 1
period_key = f"{claim.occurred_at.year}Q{quarter}"
cost_center = _claim_cost_center(claim, departments)
key = (claim.occurred_at.year, period_key, claim.department_id, cost_center, subject_code)
groups[key].append(claim)
allocation_index = 1
transaction_index = 1
for key, group_claims in sorted(groups.items()):
year, period_key, department_id, cost_center, subject_code = key
total_used = sum((Decimal(claim.amount or 0) for claim in group_claims), Decimal("0.00"))
original_amount = (
total_used / target_budget_usage(period_key, subject_code, allocation_index)
).quantize(Decimal("0.01"))
allocation = BudgetAllocation(
id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{SIM_BUDGET_PREFIX}:{key}")),
budget_no=f"{SIM_BUDGET_PREFIX}-R{allocation_index:04d}",
fiscal_year=year,
period_type="quarter",
period_key=period_key,
department_id=department_id,
department_name=group_claims[0].department_name,
cost_center=cost_center,
project_code=SIM_PROJECT_CODE,
subject_code=subject_code,
subject_name=SUBJECT_LABELS.get(subject_code, subject_code),
original_amount=max(original_amount, Decimal("3000.00")),
adjusted_amount=Decimal("0.00"),
status="active",
warning_threshold=Decimal("80.00"),
control_action="warn",
description="半年报销模拟数据部门分布修复预算池",
created_by="simulation",
updated_by="simulation",
)
db.add(allocation)
db.flush()
for claim in group_claims:
db.add(_budget_transaction(allocation.id, claim, transaction_index))
if claim.status in PENDING_STATUSES:
db.add(_budget_reservation(allocation.id, claim, transaction_index))
transaction_index += 1
allocation_index += 1
def _budget_transaction(allocation_id: str, claim: ExpenseClaim, index: int) -> BudgetTransaction:
transaction_no = f"{SIM_TRANSACTION_PREFIX}-R{index:04d}"
transaction_type = "consume" if claim.status in SUCCESS_STATUSES else "reserve"
return BudgetTransaction(
id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{transaction_no}")),
transaction_no=transaction_no,
allocation_id=allocation_id,
source_type="claim",
source_id=claim.id,
source_no=claim.claim_no,
transaction_type=transaction_type,
amount=Decimal(claim.amount or 0),
before_available_amount=Decimal("0.00"),
after_available_amount=Decimal("0.00"),
operator="simulation",
reason="修复后模拟数据预算台账",
context_json={"project_code": SIM_PROJECT_CODE, "simulated": True, "repair": True},
created_at=claim.submitted_at or claim.occurred_at,
)
def _budget_reservation(allocation_id: str, claim: ExpenseClaim, index: int) -> BudgetReservation:
reservation_no = f"{SIM_RESERVATION_PREFIX}-R{index:04d}"
return BudgetReservation(
id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{reservation_no}")),
reservation_no=reservation_no,
allocation_id=allocation_id,
source_type="claim",
source_id=claim.id,
source_no=claim.claim_no,
source_status="active",
amount=Decimal(claim.amount or 0),
context_json={"project_code": SIM_PROJECT_CODE, "simulated": True, "repair": True},
created_at=claim.submitted_at or claim.occurred_at,
)
def _recent_claims(claims: list[ExpenseClaim]) -> list[ExpenseClaim]:
needed = RECENT_PENDING_PER_DEPARTMENT * len(DEPARTMENT_PLAN)
return sorted(claims, key=lambda claim: Decimal(claim.amount or 0), reverse=True)[:needed]
def _department_amounts(claims: list[ExpenseClaim]) -> dict[str, str]:
buckets: dict[str, Decimal] = defaultdict(Decimal)
for claim in claims:
buckets[claim.department_name or "待补充"] += Decimal(claim.amount or 0)
return _format_amounts(buckets)
def _recent_pending_amounts(claims: list[ExpenseClaim]) -> dict[str, str]:
buckets: dict[str, Decimal] = defaultdict(Decimal)
for claim in claims:
if claim.status not in PENDING_STATUSES:
continue
submitted_at = claim.submitted_at or claim.occurred_at
if not submitted_at:
continue
day = submitted_at.date()
if date(2026, 6, 1) <= day <= date(2026, 6, 2):
buckets[claim.department_name or "待补充"] += Decimal(claim.amount or 0)
return _format_amounts(buckets)
def _preview_claims(
claims: list[ExpenseClaim],
departments: dict[str, OrganizationUnit],
claim_plan: dict[str, int],
) -> list[ExpenseClaim]:
preview: list[ExpenseClaim] = []
recent_claims = _recent_claims(claims)
recent_claim_set = set(recent_claims)
regular_claims = [claim for claim in claims if claim not in recent_claim_set]
index = 0
for code, _weight in DEPARTMENT_PLAN:
department = departments[code]
count = max(claim_plan.get(code, 0) - RECENT_PENDING_PER_DEPARTMENT, 0)
for claim in regular_claims[index : index + count]:
preview.append(_clone_claim(claim, department.name, claim.status, claim.submitted_at))
index += count
recent_index = 0
for code, _weight in DEPARTMENT_PLAN:
department = departments[code]
for offset in range(RECENT_PENDING_PER_DEPARTMENT):
preview.append(
_clone_claim(
recent_claims[recent_index],
department.name,
"submitted",
RECENT_DATES[offset],
)
)
recent_index += 1
return preview
def _clone_claim(
claim: ExpenseClaim,
department_name: str,
status: str,
submitted_at: datetime | None,
) -> Any:
return type(
"ClaimPreview",
(),
{
"department_name": department_name,
"status": status,
"submitted_at": submitted_at,
"occurred_at": claim.occurred_at,
"expense_type": claim.expense_type,
"amount": claim.amount,
},
)()
def _planned_budget_counts(claims: list[Any]) -> tuple[int, int, int]:
allocation_keys = set()
transaction_count = 0
reservation_count = 0
for claim in claims:
if claim.status not in BUDGETED_STATUSES:
continue
submitted_at = claim.submitted_at or claim.occurred_at
period_key = f"{submitted_at.year}Q{((submitted_at.month - 1) // 3) + 1}"
allocation_keys.add((period_key, claim.department_name, getattr(claim, "expense_type", "")))
transaction_count += 1
reservation_count += int(claim.status in PENDING_STATUSES)
return len(allocation_keys), transaction_count, reservation_count
def _claim_cost_center(
claim: ExpenseClaim,
departments: dict[str, OrganizationUnit],
) -> str | None:
for department in departments.values():
if department.id == claim.department_id:
return department.cost_center
return None
def _format_amounts(buckets: dict[str, Decimal]) -> dict[str, str]:
return {
key: str(value.quantize(Decimal("0.01")))
for key, value in sorted(buckets.items(), key=lambda item: item[1], reverse=True)
}
def _hours(value: int):
from datetime import timedelta
return timedelta(hours=value)
if __name__ == "__main__":
main()