feat: 财务看板口径重构与半年模拟数据及报销状态注册表
- 重构 finance_dashboard 口径计算,新增模拟公司画像数据生成与筛选 - 引入 expense_claim_status_registry 统一报销状态流转 - 完善报销草稿流程、Item Sync 与本体解析器 - 优化总览页趋势图、分页组件与请求进度步骤 - 增强报销申请快速预览、本体工具与详情展示 - 新增半年报销模拟数据种子脚本与状态审计工具 - 补充财务看板、报销状态注册与模拟数据测试覆盖
This commit is contained in:
120
server/scripts/audit_expense_claim_statuses.py
Normal file
120
server/scripts/audit_expense_claim_statuses.py
Normal file
@@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
SERVER_DIR = Path(__file__).resolve().parents[1]
|
||||
SRC_DIR = SERVER_DIR / "src"
|
||||
if str(SRC_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SRC_DIR))
|
||||
|
||||
from app.db.session import get_session_factory # noqa: E402
|
||||
from app.models.financial_record import ExpenseClaim # noqa: E402
|
||||
from app.services.expense_claim_status_registry import ( # noqa: E402
|
||||
is_known_approval_stage,
|
||||
is_known_claim_status,
|
||||
normalize_expense_claim_state,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Audit expense claim status consistency.")
|
||||
parser.add_argument("--sample-limit", type=int, default=20)
|
||||
args = parser.parse_args()
|
||||
|
||||
session_factory = get_session_factory()
|
||||
with session_factory() as db:
|
||||
claims = list(
|
||||
db.scalars(
|
||||
select(ExpenseClaim).order_by(
|
||||
ExpenseClaim.claim_no.asc(),
|
||||
ExpenseClaim.created_at.asc(),
|
||||
)
|
||||
).all()
|
||||
)
|
||||
payload = audit_claims(claims, sample_limit=max(args.sample_limit, 0))
|
||||
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
def audit_claims(claims: list[ExpenseClaim], *, sample_limit: int) -> dict[str, Any]:
|
||||
status_counts: Counter[str] = Counter()
|
||||
stage_counts: Counter[str] = Counter()
|
||||
status_stage_counts: Counter[str] = Counter()
|
||||
doc_type_counts: Counter[str] = Counter()
|
||||
unknown_statuses: Counter[str] = Counter()
|
||||
unknown_stages: Counter[str] = Counter()
|
||||
normalization_counts: Counter[str] = Counter()
|
||||
samples: list[dict[str, Any]] = []
|
||||
|
||||
for claim in claims:
|
||||
status = str(claim.status or "").strip()
|
||||
stage = str(claim.approval_stage or "").strip()
|
||||
doc_type = _doc_type(claim)
|
||||
status_counts[status or "<empty>"] += 1
|
||||
stage_counts[stage or "<empty>"] += 1
|
||||
status_stage_counts[f"{status or '<empty>'} | {stage or '<empty>'}"] += 1
|
||||
doc_type_counts[doc_type] += 1
|
||||
|
||||
if not is_known_claim_status(status):
|
||||
unknown_statuses[status or "<empty>"] += 1
|
||||
if not is_known_approval_stage(stage):
|
||||
unknown_stages[stage or "<empty>"] += 1
|
||||
|
||||
normalized = normalize_expense_claim_state(
|
||||
status,
|
||||
stage,
|
||||
claim_no=claim.claim_no,
|
||||
expense_type=claim.expense_type,
|
||||
)
|
||||
if normalized.changed:
|
||||
key = (
|
||||
f"{status or '<empty>'}/{stage or '<empty>'}"
|
||||
f" -> {normalized.status}/{normalized.approval_stage}"
|
||||
)
|
||||
normalization_counts[key] += 1
|
||||
if len(samples) < sample_limit:
|
||||
samples.append(
|
||||
{
|
||||
"claim_no": claim.claim_no,
|
||||
"doc_type": doc_type,
|
||||
"status": status,
|
||||
"approval_stage": stage,
|
||||
"normalized_status": normalized.status,
|
||||
"normalized_approval_stage": normalized.approval_stage,
|
||||
"status_code": normalized.status_code,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"claim_count": len(claims),
|
||||
"doc_type_counts": dict(doc_type_counts),
|
||||
"status_counts": dict(status_counts),
|
||||
"approval_stage_counts": dict(stage_counts),
|
||||
"status_stage_counts": dict(status_stage_counts),
|
||||
"unknown_statuses": dict(unknown_statuses),
|
||||
"unknown_approval_stages": dict(unknown_stages),
|
||||
"normalization_needed": sum(normalization_counts.values()),
|
||||
"normalization_counts": dict(normalization_counts),
|
||||
"normalization_samples": samples,
|
||||
}
|
||||
|
||||
|
||||
def _doc_type(claim: ExpenseClaim) -> str:
|
||||
claim_no = str(claim.claim_no or "").strip().upper()
|
||||
expense_type = str(claim.expense_type or "").strip().lower()
|
||||
if claim_no.startswith(("AP-", "APP-")) or expense_type.endswith("_application"):
|
||||
return "application"
|
||||
if claim_no.startswith("SIM-EXP-2026"):
|
||||
return "sim_reimbursement"
|
||||
return "reimbursement"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
396
server/scripts/mock_half_year_expense_demo_attachments.py
Normal file
396
server/scripts/mock_half_year_expense_demo_attachments.py
Normal file
@@ -0,0 +1,396 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from dataclasses import asdict, dataclass
|
||||
from datetime import UTC, datetime
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
SERVER_DIR = Path(__file__).resolve().parents[1]
|
||||
SRC_DIR = SERVER_DIR / "src"
|
||||
if str(SRC_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SRC_DIR))
|
||||
|
||||
from app.db.session import get_session_factory # noqa: E402
|
||||
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem # noqa: E402
|
||||
from app.services.demo_company_simulation_catalog import SIM_CLAIM_PREFIX # noqa: E402
|
||||
from app.services.expense_claim_attachment_storage import ( # noqa: E402
|
||||
ExpenseClaimAttachmentStorage,
|
||||
)
|
||||
|
||||
DOCUMENT_BY_ITEM_TYPE = {
|
||||
"hotel": ("hotel_invoice", "酒店住宿票据", "hotel", "住宿票据"),
|
||||
"hotel_ticket": ("hotel_invoice", "酒店住宿票据", "hotel", "住宿票据"),
|
||||
"transport": ("transport_receipt", "乘车票据", "transport", "交通票据"),
|
||||
"train_ticket": ("train_ticket", "火车/高铁票", "travel", "差旅票据"),
|
||||
"flight_ticket": ("flight_itinerary", "航空行程单", "travel", "差旅票据"),
|
||||
"ride_ticket": ("taxi_receipt", "出租车/网约车票据", "transport", "交通票据"),
|
||||
"meal": ("meal_receipt", "餐饮发票", "meal", "餐饮票据"),
|
||||
"entertainment": ("meal_receipt", "餐饮发票", "meal", "餐饮票据"),
|
||||
"office": ("office_invoice", "办公用品发票", "office", "办公票据"),
|
||||
"communication": ("telecom_invoice", "通信服务发票", "communication", "通信票据"),
|
||||
"travel_allowance": ("allowance_sheet", "差旅补贴测算单", "travel", "差旅测算"),
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class MockAttachmentSummary:
|
||||
mode: str
|
||||
sim_claims: int
|
||||
sim_items: int
|
||||
attachments_to_mock: int
|
||||
missing_material_items: int
|
||||
compliant_attachments: int
|
||||
violation_attachments: int
|
||||
already_mocked: int
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Mock attachment files and OCR metadata for half-year simulated claims."
|
||||
)
|
||||
parser.add_argument("--apply", action="store_true", help="Write mock attachment files.")
|
||||
args = parser.parse_args()
|
||||
|
||||
session_factory = get_session_factory()
|
||||
with session_factory() as db:
|
||||
try:
|
||||
summary = mock_attachments(db, apply=args.apply)
|
||||
if args.apply:
|
||||
db.commit()
|
||||
print(json.dumps(summary.to_dict(), ensure_ascii=False, indent=2))
|
||||
if not args.apply:
|
||||
print("dry-run only; pass --apply after confirmation to write mock attachments.")
|
||||
except Exception:
|
||||
db.rollback()
|
||||
raise
|
||||
|
||||
|
||||
def mock_attachments(db, *, apply: bool) -> MockAttachmentSummary:
|
||||
claims = _sim_claims(db)
|
||||
storage = ExpenseClaimAttachmentStorage()
|
||||
attachments_to_mock = 0
|
||||
missing_material_items = 0
|
||||
compliant_attachments = 0
|
||||
violation_attachments = 0
|
||||
already_mocked = 0
|
||||
sim_items = 0
|
||||
|
||||
for claim_index, claim in enumerate(claims, start=1):
|
||||
items = list(claim.items or [])
|
||||
sim_items += len(items)
|
||||
for item_index, item in enumerate(items, start=1):
|
||||
if _has_existing_mock(storage, item):
|
||||
already_mocked += 1
|
||||
continue
|
||||
if _should_leave_missing(claim_index, item_index, claim):
|
||||
missing_material_items += 1
|
||||
if apply:
|
||||
item.invoice_id = None
|
||||
continue
|
||||
|
||||
violated = _is_violation_sample(claim_index, item_index, claim)
|
||||
attachments_to_mock += 1
|
||||
violation_attachments += int(violated)
|
||||
compliant_attachments += int(not violated)
|
||||
if apply:
|
||||
_write_mock_attachment(
|
||||
storage=storage,
|
||||
claim=claim,
|
||||
item=item,
|
||||
claim_index=claim_index,
|
||||
item_index=item_index,
|
||||
violated=violated,
|
||||
)
|
||||
|
||||
if apply:
|
||||
claim.invoice_count = sum(
|
||||
1 for item in items if str(item.invoice_id or "").strip()
|
||||
)
|
||||
|
||||
return MockAttachmentSummary(
|
||||
mode="apply" if apply else "dry-run",
|
||||
sim_claims=len(claims),
|
||||
sim_items=sim_items,
|
||||
attachments_to_mock=attachments_to_mock,
|
||||
missing_material_items=missing_material_items,
|
||||
compliant_attachments=compliant_attachments,
|
||||
violation_attachments=violation_attachments,
|
||||
already_mocked=already_mocked,
|
||||
)
|
||||
|
||||
|
||||
def _sim_claims(db) -> list[ExpenseClaim]:
|
||||
return list(
|
||||
db.scalars(
|
||||
select(ExpenseClaim)
|
||||
.options(selectinload(ExpenseClaim.items))
|
||||
.where(ExpenseClaim.claim_no.like(f"{SIM_CLAIM_PREFIX}%"))
|
||||
.order_by(ExpenseClaim.claim_no.asc())
|
||||
).all()
|
||||
)
|
||||
|
||||
|
||||
def _has_existing_mock(storage: ExpenseClaimAttachmentStorage, item: ExpenseClaimItem) -> bool:
|
||||
file_path = storage.resolve_item_path(item)
|
||||
if file_path is None or not file_path.exists():
|
||||
return False
|
||||
metadata = storage.read_meta(file_path)
|
||||
return str(metadata.get("source") or "") == "half_year_expense_demo_mock"
|
||||
|
||||
|
||||
def _should_leave_missing(claim_index: int, item_index: int, claim: ExpenseClaim) -> bool:
|
||||
if str(claim.status or "").strip().lower() in {"draft", "returned"}:
|
||||
return (claim_index + item_index) % 4 == 0
|
||||
return (claim_index + item_index) % 19 == 0
|
||||
|
||||
|
||||
def _is_violation_sample(claim_index: int, item_index: int, claim: ExpenseClaim) -> bool:
|
||||
if claim.hermes_risk_flag or claim.risk_flags_json:
|
||||
return True
|
||||
return (claim_index * 7 + item_index * 3) % 11 == 0
|
||||
|
||||
|
||||
def _write_mock_attachment(
|
||||
*,
|
||||
storage: ExpenseClaimAttachmentStorage,
|
||||
claim: ExpenseClaim,
|
||||
item: ExpenseClaimItem,
|
||||
claim_index: int,
|
||||
item_index: int,
|
||||
violated: bool,
|
||||
) -> None:
|
||||
document_type, document_label, scene_code, scene_label = _document_meta(item.item_type)
|
||||
filename = f"{claim.claim_no}-{item_index:02d}-{document_type}.txt"
|
||||
attachment_dir = storage.build_item_dir(claim.id, item.id)
|
||||
attachment_dir.mkdir(parents=True, exist_ok=True)
|
||||
file_path = attachment_dir / filename
|
||||
ocr_text = _ocr_text(
|
||||
claim=claim,
|
||||
item=item,
|
||||
document_label=document_label,
|
||||
claim_index=claim_index,
|
||||
item_index=item_index,
|
||||
violated=violated,
|
||||
)
|
||||
file_path.write_text(ocr_text, encoding="utf-8")
|
||||
item.invoice_id = storage.to_storage_key(file_path)
|
||||
storage.write_meta(
|
||||
file_path,
|
||||
_meta_payload(
|
||||
storage_key=item.invoice_id,
|
||||
filename=filename,
|
||||
file_path=file_path,
|
||||
claim=claim,
|
||||
item=item,
|
||||
document_type=document_type,
|
||||
document_label=document_label,
|
||||
scene_code=scene_code,
|
||||
scene_label=scene_label,
|
||||
ocr_text=ocr_text,
|
||||
violated=violated,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _document_meta(item_type: str) -> tuple[str, str, str, str]:
|
||||
return DOCUMENT_BY_ITEM_TYPE.get(
|
||||
str(item_type or "").strip().lower(),
|
||||
("invoice", "费用发票", "other", "其他票据"),
|
||||
)
|
||||
|
||||
|
||||
def _ocr_text(
|
||||
*,
|
||||
claim: ExpenseClaim,
|
||||
item: ExpenseClaimItem,
|
||||
document_label: str,
|
||||
claim_index: int,
|
||||
item_index: int,
|
||||
violated: bool,
|
||||
) -> str:
|
||||
invoice_no = f"MOCK{claim_index:04d}{item_index:02d}"
|
||||
amount = _display_amount(item.item_amount)
|
||||
merchant = _merchant_name(item.item_type, violated)
|
||||
violation_line = (
|
||||
"校验提示:票据金额或场景需要人工复核。"
|
||||
if violated
|
||||
else "校验提示:票据字段与报销明细一致。"
|
||||
)
|
||||
return "\n".join(
|
||||
[
|
||||
f"票据类型:{document_label}",
|
||||
f"发票号码:{invoice_no}",
|
||||
f"开票方:{merchant}",
|
||||
f"购买方:{claim.department_name}",
|
||||
f"发生日期:{item.item_date.isoformat()}",
|
||||
f"发生地点:{item.item_location}",
|
||||
f"金额:{amount}",
|
||||
f"关联报销单:{claim.claim_no}",
|
||||
violation_line,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def _merchant_name(item_type: str, violated: bool) -> str:
|
||||
normalized = str(item_type or "").strip().lower()
|
||||
if violated:
|
||||
return {
|
||||
"hotel": "上海云栖酒店有限公司",
|
||||
"transport": "跨城交通服务商",
|
||||
"office": "综合采购供应商",
|
||||
"meal": "高端商务餐饮有限公司",
|
||||
}.get(normalized, "异常样本供应商")
|
||||
return {
|
||||
"hotel": "合规住宿服务有限公司",
|
||||
"transport": "合规出行服务有限公司",
|
||||
"travel_allowance": "系统差旅补贴测算",
|
||||
"office": "合规办公用品有限公司",
|
||||
"communication": "合规通信服务有限公司",
|
||||
"meal": "合规餐饮服务有限公司",
|
||||
}.get(normalized, "合规票据供应商")
|
||||
|
||||
|
||||
def _meta_payload(
|
||||
*,
|
||||
storage_key: str,
|
||||
filename: str,
|
||||
file_path: Path,
|
||||
claim: ExpenseClaim,
|
||||
item: ExpenseClaimItem,
|
||||
document_type: str,
|
||||
document_label: str,
|
||||
scene_code: str,
|
||||
scene_label: str,
|
||||
ocr_text: str,
|
||||
violated: bool,
|
||||
) -> dict[str, Any]:
|
||||
amount_text = _display_amount(item.item_amount)
|
||||
document_info = {
|
||||
"document_type": document_type,
|
||||
"document_type_label": document_label,
|
||||
"scene_code": scene_code,
|
||||
"scene_label": scene_label,
|
||||
"fields": [
|
||||
{"key": "invoice_no", "label": "发票号码", "value": _invoice_no(filename)},
|
||||
{"key": "invoice_date", "label": "开票日期", "value": item.item_date.isoformat()},
|
||||
{"key": "amount", "label": "金额", "value": amount_text},
|
||||
{"key": "location", "label": "地点", "value": str(item.item_location or "")},
|
||||
{
|
||||
"key": "merchant",
|
||||
"label": "开票方",
|
||||
"value": _merchant_name(item.item_type, violated),
|
||||
},
|
||||
],
|
||||
}
|
||||
requirement_check = _requirement_payload(
|
||||
violated,
|
||||
item,
|
||||
document_type,
|
||||
document_label,
|
||||
scene_code,
|
||||
scene_label,
|
||||
)
|
||||
ocr_summary = f"{document_label},金额 {amount_text},{'需复核' if violated else '字段匹配'}。"
|
||||
return {
|
||||
"source": "half_year_expense_demo_mock",
|
||||
"file_name": filename,
|
||||
"storage_key": storage_key,
|
||||
"media_type": "text/plain",
|
||||
"size_bytes": file_path.stat().st_size,
|
||||
"uploaded_at": datetime.now(UTC).isoformat(),
|
||||
"previewable": False,
|
||||
"preview_kind": "",
|
||||
"preview_storage_key": "",
|
||||
"preview_media_type": "",
|
||||
"preview_file_name": "",
|
||||
"analysis": _analysis_payload(violated, claim, item),
|
||||
"document_info": document_info,
|
||||
"requirement_check": requirement_check,
|
||||
"ocr_status": "mocked",
|
||||
"ocr_error": "",
|
||||
"ocr_text": ocr_text,
|
||||
"ocr_summary": ocr_summary,
|
||||
"ocr_avg_score": 0.97 if not violated else 0.81,
|
||||
"ocr_line_count": len(ocr_text.splitlines()),
|
||||
"ocr_classification_source": "mock_rule",
|
||||
"ocr_classification_confidence": 0.96 if not violated else 0.78,
|
||||
"ocr_classification_evidence": [document_label, scene_label],
|
||||
"ocr_warnings": ["mock违规样本"] if violated else [],
|
||||
}
|
||||
|
||||
|
||||
def _analysis_payload(
|
||||
violated: bool,
|
||||
claim: ExpenseClaim,
|
||||
item: ExpenseClaimItem,
|
||||
) -> dict[str, Any]:
|
||||
if violated:
|
||||
return {
|
||||
"severity": "warning",
|
||||
"label": "需复核",
|
||||
"headline": "票据字段存在合规疑点",
|
||||
"summary": "系统 mock 的 OCR 字段与报销场景存在偏差,用于演示违规样本。",
|
||||
"points": [
|
||||
f"报销单 {claim.claim_no} 金额或场景需要人工复核。",
|
||||
f"费用明细:{item.item_reason},金额 {_display_amount(item.item_amount)}。",
|
||||
],
|
||||
"rule_basis": ["票据金额与费用明细一致性", "票据场景与费用科目匹配"],
|
||||
"suggestion": "请核对票据原件、业务事由和费用归口后再提交或付款。",
|
||||
}
|
||||
return {
|
||||
"severity": "success",
|
||||
"label": "合规",
|
||||
"headline": "票据字段与报销明细一致",
|
||||
"summary": "系统 mock 的 OCR 字段已覆盖金额、日期、地点和票据类型。",
|
||||
"points": [
|
||||
f"金额 {_display_amount(item.item_amount)} 与费用明细一致。",
|
||||
f"票据类型匹配 {item.item_reason}。",
|
||||
],
|
||||
"rule_basis": ["基础票据完整性", "金额一致性"],
|
||||
"suggestion": "当前材料可作为演示合规样本。",
|
||||
}
|
||||
|
||||
|
||||
def _requirement_payload(
|
||||
violated: bool,
|
||||
item: ExpenseClaimItem,
|
||||
document_type: str,
|
||||
document_label: str,
|
||||
scene_code: str,
|
||||
scene_label: str,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"matches": not violated,
|
||||
"current_expense_type": str(item.item_type or "other"),
|
||||
"current_expense_type_label": str(item.item_reason or "费用明细"),
|
||||
"allowed_scene_labels": [scene_label],
|
||||
"recognized_scene_code": scene_code,
|
||||
"recognized_scene_label": scene_label,
|
||||
"recognized_document_type": document_type,
|
||||
"recognized_document_type_label": document_label,
|
||||
"message": "材料匹配,可继续处理。" if not violated else "材料存在疑点,建议人工复核。",
|
||||
}
|
||||
|
||||
|
||||
def _invoice_no(filename: str) -> str:
|
||||
return Path(filename).stem.replace("-", "").upper()[-20:]
|
||||
|
||||
|
||||
def _display_amount(value: Decimal | float | int | str | None) -> str:
|
||||
amount = Decimal(str(value or "0")).quantize(Decimal("0.01"))
|
||||
return f"{amount:.2f}"
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
570
server/scripts/repair_half_year_expense_demo_distribution.py
Normal file
570
server/scripts/repair_half_year_expense_demo_distribution.py
Normal file
@@ -0,0 +1,570 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from dataclasses import asdict, dataclass
|
||||
from datetime import UTC, date, datetime
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import selectinload
|
||||
|
||||
SERVER_DIR = Path(__file__).resolve().parents[1]
|
||||
SRC_DIR = SERVER_DIR / "src"
|
||||
if str(SRC_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SRC_DIR))
|
||||
|
||||
from app.db.session import get_session_factory # noqa: E402
|
||||
from app.models.budget import BudgetAllocation, BudgetReservation, BudgetTransaction # noqa: E402
|
||||
from app.models.employee import Employee # noqa: E402
|
||||
from app.models.financial_record import ExpenseClaim # noqa: E402
|
||||
from app.models.organization import OrganizationUnit # noqa: E402
|
||||
from app.services.demo_company_simulation_catalog import ( # noqa: E402
|
||||
BUDGETED_STATUSES,
|
||||
PENDING_STATUSES,
|
||||
SIM_BUDGET_PREFIX,
|
||||
SIM_CLAIM_PREFIX,
|
||||
SIM_EMPLOYEE_PREFIX,
|
||||
SIM_PROJECT_CODE,
|
||||
SIM_RESERVATION_PREFIX,
|
||||
SIM_TRANSACTION_PREFIX,
|
||||
SUBJECT_LABELS,
|
||||
SUCCESS_STATUSES,
|
||||
target_budget_usage,
|
||||
)
|
||||
from app.services.demo_company_simulation_filters import is_admin_employee_like # noqa: E402
|
||||
from app.services.employee_behavior_profile_service import ( # noqa: E402
|
||||
EmployeeBehaviorProfileService,
|
||||
)
|
||||
from app.services.expense_claim_status_registry import ( # noqa: E402
|
||||
normalize_expense_claim_state,
|
||||
)
|
||||
|
||||
DEPARTMENT_PLAN = (
|
||||
("TECH-DEPT", Decimal("0.30")),
|
||||
("MARKET-DEPT", Decimal("0.24")),
|
||||
("PRODUCTION-DEPT", Decimal("0.18")),
|
||||
("FINANCE-DEPT", Decimal("0.12")),
|
||||
("HR-DEPT", Decimal("0.10")),
|
||||
("PRESIDENT-OFFICE", Decimal("0.06")),
|
||||
)
|
||||
RECENT_PENDING_PER_DEPARTMENT = 3
|
||||
RECENT_DATES = (
|
||||
datetime(2026, 6, 1, 10, 0, tzinfo=UTC),
|
||||
datetime(2026, 6, 1, 15, 0, tzinfo=UTC),
|
||||
datetime(2026, 6, 2, 6, 0, tzinfo=UTC),
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RepairSummary:
|
||||
mode: str
|
||||
sim_employees: int
|
||||
sim_claims: int
|
||||
employee_department_plan: dict[str, int]
|
||||
claim_department_plan: dict[str, int]
|
||||
recent_pending_plan: dict[str, int]
|
||||
rebuilt_budget_allocations: int
|
||||
rebuilt_budget_transactions: int
|
||||
rebuilt_budget_reservations: int
|
||||
before_all_department_amounts: dict[str, str]
|
||||
before_recent_pending_amounts: dict[str, str]
|
||||
after_all_department_amounts: dict[str, str]
|
||||
after_recent_pending_amounts: dict[str, str]
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Repair simulated half-year demo data distribution."
|
||||
)
|
||||
parser.add_argument("--apply", action="store_true", help="Apply repair. Default is dry-run.")
|
||||
parser.add_argument(
|
||||
"--refresh-profiles",
|
||||
action="store_true",
|
||||
help="After --apply, refresh employee behavior profile snapshots for simulated employees.",
|
||||
)
|
||||
parser.add_argument("--profile-limit", type=int, default=120)
|
||||
args = parser.parse_args()
|
||||
|
||||
session_factory = get_session_factory()
|
||||
with session_factory() as db:
|
||||
try:
|
||||
summary = repair_distribution(db, apply=args.apply)
|
||||
profile_refresh = None
|
||||
if args.apply and args.refresh_profiles:
|
||||
profile_refresh = _refresh_company_profiles(db, limit=args.profile_limit)
|
||||
if args.apply:
|
||||
db.commit()
|
||||
payload = summary.to_dict()
|
||||
if profile_refresh is not None:
|
||||
payload["profile_refresh"] = profile_refresh
|
||||
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
||||
if not args.apply:
|
||||
print("dry-run only; pass --apply after confirmation to repair simulated data.")
|
||||
elif not args.refresh_profiles:
|
||||
print("pass --refresh-profiles to generate employee behavior profile snapshots.")
|
||||
except Exception:
|
||||
db.rollback()
|
||||
raise
|
||||
|
||||
|
||||
def repair_distribution(db, *, apply: bool) -> RepairSummary:
|
||||
departments = _canonical_departments(db)
|
||||
if len(departments) < len(DEPARTMENT_PLAN):
|
||||
missing = [code for code, _ in DEPARTMENT_PLAN if code not in departments]
|
||||
raise RuntimeError(f"missing canonical departments: {missing}")
|
||||
|
||||
sim_employees = _sim_employees(db)
|
||||
sim_claims = _sim_claims(db)
|
||||
before_all = _department_amounts(sim_claims)
|
||||
before_recent = _recent_pending_amounts(sim_claims)
|
||||
|
||||
employee_plan = _counts_by_weight(len(sim_employees))
|
||||
claim_plan = _counts_by_weight(len(sim_claims))
|
||||
recent_claims = _recent_claims(sim_claims)
|
||||
fixed_recent_plan = {code: RECENT_PENDING_PER_DEPARTMENT for code, _ in DEPARTMENT_PLAN}
|
||||
regular_plan = {
|
||||
code: max(claim_plan.get(code, 0) - fixed_recent_plan.get(code, 0), 0)
|
||||
for code, _ in DEPARTMENT_PLAN
|
||||
}
|
||||
|
||||
if apply:
|
||||
_normalize_sim_claim_workflow(sim_claims)
|
||||
_redistribute_employees(sim_employees, departments, employee_plan)
|
||||
db.flush()
|
||||
employees_by_dept = _employees_by_department(db)
|
||||
_redistribute_regular_claims(
|
||||
[claim for claim in sim_claims if claim not in set(recent_claims)],
|
||||
departments,
|
||||
employees_by_dept,
|
||||
regular_plan,
|
||||
)
|
||||
_repair_recent_pending_claims(recent_claims, departments, employees_by_dept)
|
||||
db.flush()
|
||||
_rebuild_sim_budget(db, sim_claims, departments)
|
||||
db.flush()
|
||||
|
||||
after_claims = (
|
||||
_sim_claims(db)
|
||||
if apply
|
||||
else _preview_claims(sim_claims, departments, claim_plan)
|
||||
)
|
||||
after_all = _department_amounts(after_claims)
|
||||
after_recent = _recent_pending_amounts(after_claims)
|
||||
allocation_count, transaction_count, reservation_count = _planned_budget_counts(after_claims)
|
||||
|
||||
return RepairSummary(
|
||||
mode="apply" if apply else "dry-run",
|
||||
sim_employees=len(sim_employees),
|
||||
sim_claims=len(sim_claims),
|
||||
employee_department_plan=employee_plan,
|
||||
claim_department_plan=claim_plan,
|
||||
recent_pending_plan=fixed_recent_plan,
|
||||
rebuilt_budget_allocations=allocation_count,
|
||||
rebuilt_budget_transactions=transaction_count,
|
||||
rebuilt_budget_reservations=reservation_count,
|
||||
before_all_department_amounts=before_all,
|
||||
before_recent_pending_amounts=before_recent,
|
||||
after_all_department_amounts=after_all,
|
||||
after_recent_pending_amounts=after_recent,
|
||||
)
|
||||
|
||||
|
||||
def _refresh_company_profiles(db, *, limit: int) -> dict[str, object]:
|
||||
capped_limit = max(1, min(int(limit or 120), 500))
|
||||
employees = list(
|
||||
db.scalars(select(Employee).order_by(Employee.employee_no.asc())).all()
|
||||
)
|
||||
employee_ids = [
|
||||
employee.id
|
||||
for employee in employees
|
||||
if not is_admin_employee_like(employee)
|
||||
][:capped_limit]
|
||||
service = EmployeeBehaviorProfileService(db)
|
||||
snapshot_count = 0
|
||||
for employee_id in employee_ids:
|
||||
snapshots = service.refresh_employee_profiles(
|
||||
employee_id=employee_id,
|
||||
window_days=(30, 90, 180),
|
||||
expense_type_scope="overall",
|
||||
source_task_type="half_year_expense_demo_repair",
|
||||
commit=False,
|
||||
)
|
||||
snapshot_count += len(snapshots)
|
||||
|
||||
db.commit()
|
||||
return {
|
||||
"target_employee_count": len(employee_ids),
|
||||
"snapshot_count": snapshot_count,
|
||||
"window_days": [30, 90, 180],
|
||||
"source_task_type": "half_year_expense_demo_repair",
|
||||
"scope": "all_non_admin_employees",
|
||||
}
|
||||
|
||||
|
||||
def _canonical_departments(db) -> dict[str, OrganizationUnit]:
|
||||
department_codes = [code for code, _weight in DEPARTMENT_PLAN]
|
||||
rows = db.scalars(
|
||||
select(OrganizationUnit).where(OrganizationUnit.unit_code.in_(department_codes))
|
||||
).all()
|
||||
return {row.unit_code: row for row in rows}
|
||||
|
||||
|
||||
def _sim_employees(db) -> list[Employee]:
|
||||
return list(
|
||||
db.scalars(
|
||||
select(Employee)
|
||||
.options(selectinload(Employee.organization_unit))
|
||||
.where(Employee.employee_no.like(f"{SIM_EMPLOYEE_PREFIX}%"))
|
||||
.order_by(Employee.employee_no.asc())
|
||||
).all()
|
||||
)
|
||||
|
||||
|
||||
def _sim_claims(db) -> list[ExpenseClaim]:
|
||||
return list(
|
||||
db.scalars(
|
||||
select(ExpenseClaim)
|
||||
.options(selectinload(ExpenseClaim.items))
|
||||
.where(ExpenseClaim.claim_no.like(f"{SIM_CLAIM_PREFIX}%"))
|
||||
.order_by(ExpenseClaim.claim_no.asc())
|
||||
).all()
|
||||
)
|
||||
|
||||
|
||||
def _normalize_sim_claim_workflow(claims: list[ExpenseClaim]) -> None:
|
||||
for claim in claims:
|
||||
normalized = normalize_expense_claim_state(
|
||||
claim.status,
|
||||
claim.approval_stage,
|
||||
claim_no=claim.claim_no,
|
||||
expense_type=claim.expense_type,
|
||||
is_application_claim=False,
|
||||
)
|
||||
claim.status = normalized.status
|
||||
claim.approval_stage = normalized.approval_stage
|
||||
|
||||
|
||||
def _counts_by_weight(total: int) -> dict[str, int]:
|
||||
raw = [(code, total * weight) for code, weight in DEPARTMENT_PLAN]
|
||||
counts = {code: int(value) for code, value in raw}
|
||||
remainder = total - sum(counts.values())
|
||||
remainder_order = sorted(
|
||||
raw,
|
||||
key=lambda item: item[1] - int(item[1]),
|
||||
reverse=True,
|
||||
)
|
||||
for code, _value in remainder_order[:remainder]:
|
||||
counts[code] += 1
|
||||
return counts
|
||||
|
||||
|
||||
def _redistribute_employees(
|
||||
employees: list[Employee],
|
||||
departments: dict[str, OrganizationUnit],
|
||||
plan: dict[str, int],
|
||||
) -> None:
|
||||
index = 0
|
||||
for code, _weight in DEPARTMENT_PLAN:
|
||||
department = departments[code]
|
||||
for employee in employees[index : index + plan.get(code, 0)]:
|
||||
employee.organization_unit = department
|
||||
employee.cost_center = department.cost_center
|
||||
employee.location = department.location
|
||||
employee.finance_owner_name = f"{department.name}财务BP"
|
||||
index += plan.get(code, 0)
|
||||
|
||||
|
||||
def _employees_by_department(db) -> dict[str, list[Employee]]:
|
||||
rows = db.scalars(
|
||||
select(Employee)
|
||||
.options(selectinload(Employee.organization_unit))
|
||||
.where(Employee.organization_unit_id.is_not(None))
|
||||
.order_by(Employee.employee_no.asc())
|
||||
).all()
|
||||
grouped: dict[str, list[Employee]] = defaultdict(list)
|
||||
for employee in rows:
|
||||
unit = employee.organization_unit
|
||||
if unit is not None and unit.unit_code:
|
||||
grouped[unit.unit_code].append(employee)
|
||||
return grouped
|
||||
|
||||
|
||||
def _redistribute_regular_claims(
|
||||
claims: list[ExpenseClaim],
|
||||
departments: dict[str, OrganizationUnit],
|
||||
employees_by_dept: dict[str, list[Employee]],
|
||||
plan: dict[str, int],
|
||||
) -> None:
|
||||
index = 0
|
||||
for code, _weight in DEPARTMENT_PLAN:
|
||||
department = departments[code]
|
||||
employees = employees_by_dept.get(code) or []
|
||||
for offset, claim in enumerate(claims[index : index + plan.get(code, 0)]):
|
||||
employee = employees[offset % len(employees)] if employees else None
|
||||
_assign_claim_department(claim, department, employee)
|
||||
index += plan.get(code, 0)
|
||||
|
||||
|
||||
def _repair_recent_pending_claims(
|
||||
claims: list[ExpenseClaim],
|
||||
departments: dict[str, OrganizationUnit],
|
||||
employees_by_dept: dict[str, list[Employee]],
|
||||
) -> None:
|
||||
index = 0
|
||||
for code, _weight in DEPARTMENT_PLAN:
|
||||
department = departments[code]
|
||||
employees = employees_by_dept.get(code) or []
|
||||
for offset in range(RECENT_PENDING_PER_DEPARTMENT):
|
||||
claim = claims[index]
|
||||
employee = employees[offset % len(employees)] if employees else None
|
||||
_assign_claim_department(claim, department, employee)
|
||||
claim.status = "submitted"
|
||||
claim.approval_stage = "财务审批" if offset % 2 == 0 else "直属领导审批"
|
||||
claim.occurred_at = RECENT_DATES[offset] - _hours(2)
|
||||
claim.submitted_at = RECENT_DATES[offset]
|
||||
claim.updated_at = RECENT_DATES[offset] + _hours(1)
|
||||
index += 1
|
||||
|
||||
|
||||
def _assign_claim_department(
|
||||
claim: ExpenseClaim,
|
||||
department: OrganizationUnit,
|
||||
employee: Employee | None,
|
||||
) -> None:
|
||||
claim.department_id = department.id
|
||||
claim.department_name = department.name
|
||||
if employee is not None:
|
||||
claim.employee_id = employee.id
|
||||
claim.employee_name = employee.name
|
||||
claim.location = department.location or claim.location
|
||||
|
||||
|
||||
def _rebuild_sim_budget(
|
||||
db,
|
||||
claims: list[ExpenseClaim],
|
||||
departments: dict[str, OrganizationUnit],
|
||||
) -> None:
|
||||
for model, field, prefix in (
|
||||
(BudgetTransaction, BudgetTransaction.transaction_no, SIM_TRANSACTION_PREFIX),
|
||||
(BudgetReservation, BudgetReservation.reservation_no, SIM_RESERVATION_PREFIX),
|
||||
(BudgetAllocation, BudgetAllocation.budget_no, SIM_BUDGET_PREFIX),
|
||||
):
|
||||
for row in db.scalars(select(model).where(field.like(f"{prefix}%"))).all():
|
||||
db.delete(row)
|
||||
db.flush()
|
||||
|
||||
groups: dict[tuple[int, str, str, str, str], list[ExpenseClaim]] = defaultdict(list)
|
||||
for claim in claims:
|
||||
if claim.status not in BUDGETED_STATUSES:
|
||||
continue
|
||||
subject_code = "meal" if claim.expense_type == "entertainment" else claim.expense_type
|
||||
quarter = ((claim.occurred_at.month - 1) // 3) + 1
|
||||
period_key = f"{claim.occurred_at.year}Q{quarter}"
|
||||
cost_center = _claim_cost_center(claim, departments)
|
||||
key = (claim.occurred_at.year, period_key, claim.department_id, cost_center, subject_code)
|
||||
groups[key].append(claim)
|
||||
|
||||
allocation_index = 1
|
||||
transaction_index = 1
|
||||
for key, group_claims in sorted(groups.items()):
|
||||
year, period_key, department_id, cost_center, subject_code = key
|
||||
total_used = sum((Decimal(claim.amount or 0) for claim in group_claims), Decimal("0.00"))
|
||||
original_amount = (
|
||||
total_used / target_budget_usage(period_key, subject_code, allocation_index)
|
||||
).quantize(Decimal("0.01"))
|
||||
allocation = BudgetAllocation(
|
||||
id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{SIM_BUDGET_PREFIX}:{key}")),
|
||||
budget_no=f"{SIM_BUDGET_PREFIX}-R{allocation_index:04d}",
|
||||
fiscal_year=year,
|
||||
period_type="quarter",
|
||||
period_key=period_key,
|
||||
department_id=department_id,
|
||||
department_name=group_claims[0].department_name,
|
||||
cost_center=cost_center,
|
||||
project_code=SIM_PROJECT_CODE,
|
||||
subject_code=subject_code,
|
||||
subject_name=SUBJECT_LABELS.get(subject_code, subject_code),
|
||||
original_amount=max(original_amount, Decimal("3000.00")),
|
||||
adjusted_amount=Decimal("0.00"),
|
||||
status="active",
|
||||
warning_threshold=Decimal("80.00"),
|
||||
control_action="warn",
|
||||
description="半年报销模拟数据部门分布修复预算池",
|
||||
created_by="simulation",
|
||||
updated_by="simulation",
|
||||
)
|
||||
db.add(allocation)
|
||||
db.flush()
|
||||
for claim in group_claims:
|
||||
db.add(_budget_transaction(allocation.id, claim, transaction_index))
|
||||
if claim.status in PENDING_STATUSES:
|
||||
db.add(_budget_reservation(allocation.id, claim, transaction_index))
|
||||
transaction_index += 1
|
||||
allocation_index += 1
|
||||
|
||||
|
||||
def _budget_transaction(allocation_id: str, claim: ExpenseClaim, index: int) -> BudgetTransaction:
|
||||
transaction_no = f"{SIM_TRANSACTION_PREFIX}-R{index:04d}"
|
||||
transaction_type = "consume" if claim.status in SUCCESS_STATUSES else "reserve"
|
||||
return BudgetTransaction(
|
||||
id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{transaction_no}")),
|
||||
transaction_no=transaction_no,
|
||||
allocation_id=allocation_id,
|
||||
source_type="claim",
|
||||
source_id=claim.id,
|
||||
source_no=claim.claim_no,
|
||||
transaction_type=transaction_type,
|
||||
amount=Decimal(claim.amount or 0),
|
||||
before_available_amount=Decimal("0.00"),
|
||||
after_available_amount=Decimal("0.00"),
|
||||
operator="simulation",
|
||||
reason="修复后模拟数据预算台账",
|
||||
context_json={"project_code": SIM_PROJECT_CODE, "simulated": True, "repair": True},
|
||||
created_at=claim.submitted_at or claim.occurred_at,
|
||||
)
|
||||
|
||||
|
||||
def _budget_reservation(allocation_id: str, claim: ExpenseClaim, index: int) -> BudgetReservation:
|
||||
reservation_no = f"{SIM_RESERVATION_PREFIX}-R{index:04d}"
|
||||
return BudgetReservation(
|
||||
id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"repair:{reservation_no}")),
|
||||
reservation_no=reservation_no,
|
||||
allocation_id=allocation_id,
|
||||
source_type="claim",
|
||||
source_id=claim.id,
|
||||
source_no=claim.claim_no,
|
||||
source_status="active",
|
||||
amount=Decimal(claim.amount or 0),
|
||||
context_json={"project_code": SIM_PROJECT_CODE, "simulated": True, "repair": True},
|
||||
created_at=claim.submitted_at or claim.occurred_at,
|
||||
)
|
||||
|
||||
|
||||
def _recent_claims(claims: list[ExpenseClaim]) -> list[ExpenseClaim]:
|
||||
needed = RECENT_PENDING_PER_DEPARTMENT * len(DEPARTMENT_PLAN)
|
||||
return sorted(claims, key=lambda claim: Decimal(claim.amount or 0), reverse=True)[:needed]
|
||||
|
||||
|
||||
def _department_amounts(claims: list[ExpenseClaim]) -> dict[str, str]:
|
||||
buckets: dict[str, Decimal] = defaultdict(Decimal)
|
||||
for claim in claims:
|
||||
buckets[claim.department_name or "待补充"] += Decimal(claim.amount or 0)
|
||||
return _format_amounts(buckets)
|
||||
|
||||
|
||||
def _recent_pending_amounts(claims: list[ExpenseClaim]) -> dict[str, str]:
|
||||
buckets: dict[str, Decimal] = defaultdict(Decimal)
|
||||
for claim in claims:
|
||||
if claim.status not in PENDING_STATUSES:
|
||||
continue
|
||||
submitted_at = claim.submitted_at or claim.occurred_at
|
||||
if not submitted_at:
|
||||
continue
|
||||
day = submitted_at.date()
|
||||
if date(2026, 6, 1) <= day <= date(2026, 6, 2):
|
||||
buckets[claim.department_name or "待补充"] += Decimal(claim.amount or 0)
|
||||
return _format_amounts(buckets)
|
||||
|
||||
|
||||
def _preview_claims(
|
||||
claims: list[ExpenseClaim],
|
||||
departments: dict[str, OrganizationUnit],
|
||||
claim_plan: dict[str, int],
|
||||
) -> list[ExpenseClaim]:
|
||||
preview: list[ExpenseClaim] = []
|
||||
recent_claims = _recent_claims(claims)
|
||||
recent_claim_set = set(recent_claims)
|
||||
regular_claims = [claim for claim in claims if claim not in recent_claim_set]
|
||||
index = 0
|
||||
for code, _weight in DEPARTMENT_PLAN:
|
||||
department = departments[code]
|
||||
count = max(claim_plan.get(code, 0) - RECENT_PENDING_PER_DEPARTMENT, 0)
|
||||
for claim in regular_claims[index : index + count]:
|
||||
preview.append(_clone_claim(claim, department.name, claim.status, claim.submitted_at))
|
||||
index += count
|
||||
recent_index = 0
|
||||
for code, _weight in DEPARTMENT_PLAN:
|
||||
department = departments[code]
|
||||
for offset in range(RECENT_PENDING_PER_DEPARTMENT):
|
||||
preview.append(
|
||||
_clone_claim(
|
||||
recent_claims[recent_index],
|
||||
department.name,
|
||||
"submitted",
|
||||
RECENT_DATES[offset],
|
||||
)
|
||||
)
|
||||
recent_index += 1
|
||||
return preview
|
||||
|
||||
|
||||
def _clone_claim(
|
||||
claim: ExpenseClaim,
|
||||
department_name: str,
|
||||
status: str,
|
||||
submitted_at: datetime | None,
|
||||
) -> Any:
|
||||
return type(
|
||||
"ClaimPreview",
|
||||
(),
|
||||
{
|
||||
"department_name": department_name,
|
||||
"status": status,
|
||||
"submitted_at": submitted_at,
|
||||
"occurred_at": claim.occurred_at,
|
||||
"expense_type": claim.expense_type,
|
||||
"amount": claim.amount,
|
||||
},
|
||||
)()
|
||||
|
||||
|
||||
def _planned_budget_counts(claims: list[Any]) -> tuple[int, int, int]:
|
||||
allocation_keys = set()
|
||||
transaction_count = 0
|
||||
reservation_count = 0
|
||||
for claim in claims:
|
||||
if claim.status not in BUDGETED_STATUSES:
|
||||
continue
|
||||
submitted_at = claim.submitted_at or claim.occurred_at
|
||||
period_key = f"{submitted_at.year}Q{((submitted_at.month - 1) // 3) + 1}"
|
||||
allocation_keys.add((period_key, claim.department_name, getattr(claim, "expense_type", "")))
|
||||
transaction_count += 1
|
||||
reservation_count += int(claim.status in PENDING_STATUSES)
|
||||
return len(allocation_keys), transaction_count, reservation_count
|
||||
|
||||
|
||||
def _claim_cost_center(
|
||||
claim: ExpenseClaim,
|
||||
departments: dict[str, OrganizationUnit],
|
||||
) -> str | None:
|
||||
for department in departments.values():
|
||||
if department.id == claim.department_id:
|
||||
return department.cost_center
|
||||
return None
|
||||
|
||||
|
||||
def _format_amounts(buckets: dict[str, Decimal]) -> dict[str, str]:
|
||||
return {
|
||||
key: str(value.quantize(Decimal("0.01")))
|
||||
for key, value in sorted(buckets.items(), key=lambda item: item[1], reverse=True)
|
||||
}
|
||||
|
||||
|
||||
def _hours(value: int):
|
||||
from datetime import timedelta
|
||||
|
||||
return timedelta(hours=value)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
111
server/scripts/seed_half_year_expense_demo.py
Normal file
111
server/scripts/seed_half_year_expense_demo.py
Normal file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
SERVER_DIR = Path(__file__).resolve().parents[1]
|
||||
SRC_DIR = SERVER_DIR / "src"
|
||||
if str(SRC_DIR) not in sys.path:
|
||||
sys.path.insert(0, str(SRC_DIR))
|
||||
|
||||
from app.db.session import get_session_factory # noqa: E402
|
||||
from app.models.employee import Employee # noqa: E402
|
||||
from app.services.demo_company_simulation_filters import is_admin_employee_like # noqa: E402
|
||||
from app.services.demo_company_simulation_seed import ( # noqa: E402
|
||||
HalfYearExpenseSimulationSeeder,
|
||||
SimulationConfig,
|
||||
)
|
||||
from app.services.employee_behavior_profile_service import ( # noqa: E402
|
||||
EmployeeBehaviorProfileService,
|
||||
)
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Seed half-year simulated reimbursement, budget, and employee data.",
|
||||
)
|
||||
parser.add_argument("--target-employees", type=int, default=100)
|
||||
parser.add_argument("--start-date", type=date.fromisoformat, default=date(2026, 1, 1))
|
||||
parser.add_argument("--months", type=int, default=6)
|
||||
parser.add_argument("--seed", type=int, default=20260602)
|
||||
parser.add_argument("--apply", action="store_true", help="Write data. Default is dry-run only.")
|
||||
parser.add_argument(
|
||||
"--refresh-profiles",
|
||||
action="store_true",
|
||||
help="After --apply, refresh employee behavior profile snapshots for simulated employees.",
|
||||
)
|
||||
parser.add_argument("--profile-limit", type=int, default=120)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
config = SimulationConfig(
|
||||
target_employees=args.target_employees,
|
||||
start_date=args.start_date,
|
||||
months=args.months,
|
||||
seed=args.seed,
|
||||
)
|
||||
session_factory = get_session_factory()
|
||||
with session_factory() as db:
|
||||
seeder = HalfYearExpenseSimulationSeeder(db, config)
|
||||
try:
|
||||
summary = seeder.apply() if args.apply else seeder.preview()
|
||||
profile_refresh = None
|
||||
if args.apply and args.refresh_profiles:
|
||||
profile_refresh = refresh_company_profiles(db, limit=args.profile_limit)
|
||||
elif args.apply:
|
||||
db.commit()
|
||||
payload = summary.to_dict()
|
||||
if profile_refresh is not None:
|
||||
payload["profile_refresh"] = profile_refresh
|
||||
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
||||
if not args.apply:
|
||||
print("dry-run only; pass --apply after confirmation to write simulated data.")
|
||||
elif not args.refresh_profiles:
|
||||
print("pass --refresh-profiles to generate employee behavior profile snapshots.")
|
||||
except Exception:
|
||||
db.rollback()
|
||||
raise
|
||||
|
||||
|
||||
def refresh_company_profiles(db, *, limit: int) -> dict[str, object]:
|
||||
capped_limit = max(1, min(int(limit or 120), 500))
|
||||
employees = list(
|
||||
db.scalars(select(Employee).order_by(Employee.employee_no.asc())).all()
|
||||
)
|
||||
employee_ids = [
|
||||
employee.id
|
||||
for employee in employees
|
||||
if not is_admin_employee_like(employee)
|
||||
][:capped_limit]
|
||||
service = EmployeeBehaviorProfileService(db)
|
||||
snapshot_count = 0
|
||||
for employee_id in employee_ids:
|
||||
snapshots = service.refresh_employee_profiles(
|
||||
employee_id=employee_id,
|
||||
window_days=(30, 90, 180),
|
||||
expense_type_scope="overall",
|
||||
source_task_type="half_year_expense_demo_seed",
|
||||
commit=False,
|
||||
)
|
||||
snapshot_count += len(snapshots)
|
||||
|
||||
db.commit()
|
||||
return {
|
||||
"target_employee_count": len(employee_ids),
|
||||
"snapshot_count": snapshot_count,
|
||||
"window_days": [30, 90, 180],
|
||||
"source_task_type": "half_year_expense_demo_seed",
|
||||
"scope": "all_non_admin_employees",
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user