From 222ba0bfdc4e538aa558d4115e8fdafc88dd9a70 Mon Sep 17 00:00:00 2001 From: caoxiaozhu Date: Fri, 22 May 2026 10:42:31 +0800 Subject: [PATCH] refactor(server): split oversized backend services --- .../app/services/agent_asset_json_rules.py | 98 + .../app/services/agent_asset_onlyoffice.py | 450 ++ .../agent_asset_spreadsheet_helpers.py | 298 + .../src/app/services/agent_asset_timeline.py | 132 + server/src/app/services/agent_assets.py | 935 +-- server/src/app/services/agent_foundation.py | 2250 +----- .../agent_foundation_asset_helpers.py | 322 + .../services/agent_foundation_asset_seed.py | 599 ++ .../services/agent_foundation_asset_topup.py | 667 ++ .../services/agent_foundation_constants.py | 207 + .../agent_foundation_financial_seed.py | 726 ++ .../app/services/agent_foundation_markdown.py | 202 + .../services/agent_foundation_risk_rules.py | 474 ++ .../services/agent_foundation_spreadsheets.py | 400 ++ .../src/app/services/document_intelligence.py | 172 +- .../services/document_intelligence_rules.py | 120 + .../services/document_intelligence_types.py | 53 + server/src/app/services/employee.py | 403 +- server/src/app/services/employee_import.py | 331 + server/src/app/services/employee_seed.py | 1013 +-- .../services/employee_seed_organizations.py | 112 + .../src/app/services/employee_seed_part1.py | 434 ++ .../src/app/services/employee_seed_part2.py | 412 ++ .../src/app/services/employee_seed_roles.py | 52 + .../app/services/employee_serialization.py | 126 + .../services/expense_claim_access_policy.py | 401 ++ .../expense_claim_attachment_analysis.py | 668 ++ .../expense_claim_attachment_document.py | 336 + .../expense_claim_attachment_operations.py | 495 ++ .../expense_claim_attachment_presentation.py | 138 + .../expense_claim_attachment_storage.py | 129 + .../app/services/expense_claim_constants.py | 361 + .../expense_claim_document_item_builder.py | 560 ++ .../expense_claim_document_parsing.py | 396 ++ .../app/services/expense_claim_draft_flow.py | 612 ++ .../expense_claim_draft_persistence.py | 343 + .../src/app/services/expense_claim_errors.py | 7 + .../app/services/expense_claim_item_sync.py | 461 ++ .../expense_claim_ontology_resolvers.py | 392 ++ .../services/expense_claim_platform_risk.py | 733 ++ .../services/expense_claim_policy_review.py | 654 ++ .../app/services/expense_claim_read_model.py | 269 + .../services/expense_claim_review_preview.py | 393 ++ .../app/services/expense_claim_risk_review.py | 177 + server/src/app/services/expense_claims.py | 6218 +---------------- .../src/app/services/expense_rule_runtime.py | 580 +- .../services/expense_rule_runtime_defaults.py | 299 + .../services/expense_rule_runtime_models.py | 116 + .../expense_rule_runtime_standards.py | 166 + server/src/app/services/knowledge.py | 1763 ++--- .../src/app/services/knowledge_constants.py | 66 + .../services/knowledge_document_extractors.py | 223 + .../src/app/services/knowledge_file_utils.py | 112 + .../app/services/knowledge_ingest_status.py | 69 + .../src/app/services/knowledge_onlyoffice.py | 166 + server/src/app/services/knowledge_preview.py | 157 + server/src/app/services/knowledge_rag.py | 1215 +--- .../src/app/services/knowledge_rag_runtime.py | 672 ++ server/src/app/services/ontology.py | 2280 ++---- server/src/app/services/ontology_detection.py | 451 ++ .../src/app/services/ontology_extraction.py | 529 ++ server/src/app/services/ontology_rules.py | 298 + .../src/app/services/ontology_validation.py | 285 + server/src/app/services/orchestrator.py | 1965 ++---- .../app/services/orchestrator_execution.py | 626 ++ .../services/orchestrator_expense_query.py | 535 ++ server/src/app/services/user_agent.py | 5116 +------------- .../src/app/services/user_agent_constants.py | 179 + .../src/app/services/user_agent_documents.py | 380 + .../src/app/services/user_agent_knowledge.py | 627 ++ .../user_agent_knowledge_constants.py | 54 + .../services/user_agent_knowledge_helpers.py | 322 + .../src/app/services/user_agent_response.py | 726 ++ .../app/services/user_agent_review_core.py | 528 ++ .../services/user_agent_review_messages.py | 673 ++ .../app/services/user_agent_review_profile.py | 465 ++ .../app/services/user_agent_review_slots.py | 706 ++ .../user_agent_review_travel_policy.py | 360 + .../user_agent_review_travel_receipts.py | 625 ++ .../tests/test_agent_foundation_endpoints.py | 6 +- server/tests/test_expense_claim_service.py | 33 +- server/tests/test_orchestrator_review_flow.py | 9 + server/tests/test_reimbursement_endpoints.py | 11 +- server/tests/test_user_agent_service.py | 37 + 84 files changed, 26263 insertions(+), 21898 deletions(-) create mode 100644 server/src/app/services/agent_asset_json_rules.py create mode 100644 server/src/app/services/agent_asset_onlyoffice.py create mode 100644 server/src/app/services/agent_asset_spreadsheet_helpers.py create mode 100644 server/src/app/services/agent_asset_timeline.py create mode 100644 server/src/app/services/agent_foundation_asset_helpers.py create mode 100644 server/src/app/services/agent_foundation_asset_seed.py create mode 100644 server/src/app/services/agent_foundation_asset_topup.py create mode 100644 server/src/app/services/agent_foundation_constants.py create mode 100644 server/src/app/services/agent_foundation_financial_seed.py create mode 100644 server/src/app/services/agent_foundation_markdown.py create mode 100644 server/src/app/services/agent_foundation_risk_rules.py create mode 100644 server/src/app/services/agent_foundation_spreadsheets.py create mode 100644 server/src/app/services/document_intelligence_rules.py create mode 100644 server/src/app/services/document_intelligence_types.py create mode 100644 server/src/app/services/employee_import.py create mode 100644 server/src/app/services/employee_seed_organizations.py create mode 100644 server/src/app/services/employee_seed_part1.py create mode 100644 server/src/app/services/employee_seed_part2.py create mode 100644 server/src/app/services/employee_seed_roles.py create mode 100644 server/src/app/services/employee_serialization.py create mode 100644 server/src/app/services/expense_claim_access_policy.py create mode 100644 server/src/app/services/expense_claim_attachment_analysis.py create mode 100644 server/src/app/services/expense_claim_attachment_document.py create mode 100644 server/src/app/services/expense_claim_attachment_operations.py create mode 100644 server/src/app/services/expense_claim_attachment_presentation.py create mode 100644 server/src/app/services/expense_claim_attachment_storage.py create mode 100644 server/src/app/services/expense_claim_constants.py create mode 100644 server/src/app/services/expense_claim_document_item_builder.py create mode 100644 server/src/app/services/expense_claim_document_parsing.py create mode 100644 server/src/app/services/expense_claim_draft_flow.py create mode 100644 server/src/app/services/expense_claim_draft_persistence.py create mode 100644 server/src/app/services/expense_claim_errors.py create mode 100644 server/src/app/services/expense_claim_item_sync.py create mode 100644 server/src/app/services/expense_claim_ontology_resolvers.py create mode 100644 server/src/app/services/expense_claim_platform_risk.py create mode 100644 server/src/app/services/expense_claim_policy_review.py create mode 100644 server/src/app/services/expense_claim_read_model.py create mode 100644 server/src/app/services/expense_claim_review_preview.py create mode 100644 server/src/app/services/expense_claim_risk_review.py create mode 100644 server/src/app/services/expense_rule_runtime_defaults.py create mode 100644 server/src/app/services/expense_rule_runtime_models.py create mode 100644 server/src/app/services/expense_rule_runtime_standards.py create mode 100644 server/src/app/services/knowledge_constants.py create mode 100644 server/src/app/services/knowledge_document_extractors.py create mode 100644 server/src/app/services/knowledge_file_utils.py create mode 100644 server/src/app/services/knowledge_ingest_status.py create mode 100644 server/src/app/services/knowledge_onlyoffice.py create mode 100644 server/src/app/services/knowledge_preview.py create mode 100644 server/src/app/services/knowledge_rag_runtime.py create mode 100644 server/src/app/services/ontology_detection.py create mode 100644 server/src/app/services/ontology_extraction.py create mode 100644 server/src/app/services/ontology_rules.py create mode 100644 server/src/app/services/ontology_validation.py create mode 100644 server/src/app/services/orchestrator_execution.py create mode 100644 server/src/app/services/orchestrator_expense_query.py create mode 100644 server/src/app/services/user_agent_constants.py create mode 100644 server/src/app/services/user_agent_documents.py create mode 100644 server/src/app/services/user_agent_knowledge.py create mode 100644 server/src/app/services/user_agent_knowledge_constants.py create mode 100644 server/src/app/services/user_agent_knowledge_helpers.py create mode 100644 server/src/app/services/user_agent_response.py create mode 100644 server/src/app/services/user_agent_review_core.py create mode 100644 server/src/app/services/user_agent_review_messages.py create mode 100644 server/src/app/services/user_agent_review_profile.py create mode 100644 server/src/app/services/user_agent_review_slots.py create mode 100644 server/src/app/services/user_agent_review_travel_policy.py create mode 100644 server/src/app/services/user_agent_review_travel_receipts.py diff --git a/server/src/app/services/agent_asset_json_rules.py b/server/src/app/services/agent_asset_json_rules.py new file mode 100644 index 0000000..3300783 --- /dev/null +++ b/server/src/app/services/agent_asset_json_rules.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +from app.models.agent_asset import AgentAsset +from app.schemas.agent_asset import AgentAssetRuleJsonRead, AgentAssetRuleJsonWrite +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY, RULE_LIBRARY_NAMES + + +class AgentAssetJsonRuleMixin: + def _resolve_json_risk_rule_document(self, asset: AgentAsset) -> tuple[str, str]: + config_json = dict(asset.config_json or {}) + detail_mode = str(config_json.get("detail_mode") or "").strip().lower() + if detail_mode != "json_risk": + raise ValueError("当前资产不是 JSON 风险规则。") + + rule_library = str(config_json.get("rule_library") or RISK_RULES_LIBRARY).strip() + if rule_library not in RULE_LIBRARY_NAMES: + raise ValueError("规则库目录不合法。") + + rule_document = config_json.get("rule_document") + if not isinstance(rule_document, dict): + raise ValueError("规则资产缺少 rule_document 配置。") + + file_name = str(rule_document.get("file_name") or "").strip() + if not file_name: + raise ValueError("规则资产缺少 JSON 文件名。") + return rule_library, file_name + + def read_rule_json(self, asset_id: str) -> AgentAssetRuleJsonRead: + asset = self.repository.get(asset_id) + if asset is None: + raise LookupError("资产不存在。") + + rule_library, file_name = self._resolve_json_risk_rule_document(asset) + payload = self.rule_library_manager.read_rule_library_json( + library=rule_library, + file_name=file_name, + ) + return AgentAssetRuleJsonRead( + file_name=file_name, + rule_code=str(payload.get("rule_code") or asset.code or ""), + name=str(payload.get("name") or asset.name or ""), + description=str(payload.get("description") or asset.description or "").strip(), + evaluator=str(payload.get("evaluator") or ""), + ontology_signal=str(payload.get("ontology_signal") or "") or None, + inputs=payload.get("inputs") if isinstance(payload.get("inputs"), dict) else {}, + outcomes=payload.get("outcomes") if isinstance(payload.get("outcomes"), dict) else {}, + payload=payload, + ) + + def write_rule_json( + self, + asset_id: str, + *, + body: AgentAssetRuleJsonWrite, + actor: str, + request_id: str | None = None, + ) -> AgentAssetRuleJsonRead: + asset = self.repository.get(asset_id) + if asset is None: + raise LookupError("资产不存在。") + + rule_library, file_name = self._resolve_json_risk_rule_document(asset) + payload = dict(body.payload or {}) + asset_code = str(asset.code or "").strip() + if asset_code and str(payload.get("rule_code") or "").strip() not in {"", asset_code}: + raise ValueError("规则 JSON 的 rule_code 必须与资产编码一致。") + if asset_code and not str(payload.get("rule_code") or "").strip(): + payload["rule_code"] = asset_code + + saved = self.rule_library_manager.write_rule_library_json( + library=rule_library, + file_name=file_name, + payload=payload, + ) + rule_description = str(saved.get("description") or "").strip() + if rule_description: + asset.description = rule_description + rule_name = str(saved.get("name") or "").strip() + if rule_name: + asset.name = rule_name + risk_category = str(saved.get("risk_category") or "").strip() + if risk_category: + config_json = dict(asset.config_json or {}) + config_json["risk_category"] = risk_category + asset.config_json = config_json + asset.scenario_json = [risk_category] + self.audit_service.log_action( + actor=actor, + action="update_agent_asset_rule_json", + resource_type=asset.asset_type, + resource_id=asset.id, + before_json={"file_name": file_name}, + after_json={"file_name": file_name, "rule_code": saved.get("rule_code")}, + request_id=request_id, + ) + self.db.commit() + return self.read_rule_json(asset_id) + diff --git a/server/src/app/services/agent_asset_onlyoffice.py b/server/src/app/services/agent_asset_onlyoffice.py new file mode 100644 index 0000000..2c99410 --- /dev/null +++ b/server/src/app/services/agent_asset_onlyoffice.py @@ -0,0 +1,450 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import UTC, datetime +from pathlib import Path +from typing import Any +from urllib.parse import quote +from urllib.request import Request, urlopen + +import jwt + +from app.api.deps import CurrentUserContext +from app.core.config import get_settings +from app.schemas.agent_asset import AgentAssetOnlyOfficeConfigRead +from app.services.agent_asset_spreadsheet import ( + COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + FINANCE_RULES_LIBRARY, + SPREADSHEET_MIME_TYPE, + AgentAssetSpreadsheetManager, + RuleSpreadsheetMeta, +) +from app.services.settings import resolve_onlyoffice_settings + +PREVIEW_RULE_ASSET_ID = "preview-rule-expense-company-travel-expense" +PREVIEW_RULE_CURRENT_VERSION = "v1.2.0" +PREVIEW_RULE_VERSION_FILENAMES = { + PREVIEW_RULE_CURRENT_VERSION: COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + "v1.1.0": "鍏徃宸梾璐规姤閿€瑙勫垯-v1.1.0.xlsx", + "v1.0.0": "鍏徃宸梾璐规姤閿€瑙勫垯-v1.0.0.xlsx", +} + + +@dataclass(slots=True) +class OnlyOfficeCallbackPayload: + status: int + download_url: str + users: list[str] + + +class AgentAssetOnlyOfficeMixin: + @staticmethod + def _resolve_onlyoffice_settings(): + from app.services import agent_assets + + return agent_assets.resolve_onlyoffice_settings() + + def build_rule_spreadsheet_onlyoffice_config( + self, + asset_id: str, + current_user: CurrentUserContext, + *, + version: str | None = None, + ) -> AgentAssetOnlyOfficeConfigRead: + self._ensure_ready() + if asset_id == PREVIEW_RULE_ASSET_ID: + resolved_version, metadata = self._ensure_preview_rule_spreadsheet(version=version) + return self._build_onlyoffice_spreadsheet_config( + asset_id=asset_id, + current_user=current_user, + metadata=metadata, + editable=resolved_version == PREVIEW_RULE_CURRENT_VERSION, + ) + + asset = self._require_spreadsheet_rule(asset_id) + _, metadata = self._resolve_current_spreadsheet_meta(asset) + editable = self._can_edit_current_spreadsheet(current_user) + return self._build_onlyoffice_spreadsheet_config( + asset_id=asset.id, + current_user=current_user, + metadata=metadata, + editable=editable, + ) + + def get_rule_spreadsheet_content( + self, + asset_id: str, + *, + version: str | None = None, + ) -> tuple[Path, str, str]: + self._ensure_ready() + if asset_id == PREVIEW_RULE_ASSET_ID: + _, metadata = self._ensure_preview_rule_spreadsheet(version=version) + file_path = self.spreadsheet_manager.resolve_storage_path(metadata.storage_key) + if not file_path.exists(): + raise FileNotFoundError(metadata.file_name) + return file_path, metadata.mime_type, metadata.file_name + + asset = self._require_spreadsheet_rule(asset_id) + requested_version = str(version or "").strip() + if requested_version and requested_version != "current": + _, metadata = self._resolve_spreadsheet_version_meta(asset, version=requested_version) + else: + _, metadata = self._resolve_current_spreadsheet_meta(asset) + file_path = self.spreadsheet_manager.resolve_storage_path(metadata.storage_key) + if not file_path.exists(): + raise FileNotFoundError(metadata.file_name) + return file_path, metadata.mime_type, metadata.file_name + + def validate_rule_spreadsheet_access_token( + self, + asset_id: str, + access_token: str, + ) -> None: + onlyoffice_settings = self._resolve_onlyoffice_settings() + try: + payload = jwt.decode( + access_token, + onlyoffice_settings.jwt_secret, + algorithms=["HS256"], + ) + except jwt.PyJWTError as exc: + raise ValueError("ONLYOFFICE 文件访问令牌无效。") from exc + + if ( + payload.get("scope") != "agent-asset-spreadsheet" + or payload.get("asset_id") != asset_id + ): + raise ValueError("ONLYOFFICE 文件访问令牌无效。") + + def upload_rule_spreadsheet( + self, + asset_id: str, + *, + filename: str, + content: bytes, + actor: str, + request_id: str | None = None, + change_note: str | None = None, + source: str = "upload", + ) -> AgentAssetRead: + self._ensure_ready() + asset = self._require_spreadsheet_rule(asset_id) + normalized_name = Path(str(filename or "").strip()).name.strip() + if not normalized_name: + raise ValueError("规则表文件名不能为空。") + if Path(normalized_name).suffix.lower() != ".xlsx": + raise ValueError("当前仅支持上传 .xlsx 格式的规则表。") + if not content: + raise ValueError("规则表文件内容不能为空。") + + _, current_metadata = self._resolve_current_spreadsheet_meta(asset) + file_name = current_metadata.file_name or self._resolve_default_spreadsheet_file_name(asset) + sheet_changes, cell_changes = self._collect_workbook_changes_from_content( + current_metadata, + content, + ) + changed_sheet_count = self._count_changed_sheets(sheet_changes, cell_changes) + changed_cell_count = len(cell_changes) + + metadata = self._store_current_rule_spreadsheet( + asset, + file_name=file_name, + content=content, + actor=actor, + source=source, + ) + summary = self._build_spreadsheet_change_summary( + sheet_changes, + cell_changes, + ) + self.audit_service.log_action( + actor=actor, + action="edit_rule_spreadsheet", + resource_type=asset.asset_type, + resource_id=asset.id, + before_json={"storage_key": current_metadata.storage_key}, + after_json={ + "summary": summary, + "changed_sheet_count": changed_sheet_count, + "changed_cell_count": changed_cell_count, + "sheet_changes": [item.model_dump() for item in sheet_changes], + "cell_changes": [item.model_dump() for item in cell_changes[:500]], + "storage_key": metadata.storage_key, + }, + request_id=request_id, + ) + return self.get_asset(asset.id) # type: ignore[return-value] + + def import_rule_spreadsheet_content( + self, + asset_id: str, + *, + filename: str, + content: bytes, + actor: str, + request_id: str | None = None, + ) -> AgentAssetRead: + self._ensure_ready() + asset = self._require_spreadsheet_rule(asset_id) + normalized_name = Path(str(filename or "").strip()).name.strip() + if not normalized_name: + raise ValueError("待导入表格文件名不能为空。") + if Path(normalized_name).suffix.lower() != ".xlsx": + raise ValueError("当前仅支持导入 .xlsx 格式的规则表。") + + _, current_metadata = self._resolve_current_spreadsheet_meta(asset) + imported_content = self.spreadsheet_manager.rebuild_from_uploaded_content(content) + return self.upload_rule_spreadsheet( + asset.id, + filename=current_metadata.file_name, + content=imported_content, + actor=actor, + request_id=request_id, + change_note=f"导入 Excel 表格内容:{normalized_name}", + source="content-import", + ) + + def handle_rule_spreadsheet_onlyoffice_callback( + self, + asset_id: str, + *, + version: str | None = None, + payload: dict[str, Any], + actor_name: str | None = None, + ) -> None: + self._ensure_ready() + if asset_id == PREVIEW_RULE_ASSET_ID: + self._handle_preview_rule_spreadsheet_onlyoffice_callback( + version=version, + payload=payload, + ) + return + + asset = self._require_spreadsheet_rule(asset_id) + callback = self._parse_onlyoffice_callback(payload) + if callback.status not in {2, 6} or not callback.download_url: + return + + _, current_metadata = self._resolve_current_spreadsheet_meta(asset) + request = Request( + callback.download_url, + headers={"User-Agent": "x-financial-onlyoffice-agent-asset"}, + ) + with urlopen(request, timeout=30) as response: # noqa: S310 + content = response.read() + + if current_metadata.checksum and current_metadata.checksum == self._hash_bytes(content): + return + + resolved_actor_name = str(actor_name or "").strip() or ( + callback.users[0] if callback.users else "ONLYOFFICE" + ) + self.upload_rule_spreadsheet( + asset.id, + filename=current_metadata.file_name, + content=content, + actor=resolved_actor_name, + source="onlyoffice", + ) + + + @staticmethod + def _can_edit_current_spreadsheet(current_user: CurrentUserContext) -> bool: + role_codes = {str(item).strip() for item in current_user.role_codes} + return current_user.is_admin or "manager" in role_codes or "finance" in role_codes + + @staticmethod + def _build_onlyoffice_document_key( + asset_id: str, + metadata: RuleSpreadsheetMeta, + ) -> str: + fingerprint = metadata.checksum or metadata.updated_at or metadata.file_name + raw_key = f"{asset_id}-{fingerprint}" + return "".join( + character if character.isalnum() or character in {"-", "_", ".", "="} else "_" + for character in raw_key + ) + + def _build_onlyoffice_access_token(self, asset_id: str) -> str: + onlyoffice_settings = self._resolve_onlyoffice_settings() + payload = { + "scope": "agent-asset-spreadsheet", + "asset_id": asset_id, + } + return jwt.encode(payload, onlyoffice_settings.jwt_secret, algorithm="HS256") + + @staticmethod + def _parse_onlyoffice_callback(payload: dict[str, Any]) -> OnlyOfficeCallbackPayload: + return OnlyOfficeCallbackPayload( + status=int(payload.get("status") or 0), + download_url=str(payload.get("url") or "").strip(), + users=[str(item).strip() for item in payload.get("users") or [] if str(item).strip()], + ) + + + + def _build_onlyoffice_spreadsheet_config( + self, + *, + asset_id: str, + current_user: CurrentUserContext, + metadata: RuleSpreadsheetMeta, + editable: bool, + ) -> AgentAssetOnlyOfficeConfigRead: + onlyoffice_settings = self._resolve_onlyoffice_settings() + settings = get_settings() + if not onlyoffice_settings.enabled: + raise ValueError("ONLYOFFICE 预览未启用。") + if not onlyoffice_settings.public_url or not onlyoffice_settings.backend_url: + raise ValueError("ONLYOFFICE 地址配置不完整。") + if not onlyoffice_settings.jwt_secret: + raise ValueError("ONLYOFFICE JWT 密钥未配置。") + + backend_base_url = onlyoffice_settings.backend_url.rstrip("/") + public_url = onlyoffice_settings.public_url.rstrip("/") + access_token = self._build_onlyoffice_access_token(asset_id) + document_url = ( + f"{backend_base_url}{settings.api_v1_prefix}/agent-assets/{asset_id}/spreadsheet/onlyoffice/content" + f"?access_token={access_token}" + ) + callback_url = ( + f"{backend_base_url}{settings.api_v1_prefix}/agent-assets/{asset_id}/spreadsheet/onlyoffice/callback" + f"?actor_name={quote(current_user.name)}" + ) + + config: dict[str, Any] = { + "documentType": "cell", + "document": { + "fileType": Path(metadata.file_name).suffix.lstrip(".").lower() or "xlsx", + "key": self._build_onlyoffice_document_key(asset_id, metadata), + "title": metadata.file_name, + "url": document_url, + "permissions": { + "download": True, + "edit": editable, + "print": True, + "copy": True, + }, + }, + "editorConfig": { + "mode": "edit" if editable else "view", + "lang": "zh-CN", + "callbackUrl": callback_url, + "user": { + "id": current_user.username, + "name": current_user.name, + }, + "customization": { + "compactHeader": True, + "compactToolbar": False, + "toolbarNoTabs": False, + "autosave": False, + "forcesave": editable, + }, + }, + "width": "100%", + "height": "100%", + } + config["token"] = jwt.encode(config, onlyoffice_settings.jwt_secret, algorithm="HS256") + return AgentAssetOnlyOfficeConfigRead(documentServerUrl=public_url, config=config) + + def _ensure_preview_rule_spreadsheet( + self, + *, + version: str | None = None, + ) -> tuple[str, RuleSpreadsheetMeta]: + resolved_version = str(version or PREVIEW_RULE_CURRENT_VERSION).strip() + if resolved_version not in PREVIEW_RULE_VERSION_FILENAMES: + raise LookupError(f"版本 {resolved_version} 不存在") + + file_name = PREVIEW_RULE_VERSION_FILENAMES[resolved_version] + storage_key = ( + Path("rules") + / FINANCE_RULES_LIBRARY + / ".versions" + / PREVIEW_RULE_ASSET_ID + / resolved_version + / file_name + ).as_posix() + try: + file_path = self.spreadsheet_manager.resolve_storage_path(storage_key) + except FileNotFoundError: + file_path = None + + if file_path is not None and file_path.exists(): + content = file_path.read_bytes() + updated_at = datetime.fromtimestamp(file_path.stat().st_mtime, UTC).isoformat() + return resolved_version, RuleSpreadsheetMeta( + file_name=file_name, + storage_key=storage_key, + mime_type=SPREADSHEET_MIME_TYPE, + size_bytes=file_path.stat().st_size, + checksum=self._hash_bytes(content), + updated_at=updated_at, + updated_by="ONLYOFFICE 预览", + source="preview", + ) + + metadata = self.spreadsheet_manager.store_rule_library_spreadsheet_snapshot( + library=FINANCE_RULES_LIBRARY, + asset_id=PREVIEW_RULE_ASSET_ID, + version=resolved_version, + file_name=file_name, + content=AgentAssetSpreadsheetManager.build_company_travel_rule_template(), + actor_name="ONLYOFFICE 预览", + source="preview", + ) + return resolved_version, metadata + + def _handle_preview_rule_spreadsheet_onlyoffice_callback( + self, + *, + version: str, + payload: dict[str, Any], + ) -> None: + callback = self._parse_onlyoffice_callback(payload) + if callback.status not in {2, 6} or not callback.download_url: + return + + resolved_version, metadata = self._ensure_preview_rule_spreadsheet(version=version) + request = Request( + callback.download_url, + headers={"User-Agent": "x-financial-onlyoffice-agent-asset-preview"}, + ) + with urlopen(request, timeout=30) as response: # noqa: S310 + content = response.read() + + if metadata.checksum and metadata.checksum == self._hash_bytes(content): + return + + actor_name = callback.users[0] if callback.users else "ONLYOFFICE" + self.spreadsheet_manager.store_rule_library_spreadsheet_snapshot( + library=FINANCE_RULES_LIBRARY, + asset_id=PREVIEW_RULE_ASSET_ID, + version=resolved_version, + file_name=metadata.file_name, + content=content, + actor_name=actor_name, + source="onlyoffice-preview", + ) + + @staticmethod + def _read_current_rule_document_meta(asset: AgentAsset) -> RuleSpreadsheetMeta | None: + payload = (asset.config_json or {}).get("rule_document") + if not isinstance(payload, dict): + return None + + return RuleSpreadsheetMeta( + file_name=str(payload.get("file_name") or "").strip(), + storage_key=str(payload.get("storage_key") or "").strip(), + mime_type=( + str(payload.get("mime_type") or "").strip() + or "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + ), + size_bytes=int(payload.get("size_bytes") or 0), + checksum=str(payload.get("checksum") or "").strip(), + updated_at=str(payload.get("updated_at") or "").strip(), + updated_by=str(payload.get("updated_by") or "system").strip() or "system", + source=str(payload.get("source") or "upload").strip() or "upload", + ) diff --git a/server/src/app/services/agent_asset_spreadsheet_helpers.py b/server/src/app/services/agent_asset_spreadsheet_helpers.py new file mode 100644 index 0000000..e789689 --- /dev/null +++ b/server/src/app/services/agent_asset_spreadsheet_helpers.py @@ -0,0 +1,298 @@ +from __future__ import annotations + +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from app.core.agent_enums import AgentAssetType +from app.models.agent_asset import AgentAsset +from app.schemas.agent_asset import ( + AgentAssetSpreadsheetDiffCellRead, + AgentAssetSpreadsheetDiffSheetRead, +) +from app.services.agent_asset_spreadsheet import ( + COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, + COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, + COMPANY_TRAVEL_EXPENSE_RULE_CODE, + COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + FINANCE_RULES_LIBRARY, + RULE_LIBRARY_NAMES, + SPREADSHEET_MIME_TYPE, + AgentAssetSpreadsheetManager, + RuleSpreadsheetMeta, +) + + +class AgentAssetSpreadsheetHelperMixin: + def _require_spreadsheet_rule(self, asset_id: str) -> AgentAsset: + asset = self.repository.get(asset_id) + if asset is None: + raise LookupError("Asset not found") + if asset.asset_type != AgentAssetType.RULE.value: + raise ValueError("仅规则资产支持 Excel 规则表。") + detail_mode = str((asset.config_json or {}).get("detail_mode") or "").strip().lower() + if detail_mode != "spreadsheet": + raise ValueError("当前规则未配置 Excel 规则表。") + return asset + + def _resolve_spreadsheet_version_meta( + self, + asset: AgentAsset, + *, + version: str | None = None, + ) -> tuple[str, RuleSpreadsheetMeta]: + resolved_version = str(version or self._resolve_working_version(asset) or "").strip() + if not resolved_version: + raise ValueError("当前规则尚未配置表格版本。") + + version_row = self.repository.get_version(asset.id, resolved_version) + if version_row is None: + raise LookupError(f"版本 {resolved_version} 不存在") + + # 版本记录中的快照才是不变的事实来源。`/rules` 下的工作簿只是当前 + # 可编辑副本,后续写入不应该反向污染某个已存在版本的内容。 + metadata = self.spreadsheet_manager.parse_version_markdown(str(version_row.content or "")) + if metadata is None and self._resolve_working_version(asset) == resolved_version: + metadata = self._read_current_rule_document_meta(asset) + if metadata is None: + raise FileNotFoundError("规则表版本快照不存在。") + return resolved_version, metadata + + def _resolve_current_spreadsheet_meta( + self, + asset: AgentAsset, + ) -> tuple[str, RuleSpreadsheetMeta]: + config_json = dict(asset.config_json or {}) + current_meta = self._read_current_rule_document_meta(asset) + file_name = ( + current_meta.file_name + if current_meta is not None and current_meta.file_name + else self._resolve_default_spreadsheet_file_name(asset) + ) + library = self._resolve_spreadsheet_rule_library(asset) + storage_key = (Path("rules") / library / file_name).as_posix() + file_path = self.spreadsheet_manager.resolve_storage_path(storage_key) + + if not file_path.exists(): + content: bytes | None = None + if current_meta is not None and current_meta.storage_key: + try: + legacy_path = self.spreadsheet_manager.resolve_storage_path( + current_meta.storage_key + ) + except FileNotFoundError: + legacy_path = None + if legacy_path is not None and legacy_path.exists(): + content = legacy_path.read_bytes() + if content is None: + content = AgentAssetSpreadsheetManager.build_blank_rule_workbook( + Path(file_name).stem or "规则表" + ) + meta = self.spreadsheet_manager.store_rule_library_spreadsheet( + library=library, + file_name=file_name, + content=content, + actor_name=( + current_meta.updated_by + if current_meta is not None and current_meta.updated_by + else "system" + ), + source="current-rule", + ) + else: + content = file_path.read_bytes() + meta = RuleSpreadsheetMeta( + file_name=file_name, + storage_key=storage_key, + mime_type=( + current_meta.mime_type + if current_meta is not None and current_meta.mime_type + else SPREADSHEET_MIME_TYPE + ), + size_bytes=file_path.stat().st_size, + checksum=self._hash_bytes(content), + updated_at=datetime.fromtimestamp(file_path.stat().st_mtime, UTC).isoformat(), + updated_by=( + current_meta.updated_by + if current_meta is not None and current_meta.updated_by + else "system" + ), + source=( + current_meta.source + if current_meta is not None and current_meta.source + else "current-rule" + ), + ) + + expected_document = { + **self.spreadsheet_manager.build_rule_document_config( + meta, + asset_version="current", + ), + "storage_key": meta.storage_key, + } + if config_json.get("rule_document") != expected_document: + config_json["detail_mode"] = "spreadsheet" + config_json["tag"] = str(config_json.get("tag") or "财务规则").strip() or "财务规则" + config_json["rule_library"] = library + config_json["rule_document"] = expected_document + asset.config_json = config_json + self.repository.save_asset(asset) + + return "current", meta + + def _store_current_rule_spreadsheet( + self, + asset: AgentAsset, + *, + file_name: str, + content: bytes, + actor: str, + source: str, + ) -> RuleSpreadsheetMeta: + library = self._resolve_spreadsheet_rule_library(asset) + metadata = self.spreadsheet_manager.store_rule_library_spreadsheet( + library=library, + file_name=file_name, + content=content, + actor_name=actor, + source=source, + ) + config_json = dict(asset.config_json or {}) + config_json["detail_mode"] = "spreadsheet" + config_json["tag"] = str(config_json.get("tag") or "财务规则").strip() or "财务规则" + config_json["rule_library"] = library + config_json["rule_document"] = { + **self.spreadsheet_manager.build_rule_document_config( + metadata, + asset_version="current", + ), + "storage_key": metadata.storage_key, + } + asset.config_json = config_json + self.repository.save_asset(asset) + return metadata + + @staticmethod + def _resolve_spreadsheet_rule_library(asset: AgentAsset) -> str: + config_json = dict(asset.config_json or {}) + library = str(config_json.get("rule_library") or FINANCE_RULES_LIBRARY).strip() + if library not in RULE_LIBRARY_NAMES: + return FINANCE_RULES_LIBRARY + return library + + @staticmethod + def _resolve_default_spreadsheet_file_name(asset: AgentAsset) -> str: + if asset.code == COMPANY_TRAVEL_EXPENSE_RULE_CODE: + return COMPANY_TRAVEL_EXPENSE_RULE_FILENAME + if asset.code == COMPANY_COMMUNICATION_EXPENSE_RULE_CODE: + return COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME + fallback = Path(str(asset.name or "规则表").strip()).name + return fallback if fallback.lower().endswith(".xlsx") else f"{fallback}.xlsx" + + + def _load_spreadsheet_for_compare(self, metadata: RuleSpreadsheetMeta): + from io import BytesIO + + from openpyxl import load_workbook + + file_path = self.spreadsheet_manager.resolve_storage_path(metadata.storage_key) + if not file_path.exists(): + raise FileNotFoundError(metadata.file_name) + return load_workbook(BytesIO(file_path.read_bytes()), read_only=False, data_only=False) + + def _collect_workbook_changes_from_content( + self, + base_metadata: RuleSpreadsheetMeta, + target_content: bytes, + ) -> tuple[list[AgentAssetSpreadsheetDiffSheetRead], list[AgentAssetSpreadsheetDiffCellRead]]: + from io import BytesIO + + from openpyxl import load_workbook + + base_workbook = self._load_spreadsheet_for_compare(base_metadata) + target_workbook = load_workbook(BytesIO(target_content), read_only=False, data_only=False) + return self._collect_workbook_changes(base_workbook, target_workbook) + + def _collect_workbook_changes( + self, base_workbook, target_workbook + ) -> tuple[list[AgentAssetSpreadsheetDiffSheetRead], list[AgentAssetSpreadsheetDiffCellRead]]: + base_sheet_names = set(base_workbook.sheetnames) + target_sheet_names = set(target_workbook.sheetnames) + sheet_changes: list[AgentAssetSpreadsheetDiffSheetRead] = [] + for sheet_name in sorted(target_sheet_names - base_sheet_names): + sheet_changes.append( + AgentAssetSpreadsheetDiffSheetRead(sheet_name=sheet_name, change_type="added") + ) + for sheet_name in sorted(base_sheet_names - target_sheet_names): + sheet_changes.append( + AgentAssetSpreadsheetDiffSheetRead(sheet_name=sheet_name, change_type="removed") + ) + + cell_changes: list[AgentAssetSpreadsheetDiffCellRead] = [] + + for sheet_name in sorted(base_sheet_names & target_sheet_names): + base_sheet = base_workbook[sheet_name] + target_sheet = target_workbook[sheet_name] + max_row = max(base_sheet.max_row, target_sheet.max_row) + max_column = max(base_sheet.max_column, target_sheet.max_column) + for row_index in range(1, max_row + 1): + for column_index in range(1, max_column + 1): + before_value = base_sheet.cell(row=row_index, column=column_index).value + after_value = target_sheet.cell(row=row_index, column=column_index).value + if before_value == after_value: + continue + if before_value in (None, ""): + change_type = "added" + elif after_value in (None, ""): + change_type = "removed" + else: + change_type = "modified" + cell_changes.append( + AgentAssetSpreadsheetDiffCellRead( + sheet_name=sheet_name, + cell=target_sheet.cell(row=row_index, column=column_index).coordinate, + change_type=change_type, + before_value=before_value, + after_value=after_value, + ) + ) + + for sheet_name in sorted({item.sheet_name for item in cell_changes}): + sheet_changes.append( + AgentAssetSpreadsheetDiffSheetRead(sheet_name=sheet_name, change_type="modified") + ) + + return sheet_changes, cell_changes + + @staticmethod + def _count_changed_sheets( + sheet_changes: list[AgentAssetSpreadsheetDiffSheetRead], + cell_changes: list[AgentAssetSpreadsheetDiffCellRead], + ) -> int: + return len( + {item.sheet_name for item in sheet_changes} + | {item.sheet_name for item in cell_changes} + ) + + @staticmethod + def _build_spreadsheet_change_summary( + sheet_changes: list[AgentAssetSpreadsheetDiffSheetRead], + cell_changes: list[AgentAssetSpreadsheetDiffCellRead], + ) -> str: + sheet_names = sorted( + {item.sheet_name for item in sheet_changes} + | {item.sheet_name for item in cell_changes} + ) + if not sheet_names: + return "文件内容已保存,未发现单元格级差异。" + + preview = "、".join(sheet_names[:3]) + if len(sheet_names) > 3: + preview = f"{preview} 等" + sheet_text = f"涉及 {len(sheet_names)} 个工作表({preview})" + if cell_changes: + return f"{sheet_text},共 {len(cell_changes)} 处单元格改动。" + return f"{sheet_text},工作表结构发生变化。" + + diff --git a/server/src/app/services/agent_asset_timeline.py b/server/src/app/services/agent_asset_timeline.py new file mode 100644 index 0000000..f37374d --- /dev/null +++ b/server/src/app/services/agent_asset_timeline.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +from app.core.agent_enums import AgentReviewStatus +from app.schemas.agent_asset import ( + AgentAssetSpreadsheetChangeRecordRead, + AgentAssetSpreadsheetDiffCellRead, + AgentAssetSpreadsheetDiffSheetRead, + AgentAssetVersionTimelineItemRead, +) + + +class AgentAssetTimelineMixin: + def list_version_timeline(self, asset_id: str) -> list[AgentAssetVersionTimelineItemRead]: + self._ensure_ready() + asset = self.repository.get(asset_id) + if asset is None: + raise LookupError("Asset not found") + + events: list[AgentAssetVersionTimelineItemRead] = [] + versions = self.repository.list_versions(asset_id) + for version in versions: + source_version = self._extract_restore_source_version(version.change_note) + events.append( + AgentAssetVersionTimelineItemRead( + event_type="restored" if source_version else "created", + version=version.version, + actor=version.created_by, + event_time=version.created_at, + title="恢复生成工作稿" if source_version else "创建工作版本", + description=version.change_note or "生成新版本", + note=version.change_note, + source_version=source_version, + ) + ) + + for review in self.repository.list_reviews(asset_id): + event_type = { + AgentReviewStatus.PENDING.value: "submitted", + AgentReviewStatus.APPROVED.value: "approved", + AgentReviewStatus.REJECTED.value: "rejected", + }.get(review.review_status, "reviewed") + title = { + "submitted": "提交审核", + "approved": "审核通过", + "rejected": "审核驳回", + }.get(event_type, "审核处理") + events.append( + AgentAssetVersionTimelineItemRead( + event_type=event_type, + version=review.version, + actor=review.reviewer, + event_time=review.reviewed_at or review.created_at, + title=title, + description=review.review_note or "", + note=review.review_note, + ) + ) + + audit_logs = self.audit_service.repository.list( + resource_type=asset.asset_type, + resource_id=asset.id, + limit=200, + ) + for log in audit_logs: + if log.action != "activate_agent_asset": + continue + after_json = log.after_json or {} + version = str( + after_json.get("published_version") + or after_json.get("current_version") + or "" + ).strip() + if not version: + continue + events.append( + AgentAssetVersionTimelineItemRead( + event_type="published", + version=version, + actor=log.actor, + event_time=log.created_at, + title="正式上线", + description="该版本已切换为线上正式版本。", + ) + ) + + return sorted(events, key=lambda item: item.event_time) + + def list_spreadsheet_change_records( + self, + asset_id: str, + *, + limit: int = 30, + ) -> list[AgentAssetSpreadsheetChangeRecordRead]: + self._ensure_ready() + asset = self._require_spreadsheet_rule(asset_id) + logs = self.audit_service.repository.list( + resource_type=asset.asset_type, + resource_id=asset.id, + action="edit_rule_spreadsheet", + limit=min(max(limit, 1), 30), + ) + return [ + AgentAssetSpreadsheetChangeRecordRead( + id=log.id, + actor=log.actor, + changed_at=log.created_at, + summary=str((log.after_json or {}).get("summary") or "表格内容已保存。"), + sheet_changes=[ + AgentAssetSpreadsheetDiffSheetRead.model_validate(item) + for item in ((log.after_json or {}).get("sheet_changes") or []) + ], + cell_changes=[ + AgentAssetSpreadsheetDiffCellRead.model_validate(item) + for item in ((log.after_json or {}).get("cell_changes") or []) + ], + changed_sheet_count=int((log.after_json or {}).get("changed_sheet_count") or 0), + changed_cell_count=int((log.after_json or {}).get("changed_cell_count") or 0), + ) + for log in logs + ] + + + @staticmethod + def _extract_restore_source_version(change_note: str | None) -> str | None: + normalized = str(change_note or "").strip() + prefix = "基于历史版本 " + suffix = " 恢复生成工作稿" + if not normalized.startswith(prefix) or suffix not in normalized: + return None + return normalized.removeprefix(prefix).split(suffix, 1)[0].strip() or None + + diff --git a/server/src/app/services/agent_assets.py b/server/src/app/services/agent_assets.py index 9a21fee..b985f7f 100644 --- a/server/src/app/services/agent_assets.py +++ b/server/src/app/services/agent_assets.py @@ -2,80 +2,48 @@ from __future__ import annotations import json from collections import defaultdict -from dataclasses import dataclass from datetime import UTC, datetime -from pathlib import Path from typing import Any -from urllib.parse import quote -from urllib.request import Request, urlopen - -import jwt from sqlalchemy.orm import Session -from app.api.deps import CurrentUserContext from app.core.agent_enums import ( AgentAssetContentType, AgentAssetStatus, AgentAssetType, AgentReviewStatus, ) -from app.core.config import get_settings from app.core.logging import get_logger from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion from app.repositories.agent_asset import AgentAssetRepository from app.schemas.agent_asset import ( AgentAssetCreate, AgentAssetListItem, - AgentAssetOnlyOfficeConfigRead, AgentAssetRead, AgentAssetReviewCreate, AgentAssetReviewRead, - AgentAssetRuleJsonRead, - AgentAssetRuleJsonWrite, - AgentAssetSpreadsheetChangeRecordRead, - AgentAssetSpreadsheetDiffCellRead, - AgentAssetSpreadsheetDiffSheetRead, AgentAssetUpdate, AgentAssetVersionCreate, AgentAssetVersionRead, - AgentAssetVersionTimelineItemRead, ) +from app.services.agent_asset_json_rules import AgentAssetJsonRuleMixin +from app.services.agent_asset_onlyoffice import AgentAssetOnlyOfficeMixin from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager -from app.services.agent_asset_spreadsheet import ( - COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, - COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, - COMPANY_TRAVEL_EXPENSE_RULE_CODE, - COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, - FINANCE_RULES_LIBRARY, - RISK_RULES_LIBRARY, - RULE_LIBRARY_NAMES, - SPREADSHEET_MIME_TYPE, - AgentAssetSpreadsheetManager, - RuleSpreadsheetMeta, -) +from app.services.agent_asset_spreadsheet_helpers import AgentAssetSpreadsheetHelperMixin +from app.services.agent_asset_timeline import AgentAssetTimelineMixin +from app.services.agent_asset_spreadsheet import AgentAssetSpreadsheetManager from app.services.agent_foundation import AgentFoundationService from app.services.audit import AuditLogService from app.services.settings import resolve_onlyoffice_settings logger = get_logger("app.services.agent_assets") -PREVIEW_RULE_ASSET_ID = "preview-rule-expense-company-travel-expense" -PREVIEW_RULE_CURRENT_VERSION = "v1.2.0" -PREVIEW_RULE_VERSION_FILENAMES = { - PREVIEW_RULE_CURRENT_VERSION: COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, - "v1.1.0": "公司差旅费报销规则-v1.1.0.xlsx", - "v1.0.0": "公司差旅费报销规则-v1.0.0.xlsx", -} - -@dataclass(slots=True) -class OnlyOfficeCallbackPayload: - status: int - download_url: str - users: list[str] - - -class AgentAssetService: +class AgentAssetService( + AgentAssetOnlyOfficeMixin, + AgentAssetSpreadsheetHelperMixin, + AgentAssetTimelineMixin, + AgentAssetJsonRuleMixin, +): def __init__(self, db: Session) -> None: self.db = db self.repository = AgentAssetRepository(db) @@ -497,210 +465,6 @@ class AgentAssetService: logger.info("Activated agent asset id=%s code=%s", updated.id, updated.code) return self.get_asset(updated.id) # type: ignore[return-value] - def build_rule_spreadsheet_onlyoffice_config( - self, - asset_id: str, - current_user: CurrentUserContext, - *, - version: str | None = None, - ) -> AgentAssetOnlyOfficeConfigRead: - self._ensure_ready() - if asset_id == PREVIEW_RULE_ASSET_ID: - resolved_version, metadata = self._ensure_preview_rule_spreadsheet(version=version) - return self._build_onlyoffice_spreadsheet_config( - asset_id=asset_id, - current_user=current_user, - metadata=metadata, - editable=resolved_version == PREVIEW_RULE_CURRENT_VERSION, - ) - - asset = self._require_spreadsheet_rule(asset_id) - _, metadata = self._resolve_current_spreadsheet_meta(asset) - editable = self._can_edit_current_spreadsheet(current_user) - return self._build_onlyoffice_spreadsheet_config( - asset_id=asset.id, - current_user=current_user, - metadata=metadata, - editable=editable, - ) - - def get_rule_spreadsheet_content( - self, - asset_id: str, - *, - version: str | None = None, - ) -> tuple[Path, str, str]: - self._ensure_ready() - if asset_id == PREVIEW_RULE_ASSET_ID: - _, metadata = self._ensure_preview_rule_spreadsheet(version=version) - file_path = self.spreadsheet_manager.resolve_storage_path(metadata.storage_key) - if not file_path.exists(): - raise FileNotFoundError(metadata.file_name) - return file_path, metadata.mime_type, metadata.file_name - - asset = self._require_spreadsheet_rule(asset_id) - requested_version = str(version or "").strip() - if requested_version and requested_version != "current": - _, metadata = self._resolve_spreadsheet_version_meta(asset, version=requested_version) - else: - _, metadata = self._resolve_current_spreadsheet_meta(asset) - file_path = self.spreadsheet_manager.resolve_storage_path(metadata.storage_key) - if not file_path.exists(): - raise FileNotFoundError(metadata.file_name) - return file_path, metadata.mime_type, metadata.file_name - - def validate_rule_spreadsheet_access_token( - self, - asset_id: str, - access_token: str, - ) -> None: - onlyoffice_settings = resolve_onlyoffice_settings() - try: - payload = jwt.decode( - access_token, - onlyoffice_settings.jwt_secret, - algorithms=["HS256"], - ) - except jwt.PyJWTError as exc: - raise ValueError("ONLYOFFICE 文件访问令牌无效。") from exc - - if ( - payload.get("scope") != "agent-asset-spreadsheet" - or payload.get("asset_id") != asset_id - ): - raise ValueError("ONLYOFFICE 文件访问令牌无效。") - - def upload_rule_spreadsheet( - self, - asset_id: str, - *, - filename: str, - content: bytes, - actor: str, - request_id: str | None = None, - change_note: str | None = None, - source: str = "upload", - ) -> AgentAssetRead: - self._ensure_ready() - asset = self._require_spreadsheet_rule(asset_id) - normalized_name = Path(str(filename or "").strip()).name.strip() - if not normalized_name: - raise ValueError("规则表文件名不能为空。") - if Path(normalized_name).suffix.lower() != ".xlsx": - raise ValueError("当前仅支持上传 .xlsx 格式的规则表。") - if not content: - raise ValueError("规则表文件内容不能为空。") - - _, current_metadata = self._resolve_current_spreadsheet_meta(asset) - file_name = current_metadata.file_name or self._resolve_default_spreadsheet_file_name(asset) - sheet_changes, cell_changes = self._collect_workbook_changes_from_content( - current_metadata, - content, - ) - changed_sheet_count = self._count_changed_sheets(sheet_changes, cell_changes) - changed_cell_count = len(cell_changes) - - metadata = self._store_current_rule_spreadsheet( - asset, - file_name=file_name, - content=content, - actor=actor, - source=source, - ) - summary = self._build_spreadsheet_change_summary( - sheet_changes, - cell_changes, - ) - self.audit_service.log_action( - actor=actor, - action="edit_rule_spreadsheet", - resource_type=asset.asset_type, - resource_id=asset.id, - before_json={"storage_key": current_metadata.storage_key}, - after_json={ - "summary": summary, - "changed_sheet_count": changed_sheet_count, - "changed_cell_count": changed_cell_count, - "sheet_changes": [item.model_dump() for item in sheet_changes], - "cell_changes": [item.model_dump() for item in cell_changes[:500]], - "storage_key": metadata.storage_key, - }, - request_id=request_id, - ) - return self.get_asset(asset.id) # type: ignore[return-value] - - def import_rule_spreadsheet_content( - self, - asset_id: str, - *, - filename: str, - content: bytes, - actor: str, - request_id: str | None = None, - ) -> AgentAssetRead: - self._ensure_ready() - asset = self._require_spreadsheet_rule(asset_id) - normalized_name = Path(str(filename or "").strip()).name.strip() - if not normalized_name: - raise ValueError("待导入表格文件名不能为空。") - if Path(normalized_name).suffix.lower() != ".xlsx": - raise ValueError("当前仅支持导入 .xlsx 格式的规则表。") - - _, current_metadata = self._resolve_current_spreadsheet_meta(asset) - imported_content = self.spreadsheet_manager.rebuild_from_uploaded_content(content) - return self.upload_rule_spreadsheet( - asset.id, - filename=current_metadata.file_name, - content=imported_content, - actor=actor, - request_id=request_id, - change_note=f"导入 Excel 表格内容:{normalized_name}", - source="content-import", - ) - - def handle_rule_spreadsheet_onlyoffice_callback( - self, - asset_id: str, - *, - version: str | None = None, - payload: dict[str, Any], - actor_name: str | None = None, - ) -> None: - self._ensure_ready() - if asset_id == PREVIEW_RULE_ASSET_ID: - self._handle_preview_rule_spreadsheet_onlyoffice_callback( - version=version, - payload=payload, - ) - return - - asset = self._require_spreadsheet_rule(asset_id) - callback = self._parse_onlyoffice_callback(payload) - if callback.status not in {2, 6} or not callback.download_url: - return - - _, current_metadata = self._resolve_current_spreadsheet_meta(asset) - request = Request( - callback.download_url, - headers={"User-Agent": "x-financial-onlyoffice-agent-asset"}, - ) - with urlopen(request, timeout=30) as response: # noqa: S310 - content = response.read() - - if current_metadata.checksum and current_metadata.checksum == self._hash_bytes(content): - return - - resolved_actor_name = str(actor_name or "").strip() or ( - callback.users[0] if callback.users else "ONLYOFFICE" - ) - self.upload_rule_spreadsheet( - asset.id, - filename=current_metadata.file_name, - content=content, - actor=resolved_actor_name, - source="onlyoffice", - ) - def _ensure_ready(self) -> None: AgentFoundationService(self.db).ensure_foundation_ready() @@ -804,115 +568,6 @@ class AgentAssetService: ) return restored # type: ignore[return-value] - def list_version_timeline(self, asset_id: str) -> list[AgentAssetVersionTimelineItemRead]: - self._ensure_ready() - asset = self.repository.get(asset_id) - if asset is None: - raise LookupError("Asset not found") - - events: list[AgentAssetVersionTimelineItemRead] = [] - versions = self.repository.list_versions(asset_id) - for version in versions: - source_version = self._extract_restore_source_version(version.change_note) - events.append( - AgentAssetVersionTimelineItemRead( - event_type="restored" if source_version else "created", - version=version.version, - actor=version.created_by, - event_time=version.created_at, - title="恢复生成工作稿" if source_version else "创建工作版本", - description=version.change_note or "生成新版本", - note=version.change_note, - source_version=source_version, - ) - ) - - for review in self.repository.list_reviews(asset_id): - event_type = { - AgentReviewStatus.PENDING.value: "submitted", - AgentReviewStatus.APPROVED.value: "approved", - AgentReviewStatus.REJECTED.value: "rejected", - }.get(review.review_status, "reviewed") - title = { - "submitted": "提交审核", - "approved": "审核通过", - "rejected": "审核驳回", - }.get(event_type, "审核处理") - events.append( - AgentAssetVersionTimelineItemRead( - event_type=event_type, - version=review.version, - actor=review.reviewer, - event_time=review.reviewed_at or review.created_at, - title=title, - description=review.review_note or "", - note=review.review_note, - ) - ) - - audit_logs = self.audit_service.repository.list( - resource_type=asset.asset_type, - resource_id=asset.id, - limit=200, - ) - for log in audit_logs: - if log.action != "activate_agent_asset": - continue - after_json = log.after_json or {} - version = str( - after_json.get("published_version") - or after_json.get("current_version") - or "" - ).strip() - if not version: - continue - events.append( - AgentAssetVersionTimelineItemRead( - event_type="published", - version=version, - actor=log.actor, - event_time=log.created_at, - title="正式上线", - description="该版本已切换为线上正式版本。", - ) - ) - - return sorted(events, key=lambda item: item.event_time) - - def list_spreadsheet_change_records( - self, - asset_id: str, - *, - limit: int = 30, - ) -> list[AgentAssetSpreadsheetChangeRecordRead]: - self._ensure_ready() - asset = self._require_spreadsheet_rule(asset_id) - logs = self.audit_service.repository.list( - resource_type=asset.asset_type, - resource_id=asset.id, - action="edit_rule_spreadsheet", - limit=min(max(limit, 1), 30), - ) - return [ - AgentAssetSpreadsheetChangeRecordRead( - id=log.id, - actor=log.actor, - changed_at=log.created_at, - summary=str((log.after_json or {}).get("summary") or "表格内容已保存。"), - sheet_changes=[ - AgentAssetSpreadsheetDiffSheetRead.model_validate(item) - for item in ((log.after_json or {}).get("sheet_changes") or []) - ], - cell_changes=[ - AgentAssetSpreadsheetDiffCellRead.model_validate(item) - for item in ((log.after_json or {}).get("cell_changes") or []) - ], - changed_sheet_count=int((log.after_json or {}).get("changed_sheet_count") or 0), - changed_cell_count=int((log.after_json or {}).get("changed_cell_count") or 0), - ) - for log in logs - ] - def _serialize_version( self, version: AgentAssetVersion, asset: AgentAsset ) -> AgentAssetVersionRead: @@ -1037,337 +692,6 @@ class AgentAssetService: return version.content return json.loads(version.content) - def _require_spreadsheet_rule(self, asset_id: str) -> AgentAsset: - asset = self.repository.get(asset_id) - if asset is None: - raise LookupError("Asset not found") - if asset.asset_type != AgentAssetType.RULE.value: - raise ValueError("仅规则资产支持 Excel 规则表。") - detail_mode = str((asset.config_json or {}).get("detail_mode") or "").strip().lower() - if detail_mode != "spreadsheet": - raise ValueError("当前规则未配置 Excel 规则表。") - return asset - - def _resolve_spreadsheet_version_meta( - self, - asset: AgentAsset, - *, - version: str | None = None, - ) -> tuple[str, RuleSpreadsheetMeta]: - resolved_version = str(version or self._resolve_working_version(asset) or "").strip() - if not resolved_version: - raise ValueError("当前规则尚未配置表格版本。") - - version_row = self.repository.get_version(asset.id, resolved_version) - if version_row is None: - raise LookupError(f"版本 {resolved_version} 不存在") - - # 版本记录中的快照才是不变的事实来源。`/rules` 下的工作簿只是当前 - # 可编辑副本,后续写入不应该反向污染某个已存在版本的内容。 - metadata = self.spreadsheet_manager.parse_version_markdown(str(version_row.content or "")) - if metadata is None and self._resolve_working_version(asset) == resolved_version: - metadata = self._read_current_rule_document_meta(asset) - if metadata is None: - raise FileNotFoundError("规则表版本快照不存在。") - return resolved_version, metadata - - def _resolve_current_spreadsheet_meta( - self, - asset: AgentAsset, - ) -> tuple[str, RuleSpreadsheetMeta]: - config_json = dict(asset.config_json or {}) - current_meta = self._read_current_rule_document_meta(asset) - file_name = ( - current_meta.file_name - if current_meta is not None and current_meta.file_name - else self._resolve_default_spreadsheet_file_name(asset) - ) - library = self._resolve_spreadsheet_rule_library(asset) - storage_key = (Path("rules") / library / file_name).as_posix() - file_path = self.spreadsheet_manager.resolve_storage_path(storage_key) - - if not file_path.exists(): - content: bytes | None = None - if current_meta is not None and current_meta.storage_key: - try: - legacy_path = self.spreadsheet_manager.resolve_storage_path( - current_meta.storage_key - ) - except FileNotFoundError: - legacy_path = None - if legacy_path is not None and legacy_path.exists(): - content = legacy_path.read_bytes() - if content is None: - content = AgentAssetSpreadsheetManager.build_blank_rule_workbook( - Path(file_name).stem or "规则表" - ) - meta = self.spreadsheet_manager.store_rule_library_spreadsheet( - library=library, - file_name=file_name, - content=content, - actor_name=( - current_meta.updated_by - if current_meta is not None and current_meta.updated_by - else "system" - ), - source="current-rule", - ) - else: - content = file_path.read_bytes() - meta = RuleSpreadsheetMeta( - file_name=file_name, - storage_key=storage_key, - mime_type=( - current_meta.mime_type - if current_meta is not None and current_meta.mime_type - else SPREADSHEET_MIME_TYPE - ), - size_bytes=file_path.stat().st_size, - checksum=self._hash_bytes(content), - updated_at=datetime.fromtimestamp(file_path.stat().st_mtime, UTC).isoformat(), - updated_by=( - current_meta.updated_by - if current_meta is not None and current_meta.updated_by - else "system" - ), - source=( - current_meta.source - if current_meta is not None and current_meta.source - else "current-rule" - ), - ) - - expected_document = { - **self.spreadsheet_manager.build_rule_document_config( - meta, - asset_version="current", - ), - "storage_key": meta.storage_key, - } - if config_json.get("rule_document") != expected_document: - config_json["detail_mode"] = "spreadsheet" - config_json["tag"] = str(config_json.get("tag") or "财务规则").strip() or "财务规则" - config_json["rule_library"] = library - config_json["rule_document"] = expected_document - asset.config_json = config_json - self.repository.save_asset(asset) - - return "current", meta - - def _store_current_rule_spreadsheet( - self, - asset: AgentAsset, - *, - file_name: str, - content: bytes, - actor: str, - source: str, - ) -> RuleSpreadsheetMeta: - library = self._resolve_spreadsheet_rule_library(asset) - metadata = self.spreadsheet_manager.store_rule_library_spreadsheet( - library=library, - file_name=file_name, - content=content, - actor_name=actor, - source=source, - ) - config_json = dict(asset.config_json or {}) - config_json["detail_mode"] = "spreadsheet" - config_json["tag"] = str(config_json.get("tag") or "财务规则").strip() or "财务规则" - config_json["rule_library"] = library - config_json["rule_document"] = { - **self.spreadsheet_manager.build_rule_document_config( - metadata, - asset_version="current", - ), - "storage_key": metadata.storage_key, - } - asset.config_json = config_json - self.repository.save_asset(asset) - return metadata - - @staticmethod - def _resolve_spreadsheet_rule_library(asset: AgentAsset) -> str: - config_json = dict(asset.config_json or {}) - library = str(config_json.get("rule_library") or FINANCE_RULES_LIBRARY).strip() - if library not in RULE_LIBRARY_NAMES: - return FINANCE_RULES_LIBRARY - return library - - @staticmethod - def _resolve_default_spreadsheet_file_name(asset: AgentAsset) -> str: - if asset.code == COMPANY_TRAVEL_EXPENSE_RULE_CODE: - return COMPANY_TRAVEL_EXPENSE_RULE_FILENAME - if asset.code == COMPANY_COMMUNICATION_EXPENSE_RULE_CODE: - return COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME - fallback = Path(str(asset.name or "规则表").strip()).name - return fallback if fallback.lower().endswith(".xlsx") else f"{fallback}.xlsx" - - def _build_onlyoffice_spreadsheet_config( - self, - *, - asset_id: str, - current_user: CurrentUserContext, - metadata: RuleSpreadsheetMeta, - editable: bool, - ) -> AgentAssetOnlyOfficeConfigRead: - onlyoffice_settings = resolve_onlyoffice_settings() - settings = get_settings() - if not onlyoffice_settings.enabled: - raise ValueError("ONLYOFFICE 预览未启用。") - if not onlyoffice_settings.public_url or not onlyoffice_settings.backend_url: - raise ValueError("ONLYOFFICE 地址配置不完整。") - if not onlyoffice_settings.jwt_secret: - raise ValueError("ONLYOFFICE JWT 密钥未配置。") - - backend_base_url = onlyoffice_settings.backend_url.rstrip("/") - public_url = onlyoffice_settings.public_url.rstrip("/") - access_token = self._build_onlyoffice_access_token(asset_id) - document_url = ( - f"{backend_base_url}{settings.api_v1_prefix}/agent-assets/{asset_id}/spreadsheet/onlyoffice/content" - f"?access_token={access_token}" - ) - callback_url = ( - f"{backend_base_url}{settings.api_v1_prefix}/agent-assets/{asset_id}/spreadsheet/onlyoffice/callback" - f"?actor_name={quote(current_user.name)}" - ) - - config: dict[str, Any] = { - "documentType": "cell", - "document": { - "fileType": Path(metadata.file_name).suffix.lstrip(".").lower() or "xlsx", - "key": self._build_onlyoffice_document_key(asset_id, metadata), - "title": metadata.file_name, - "url": document_url, - "permissions": { - "download": True, - "edit": editable, - "print": True, - "copy": True, - }, - }, - "editorConfig": { - "mode": "edit" if editable else "view", - "lang": "zh-CN", - "callbackUrl": callback_url, - "user": { - "id": current_user.username, - "name": current_user.name, - }, - "customization": { - "compactHeader": True, - "compactToolbar": False, - "toolbarNoTabs": False, - "autosave": False, - "forcesave": editable, - }, - }, - "width": "100%", - "height": "100%", - } - config["token"] = jwt.encode(config, onlyoffice_settings.jwt_secret, algorithm="HS256") - return AgentAssetOnlyOfficeConfigRead(documentServerUrl=public_url, config=config) - - def _ensure_preview_rule_spreadsheet( - self, - *, - version: str | None = None, - ) -> tuple[str, RuleSpreadsheetMeta]: - resolved_version = str(version or PREVIEW_RULE_CURRENT_VERSION).strip() - if resolved_version not in PREVIEW_RULE_VERSION_FILENAMES: - raise LookupError(f"版本 {resolved_version} 不存在") - - file_name = PREVIEW_RULE_VERSION_FILENAMES[resolved_version] - storage_key = ( - Path("rules") - / FINANCE_RULES_LIBRARY - / ".versions" - / PREVIEW_RULE_ASSET_ID - / resolved_version - / file_name - ).as_posix() - try: - file_path = self.spreadsheet_manager.resolve_storage_path(storage_key) - except FileNotFoundError: - file_path = None - - if file_path is not None and file_path.exists(): - content = file_path.read_bytes() - updated_at = datetime.fromtimestamp(file_path.stat().st_mtime, UTC).isoformat() - return resolved_version, RuleSpreadsheetMeta( - file_name=file_name, - storage_key=storage_key, - mime_type=SPREADSHEET_MIME_TYPE, - size_bytes=file_path.stat().st_size, - checksum=self._hash_bytes(content), - updated_at=updated_at, - updated_by="ONLYOFFICE 预览", - source="preview", - ) - - metadata = self.spreadsheet_manager.store_rule_library_spreadsheet_snapshot( - library=FINANCE_RULES_LIBRARY, - asset_id=PREVIEW_RULE_ASSET_ID, - version=resolved_version, - file_name=file_name, - content=AgentAssetSpreadsheetManager.build_company_travel_rule_template(), - actor_name="ONLYOFFICE 预览", - source="preview", - ) - return resolved_version, metadata - - def _handle_preview_rule_spreadsheet_onlyoffice_callback( - self, - *, - version: str, - payload: dict[str, Any], - ) -> None: - callback = self._parse_onlyoffice_callback(payload) - if callback.status not in {2, 6} or not callback.download_url: - return - - resolved_version, metadata = self._ensure_preview_rule_spreadsheet(version=version) - request = Request( - callback.download_url, - headers={"User-Agent": "x-financial-onlyoffice-agent-asset-preview"}, - ) - with urlopen(request, timeout=30) as response: # noqa: S310 - content = response.read() - - if metadata.checksum and metadata.checksum == self._hash_bytes(content): - return - - actor_name = callback.users[0] if callback.users else "ONLYOFFICE" - self.spreadsheet_manager.store_rule_library_spreadsheet_snapshot( - library=FINANCE_RULES_LIBRARY, - asset_id=PREVIEW_RULE_ASSET_ID, - version=resolved_version, - file_name=metadata.file_name, - content=content, - actor_name=actor_name, - source="onlyoffice-preview", - ) - - @staticmethod - def _read_current_rule_document_meta(asset: AgentAsset) -> RuleSpreadsheetMeta | None: - payload = (asset.config_json or {}).get("rule_document") - if not isinstance(payload, dict): - return None - - return RuleSpreadsheetMeta( - file_name=str(payload.get("file_name") or "").strip(), - storage_key=str(payload.get("storage_key") or "").strip(), - mime_type=( - str(payload.get("mime_type") or "").strip() - or "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" - ), - size_bytes=int(payload.get("size_bytes") or 0), - checksum=str(payload.get("checksum") or "").strip(), - updated_at=str(payload.get("updated_at") or "").strip(), - updated_by=str(payload.get("updated_by") or "system").strip() or "system", - source=str(payload.get("source") or "upload").strip() or "upload", - ) - @staticmethod def _increment_version(version: str | None) -> str: normalized = str(version or "").strip().removeprefix("v") @@ -1377,40 +701,6 @@ class AgentAssetService: major, minor, patch = [int(item) for item in parts] return f"v{major}.{minor}.{patch + 1}" - @staticmethod - def _can_edit_current_spreadsheet(current_user: CurrentUserContext) -> bool: - role_codes = {str(item).strip() for item in current_user.role_codes} - return current_user.is_admin or "manager" in role_codes or "finance" in role_codes - - @staticmethod - def _build_onlyoffice_document_key( - asset_id: str, - metadata: RuleSpreadsheetMeta, - ) -> str: - fingerprint = metadata.checksum or metadata.updated_at or metadata.file_name - raw_key = f"{asset_id}-{fingerprint}" - return "".join( - character if character.isalnum() or character in {"-", "_", ".", "="} else "_" - for character in raw_key - ) - - @staticmethod - def _build_onlyoffice_access_token(asset_id: str) -> str: - onlyoffice_settings = resolve_onlyoffice_settings() - payload = { - "scope": "agent-asset-spreadsheet", - "asset_id": asset_id, - } - return jwt.encode(payload, onlyoffice_settings.jwt_secret, algorithm="HS256") - - @staticmethod - def _parse_onlyoffice_callback(payload: dict[str, Any]) -> OnlyOfficeCallbackPayload: - return OnlyOfficeCallbackPayload( - status=int(payload.get("status") or 0), - download_url=str(payload.get("url") or "").strip(), - users=[str(item).strip() for item in payload.get("users") or [] if str(item).strip()], - ) - @staticmethod def _hash_bytes(content: bytes) -> str: import hashlib @@ -1460,211 +750,8 @@ class AgentAssetService: return "rejected" return "draft" - def _load_spreadsheet_for_compare(self, metadata: RuleSpreadsheetMeta): - from io import BytesIO - - from openpyxl import load_workbook - - file_path = self.spreadsheet_manager.resolve_storage_path(metadata.storage_key) - if not file_path.exists(): - raise FileNotFoundError(metadata.file_name) - return load_workbook(BytesIO(file_path.read_bytes()), read_only=False, data_only=False) - - def _collect_workbook_changes_from_content( - self, - base_metadata: RuleSpreadsheetMeta, - target_content: bytes, - ) -> tuple[list[AgentAssetSpreadsheetDiffSheetRead], list[AgentAssetSpreadsheetDiffCellRead]]: - from io import BytesIO - - from openpyxl import load_workbook - - base_workbook = self._load_spreadsheet_for_compare(base_metadata) - target_workbook = load_workbook(BytesIO(target_content), read_only=False, data_only=False) - return self._collect_workbook_changes(base_workbook, target_workbook) - - def _collect_workbook_changes( - self, base_workbook, target_workbook - ) -> tuple[list[AgentAssetSpreadsheetDiffSheetRead], list[AgentAssetSpreadsheetDiffCellRead]]: - base_sheet_names = set(base_workbook.sheetnames) - target_sheet_names = set(target_workbook.sheetnames) - sheet_changes: list[AgentAssetSpreadsheetDiffSheetRead] = [] - for sheet_name in sorted(target_sheet_names - base_sheet_names): - sheet_changes.append( - AgentAssetSpreadsheetDiffSheetRead(sheet_name=sheet_name, change_type="added") - ) - for sheet_name in sorted(base_sheet_names - target_sheet_names): - sheet_changes.append( - AgentAssetSpreadsheetDiffSheetRead(sheet_name=sheet_name, change_type="removed") - ) - - cell_changes: list[AgentAssetSpreadsheetDiffCellRead] = [] - - for sheet_name in sorted(base_sheet_names & target_sheet_names): - base_sheet = base_workbook[sheet_name] - target_sheet = target_workbook[sheet_name] - max_row = max(base_sheet.max_row, target_sheet.max_row) - max_column = max(base_sheet.max_column, target_sheet.max_column) - for row_index in range(1, max_row + 1): - for column_index in range(1, max_column + 1): - before_value = base_sheet.cell(row=row_index, column=column_index).value - after_value = target_sheet.cell(row=row_index, column=column_index).value - if before_value == after_value: - continue - if before_value in (None, ""): - change_type = "added" - elif after_value in (None, ""): - change_type = "removed" - else: - change_type = "modified" - cell_changes.append( - AgentAssetSpreadsheetDiffCellRead( - sheet_name=sheet_name, - cell=target_sheet.cell(row=row_index, column=column_index).coordinate, - change_type=change_type, - before_value=before_value, - after_value=after_value, - ) - ) - - for sheet_name in sorted({item.sheet_name for item in cell_changes}): - sheet_changes.append( - AgentAssetSpreadsheetDiffSheetRead(sheet_name=sheet_name, change_type="modified") - ) - - return sheet_changes, cell_changes - - @staticmethod - def _count_changed_sheets( - sheet_changes: list[AgentAssetSpreadsheetDiffSheetRead], - cell_changes: list[AgentAssetSpreadsheetDiffCellRead], - ) -> int: - return len( - {item.sheet_name for item in sheet_changes} - | {item.sheet_name for item in cell_changes} - ) - - @staticmethod - def _build_spreadsheet_change_summary( - sheet_changes: list[AgentAssetSpreadsheetDiffSheetRead], - cell_changes: list[AgentAssetSpreadsheetDiffCellRead], - ) -> str: - sheet_names = sorted( - {item.sheet_name for item in sheet_changes} - | {item.sheet_name for item in cell_changes} - ) - if not sheet_names: - return "文件内容已保存,未发现单元格级差异。" - - preview = "、".join(sheet_names[:3]) - if len(sheet_names) > 3: - preview = f"{preview} 等" - sheet_text = f"涉及 {len(sheet_names)} 个工作表({preview})" - if cell_changes: - return f"{sheet_text},共 {len(cell_changes)} 处单元格改动。" - return f"{sheet_text},工作表结构发生变化。" - def _next_available_version(self, asset: AgentAsset) -> str: candidate = self._increment_version(self._resolve_working_version(asset)) while self.repository.get_version(asset.id, candidate) is not None: candidate = self._increment_version(candidate) return candidate - - @staticmethod - def _extract_restore_source_version(change_note: str | None) -> str | None: - normalized = str(change_note or "").strip() - prefix = "基于历史版本 " - suffix = " 恢复生成工作稿" - if not normalized.startswith(prefix) or suffix not in normalized: - return None - return normalized.removeprefix(prefix).split(suffix, 1)[0].strip() or None - - def _resolve_json_risk_rule_document(self, asset: AgentAsset) -> tuple[str, str]: - config_json = dict(asset.config_json or {}) - detail_mode = str(config_json.get("detail_mode") or "").strip().lower() - if detail_mode != "json_risk": - raise ValueError("当前资产不是 JSON 风险规则。") - - rule_library = str(config_json.get("rule_library") or RISK_RULES_LIBRARY).strip() - if rule_library not in RULE_LIBRARY_NAMES: - raise ValueError("规则库目录不合法。") - - rule_document = config_json.get("rule_document") - if not isinstance(rule_document, dict): - raise ValueError("规则资产缺少 rule_document 配置。") - - file_name = str(rule_document.get("file_name") or "").strip() - if not file_name: - raise ValueError("规则资产缺少 JSON 文件名。") - return rule_library, file_name - - def read_rule_json(self, asset_id: str) -> AgentAssetRuleJsonRead: - asset = self.repository.get(asset_id) - if asset is None: - raise LookupError("资产不存在。") - - rule_library, file_name = self._resolve_json_risk_rule_document(asset) - payload = self.rule_library_manager.read_rule_library_json( - library=rule_library, - file_name=file_name, - ) - return AgentAssetRuleJsonRead( - file_name=file_name, - rule_code=str(payload.get("rule_code") or asset.code or ""), - name=str(payload.get("name") or asset.name or ""), - description=str(payload.get("description") or asset.description or "").strip(), - evaluator=str(payload.get("evaluator") or ""), - ontology_signal=str(payload.get("ontology_signal") or "") or None, - inputs=payload.get("inputs") if isinstance(payload.get("inputs"), dict) else {}, - outcomes=payload.get("outcomes") if isinstance(payload.get("outcomes"), dict) else {}, - payload=payload, - ) - - def write_rule_json( - self, - asset_id: str, - *, - body: AgentAssetRuleJsonWrite, - actor: str, - request_id: str | None = None, - ) -> AgentAssetRuleJsonRead: - asset = self.repository.get(asset_id) - if asset is None: - raise LookupError("资产不存在。") - - rule_library, file_name = self._resolve_json_risk_rule_document(asset) - payload = dict(body.payload or {}) - asset_code = str(asset.code or "").strip() - if asset_code and str(payload.get("rule_code") or "").strip() not in {"", asset_code}: - raise ValueError("规则 JSON 的 rule_code 必须与资产编码一致。") - if asset_code and not str(payload.get("rule_code") or "").strip(): - payload["rule_code"] = asset_code - - saved = self.rule_library_manager.write_rule_library_json( - library=rule_library, - file_name=file_name, - payload=payload, - ) - rule_description = str(saved.get("description") or "").strip() - if rule_description: - asset.description = rule_description - rule_name = str(saved.get("name") or "").strip() - if rule_name: - asset.name = rule_name - risk_category = str(saved.get("risk_category") or "").strip() - if risk_category: - config_json = dict(asset.config_json or {}) - config_json["risk_category"] = risk_category - asset.config_json = config_json - asset.scenario_json = [risk_category] - self.audit_service.log_action( - actor=actor, - action="update_agent_asset_rule_json", - resource_type=asset.asset_type, - resource_id=asset.id, - before_json={"file_name": file_name}, - after_json={"file_name": file_name, "rule_code": saved.get("rule_code")}, - request_id=request_id, - ) - self.db.commit() - return self.read_rule_json(asset_id) diff --git a/server/src/app/services/agent_foundation.py b/server/src/app/services/agent_foundation.py index e2c9067..2e57296 100644 --- a/server/src/app/services/agent_foundation.py +++ b/server/src/app/services/agent_foundation.py @@ -1,2176 +1,76 @@ -from __future__ import annotations - -import hashlib -import json -from datetime import UTC, date, datetime -from decimal import Decimal -from pathlib import Path - -from sqlalchemy import inspect, select, text -from sqlalchemy.orm import Session - -from app.core.agent_enums import ( - AgentAssetContentType, - AgentAssetDomain, - AgentAssetStatus, - AgentAssetType, - AgentName, - AgentPermissionLevel, - AgentReviewStatus, - AgentRunSource, - AgentRunStatus, - AgentToolType, -) -from app.core.config import get_settings -from app.core.logging import get_logger -from app.db.base import Base -from app.db.session import get_session_factory -from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion -from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog -from app.models.audit_log import AuditLog -from app.models.financial_record import ( - AccountsPayableRecord, - AccountsReceivableRecord, - ExpenseClaim, - ExpenseClaimItem, -) -from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager -from app.services.agent_asset_spreadsheet import ( - AgentAssetSpreadsheetManager, - COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, - COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, - COMPANY_TRAVEL_EXPENSE_RULE_CODE, - COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, - FINANCE_RULES_LIBRARY, - RISK_RULES_LIBRARY, - RuleSpreadsheetMeta, -) - -PLATFORM_DESTINATION_LOCATION_RULE_CODE = "risk.travel.destination_receipt_location" -PLATFORM_DESTINATION_LOCATION_RULE_FILENAME = "risk.travel.destination_receipt_location.json" -from app.services.expense_rule_runtime import ( - build_scene_submission_standard_markdown, - build_travel_risk_control_standard_markdown, -) - -logger = get_logger("app.services.agent_foundation") - -DEMO_EXPENSE_CLAIM_SIGNATURES = { - ( - "EXP-202605-001", - "张三", - "华南客户拜访差旅报销", - "3280.00", - "submitted", - ), - ( - "EXP-202605-002", - "李四", - "客户路演餐费", - "860.00", - "approved", - ), - ( - "EXP-202605-003", - "王五", - "市场活动会务差旅", - "3280.00", - "review", - ), -} -DEMO_RECEIVABLE_SIGNATURES = { - ("AR-202605-001", "客户A", "50000.00", "partial"), - ("AR-202605-002", "客户B", "78000.00", "overdue"), -} -DEMO_PAYABLE_SIGNATURES = { - ("AP-202605-001", "供应商A", "33000.00", "scheduled"), - ("AP-202605-002", "供应商B", "96000.00", "overdue"), -} - -LEGACY_RULE_CODES = ( - "rule.expense.duplicate_expense_check", - "rule.expense.travel_receipt_requirements", - "rule.ap.payment_dual_review", -) - -ATTACHMENT_RULE_ASSET_CODE = "rule.expense.attachment_submission_requirements" -COMPANY_TRAVEL_RULE_VERSION = "v1.0.0" -COMPANY_COMMUNICATION_RULE_VERSION = "v1.0.0" -COMPANY_TRAVEL_RULE_SCENARIO_JSON = ("差旅",) -COMPANY_COMMUNICATION_RULE_SCENARIO_JSON = ("费用科目",) - -ATTACHMENT_RULE_RUNTIME_CONFIG = { - "kind": "policy_rule_draft", - "version": 1, - "template_key": "attachment_requirement_v1", - "rule_name": "报销附件与单据完整性规则", - "scenario": "attachment_policy", - "source_document_name": "报销制度 / 单据与附件要求", - "review_required": True, - "target": { - "expense_types": [ - "travel", - "hotel", - "transport", - "meal", - "office", - "meeting", - "training", - "communication", - "welfare", - "other", - ], - "scene_codes": ["expense", "attachment_policy", "invoice_anomaly"], - }, - "attachment_requirements": { - "min_attachment_count": 1, - "items": [ - { - "document_type": "vat_invoice", - "required": True, - "min_count": 1, - "description": "金额类报销原则上必须提供合法票据。", - }, - { - "document_type": "receipt", - "required": False, - "min_count": 1, - "description": "特殊场景无发票时需补充收据与情况说明。", - }, - { - "document_type": "flight_itinerary", - "required": False, - "min_count": 1, - "description": "差旅交通报销需提供行程单或等效凭证。", - }, - { - "document_type": "hotel_invoice", - "required": False, - "min_count": 1, - "description": "住宿报销需提供酒店票据或等效住宿凭证。", - }, - ], - "manual_fill_required": False, - }, - "missing_attachment_action": "block", - "output": { - "risk_code": "invoice_anomaly", - "action": "block", - "message": "附件或单据不完整,需补件后再提交。", - }, -} - - -def prepare_agent_foundation() -> None: - settings = get_settings() - if not settings.setup_completed: - logger.info("Agent foundation bootstrap skipped because setup is incomplete") - return - - session_factory = get_session_factory() - with session_factory() as db: - AgentFoundationService(db).ensure_foundation_ready() - - -class AgentFoundationService: - def __init__(self, db: Session) -> None: - self.db = db - - def ensure_foundation_ready(self) -> None: - try: - Base.metadata.create_all(bind=self.db.get_bind()) - self._ensure_agent_asset_schema() - self._seed_agent_assets() - self._sync_demo_financial_records() - self._seed_runs_and_logs() - self.db.commit() - except Exception: - self.db.rollback() - logger.exception("Failed to prepare agent foundation") - raise - - def _sync_demo_financial_records(self) -> None: - if get_settings().seed_demo_financial_records: - self._seed_financial_records() - return - self._purge_demo_financial_records() - - def _seed_agent_assets(self) -> None: - existing_codes = set(self.db.scalars(select(AgentAsset.code)).all()) - if existing_codes: - self._top_up_agent_assets(existing_codes) - return - - attachment_rule = AgentAsset( - asset_type=AgentAssetType.RULE.value, - code=ATTACHMENT_RULE_ASSET_CODE, - name="报销附件与单据完整性规则", - description="统一定义报销提交时的附件数量、票据类型和补件处理口径,作为上线前待审核规则。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=["expense", "risk_check", "attachment_policy", "invoice_anomaly"], - owner="财务制度管理组", - reviewer="高嘉禾", - status=AgentAssetStatus.REVIEW.value, - current_version="v1.0.0", - published_version=None, - working_version="v1.0.0", - config_json={ - "severity": "high", - "enabled": False, - "runtime_kind": "policy_rule_draft", - "rule_template_key": "attachment_requirement_v1", - "rule_template_label": "附件要求模板", - "runtime_rule": ATTACHMENT_RULE_RUNTIME_CONFIG, - }, - ) - scene_submission_rule = AgentAsset( - asset_type=AgentAssetType.RULE.value, - code="rule.expense.scene_submission_standard", - name="报销场景提交与附件标准", - description="统一定义各报销场景的必填字段、附件类型要求和金额阈值。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=["expense", "risk_check", "scene_policy", "attachment_policy"], - owner="费用运营组", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - published_version="v1.0.0", - working_version="v1.0.0", - config_json={ - "severity": "high", - "enabled": True, - "runtime_kind": "scene_matrix", - "rule_template_label": "系统内置场景矩阵规则", - }, - ) - travel_policy_rule = AgentAsset( - asset_type=AgentAssetType.RULE.value, - code="rule.expense.travel_risk_control_standard", - name="差旅报销风险管控制度", - description="统一定义差旅报销的行程闭环、酒店地点一致性、职级差标和风险处置口径。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=["expense", "risk_check", "travel_policy", "travel_standard"], - owner="风控与审计部", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.1.0", - published_version="v1.1.0", - working_version="v1.1.0", - config_json={ - "severity": "high", - "enabled": True, - "block_on_high_risk": True, - "warning_on_medium_risk": True, - "source_doc": "document/development/risks/travel-risk-control-standard.md", - "runtime_kind": "travel_policy", - "rule_template_key": "travel_standard_v1", - "rule_template_label": "差旅标准模板", - }, - ) - company_travel_rule = AgentAsset( - asset_type=AgentAssetType.RULE.value, - code=COMPANY_TRAVEL_EXPENSE_RULE_CODE, - name="公司差旅费报销规则", - description="通过 Excel 明细表维护差旅费报销标准、票据要求和审批口径。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=list(COMPANY_TRAVEL_RULE_SCENARIO_JSON), - owner="财务制度管理组", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version=COMPANY_TRAVEL_RULE_VERSION, - published_version=COMPANY_TRAVEL_RULE_VERSION, - working_version=COMPANY_TRAVEL_RULE_VERSION, - config_json={ - "severity": "medium", - "enabled": True, - "tag": "财务规则", - "detail_mode": "spreadsheet", - "rule_library": FINANCE_RULES_LIBRARY, - "scenario_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], - "ai_review_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], - "rule_template_label": "差旅报销 Excel 模板", - }, - ) - platform_risk_assets = self._build_platform_risk_seed_assets() - company_communication_rule = AgentAsset( - asset_type=AgentAssetType.RULE.value, - code=COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, - name="公司通信费报销规则", - description="通过 Excel 明细表维护员工通信费报销标准、专项补充口径和审批要求。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=list(COMPANY_COMMUNICATION_RULE_SCENARIO_JSON), - owner="财务制度管理组", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version=COMPANY_COMMUNICATION_RULE_VERSION, - published_version=COMPANY_COMMUNICATION_RULE_VERSION, - working_version=COMPANY_COMMUNICATION_RULE_VERSION, - config_json={ - "severity": "medium", - "enabled": True, - "tag": "财务规则", - "detail_mode": "spreadsheet", - "rule_library": FINANCE_RULES_LIBRARY, - "scenario_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], - "ai_review_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], - "rule_template_label": "通信费报销 Excel 模板", - }, - ) - skill_expense_asset = AgentAsset( - asset_type=AgentAssetType.SKILL.value, - code="skill.expense.summary_lookup", - name="报销汇总查询技能", - description="根据时间、员工和部门汇总报销金额与单据数量。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=["expense", "query", "summary"], - owner="平台研发组", - reviewer="陈硕", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - published_version="v1.0.0", - working_version="v1.0.0", - config_json={"input_schema": ["time_range", "employee", "department"]}, - ) - skill_ar_asset = AgentAsset( - asset_type=AgentAssetType.SKILL.value, - code="skill.ar.aging_summary", - name="应收账龄汇总技能", - description="按客户、账龄和逾期状态汇总应收风险分布。", - domain=AgentAssetDomain.AR.value, - scenario_json=["accounts_receivable", "query", "aging_summary"], - owner="平台研发组", - reviewer="陈硕", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - published_version="v1.0.0", - working_version="v1.0.0", - config_json={"input_schema": ["customer", "aging_bucket", "status"]}, - ) - invoice_mcp_asset = AgentAsset( - asset_type=AgentAssetType.MCP.value, - code="mcp.invoice.verify_mock", - name="发票验真 Mock 服务", - description="模拟发票验真、发票状态查询和异常降级说明。", - domain=AgentAssetDomain.SYSTEM.value, - scenario_json=["expense", "invoice_validation"], - owner="平台研发组", - reviewer="周悦宁", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - published_version="v1.0.0", - working_version="v1.0.0", - config_json={"endpoint": "mock://invoice/verify", "timeout_ms": 1200}, - ) - ledger_mcp_asset = AgentAsset( - asset_type=AgentAssetType.MCP.value, - code="mcp.ledger.snapshot_mock", - name="总账快照 Mock 服务", - description="模拟返回应收、应付和费用汇总快照,供 Agent 查询和巡检。", - domain=AgentAssetDomain.SYSTEM.value, - scenario_json=["expense", "accounts_receivable", "accounts_payable"], - owner="平台研发组", - reviewer="周悦宁", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - published_version="v1.0.0", - working_version="v1.0.0", - config_json={"endpoint": "mock://ledger/snapshot", "timeout_ms": 1500}, - ) - task_asset = AgentAsset( - asset_type=AgentAssetType.TASK.value, - code="task.hermes.daily_risk_scan", - name="Hermes 每日风险巡检", - description="每天早上巡检重复报销、金额超标、逾期应收和异常付款。", - domain=AgentAssetDomain.SYSTEM.value, - scenario_json=["schedule", "risk_check"], - owner="风控与审计部", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - published_version="v1.0.0", - working_version="v1.0.0", - config_json={"cron": "0 9 * * *", "agent": AgentName.HERMES.value}, - ) - ar_summary_task = AgentAsset( - asset_type=AgentAssetType.TASK.value, - code="task.hermes.weekly_ar_summary", - name="Hermes 每周应收账龄汇总", - description="每周汇总逾期应收、账龄分布和客户风险变化。", - domain=AgentAssetDomain.SYSTEM.value, - scenario_json=["schedule", "accounts_receivable", "summary"], - owner="风控与审计部", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - published_version="v1.0.0", - working_version="v1.0.0", - config_json={"cron": "0 10 * * 1", "agent": AgentName.HERMES.value}, - ) - rule_digest_task = AgentAsset( - asset_type=AgentAssetType.TASK.value, - code="task.hermes.rule_review_digest", - name="Hermes 规则待审摘要", - description="每天汇总待审规则、待补样例和被拒规则修订建议。", - domain=AgentAssetDomain.SYSTEM.value, - scenario_json=["schedule", "rule_center", "review_digest"], - owner="风控与审计部", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - published_version="v1.0.0", - working_version="v1.0.0", - config_json={"cron": "0 18 * * *", "agent": AgentName.HERMES.value}, - ) - knowledge_index_task = AgentAsset( - asset_type=AgentAssetType.TASK.value, - code="task.hermes.knowledge_index_sync", - name="Hermes ??????", - description="?????????? LightRAG ???????", - domain=AgentAssetDomain.SYSTEM.value, - scenario_json=["schedule", "knowledge", "rule_center"], - owner="财务制度管理组", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - published_version="v1.0.0", - working_version="v1.0.0", - config_json={"cron": "0 0 * * *", "agent": AgentName.HERMES.value}, - ) - - self.db.add_all( - [ - attachment_rule, - scene_submission_rule, - travel_policy_rule, - *platform_risk_assets, - company_travel_rule, - company_communication_rule, - skill_expense_asset, - skill_ar_asset, - invoice_mcp_asset, - ledger_mcp_asset, - task_asset, - ar_summary_task, - rule_digest_task, - knowledge_index_task, - ] - ) - self.db.flush() - - company_travel_rule_meta = self._ensure_company_travel_rule_spreadsheet_seed( - company_travel_rule, - version=COMPANY_TRAVEL_RULE_VERSION, - actor_name="系统初始化", - ) - company_communication_rule_meta = self._ensure_company_communication_rule_spreadsheet_seed( - company_communication_rule, - version=COMPANY_COMMUNICATION_RULE_VERSION, - actor_name="系统初始化", - ) - - self.db.add_all( - [ - AgentAssetVersion( - asset=attachment_rule, - version="v0.9.0", - content=self._attachment_submission_requirement_markdown( - version_note="首版附件完整性规则草稿,覆盖基础票据与补件口径。", - include_review_note=True, - ), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="首版草稿。", - created_by="高嘉禾", - ), - AgentAssetVersion( - asset=attachment_rule, - version="v1.0.0", - content=self._attachment_submission_requirement_markdown( - version_note="补充票据缺失、收据替代和差旅等效凭证口径,待审核。", - include_review_note=True, - ), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="补充票据替代与差旅等效凭证口径,待审核。", - created_by="高嘉禾", - ), - AgentAssetVersion( - asset=scene_submission_rule, - version="v1.0.0", - content=self._scene_submission_standard_markdown(), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="首版报销场景提交标准,覆盖附件类型、必填字段和金额阈值。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=travel_policy_rule, - version="v1.0.0", - content=self._travel_risk_control_standard_markdown(version="v1.0.0"), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="首版差旅制度执行规则,覆盖行程闭环与基础差标校验。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=travel_policy_rule, - version="v1.1.0", - content=self._travel_risk_control_standard_markdown(version="v1.1.0"), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="补充可执行规则块,供审核引擎直接消费差旅制度标准。", - created_by="系统初始化", - ), - *[ - AgentAssetVersion( - asset=asset, - version="v1.0.0", - content=self._platform_risk_rule_markdown(asset), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note=f"平台通用风险规则:{asset.name}", - created_by="系统初始化", - ) - for asset in platform_risk_assets - ], - AgentAssetVersion( - asset=company_travel_rule, - version=COMPANY_TRAVEL_RULE_VERSION, - content=AgentAssetSpreadsheetManager.build_version_markdown( - rule_name=company_travel_rule.name, - version=COMPANY_TRAVEL_RULE_VERSION, - metadata=company_travel_rule_meta, - ), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="初始化差旅费报销 Excel 规则表。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=company_communication_rule, - version=COMPANY_COMMUNICATION_RULE_VERSION, - content=AgentAssetSpreadsheetManager.build_version_markdown( - rule_name=company_communication_rule.name, - version=COMPANY_COMMUNICATION_RULE_VERSION, - metadata=company_communication_rule_meta, - ), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="初始化通信费报销 Excel 规则表。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=skill_expense_asset, - version="v1.0.0", - content=self._json_content( - { - "inputs": ["time_range", "employee", "department"], - "outputs": ["total_amount", "claim_count"], - "dependencies": ["database.expense_claims"], - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化技能快照。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=skill_ar_asset, - version="v1.0.0", - content=self._json_content( - { - "inputs": ["customer", "aging_bucket", "status"], - "outputs": ["receivable_total", "overdue_total", "customer_count"], - "dependencies": ["database.accounts_receivable"], - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化应收账龄技能快照。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=invoice_mcp_asset, - version="v1.0.0", - content=self._json_content( - { - "service_type": "mock", - "auth_mode": "none", - "degrade_strategy": "return_stub_with_warning", - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化 MCP 快照。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=ledger_mcp_asset, - version="v1.0.0", - content=self._json_content( - { - "service_type": "mock", - "auth_mode": "service_account", - "degrade_strategy": "return_cached_snapshot_with_warning", - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化总账快照 MCP。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=task_asset, - version="v1.0.0", - content=self._json_content( - { - "task_type": "daily_risk_scan", - "schedule": "0 9 * * *", - "target_agent": AgentName.HERMES.value, - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化任务快照。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=ar_summary_task, - version="v1.0.0", - content=self._json_content( - { - "task_type": "weekly_ar_summary", - "schedule": "0 10 * * 1", - "target_agent": AgentName.HERMES.value, - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化应收账龄汇总任务。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=rule_digest_task, - version="v1.0.0", - content=self._json_content( - { - "task_type": "rule_review_digest", - "schedule": "0 18 * * *", - "target_agent": AgentName.HERMES.value, - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化规则待审摘要任务。", - created_by="系统初始化", - ), - AgentAssetVersion( - asset=knowledge_index_task, - version="v1.0.0", - content=self._json_content( - { - "task_type": "knowledge_index_sync", - "schedule": "0 0 * * *", - "target_agent": AgentName.HERMES.value, - "folder": "报销制度", - "changed_only": True, - "index_engine": "lightrag", - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化制度知识与规则草稿形成任务。", - created_by="系统初始化", - ), - ] - ) - self.db.add_all( - [ - AgentAssetReview( - asset=attachment_rule, - version="v1.0.0", - reviewer="高嘉禾", - review_status=AgentReviewStatus.PENDING.value, - review_note="等待制度管理员确认收据替代与补件时限口径。", - reviewed_at=None, - ), - AgentAssetReview( - asset=scene_submission_rule, - version="v1.0.0", - reviewer="顾承宇", - review_status=AgentReviewStatus.APPROVED.value, - review_note="可作为报销场景统一审核标准正式执行。", - reviewed_at=datetime.now(UTC), - ), - AgentAssetReview( - asset=travel_policy_rule, - version="v1.1.0", - reviewer="顾承宇", - review_status=AgentReviewStatus.APPROVED.value, - review_note="制度口径已确认,并已补充可执行配置供审核引擎读取。", - reviewed_at=datetime.now(UTC), - ), - AgentAssetReview( - asset=company_travel_rule, - version=COMPANY_TRAVEL_RULE_VERSION, - reviewer="顾承宇", - review_status=AgentReviewStatus.APPROVED.value, - review_note="首版 Excel 规则表已确认,可作为财务规则使用。", - reviewed_at=datetime.now(UTC), - ), - AgentAssetReview( - asset=company_communication_rule, - version=COMPANY_COMMUNICATION_RULE_VERSION, - reviewer="顾承宇", - review_status=AgentReviewStatus.APPROVED.value, - review_note="首版 Excel 规则表已确认,可作为财务规则使用。", - reviewed_at=datetime.now(UTC), - ), - ] - ) - - def _seed_financial_records(self) -> None: - if self.db.scalar(select(ExpenseClaim.id).limit(1)) is not None: - return - - claim_1 = ExpenseClaim( - claim_no="EXP-202605-001", - employee_name="张三", - department_name="财务共享中心", - project_code="PRJ-EXP-01", - expense_type="travel", - reason="华南客户拜访差旅报销", - location="深圳", - amount=Decimal("3280.00"), - currency="CNY", - invoice_count=3, - occurred_at=datetime(2026, 5, 6, 9, 0, tzinfo=UTC), - submitted_at=datetime(2026, 5, 7, 10, 20, tzinfo=UTC), - status="submitted", - approval_stage="finance_review", - risk_flags_json=["amount_over_limit"], - ) - claim_1.items = [ - ExpenseClaimItem( - item_date=date(2026, 5, 5), - item_type="hotel", - item_reason="客户拜访住宿", - item_location="深圳", - item_amount=Decimal("1880.00"), - invoice_id="INV-HOTEL-001", - ), - ExpenseClaimItem( - item_date=date(2026, 5, 6), - item_type="transport", - item_reason="往返交通", - item_location="深圳", - item_amount=Decimal("1400.00"), - invoice_id="INV-TRANS-009", - ), - ] - - claim_2 = ExpenseClaim( - claim_no="EXP-202605-002", - employee_name="李四", - department_name="华东销售部", - project_code="PRJ-SALES-02", - expense_type="meal", - reason="客户路演餐费", - location="上海", - amount=Decimal("860.00"), - currency="CNY", - invoice_count=1, - occurred_at=datetime(2026, 5, 8, 12, 0, tzinfo=UTC), - submitted_at=datetime(2026, 5, 8, 18, 30, tzinfo=UTC), - status="approved", - approval_stage="completed", - risk_flags_json=[], - ) - - claim_3 = ExpenseClaim( - claim_no="EXP-202605-003", - employee_name="王五", - department_name="市场品牌部", - project_code="PRJ-MKT-08", - expense_type="travel", - reason="市场活动会务差旅", - location="北京", - amount=Decimal("3280.00"), - currency="CNY", - invoice_count=2, - occurred_at=datetime(2026, 5, 6, 11, 30, tzinfo=UTC), - submitted_at=datetime(2026, 5, 8, 9, 10, tzinfo=UTC), - status="review", - approval_stage="risk_check", - risk_flags_json=["duplicate_expense"], - ) - - ar_records = [ - AccountsReceivableRecord( - receivable_no="AR-202605-001", - customer_id="CUS-A", - customer_name="客户A", - contract_no="CTR-AR-1001", - invoice_no="INV-AR-9001", - amount_receivable=Decimal("120000.00"), - amount_received=Decimal("70000.00"), - amount_outstanding=Decimal("50000.00"), - currency="CNY", - posting_date=date(2026, 4, 1), - due_date=date(2026, 4, 30), - aging_days=11, - status="partial", - risk_flags_json=[], - ), - AccountsReceivableRecord( - receivable_no="AR-202605-002", - customer_id="CUS-B", - customer_name="客户B", - contract_no="CTR-AR-1002", - invoice_no="INV-AR-9002", - amount_receivable=Decimal("88000.00"), - amount_received=Decimal("10000.00"), - amount_outstanding=Decimal("78000.00"), - currency="CNY", - posting_date=date(2026, 3, 15), - due_date=date(2026, 4, 15), - aging_days=26, - status="overdue", - risk_flags_json=["ar_overdue"], - ), - ] - - ap_records = [ - AccountsPayableRecord( - payable_no="AP-202605-001", - vendor_id="VEN-A", - vendor_name="供应商A", - invoice_no="INV-AP-5001", - amount_payable=Decimal("43000.00"), - amount_paid=Decimal("10000.00"), - amount_outstanding=Decimal("33000.00"), - currency="CNY", - posting_date=date(2026, 4, 20), - due_date=date(2026, 5, 12), - aging_days=0, - status="scheduled", - risk_flags_json=[], - ), - AccountsPayableRecord( - payable_no="AP-202605-002", - vendor_id="VEN-B", - vendor_name="供应商B", - invoice_no="INV-AP-5002", - amount_payable=Decimal("96000.00"), - amount_paid=Decimal("0.00"), - amount_outstanding=Decimal("96000.00"), - currency="CNY", - posting_date=date(2026, 4, 10), - due_date=date(2026, 5, 5), - aging_days=6, - status="overdue", - risk_flags_json=["ap_overdue"], - ), - ] - - self.db.add_all([claim_1, claim_2, claim_3, *ar_records, *ap_records]) - - def _purge_demo_financial_records(self) -> None: - demo_claims = list(self.db.scalars(select(ExpenseClaim)).all()) - for claim in demo_claims: - signature = ( - str(claim.claim_no or "").strip(), - str(claim.employee_name or "").strip(), - str(claim.reason or "").strip(), - f"{Decimal(claim.amount or 0):.2f}", - str(claim.status or "").strip(), - ) - if signature in DEMO_EXPENSE_CLAIM_SIGNATURES: - self.db.delete(claim) - - demo_receivables = list(self.db.scalars(select(AccountsReceivableRecord)).all()) - for record in demo_receivables: - signature = ( - str(record.receivable_no or "").strip(), - str(record.customer_name or "").strip(), - f"{Decimal(record.amount_outstanding or 0):.2f}", - str(record.status or "").strip(), - ) - if signature in DEMO_RECEIVABLE_SIGNATURES: - self.db.delete(record) - - demo_payables = list(self.db.scalars(select(AccountsPayableRecord)).all()) - for record in demo_payables: - signature = ( - str(record.payable_no or "").strip(), - str(record.vendor_name or "").strip(), - f"{Decimal(record.amount_outstanding or 0):.2f}", - str(record.status or "").strip(), - ) - if signature in DEMO_PAYABLE_SIGNATURES: - self.db.delete(record) - - def _seed_runs_and_logs(self) -> None: - if self.db.scalar(select(AgentRun.id).limit(1)) is not None: - return - - task_asset = self.db.scalar( - select(AgentAsset).where(AgentAsset.code == "task.hermes.daily_risk_scan") - ) - - user_run = AgentRun( - run_id="run_user_20260511_001", - agent=AgentName.USER_AGENT.value, - source=AgentRunSource.USER_MESSAGE.value, - user_id="emp_001", - task_id=None, - ontology_json={"scenario": "expense", "intent": "query"}, - route_json={"selected_agent": AgentName.USER_AGENT.value, "route_reason": "user query"}, - permission_level=AgentPermissionLevel.READ.value, - status=AgentRunStatus.SUCCEEDED.value, - result_summary="已返回本周报销金额和风险摘要。", - started_at=datetime(2026, 5, 11, 8, 35, tzinfo=UTC), - finished_at=datetime(2026, 5, 11, 8, 35, 2, tzinfo=UTC), - ) - hermes_run = AgentRun( - run_id="run_hermes_20260511_001", - agent=AgentName.HERMES.value, - source=AgentRunSource.SCHEDULE.value, - user_id=None, - task_id=task_asset.id if task_asset else None, - ontology_json={"scenario": "expense", "intent": "risk_check"}, - route_json={ - "selected_agent": AgentName.HERMES.value, - "route_reason": "scheduled risk scan", - }, - permission_level=AgentPermissionLevel.READ.value, - status=AgentRunStatus.SUCCEEDED.value, - result_summary="Hermes 已生成今日风险巡检摘要。", - started_at=datetime(2026, 5, 11, 9, 0, tzinfo=UTC), - finished_at=datetime(2026, 5, 11, 9, 0, 4, tzinfo=UTC), - ) - blocked_run = AgentRun( - run_id="run_user_20260511_002", - agent=AgentName.ORCHESTRATOR.value, - source=AgentRunSource.USER_MESSAGE.value, - user_id="emp_002", - task_id=None, - ontology_json={"scenario": "accounts_payable", "intent": "operate"}, - route_json={ - "selected_agent": AgentName.USER_AGENT.value, - "route_reason": "payment request", - }, - permission_level=AgentPermissionLevel.APPROVAL_REQUIRED.value, - status=AgentRunStatus.BLOCKED.value, - result_summary="动作需要人工确认。", - error_message="直接付款属于高风险动作,已阻断自动执行。", - started_at=datetime(2026, 5, 11, 10, 5, tzinfo=UTC), - finished_at=datetime(2026, 5, 11, 10, 5, 1, tzinfo=UTC), - ) - self.db.add_all([user_run, hermes_run, blocked_run]) - self.db.flush() - - self.db.add_all( - [ - AgentToolCall( - run_id=user_run.run_id, - tool_type=AgentToolType.DATABASE.value, - tool_name="expense_claims.lookup", - request_json={"time_range": "this_week", "employee": "all"}, - response_json={"claim_count": 3, "total_amount": "7420.00"}, - status="succeeded", - duration_ms=48, - ), - AgentToolCall( - run_id=hermes_run.run_id, - tool_type=AgentToolType.MCP.value, - tool_name="invoice.verify_mock", - request_json={"claim_no": "EXP-202605-003"}, - response_json={ - "warning": "external service degraded", - "fallback": "used mock response", - }, - status="failed", - duration_ms=132, - error_message="mock upstream timeout", - ), - AgentToolCall( - run_id=blocked_run.run_id, - tool_type=AgentToolType.RULE_ENGINE.value, - tool_name="permission.guard", - request_json={"action": "direct_payment"}, - response_json={"requires_confirmation": True}, - status="succeeded", - duration_ms=5, - ), - SemanticParseLog( - run_id=user_run.run_id, - user_id="emp_001", - raw_query="查一下本周报销超标风险", - scenario="expense", - intent="risk_check", - entities_json=[], - time_range_json={"start_date": "2026-05-11", "end_date": "2026-05-17"}, - metrics_json=["amount"], - constraints_json=[], - risk_flags_json=["amount_over_limit"], - permission_json={"level": AgentPermissionLevel.READ.value}, - confidence=0.93, - ), - SemanticParseLog( - run_id=blocked_run.run_id, - user_id="emp_002", - raw_query="帮我直接付款给供应商B", - scenario="accounts_payable", - intent="operate", - entities_json=[{"type": "vendor", "value": "供应商B"}], - time_range_json={}, - metrics_json=["amount"], - constraints_json=[], - risk_flags_json=["ap_overdue"], - permission_json={"level": AgentPermissionLevel.APPROVAL_REQUIRED.value}, - confidence=0.96, - ), - ] - ) - - if self.db.scalar(select(AuditLog.id).limit(1)) is None: - self.db.add_all( - [ - AuditLog( - actor="系统初始化", - action="save_rule_markdown", - resource_type="rule", - resource_id=ATTACHMENT_RULE_ASSET_CODE, - before_json=None, - after_json={"version": "v1.0.0"}, - request_id="seed-audit-001", - ), - AuditLog( - actor="高嘉禾", - action="review_rule", - resource_type="rule", - resource_id=ATTACHMENT_RULE_ASSET_CODE, - before_json={"review_status": "pending"}, - after_json={"review_status": "pending"}, - request_id="seed-audit-002", - ), - AuditLog( - actor="系统初始化", - action="activate_rule", - resource_type="rule", - resource_id="rule.expense.scene_submission_standard", - before_json={"status": "draft"}, - after_json={"status": "active"}, - request_id="seed-audit-003", - ), - AuditLog( - actor="Hermes", - action="update_task_status", - resource_type="task", - resource_id="task.hermes.daily_risk_scan", - before_json={"status": "idle"}, - after_json={"status": "succeeded"}, - request_id="seed-audit-004", - ), - ] - ) - - def _top_up_agent_assets(self, existing_codes: set[str]) -> None: - self._remove_legacy_rule_assets() - existing_codes = set(self.db.scalars(select(AgentAsset.code)).all()) - - attachment_rule = self.db.scalar( - select(AgentAsset).where(AgentAsset.code == ATTACHMENT_RULE_ASSET_CODE) - ) - scene_submission_rule = self.db.scalar( - select(AgentAsset).where(AgentAsset.code == "rule.expense.scene_submission_standard") - ) - travel_policy_rule = self.db.scalar( - select(AgentAsset).where(AgentAsset.code == "rule.expense.travel_risk_control_standard") - ) - company_travel_rule = self.db.scalar( - select(AgentAsset).where(AgentAsset.code == COMPANY_TRAVEL_EXPENSE_RULE_CODE) - ) - company_communication_rule = self.db.scalar( - select(AgentAsset).where(AgentAsset.code == COMPANY_COMMUNICATION_EXPENSE_RULE_CODE) - ) - - if ATTACHMENT_RULE_ASSET_CODE not in existing_codes: - attachment_rule = self._create_seed_asset( - asset_type=AgentAssetType.RULE.value, - code=ATTACHMENT_RULE_ASSET_CODE, - name="报销附件与单据完整性规则", - description="统一定义报销提交时的附件数量、票据类型和补件处理口径,作为上线前待审核规则。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=["expense", "risk_check", "attachment_policy", "invoice_anomaly"], - owner="财务制度管理组", - reviewer="高嘉禾", - status=AgentAssetStatus.REVIEW.value, - current_version="v1.0.0", - config_json={ - "severity": "high", - "enabled": False, - "runtime_kind": "policy_rule_draft", - "rule_template_key": "attachment_requirement_v1", - "rule_template_label": "附件要求模板", - "runtime_rule": ATTACHMENT_RULE_RUNTIME_CONFIG, - }, - ) - - if attachment_rule is not None: - if not str(attachment_rule.current_version or "").strip(): - attachment_rule.current_version = "v1.0.0" - if not str(attachment_rule.working_version or "").strip(): - attachment_rule.working_version = attachment_rule.current_version - attachment_rule.status = attachment_rule.status or AgentAssetStatus.REVIEW.value - attachment_rule.description = "统一定义报销提交时的附件数量、票据类型和补件处理口径,作为上线前待审核规则。" - attachment_rule.config_json = { - "severity": "high", - "enabled": False, - "runtime_kind": "policy_rule_draft", - "rule_template_key": "attachment_requirement_v1", - "rule_template_label": "附件要求模板", - "runtime_rule": ATTACHMENT_RULE_RUNTIME_CONFIG, - } - self._ensure_asset_version( - attachment_rule, - version="v0.9.0", - content=self._attachment_submission_requirement_markdown( - version_note="首版附件完整性规则草稿,覆盖基础票据与补件口径。", - include_review_note=True, - ), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="首版草稿。", - created_by="高嘉禾", - ) - self._ensure_asset_version( - attachment_rule, - version="v1.0.0", - content=self._attachment_submission_requirement_markdown( - version_note="补充票据缺失、收据替代和差旅等效凭证口径,待审核。", - include_review_note=True, - ), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="补充票据替代与差旅等效凭证口径,待审核。", - created_by="高嘉禾", - ) - self._ensure_asset_review( - attachment_rule, - version="v1.0.0", - reviewer="高嘉禾", - review_status=AgentReviewStatus.PENDING.value, - review_note="等待制度管理员确认收据替代与补件时限口径。", - reviewed_at=None, - ) - - if "rule.expense.scene_submission_standard" not in existing_codes: - scene_submission_rule = self._create_seed_asset( - asset_type=AgentAssetType.RULE.value, - code="rule.expense.scene_submission_standard", - name="报销场景提交与附件标准", - description="统一定义各报销场景的必填字段、附件类型要求和金额阈值。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=["expense", "risk_check", "scene_policy", "attachment_policy"], - owner="费用运营组", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - config_json={ - "severity": "high", - "enabled": True, - "runtime_kind": "scene_matrix", - "rule_template_label": "系统内置场景矩阵规则", - }, - ) - - if scene_submission_rule is not None: - if not str(scene_submission_rule.current_version or "").strip(): - scene_submission_rule.current_version = "v1.0.0" - if not str(scene_submission_rule.working_version or "").strip(): - scene_submission_rule.working_version = scene_submission_rule.current_version - if not str(scene_submission_rule.published_version or "").strip(): - scene_submission_rule.published_version = scene_submission_rule.current_version - scene_submission_rule.status = scene_submission_rule.status or AgentAssetStatus.ACTIVE.value - scene_submission_rule.description = "统一定义各报销场景的必填字段、附件类型要求和金额阈值。" - scene_submission_rule.config_json = { - "severity": "high", - "enabled": True, - "runtime_kind": "scene_matrix", - "rule_template_label": "系统内置场景矩阵规则", - } - self._ensure_asset_version( - scene_submission_rule, - version="v1.0.0", - content=self._scene_submission_standard_markdown(), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="首版报销场景提交标准,覆盖附件类型、必填字段和金额阈值。", - created_by="系统初始化", - ) - self._ensure_asset_review( - scene_submission_rule, - version="v1.0.0", - reviewer="顾承宇", - review_status=AgentReviewStatus.APPROVED.value, - review_note="可作为报销场景统一审核标准正式执行。", - reviewed_at=datetime.now(UTC), - ) - - if "rule.expense.travel_risk_control_standard" not in existing_codes: - travel_policy_rule = self._create_seed_asset( - asset_type=AgentAssetType.RULE.value, - code="rule.expense.travel_risk_control_standard", - name="差旅报销风险管控制度", - description="统一定义差旅报销的行程闭环、酒店地点一致性、职级差标和风险处置口径。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=["expense", "risk_check", "travel_policy", "travel_standard"], - owner="风控与审计部", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.1.0", - config_json={ - "severity": "high", - "enabled": True, - "block_on_high_risk": True, - "warning_on_medium_risk": True, - "source_doc": "document/development/risks/travel-risk-control-standard.md", - "runtime_kind": "travel_policy", - "rule_template_key": "travel_standard_v1", - "rule_template_label": "差旅标准模板", - }, - ) - - if travel_policy_rule is not None: - if not str(travel_policy_rule.current_version or "").strip(): - travel_policy_rule.current_version = "v1.1.0" - if not str(travel_policy_rule.working_version or "").strip(): - travel_policy_rule.working_version = travel_policy_rule.current_version - if not str(travel_policy_rule.published_version or "").strip(): - travel_policy_rule.published_version = travel_policy_rule.current_version - travel_policy_rule.status = travel_policy_rule.status or AgentAssetStatus.ACTIVE.value - travel_policy_rule.config_json = { - "severity": "high", - "enabled": True, - "block_on_high_risk": True, - "warning_on_medium_risk": True, - "source_doc": "document/development/risks/travel-risk-control-standard.md", - "runtime_kind": "travel_policy", - "rule_template_key": "travel_standard_v1", - "rule_template_label": "差旅标准模板", - } - self._ensure_asset_version( - travel_policy_rule, - version="v1.0.0", - content=self._travel_risk_control_standard_markdown(version="v1.0.0"), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="首版差旅制度执行规则,覆盖行程闭环与基础差标校验。", - created_by="系统初始化", - ) - self._ensure_asset_version( - travel_policy_rule, - version="v1.1.0", - content=self._travel_risk_control_standard_markdown(version="v1.1.0"), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="补充可执行规则块,供审核引擎直接消费差旅制度标准。", - created_by="系统初始化", - ) - self._ensure_asset_review( - travel_policy_rule, - version="v1.1.0", - reviewer="顾承宇", - review_status=AgentReviewStatus.APPROVED.value, - review_note="制度口径已确认,并已补充可执行配置供审核引擎读取。", - reviewed_at=datetime.now(UTC), - ) - - self.sync_platform_risk_rules_from_library() - - if COMPANY_TRAVEL_EXPENSE_RULE_CODE not in existing_codes: - company_travel_rule = self._create_seed_asset( - asset_type=AgentAssetType.RULE.value, - code=COMPANY_TRAVEL_EXPENSE_RULE_CODE, - name="公司差旅费报销规则", - description="通过 Excel 明细表维护差旅费报销标准、票据要求和审批口径。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=list(COMPANY_TRAVEL_RULE_SCENARIO_JSON), - owner="财务制度管理组", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version=COMPANY_TRAVEL_RULE_VERSION, - config_json={ - "severity": "medium", - "enabled": True, - "tag": "财务规则", - "detail_mode": "spreadsheet", - "scenario_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], - "ai_review_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], - "rule_template_label": "差旅报销 Excel 模板", - }, - ) - if COMPANY_COMMUNICATION_EXPENSE_RULE_CODE not in existing_codes: - company_communication_rule = self._create_seed_asset( - asset_type=AgentAssetType.RULE.value, - code=COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, - name="公司通信费报销规则", - description="通过 Excel 明细表维护员工通信费报销标准、专项补充口径和审批要求。", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=list(COMPANY_COMMUNICATION_RULE_SCENARIO_JSON), - owner="财务制度管理组", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version=COMPANY_COMMUNICATION_RULE_VERSION, - config_json={ - "severity": "medium", - "enabled": True, - "tag": "财务规则", - "detail_mode": "spreadsheet", - "scenario_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], - "ai_review_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], - "rule_template_label": "通信费报销 Excel 模板", - }, - ) +from __future__ import annotations - if company_travel_rule is not None: - company_travel_rule.scenario_json = list(COMPANY_TRAVEL_RULE_SCENARIO_JSON) - if not str(company_travel_rule.current_version or "").strip(): - company_travel_rule.current_version = COMPANY_TRAVEL_RULE_VERSION - if not str(company_travel_rule.working_version or "").strip(): - company_travel_rule.working_version = company_travel_rule.current_version - if not str(company_travel_rule.published_version or "").strip(): - company_travel_rule.published_version = company_travel_rule.current_version - if not str(company_travel_rule.status or "").strip(): - company_travel_rule.status = AgentAssetStatus.ACTIVE.value - company_travel_rule.description = "通过 Excel 明细表维护差旅费报销标准、票据要求和审批口径。" - company_travel_rule.config_json = { - **(company_travel_rule.config_json or {}), - "severity": "medium", - "enabled": True, - "tag": "财务规则", - "detail_mode": "spreadsheet", - "rule_library": FINANCE_RULES_LIBRARY, - "scenario_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], - "ai_review_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], - "rule_template_label": "差旅报销 Excel 模板", - } - company_travel_rule_meta = self._ensure_company_travel_rule_spreadsheet_seed( - company_travel_rule, - version=str(company_travel_rule.current_version or COMPANY_TRAVEL_RULE_VERSION), - actor_name="系统初始化", - ) - self._ensure_asset_version( - company_travel_rule, - version=str(company_travel_rule.current_version or COMPANY_TRAVEL_RULE_VERSION), - content=AgentAssetSpreadsheetManager.build_version_markdown( - rule_name=company_travel_rule.name, - version=str(company_travel_rule.current_version or COMPANY_TRAVEL_RULE_VERSION), - metadata=company_travel_rule_meta, - ), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="初始化差旅费报销 Excel 规则表。", - created_by="系统初始化", - ) - if str(company_travel_rule.current_version or "").strip() == COMPANY_TRAVEL_RULE_VERSION: - self._ensure_asset_review( - company_travel_rule, - version=COMPANY_TRAVEL_RULE_VERSION, - reviewer="顾承宇", - review_status=AgentReviewStatus.APPROVED.value, - review_note="首版 Excel 规则表已确认,可作为财务规则使用。", - reviewed_at=datetime.now(UTC), - ) - - if company_communication_rule is not None: - company_communication_rule.scenario_json = list(COMPANY_COMMUNICATION_RULE_SCENARIO_JSON) - if not str(company_communication_rule.current_version or "").strip(): - company_communication_rule.current_version = COMPANY_COMMUNICATION_RULE_VERSION - if not str(company_communication_rule.working_version or "").strip(): - company_communication_rule.working_version = company_communication_rule.current_version - if not str(company_communication_rule.published_version or "").strip(): - company_communication_rule.published_version = company_communication_rule.current_version - if not str(company_communication_rule.status or "").strip(): - company_communication_rule.status = AgentAssetStatus.ACTIVE.value - company_communication_rule.description = "通过 Excel 明细表维护员工通信费报销标准、专项补充口径和审批要求。" - company_communication_rule.config_json = { - **(company_communication_rule.config_json or {}), - "severity": "medium", - "enabled": True, - "tag": "财务规则", - "detail_mode": "spreadsheet", - "rule_library": FINANCE_RULES_LIBRARY, - "scenario_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], - "ai_review_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], - "rule_template_label": "通信费报销 Excel 模板", - } - company_communication_rule_meta = self._ensure_company_communication_rule_spreadsheet_seed( - company_communication_rule, - version=str(company_communication_rule.current_version or COMPANY_COMMUNICATION_RULE_VERSION), - actor_name="系统初始化", - ) - self._ensure_asset_version( - company_communication_rule, - version=str(company_communication_rule.current_version or COMPANY_COMMUNICATION_RULE_VERSION), - content=AgentAssetSpreadsheetManager.build_version_markdown( - rule_name=company_communication_rule.name, - version=str(company_communication_rule.current_version or COMPANY_COMMUNICATION_RULE_VERSION), - metadata=company_communication_rule_meta, - ), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note="初始化通信费报销 Excel 规则表。", - created_by="系统初始化", - ) - if str(company_communication_rule.current_version or "").strip() == COMPANY_COMMUNICATION_RULE_VERSION: - self._ensure_asset_review( - company_communication_rule, - version=COMPANY_COMMUNICATION_RULE_VERSION, - reviewer="顾承宇", - review_status=AgentReviewStatus.APPROVED.value, - review_note="首版 Excel 规则表已确认,可作为财务规则使用。", - reviewed_at=datetime.now(UTC), - ) - - if "skill.ar.aging_summary" not in existing_codes: - asset = self._create_seed_asset( - asset_type=AgentAssetType.SKILL.value, - code="skill.ar.aging_summary", - name="应收账龄汇总技能", - description="按客户、账龄和逾期状态汇总应收风险分布。", - domain=AgentAssetDomain.AR.value, - scenario_json=["accounts_receivable", "query", "aging_summary"], - owner="平台研发组", - reviewer="陈硕", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - config_json={"input_schema": ["customer", "aging_bucket", "status"]}, - ) - self._ensure_asset_version( - asset, - version="v1.0.0", - content=self._json_content( - { - "inputs": ["customer", "aging_bucket", "status"], - "outputs": ["receivable_total", "overdue_total", "customer_count"], - "dependencies": ["database.accounts_receivable"], - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化应收账龄技能快照。", - created_by="系统初始化", - ) - - if "mcp.ledger.snapshot_mock" not in existing_codes: - asset = self._create_seed_asset( - asset_type=AgentAssetType.MCP.value, - code="mcp.ledger.snapshot_mock", - name="总账快照 Mock 服务", - description="模拟返回应收、应付和费用汇总快照,供 Agent 查询和巡检。", - domain=AgentAssetDomain.SYSTEM.value, - scenario_json=["expense", "accounts_receivable", "accounts_payable"], - owner="平台研发组", - reviewer="周悦宁", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - config_json={"endpoint": "mock://ledger/snapshot", "timeout_ms": 1500}, - ) - self._ensure_asset_version( - asset, - version="v1.0.0", - content=self._json_content( - { - "service_type": "mock", - "auth_mode": "service_account", - "degrade_strategy": "return_cached_snapshot_with_warning", - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化总账快照 MCP。", - created_by="系统初始化", - ) - - if "task.hermes.weekly_ar_summary" not in existing_codes: - asset = self._create_seed_asset( - asset_type=AgentAssetType.TASK.value, - code="task.hermes.weekly_ar_summary", - name="Hermes 每周应收账龄汇总", - description="每周汇总逾期应收、账龄分布和客户风险变化。", - domain=AgentAssetDomain.SYSTEM.value, - scenario_json=["schedule", "accounts_receivable", "summary"], - owner="风控与审计部", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - config_json={"cron": "0 10 * * 1", "agent": AgentName.HERMES.value}, - ) - self._ensure_asset_version( - asset, - version="v1.0.0", - content=self._json_content( - { - "task_type": "weekly_ar_summary", - "schedule": "0 10 * * 1", - "target_agent": AgentName.HERMES.value, - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化应收账龄汇总任务。", - created_by="系统初始化", - ) - - if "task.hermes.rule_review_digest" not in existing_codes: - asset = self._create_seed_asset( - asset_type=AgentAssetType.TASK.value, - code="task.hermes.rule_review_digest", - name="Hermes 规则待审摘要", - description="每天汇总待审规则、待补样例和被拒规则修订建议。", - domain=AgentAssetDomain.SYSTEM.value, - scenario_json=["schedule", "rule_center", "review_digest"], - owner="风控与审计部", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - config_json={"cron": "0 18 * * *", "agent": AgentName.HERMES.value}, - ) - self._ensure_asset_version( - asset, - version="v1.0.0", - content=self._json_content( - { - "task_type": "rule_review_digest", - "schedule": "0 18 * * *", - "target_agent": AgentName.HERMES.value, - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化规则待审摘要任务。", - created_by="系统初始化", - ) - - if "task.hermes.knowledge_index_sync" not in existing_codes: - asset = self._create_seed_asset( - asset_type=AgentAssetType.TASK.value, - code="task.hermes.knowledge_index_sync", - name="Hermes ??????", - description="?????????? LightRAG ???????", - domain=AgentAssetDomain.SYSTEM.value, - scenario_json=["schedule", "knowledge", "rule_center"], - owner="财务制度管理组", - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - config_json={"cron": "0 0 * * *", "agent": AgentName.HERMES.value}, - ) - self._ensure_asset_version( - asset, - version="v1.0.0", - content=self._json_content( - { - "task_type": "knowledge_index_sync", - "schedule": "0 0 * * *", - "target_agent": AgentName.HERMES.value, - "folder": "报销制度", - "changed_only": True, - } - ), - content_type=AgentAssetContentType.JSON.value, - change_note="初始化制度知识与规则草稿形成任务。", - created_by="系统初始化", - ) - - def _ensure_company_travel_rule_spreadsheet_seed( - self, - asset: AgentAsset, - *, - version: str, - actor_name: str, - ): - manager = AgentAssetSpreadsheetManager() - manager.ensure_rule_library_dirs() - live_document = manager.store_rule_library_spreadsheet( - library=FINANCE_RULES_LIBRARY, - file_name=COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, - content=self._read_or_build_company_travel_rule_file(manager), - actor_name=actor_name, - source="rule-library", - ) - existing_document = ( - asset.config_json.get("rule_document") - if isinstance(asset.config_json, dict) - else None - ) - storage_key = ( - str(existing_document.get("storage_key") or "").strip() - if isinstance(existing_document, dict) - else "" - ) - if storage_key: - try: - existing_path = manager.resolve_storage_path(storage_key) - except FileNotFoundError: - existing_path = None - if existing_path is not None and existing_path.exists(): - asset.config_json = { - **(asset.config_json or {}), - "detail_mode": "spreadsheet", - "tag": "财务规则", - "rule_library": FINANCE_RULES_LIBRARY, - "rule_document": { - **AgentAssetSpreadsheetManager.build_rule_document_config( - live_document, - asset_version=version, - ), - "storage_key": live_document.storage_key, - }, - } - return live_document - - asset.config_json = { - **(asset.config_json or {}), - "detail_mode": "spreadsheet", - "tag": "财务规则", - "rule_library": FINANCE_RULES_LIBRARY, - "rule_document": { - **AgentAssetSpreadsheetManager.build_rule_document_config( - live_document, - asset_version=version, - ), - "storage_key": live_document.storage_key, - }, - } - return live_document - - def _ensure_company_communication_rule_spreadsheet_seed( - self, - asset: AgentAsset, - *, - version: str, - actor_name: str, - ): - return self._ensure_finance_rule_spreadsheet_seed( - asset, - version=version, - actor_name=actor_name, - file_name=COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, - fallback_sheet_name="通信费报销规则", - ) - - @staticmethod - def _read_or_build_company_travel_rule_file( - manager: AgentAssetSpreadsheetManager, - ) -> bytes: - live_key = ( - Path("rules") - / FINANCE_RULES_LIBRARY - / COMPANY_TRAVEL_EXPENSE_RULE_FILENAME - ).as_posix() - live_path = manager.resolve_storage_path(live_key) - if live_path.exists(): - return live_path.read_bytes() - return AgentAssetSpreadsheetManager.build_blank_rule_workbook("差旅费报销规则") - - def _ensure_finance_rule_spreadsheet_seed( - self, - asset: AgentAsset, - *, - version: str, - actor_name: str, - file_name: str, - fallback_sheet_name: str, - ): - manager = AgentAssetSpreadsheetManager() - manager.ensure_rule_library_dirs() - live_document = manager.store_rule_library_spreadsheet( - library=FINANCE_RULES_LIBRARY, - file_name=file_name, - content=self._read_or_build_finance_rule_file( - manager, - file_name=file_name, - fallback_sheet_name=fallback_sheet_name, - ), - actor_name=actor_name, - source="rule-library", - ) - existing_document = ( - asset.config_json.get("rule_document") - if isinstance(asset.config_json, dict) - else None - ) - storage_key = ( - str(existing_document.get("storage_key") or "").strip() - if isinstance(existing_document, dict) - else "" - ) - if storage_key: - try: - existing_path = manager.resolve_storage_path(storage_key) - except FileNotFoundError: - existing_path = None - if existing_path is not None and existing_path.exists(): - asset.config_json = { - **(asset.config_json or {}), - "detail_mode": "spreadsheet", - "tag": "财务规则", - "rule_library": FINANCE_RULES_LIBRARY, - "rule_document": { - **AgentAssetSpreadsheetManager.build_rule_document_config( - live_document, - asset_version=version, - ), - "storage_key": live_document.storage_key, - }, - } - return live_document - - asset.config_json = { - **(asset.config_json or {}), - "detail_mode": "spreadsheet", - "tag": "财务规则", - "rule_library": FINANCE_RULES_LIBRARY, - "rule_document": { - **AgentAssetSpreadsheetManager.build_rule_document_config( - live_document, - asset_version=version, - ), - "storage_key": live_document.storage_key, - }, - } - return live_document - - @staticmethod - def _read_or_build_finance_rule_file( - manager: AgentAssetSpreadsheetManager, - *, - file_name: str, - fallback_sheet_name: str, - ) -> bytes: - live_key = ( - Path("rules") - / FINANCE_RULES_LIBRARY - / file_name - ).as_posix() - live_path = manager.resolve_storage_path(live_key) - if live_path.exists(): - return live_path.read_bytes() - return AgentAssetSpreadsheetManager.build_blank_rule_workbook(fallback_sheet_name) - - def _create_seed_asset( - self, - *, - asset_type: str, - code: str, - name: str, - description: str, - domain: str, - scenario_json: list[str], - owner: str, - reviewer: str, - status: str, - current_version: str, - config_json: dict[str, object], - ) -> AgentAsset: - asset = AgentAsset( - asset_type=asset_type, - code=code, - name=name, - description=description, - domain=domain, - scenario_json=scenario_json, - owner=owner, - reviewer=reviewer, - status=status, - current_version=current_version, - published_version=current_version if status == AgentAssetStatus.ACTIVE.value else None, - working_version=current_version, - config_json=config_json, - ) - self.db.add(asset) - self.db.flush() - return asset - - def _ensure_asset_version( - self, - asset: AgentAsset, - *, - version: str, - content: str, - content_type: str, - change_note: str, - created_by: str, - ) -> None: - existing = self.db.scalar( - select(AgentAssetVersion).where( - AgentAssetVersion.asset_id == asset.id, - AgentAssetVersion.version == version, - ) - ) - if existing is not None: - return - - self.db.add( - AgentAssetVersion( - asset_id=asset.id, - version=version, - content=content, - content_type=content_type, - change_note=change_note, - created_by=created_by, - ) - ) - - def _ensure_asset_review( - self, - asset: AgentAsset, - *, - version: str, - reviewer: str, - review_status: str, - review_note: str, - reviewed_at: datetime | None, - ) -> None: - existing = self.db.scalar( - select(AgentAssetReview).where( - AgentAssetReview.asset_id == asset.id, - AgentAssetReview.version == version, - AgentAssetReview.review_status == review_status, - ) - ) - if existing is not None: - return - - self.db.add( - AgentAssetReview( - asset_id=asset.id, - version=version, - reviewer=reviewer, - review_status=review_status, - review_note=review_note, - reviewed_at=reviewed_at, - ) - ) - - def _remove_legacy_rule_assets(self) -> None: - assets = list( - self.db.scalars( - select(AgentAsset).where(AgentAsset.code.in_(LEGACY_RULE_CODES)) - ).all() - ) - for asset in assets: - self.db.delete(asset) - - obsolete_logs = list( - self.db.scalars( - select(AuditLog).where(AuditLog.resource_id.in_(LEGACY_RULE_CODES)) - ).all() - ) - for log in obsolete_logs: - self.db.delete(log) - - def _ensure_agent_asset_schema(self) -> None: - bind = self.db.get_bind() - inspector = inspect(bind) - if "agent_assets" not in inspector.get_table_names(): - return - - column_names = {column["name"] for column in inspector.get_columns("agent_assets")} - migration_statements: list[str] = [] - if "published_version" not in column_names: - migration_statements.append("ALTER TABLE agent_assets ADD COLUMN published_version VARCHAR(30)") - if "working_version" not in column_names: - migration_statements.append("ALTER TABLE agent_assets ADD COLUMN working_version VARCHAR(30)") - - for statement in migration_statements: - self.db.execute(text(statement)) - - self.db.execute( - text( - "UPDATE agent_assets " - "SET working_version = COALESCE(working_version, current_version), " - "published_version = CASE " - "WHEN published_version IS NOT NULL THEN published_version " - "WHEN status = 'active' THEN current_version " - "ELSE published_version END" - ) - ) - - if migration_statements: - self.db.commit() - - def _attachment_submission_requirement_markdown( - self, - *, - version_note: str, - include_review_note: bool, - ) -> str: - sections = [ - "# 报销附件与单据完整性规则", - "", - "## 模板信息", - "", - "- 模板键:`attachment_requirement_v1`", - "- 来源文档:报销制度 / 单据与附件要求", - "- 审核状态:待审核", - "", - "## 目标", - "", - "统一约束报销提交时的票据、附件与替代凭证要求,避免缺件、错件和无依据流转。", - "", - "## 适用范围", - "", - "适用于员工报销提交场景,重点覆盖差旅、住宿、交通、餐费、办公和其他费用的附件校验。", - "", - "## 输入字段", - "", - "- expense_type", - "- attachments", - "- invoice_count", - "- reason", - "", - "## 判断规则", - "", - "- 报销提交前至少需要 1 份有效附件。", - "- 金额类报销原则上应提供合法票据;特殊场景无发票时,必须补充收据与情况说明。", - "- 差旅交通报销需提供行程单或等效凭证;住宿报销需提供酒店票据或等效住宿凭证。", - "- 缺少必要附件时直接拦截,并提示补件后重新提交。", - "", - "## 输出", - "", - "- 风险编码:`invoice_anomaly`", - "- 默认动作:`block`", - "- 处理说明:附件或单据不完整时退回补充。", - "", - "## 来源依据", - "", - "- 报销制度对票据、附件、替代凭证和补件要求的统一约束。", - "", - "## 审核约束", - "", - "- 当前规则属于真实业务规则,但仍处于待审核状态。", - "- 上线前需由制度管理员确认收据替代、补件时限和特殊场景豁免口径。", - f"- 当前版本说明:{version_note}", - "", - "## 管理员备注", - "", - "需要结合公司正式报销制度,补充各场景附件替代口径与例外审批要求。", - ] - if include_review_note: - sections.extend(["", "```expense-rule", json.dumps(ATTACHMENT_RULE_RUNTIME_CONFIG, ensure_ascii=False, indent=2), "```"]) - return "\n".join(sections) - - def _scene_submission_standard_markdown(self) -> str: - return self._markdown_content(build_scene_submission_standard_markdown()) - - def _travel_risk_control_standard_markdown(self, *, version: str = "v1.1.0") -> str: - return self._markdown_content(build_travel_risk_control_standard_markdown()) - - def _iter_platform_risk_manifests(self) -> list[tuple[str, dict[str, object]]]: - manager = AgentAssetRuleLibraryManager() - manifests: list[tuple[str, dict[str, object]]] = [] - for file_name in sorted(manager.list_rule_library_json_files(library=RISK_RULES_LIBRARY)): - payload = manager.read_rule_library_json(library=RISK_RULES_LIBRARY, file_name=file_name) - if payload.get("enabled") is False: - continue - manifests.append((file_name, payload)) - return manifests - - @staticmethod - def _resolve_platform_risk_category(manifest: dict[str, object]) -> str: - explicit = str(manifest.get("risk_category") or "").strip() - if explicit: - return explicit - - rule_code = str(manifest.get("rule_code") or "").strip().lower() - applies_to = manifest.get("applies_to") if isinstance(manifest.get("applies_to"), dict) else {} - domains = {str(item or "").strip().lower() for item in applies_to.get("domains") or []} - expense_types = { - str(item or "").strip().lower() for item in applies_to.get("expense_types") or [] - } - - if rule_code.startswith("risk.invoice."): - return "发票" - if "meal" in domains or "entertainment" in expense_types: - return "餐饮招待" - if "transport" in expense_types or "consecutive_transport" in rule_code: - return "交通出行" - if "office" in expense_types: - return "办公物料" - if "travel" in domains or rule_code.startswith("risk.travel."): - return "差旅" - if rule_code.startswith("risk.expense."): - return "费用科目" - return "通用" - - def _platform_risk_scenario_json(self, manifest: dict[str, object]) -> list[str]: - category = self._resolve_platform_risk_category(manifest) - return [category] if category else ["通用"] - - def _platform_risk_config_json(self, file_name: str, manifest: dict[str, object]) -> dict[str, object]: - outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {} - fail_outcome = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {} - risk_category = self._resolve_platform_risk_category(manifest) - return { - "severity": str(fail_outcome.get("severity") or "medium"), - "enabled": True, - "tag": "风险规则", - "detail_mode": "json_risk", - "risk_category": risk_category, - "rule_library": RISK_RULES_LIBRARY, - "rule_document": { - "file_name": file_name, - "storage_key": f"rules/{RISK_RULES_LIBRARY}/{file_name}", - }, - "ontology_signal": str(manifest.get("ontology_signal") or "").strip(), - "evaluator": str(manifest.get("evaluator") or "").strip(), - "source_ref": ( - (manifest.get("metadata") or {}).get("source_ref") - if isinstance(manifest.get("metadata"), dict) - else "" - ), - } - - def _build_platform_risk_seed_assets(self) -> list[AgentAsset]: - assets: list[AgentAsset] = [] - for file_name, manifest in self._iter_platform_risk_manifests(): - rule_code = str(manifest.get("rule_code") or "").strip() - if not rule_code: - continue - metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {} - source_ref = str(metadata.get("source_ref") or "").strip() - rule_description = str(manifest.get("description") or "").strip() - assets.append( - AgentAsset( - asset_type=AgentAssetType.RULE.value, - code=rule_code, - name=str(manifest.get("name") or rule_code), - description=rule_description - or f"平台通用风险规则:{source_ref or manifest.get('name') or rule_code}", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=self._platform_risk_scenario_json(manifest), - owner=str(metadata.get("owner") or "风控与审计部"), - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - published_version="v1.0.0", - working_version="v1.0.0", - config_json=self._platform_risk_config_json(file_name, manifest), - ) - ) - return assets - - def sync_platform_risk_rules_from_library(self) -> int: - existing_codes = set(self.db.scalars(select(AgentAsset.code)).all()) - before_count = len(existing_codes) - self._ensure_platform_risk_rules_from_library(existing_codes) - self.db.flush() - after_codes = set(self.db.scalars(select(AgentAsset.code)).all()) - synced = max(len(after_codes) - before_count, 0) - manifest_count = len(self._iter_platform_risk_manifests()) - logger.info( - "Platform risk rules synced from library", - extra={"manifest_count": manifest_count, "created_count": synced, "total": len(after_codes)}, - ) - return manifest_count - - def _ensure_platform_risk_rules_from_library(self, existing_codes: set[str]) -> None: - for file_name, manifest in self._iter_platform_risk_manifests(): - rule_code = str(manifest.get("rule_code") or "").strip() - if not rule_code: - continue - - metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {} - source_ref = str(metadata.get("source_ref") or "").strip() - rule_description = str(manifest.get("description") or "").strip() - config_json = self._platform_risk_config_json(file_name, manifest) - scenario_json = self._platform_risk_scenario_json(manifest) - - asset = self.db.scalar(select(AgentAsset).where(AgentAsset.code == rule_code)) - if asset is None and rule_code not in existing_codes: - asset = self._create_seed_asset( - asset_type=AgentAssetType.RULE.value, - code=rule_code, - name=str(manifest.get("name") or rule_code), - description=rule_description - or f"平台通用风险规则:{source_ref or manifest.get('name') or rule_code}", - domain=AgentAssetDomain.EXPENSE.value, - scenario_json=scenario_json, - owner=str(metadata.get("owner") or "风控与审计部"), - reviewer="顾承宇", - status=AgentAssetStatus.ACTIVE.value, - current_version="v1.0.0", - config_json=config_json, - ) - - if asset is None: - continue - - if not str(asset.current_version or "").strip(): - asset.current_version = "v1.0.0" - if not str(asset.working_version or "").strip(): - asset.working_version = asset.current_version - if not str(asset.published_version or "").strip(): - asset.published_version = asset.current_version - asset.status = asset.status or AgentAssetStatus.ACTIVE.value - asset.name = str(manifest.get("name") or asset.name or rule_code) - if rule_description: - asset.description = rule_description - asset.config_json = config_json - asset.scenario_json = scenario_json - - self._ensure_asset_version( - asset, - version="v1.0.0", - content=self._platform_risk_rule_markdown(asset, manifest=manifest, file_name=file_name), - content_type=AgentAssetContentType.MARKDOWN.value, - change_note=f"平台通用风险规则:{asset.name}", - created_by="系统初始化", - ) - self._ensure_asset_review( - asset, - version="v1.0.0", - reviewer="顾承宇", - review_status=AgentReviewStatus.APPROVED.value, - review_note="平台内置风险规则,供提交验审与风险问答共用。", - reviewed_at=datetime.now(UTC), - ) - - @staticmethod - def _platform_risk_rule_markdown( - asset: AgentAsset, - *, - manifest: dict[str, object] | None = None, - file_name: str = "", - ) -> str: - config = asset.config_json if isinstance(asset.config_json, dict) else {} - rule_document = config.get("rule_document") if isinstance(config.get("rule_document"), dict) else {} - resolved_file_name = file_name or str(rule_document.get("file_name") or "").strip() - evaluator = str(config.get("evaluator") or (manifest or {}).get("evaluator") or "").strip() - ontology_signal = str(config.get("ontology_signal") or (manifest or {}).get("ontology_signal") or "").strip() - source_ref = str(config.get("source_ref") or "").strip() - if not source_ref and isinstance(manifest, dict): - metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {} - source_ref = str(metadata.get("source_ref") or "").strip() - - lines = [ - f"# {asset.name}", - "", - "## 规则类型", - "", - "- 平台内置通用风险规则(`json_risk`)", - ] - if evaluator: - lines.append(f"- 检查器:`{evaluator}`") - if ontology_signal: - lines.append(f"- 本体信号:`{ontology_signal}`") - if source_ref: - lines.extend(["", "## 来源", "", f"- {source_ref}"]) - if resolved_file_name: - lines.extend( - [ - "", - "## 配置文件", - "", - f"- `rules/{RISK_RULES_LIBRARY}/{resolved_file_name}`", - ] - ) - return "\n".join(lines) - - @staticmethod - def _platform_destination_location_risk_markdown() -> str: - return AgentFoundationService._platform_risk_rule_markdown( - AgentAsset(name="申报地点与票据地点一致", config_json={"evaluator": "location_consistency"}), - manifest={ - "evaluator": "location_consistency", - "ontology_signal": "location_mismatch", - "metadata": {"source_ref": "常用risk.txt / 一、出差类 / 行程不符"}, - }, - file_name=PLATFORM_DESTINATION_LOCATION_RULE_FILENAME, - ) - - @staticmethod - def _markdown_content(content: str) -> str: - return content - - @staticmethod - def _json_content(content: dict[str, object]) -> str: - return json.dumps(content, ensure_ascii=False, sort_keys=True, indent=2) +from sqlalchemy import select +from sqlalchemy.orm import Session + +from app.core.config import get_settings +from app.core.logging import get_logger +from app.db.base import Base +from app.db.session import get_session_factory +from app.models.agent_asset import AgentAsset +from app.services.agent_foundation_asset_helpers import AgentFoundationAssetHelperMixin +from app.services.agent_foundation_asset_seed import AgentFoundationAssetSeedMixin +from app.services.agent_foundation_asset_topup import AgentFoundationAssetTopUpMixin +from app.services.agent_foundation_constants import ( + ATTACHMENT_RULE_ASSET_CODE, + ATTACHMENT_RULE_RUNTIME_CONFIG, + COMPANY_COMMUNICATION_RULE_SCENARIO_JSON, + COMPANY_COMMUNICATION_RULE_VERSION, + COMPANY_TRAVEL_RULE_SCENARIO_JSON, + COMPANY_TRAVEL_RULE_VERSION, + DEMO_EXPENSE_CLAIM_SIGNATURES, + DEMO_PAYABLE_SIGNATURES, + DEMO_RECEIVABLE_SIGNATURES, + LEGACY_RULE_CODES, + PLATFORM_DESTINATION_LOCATION_RULE_CODE, + PLATFORM_DESTINATION_LOCATION_RULE_FILENAME, +) +from app.services.agent_foundation_financial_seed import AgentFoundationFinancialSeedMixin +from app.services.agent_foundation_markdown import AgentFoundationMarkdownMixin +from app.services.agent_foundation_risk_rules import AgentFoundationRiskRuleMixin +from app.services.agent_foundation_spreadsheets import AgentFoundationSpreadsheetMixin + +logger = get_logger("app.services.agent_foundation") + + +def prepare_agent_foundation() -> None: + settings = get_settings() + if not settings.setup_completed: + logger.info("Agent foundation bootstrap skipped because setup is incomplete") + return + + session_factory = get_session_factory() + with session_factory() as db: + AgentFoundationService(db).ensure_foundation_ready() + + +class AgentFoundationService( + AgentFoundationAssetSeedMixin, + AgentFoundationFinancialSeedMixin, + AgentFoundationAssetTopUpMixin, + AgentFoundationSpreadsheetMixin, + AgentFoundationAssetHelperMixin, + AgentFoundationMarkdownMixin, + AgentFoundationRiskRuleMixin, +): + def __init__(self, db: Session) -> None: + self.db = db + + def ensure_foundation_ready(self) -> None: + try: + Base.metadata.create_all(bind=self.db.get_bind()) + self._ensure_agent_asset_schema() + self._seed_agent_assets() + self._sync_demo_financial_records() + self._seed_runs_and_logs() + self.db.commit() + except Exception: + self.db.rollback() + logger.exception("Failed to prepare agent foundation") + raise + + def _sync_demo_financial_records(self) -> None: + if get_settings().seed_demo_financial_records: + self._seed_financial_records() + return + self._purge_demo_financial_records() diff --git a/server/src/app/services/agent_foundation_asset_helpers.py b/server/src/app/services/agent_foundation_asset_helpers.py new file mode 100644 index 0000000..f95e317 --- /dev/null +++ b/server/src/app/services/agent_foundation_asset_helpers.py @@ -0,0 +1,322 @@ +from __future__ import annotations + +import hashlib +import json +from datetime import UTC, date, datetime +from decimal import Decimal +from pathlib import Path + +from sqlalchemy import inspect, select, text + +from app.core.agent_enums import ( + AgentAssetContentType, + AgentAssetDomain, + AgentAssetStatus, + AgentAssetType, + AgentName, + AgentPermissionLevel, + AgentReviewStatus, + AgentRunSource, + AgentRunStatus, + AgentToolType, +) +from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion +from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog +from app.models.audit_log import AuditLog +from app.models.financial_record import ( + AccountsPayableRecord, + AccountsReceivableRecord, + ExpenseClaim, + ExpenseClaimItem, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import ( + AgentAssetSpreadsheetManager, + COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, + COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, + COMPANY_TRAVEL_EXPENSE_RULE_CODE, + COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + FINANCE_RULES_LIBRARY, + RISK_RULES_LIBRARY, +) +from app.services.expense_rule_runtime import ( + build_scene_submission_standard_markdown, + build_travel_risk_control_standard_markdown, +) +from app.services.agent_foundation_constants import ( + ATTACHMENT_RULE_ASSET_CODE, + ATTACHMENT_RULE_RUNTIME_CONFIG, + COMPANY_COMMUNICATION_RULE_SCENARIO_JSON, + COMPANY_COMMUNICATION_RULE_VERSION, + COMPANY_TRAVEL_RULE_SCENARIO_JSON, + COMPANY_TRAVEL_RULE_VERSION, + DEMO_EXPENSE_CLAIM_SIGNATURES, + DEMO_PAYABLE_SIGNATURES, + DEMO_RECEIVABLE_SIGNATURES, + LEGACY_RULE_CODES, + PLATFORM_DESTINATION_LOCATION_RULE_FILENAME, +) +from app.core.logging import get_logger + +logger = get_logger("app.services.agent_foundation") + +class AgentFoundationAssetHelperMixin: + def _create_seed_asset( + + self, + + *, + + asset_type: str, + + code: str, + + name: str, + + description: str, + + domain: str, + + scenario_json: list[str], + + owner: str, + + reviewer: str, + + status: str, + + current_version: str, + + config_json: dict[str, object], + + ) -> AgentAsset: + + asset = AgentAsset( + + asset_type=asset_type, + + code=code, + + name=name, + + description=description, + + domain=domain, + + scenario_json=scenario_json, + + owner=owner, + + reviewer=reviewer, + + status=status, + + current_version=current_version, + + published_version=current_version if status == AgentAssetStatus.ACTIVE.value else None, + + working_version=current_version, + + config_json=config_json, + + ) + + self.db.add(asset) + + self.db.flush() + + return asset + + def _ensure_asset_version( + + self, + + asset: AgentAsset, + + *, + + version: str, + + content: str, + + content_type: str, + + change_note: str, + + created_by: str, + + ) -> None: + + existing = self.db.scalar( + + select(AgentAssetVersion).where( + + AgentAssetVersion.asset_id == asset.id, + + AgentAssetVersion.version == version, + + ) + + ) + + if existing is not None: + + return + + self.db.add( + + AgentAssetVersion( + + asset_id=asset.id, + + version=version, + + content=content, + + content_type=content_type, + + change_note=change_note, + + created_by=created_by, + + ) + + ) + + def _ensure_asset_review( + + self, + + asset: AgentAsset, + + *, + + version: str, + + reviewer: str, + + review_status: str, + + review_note: str, + + reviewed_at: datetime | None, + + ) -> None: + + existing = self.db.scalar( + + select(AgentAssetReview).where( + + AgentAssetReview.asset_id == asset.id, + + AgentAssetReview.version == version, + + AgentAssetReview.review_status == review_status, + + ) + + ) + + if existing is not None: + + return + + self.db.add( + + AgentAssetReview( + + asset_id=asset.id, + + version=version, + + reviewer=reviewer, + + review_status=review_status, + + review_note=review_note, + + reviewed_at=reviewed_at, + + ) + + ) + + def _remove_legacy_rule_assets(self) -> None: + + assets = list( + + self.db.scalars( + + select(AgentAsset).where(AgentAsset.code.in_(LEGACY_RULE_CODES)) + + ).all() + + ) + + for asset in assets: + + self.db.delete(asset) + + obsolete_logs = list( + + self.db.scalars( + + select(AuditLog).where(AuditLog.resource_id.in_(LEGACY_RULE_CODES)) + + ).all() + + ) + + for log in obsolete_logs: + + self.db.delete(log) + + def _ensure_agent_asset_schema(self) -> None: + + bind = self.db.get_bind() + + inspector = inspect(bind) + + if "agent_assets" not in inspector.get_table_names(): + + return + + column_names = {column["name"] for column in inspector.get_columns("agent_assets")} + + migration_statements: list[str] = [] + + if "published_version" not in column_names: + + migration_statements.append("ALTER TABLE agent_assets ADD COLUMN published_version VARCHAR(30)") + + if "working_version" not in column_names: + + migration_statements.append("ALTER TABLE agent_assets ADD COLUMN working_version VARCHAR(30)") + + for statement in migration_statements: + + self.db.execute(text(statement)) + + self.db.execute( + + text( + + "UPDATE agent_assets " + + "SET working_version = COALESCE(working_version, current_version), " + + "published_version = CASE " + + "WHEN published_version IS NOT NULL THEN published_version " + + "WHEN status = 'active' THEN current_version " + + "ELSE published_version END" + + ) + + ) + + if migration_statements: + + self.db.commit() diff --git a/server/src/app/services/agent_foundation_asset_seed.py b/server/src/app/services/agent_foundation_asset_seed.py new file mode 100644 index 0000000..5708d75 --- /dev/null +++ b/server/src/app/services/agent_foundation_asset_seed.py @@ -0,0 +1,599 @@ +from __future__ import annotations + +import hashlib +import json +from datetime import UTC, date, datetime +from decimal import Decimal +from pathlib import Path + +from sqlalchemy import inspect, select, text + +from app.core.agent_enums import ( + AgentAssetContentType, + AgentAssetDomain, + AgentAssetStatus, + AgentAssetType, + AgentName, + AgentPermissionLevel, + AgentReviewStatus, + AgentRunSource, + AgentRunStatus, + AgentToolType, +) +from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion +from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog +from app.models.audit_log import AuditLog +from app.models.financial_record import ( + AccountsPayableRecord, + AccountsReceivableRecord, + ExpenseClaim, + ExpenseClaimItem, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import ( + AgentAssetSpreadsheetManager, + COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, + COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, + COMPANY_TRAVEL_EXPENSE_RULE_CODE, + COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + FINANCE_RULES_LIBRARY, + RISK_RULES_LIBRARY, +) +from app.services.expense_rule_runtime import ( + build_scene_submission_standard_markdown, + build_travel_risk_control_standard_markdown, +) +from app.services.agent_foundation_constants import ( + ATTACHMENT_RULE_ASSET_CODE, + ATTACHMENT_RULE_RUNTIME_CONFIG, + COMPANY_COMMUNICATION_RULE_SCENARIO_JSON, + COMPANY_COMMUNICATION_RULE_VERSION, + COMPANY_TRAVEL_RULE_SCENARIO_JSON, + COMPANY_TRAVEL_RULE_VERSION, + DEMO_EXPENSE_CLAIM_SIGNATURES, + DEMO_PAYABLE_SIGNATURES, + DEMO_RECEIVABLE_SIGNATURES, + LEGACY_RULE_CODES, + PLATFORM_DESTINATION_LOCATION_RULE_FILENAME, +) +from app.core.logging import get_logger + +logger = get_logger("app.services.agent_foundation") + + +class AgentFoundationAssetSeedMixin: + def _seed_agent_assets(self) -> None: + + existing_codes = set(self.db.scalars(select(AgentAsset.code)).all()) + + if existing_codes: + + self._top_up_agent_assets(existing_codes) + + return + + attachment_rule = AgentAsset( + asset_type=AgentAssetType.RULE.value, + code=ATTACHMENT_RULE_ASSET_CODE, + name="报销附件与单据完整性规则", + description="统一定义报销提交时的附件数量、票据类型和补件处理口径,作为上线前待审核规则。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=["expense", "risk_check", "attachment_policy", "invoice_anomaly"], + owner="财务制度管理组", + reviewer="高嘉禾", + status=AgentAssetStatus.REVIEW.value, + current_version="v1.0.0", + published_version=None, + working_version="v1.0.0", + config_json={ + "severity": "high", + "enabled": False, + "runtime_kind": "policy_rule_draft", + "rule_template_key": "attachment_requirement_v1", + "rule_template_label": "附件要求模板", + "runtime_rule": ATTACHMENT_RULE_RUNTIME_CONFIG, + }, + ) + + scene_submission_rule = AgentAsset( + asset_type=AgentAssetType.RULE.value, + code="rule.expense.scene_submission_standard", + name="报销场景提交与附件标准", + description="统一定义各报销场景的必填字段、附件类型要求和金额阈值。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=["expense", "risk_check", "scene_policy", "attachment_policy"], + owner="费用运营组", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + published_version="v1.0.0", + working_version="v1.0.0", + config_json={ + "severity": "high", + "enabled": True, + "runtime_kind": "scene_matrix", + "rule_template_label": "系统内置场景矩阵规则", + }, + ) + + travel_policy_rule = AgentAsset( + asset_type=AgentAssetType.RULE.value, + code="rule.expense.travel_risk_control_standard", + name="差旅报销风险管控制度", + description="统一定义差旅报销的行程闭环、酒店地点一致性、职级差标和风险处置口径。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=["expense", "risk_check", "travel_policy", "travel_standard"], + owner="风控与审计部", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.1.0", + published_version="v1.1.0", + working_version="v1.1.0", + config_json={ + "severity": "high", + "enabled": True, + "block_on_high_risk": True, + "warning_on_medium_risk": True, + "source_doc": "document/development/risks/travel-risk-control-standard.md", + "runtime_kind": "travel_policy", + "rule_template_key": "travel_standard_v1", + "rule_template_label": "差旅标准模板", + }, + ) + + company_travel_rule = AgentAsset( + asset_type=AgentAssetType.RULE.value, + code=COMPANY_TRAVEL_EXPENSE_RULE_CODE, + name="公司差旅费报销规则", + description="通过 Excel 明细表维护差旅费报销标准、票据要求和审批口径。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=list(COMPANY_TRAVEL_RULE_SCENARIO_JSON), + owner="财务制度管理组", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version=COMPANY_TRAVEL_RULE_VERSION, + published_version=COMPANY_TRAVEL_RULE_VERSION, + working_version=COMPANY_TRAVEL_RULE_VERSION, + config_json={ + "severity": "medium", + "enabled": True, + "tag": "财务规则", + "detail_mode": "spreadsheet", + "rule_library": FINANCE_RULES_LIBRARY, + "scenario_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], + "ai_review_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], + "rule_template_label": "差旅报销 Excel 模板", + }, + ) + platform_risk_assets = self._build_platform_risk_seed_assets() + + company_communication_rule = AgentAsset( + asset_type=AgentAssetType.RULE.value, + code=COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, + name="公司通信费报销规则", + description="通过 Excel 明细表维护员工通信费报销标准、专项补充口径和审批要求。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=list(COMPANY_COMMUNICATION_RULE_SCENARIO_JSON), + owner="财务制度管理组", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version=COMPANY_COMMUNICATION_RULE_VERSION, + published_version=COMPANY_COMMUNICATION_RULE_VERSION, + working_version=COMPANY_COMMUNICATION_RULE_VERSION, + config_json={ + "severity": "medium", + "enabled": True, + "tag": "财务规则", + "detail_mode": "spreadsheet", + "rule_library": FINANCE_RULES_LIBRARY, + "scenario_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], + "ai_review_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], + "rule_template_label": "通信费报销 Excel 模板", + }, + ) + skill_expense_asset = AgentAsset( + asset_type=AgentAssetType.SKILL.value, + code="skill.expense.summary_lookup", + name="报销汇总查询技能", + description="根据时间、员工和部门汇总报销金额与单据数量。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=["expense", "query", "summary"], + owner="平台研发组", + reviewer="陈硕", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + published_version="v1.0.0", + working_version="v1.0.0", + config_json={"input_schema": ["time_range", "employee", "department"]}, + ) + + skill_ar_asset = AgentAsset( + asset_type=AgentAssetType.SKILL.value, + code="skill.ar.aging_summary", + name="应收账龄汇总技能", + description="按客户、账龄和逾期状态汇总应收风险分布。", + domain=AgentAssetDomain.AR.value, + scenario_json=["accounts_receivable", "query", "aging_summary"], + owner="平台研发组", + reviewer="陈硕", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + published_version="v1.0.0", + working_version="v1.0.0", + config_json={"input_schema": ["customer", "aging_bucket", "status"]}, + ) + + invoice_mcp_asset = AgentAsset( + asset_type=AgentAssetType.MCP.value, + code="mcp.invoice.verify_mock", + name="发票验真 Mock 服务", + description="模拟发票验真、发票状态查询和异常降级说明。", + domain=AgentAssetDomain.SYSTEM.value, + scenario_json=["expense", "invoice_validation"], + owner="平台研发组", + reviewer="周悦宁", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + published_version="v1.0.0", + working_version="v1.0.0", + config_json={"endpoint": "mock://invoice/verify", "timeout_ms": 1200}, + ) + + ledger_mcp_asset = AgentAsset( + asset_type=AgentAssetType.MCP.value, + code="mcp.ledger.snapshot_mock", + name="总账快照 Mock 服务", + description="模拟返回应收、应付和费用汇总快照,供 Agent 查询和巡检。", + domain=AgentAssetDomain.SYSTEM.value, + scenario_json=["expense", "accounts_receivable", "accounts_payable"], + owner="平台研发组", + reviewer="周悦宁", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + published_version="v1.0.0", + working_version="v1.0.0", + config_json={"endpoint": "mock://ledger/snapshot", "timeout_ms": 1500}, + ) + + task_asset = AgentAsset( + asset_type=AgentAssetType.TASK.value, + code="task.hermes.daily_risk_scan", + name="Hermes 每日风险巡检", + description="每天早上巡检重复报销、金额超标、逾期应收和异常付款。", + domain=AgentAssetDomain.SYSTEM.value, + scenario_json=["schedule", "risk_check"], + owner="风控与审计部", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + published_version="v1.0.0", + working_version="v1.0.0", + config_json={"cron": "0 9 * * *", "agent": AgentName.HERMES.value}, + ) + + ar_summary_task = AgentAsset( + asset_type=AgentAssetType.TASK.value, + code="task.hermes.weekly_ar_summary", + name="Hermes 每周应收账龄汇总", + description="每周汇总逾期应收、账龄分布和客户风险变化。", + domain=AgentAssetDomain.SYSTEM.value, + scenario_json=["schedule", "accounts_receivable", "summary"], + owner="风控与审计部", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + published_version="v1.0.0", + working_version="v1.0.0", + config_json={"cron": "0 10 * * 1", "agent": AgentName.HERMES.value}, + ) + + rule_digest_task = AgentAsset( + asset_type=AgentAssetType.TASK.value, + code="task.hermes.rule_review_digest", + name="Hermes 规则待审摘要", + description="每天汇总待审规则、待补样例和被拒规则修订建议。", + domain=AgentAssetDomain.SYSTEM.value, + scenario_json=["schedule", "rule_center", "review_digest"], + owner="风控与审计部", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + published_version="v1.0.0", + working_version="v1.0.0", + config_json={"cron": "0 18 * * *", "agent": AgentName.HERMES.value}, + ) + + knowledge_index_task = AgentAsset( + asset_type=AgentAssetType.TASK.value, + code="task.hermes.knowledge_index_sync", + name="Hermes ??????", + description="?????????? LightRAG ???????", + domain=AgentAssetDomain.SYSTEM.value, + scenario_json=["schedule", "knowledge", "rule_center"], + owner="财务制度管理组", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + published_version="v1.0.0", + working_version="v1.0.0", + config_json={"cron": "0 0 * * *", "agent": AgentName.HERMES.value}, + ) + + self.db.add_all( + [ + attachment_rule, + scene_submission_rule, + travel_policy_rule, + *platform_risk_assets, + company_travel_rule, + company_communication_rule, + skill_expense_asset, + skill_ar_asset, + invoice_mcp_asset, + ledger_mcp_asset, + task_asset, + ar_summary_task, + rule_digest_task, + knowledge_index_task, + ] + ) + + self.db.flush() + + company_travel_rule_meta = self._ensure_company_travel_rule_spreadsheet_seed( + company_travel_rule, + version=COMPANY_TRAVEL_RULE_VERSION, + actor_name="系统初始化", + ) + + company_communication_rule_meta = self._ensure_company_communication_rule_spreadsheet_seed( + company_communication_rule, + version=COMPANY_COMMUNICATION_RULE_VERSION, + actor_name="系统初始化", + ) + + self.db.add_all( + [ + AgentAssetVersion( + asset=attachment_rule, + version="v0.9.0", + content=self._attachment_submission_requirement_markdown( + version_note="首版附件完整性规则草稿,覆盖基础票据与补件口径。", + include_review_note=True, + ), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="首版草稿。", + created_by="高嘉禾", + ), + AgentAssetVersion( + asset=attachment_rule, + version="v1.0.0", + content=self._attachment_submission_requirement_markdown( + version_note="补充票据缺失、收据替代和差旅等效凭证口径,待审核。", + include_review_note=True, + ), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="补充票据替代与差旅等效凭证口径,待审核。", + created_by="高嘉禾", + ), + AgentAssetVersion( + asset=scene_submission_rule, + version="v1.0.0", + content=self._scene_submission_standard_markdown(), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="首版报销场景提交标准,覆盖附件类型、必填字段和金额阈值。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=travel_policy_rule, + version="v1.0.0", + content=self._travel_risk_control_standard_markdown(version="v1.0.0"), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="首版差旅制度执行规则,覆盖行程闭环与基础差标校验。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=travel_policy_rule, + version="v1.1.0", + content=self._travel_risk_control_standard_markdown(version="v1.1.0"), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="补充可执行规则块,供审核引擎直接消费差旅制度标准。", + created_by="系统初始化", + ), + *[ + AgentAssetVersion( + asset=asset, + version="v1.0.0", + content=self._platform_risk_rule_markdown(asset), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note=f"平台通用风险规则:{asset.name}", + created_by="系统初始化", + ) + for asset in platform_risk_assets + ], + AgentAssetVersion( + asset=company_travel_rule, + version=COMPANY_TRAVEL_RULE_VERSION, + content=AgentAssetSpreadsheetManager.build_version_markdown( + rule_name=company_travel_rule.name, + version=COMPANY_TRAVEL_RULE_VERSION, + metadata=company_travel_rule_meta, + ), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="初始化差旅费报销 Excel 规则表。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=company_communication_rule, + version=COMPANY_COMMUNICATION_RULE_VERSION, + content=AgentAssetSpreadsheetManager.build_version_markdown( + rule_name=company_communication_rule.name, + version=COMPANY_COMMUNICATION_RULE_VERSION, + metadata=company_communication_rule_meta, + ), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="初始化通信费报销 Excel 规则表。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=skill_expense_asset, + version="v1.0.0", + content=self._json_content( + { + "inputs": ["time_range", "employee", "department"], + "outputs": ["total_amount", "claim_count"], + "dependencies": ["database.expense_claims"], + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化技能快照。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=skill_ar_asset, + version="v1.0.0", + content=self._json_content( + { + "inputs": ["customer", "aging_bucket", "status"], + "outputs": ["receivable_total", "overdue_total", "customer_count"], + "dependencies": ["database.accounts_receivable"], + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化应收账龄技能快照。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=invoice_mcp_asset, + version="v1.0.0", + content=self._json_content( + { + "service_type": "mock", + "auth_mode": "none", + "degrade_strategy": "return_stub_with_warning", + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化 MCP 快照。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=ledger_mcp_asset, + version="v1.0.0", + content=self._json_content( + { + "service_type": "mock", + "auth_mode": "service_account", + "degrade_strategy": "return_cached_snapshot_with_warning", + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化总账快照 MCP。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=task_asset, + version="v1.0.0", + content=self._json_content( + { + "task_type": "daily_risk_scan", + "schedule": "0 9 * * *", + "target_agent": AgentName.HERMES.value, + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化任务快照。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=ar_summary_task, + version="v1.0.0", + content=self._json_content( + { + "task_type": "weekly_ar_summary", + "schedule": "0 10 * * 1", + "target_agent": AgentName.HERMES.value, + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化应收账龄汇总任务。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=rule_digest_task, + version="v1.0.0", + content=self._json_content( + { + "task_type": "rule_review_digest", + "schedule": "0 18 * * *", + "target_agent": AgentName.HERMES.value, + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化规则待审摘要任务。", + created_by="系统初始化", + ), + AgentAssetVersion( + asset=knowledge_index_task, + version="v1.0.0", + content=self._json_content( + { + "task_type": "knowledge_index_sync", + "schedule": "0 0 * * *", + "target_agent": AgentName.HERMES.value, + "folder": "报销制度", + "changed_only": True, + "index_engine": "lightrag", + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化制度知识与规则草稿形成任务。", + created_by="系统初始化", + ), + ] + ) + + self.db.add_all( + [ + AgentAssetReview( + asset=attachment_rule, + version="v1.0.0", + reviewer="高嘉禾", + review_status=AgentReviewStatus.PENDING.value, + review_note="等待制度管理员确认收据替代与补件时限口径。", + reviewed_at=None, + ), + AgentAssetReview( + asset=scene_submission_rule, + version="v1.0.0", + reviewer="顾承宇", + review_status=AgentReviewStatus.APPROVED.value, + review_note="可作为报销场景统一审核标准正式执行。", + reviewed_at=datetime.now(UTC), + ), + AgentAssetReview( + asset=travel_policy_rule, + version="v1.1.0", + reviewer="顾承宇", + review_status=AgentReviewStatus.APPROVED.value, + review_note="制度口径已确认,并已补充可执行配置供审核引擎读取。", + reviewed_at=datetime.now(UTC), + ), + AgentAssetReview( + asset=company_travel_rule, + version=COMPANY_TRAVEL_RULE_VERSION, + reviewer="顾承宇", + review_status=AgentReviewStatus.APPROVED.value, + review_note="首版 Excel 规则表已确认,可作为财务规则使用。", + reviewed_at=datetime.now(UTC), + ), + AgentAssetReview( + asset=company_communication_rule, + version=COMPANY_COMMUNICATION_RULE_VERSION, + reviewer="顾承宇", + review_status=AgentReviewStatus.APPROVED.value, + review_note="首版 Excel 规则表已确认,可作为财务规则使用。", + reviewed_at=datetime.now(UTC), + ), + ] + ) diff --git a/server/src/app/services/agent_foundation_asset_topup.py b/server/src/app/services/agent_foundation_asset_topup.py new file mode 100644 index 0000000..9694ca5 --- /dev/null +++ b/server/src/app/services/agent_foundation_asset_topup.py @@ -0,0 +1,667 @@ +from __future__ import annotations + +import hashlib +import json +from datetime import UTC, date, datetime +from decimal import Decimal +from pathlib import Path + +from sqlalchemy import inspect, select, text + +from app.core.agent_enums import ( + AgentAssetContentType, + AgentAssetDomain, + AgentAssetStatus, + AgentAssetType, + AgentName, + AgentPermissionLevel, + AgentReviewStatus, + AgentRunSource, + AgentRunStatus, + AgentToolType, +) +from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion +from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog +from app.models.audit_log import AuditLog +from app.models.financial_record import ( + AccountsPayableRecord, + AccountsReceivableRecord, + ExpenseClaim, + ExpenseClaimItem, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import ( + AgentAssetSpreadsheetManager, + COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, + COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, + COMPANY_TRAVEL_EXPENSE_RULE_CODE, + COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + FINANCE_RULES_LIBRARY, + RISK_RULES_LIBRARY, +) +from app.services.expense_rule_runtime import ( + build_scene_submission_standard_markdown, + build_travel_risk_control_standard_markdown, +) +from app.services.agent_foundation_constants import ( + ATTACHMENT_RULE_ASSET_CODE, + ATTACHMENT_RULE_RUNTIME_CONFIG, + COMPANY_COMMUNICATION_RULE_SCENARIO_JSON, + COMPANY_COMMUNICATION_RULE_VERSION, + COMPANY_TRAVEL_RULE_SCENARIO_JSON, + COMPANY_TRAVEL_RULE_VERSION, + DEMO_EXPENSE_CLAIM_SIGNATURES, + DEMO_PAYABLE_SIGNATURES, + DEMO_RECEIVABLE_SIGNATURES, + LEGACY_RULE_CODES, + PLATFORM_DESTINATION_LOCATION_RULE_FILENAME, +) +from app.core.logging import get_logger + +logger = get_logger("app.services.agent_foundation") + + +class AgentFoundationAssetTopUpMixin: + def _top_up_agent_assets(self, existing_codes: set[str]) -> None: + + self._remove_legacy_rule_assets() + + existing_codes = set(self.db.scalars(select(AgentAsset.code)).all()) + + attachment_rule = self.db.scalar( + select(AgentAsset).where(AgentAsset.code == ATTACHMENT_RULE_ASSET_CODE) + ) + + scene_submission_rule = self.db.scalar( + select(AgentAsset).where(AgentAsset.code == "rule.expense.scene_submission_standard") + ) + + travel_policy_rule = self.db.scalar( + select(AgentAsset).where(AgentAsset.code == "rule.expense.travel_risk_control_standard") + ) + + company_travel_rule = self.db.scalar( + select(AgentAsset).where(AgentAsset.code == COMPANY_TRAVEL_EXPENSE_RULE_CODE) + ) + + company_communication_rule = self.db.scalar( + select(AgentAsset).where(AgentAsset.code == COMPANY_COMMUNICATION_EXPENSE_RULE_CODE) + ) + + if ATTACHMENT_RULE_ASSET_CODE not in existing_codes: + + attachment_rule = self._create_seed_asset( + asset_type=AgentAssetType.RULE.value, + code=ATTACHMENT_RULE_ASSET_CODE, + name="报销附件与单据完整性规则", + description="统一定义报销提交时的附件数量、票据类型和补件处理口径,作为上线前待审核规则。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=["expense", "risk_check", "attachment_policy", "invoice_anomaly"], + owner="财务制度管理组", + reviewer="高嘉禾", + status=AgentAssetStatus.REVIEW.value, + current_version="v1.0.0", + config_json={ + "severity": "high", + "enabled": False, + "runtime_kind": "policy_rule_draft", + "rule_template_key": "attachment_requirement_v1", + "rule_template_label": "附件要求模板", + "runtime_rule": ATTACHMENT_RULE_RUNTIME_CONFIG, + }, + ) + + if attachment_rule is not None: + + if not str(attachment_rule.current_version or "").strip(): + + attachment_rule.current_version = "v1.0.0" + + if not str(attachment_rule.working_version or "").strip(): + + attachment_rule.working_version = attachment_rule.current_version + + attachment_rule.status = attachment_rule.status or AgentAssetStatus.REVIEW.value + + attachment_rule.description = ( + "统一定义报销提交时的附件数量、票据类型和补件处理口径,作为上线前待审核规则。" + ) + + attachment_rule.config_json = { + "severity": "high", + "enabled": False, + "runtime_kind": "policy_rule_draft", + "rule_template_key": "attachment_requirement_v1", + "rule_template_label": "附件要求模板", + "runtime_rule": ATTACHMENT_RULE_RUNTIME_CONFIG, + } + + self._ensure_asset_version( + attachment_rule, + version="v0.9.0", + content=self._attachment_submission_requirement_markdown( + version_note="首版附件完整性规则草稿,覆盖基础票据与补件口径。", + include_review_note=True, + ), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="首版草稿。", + created_by="高嘉禾", + ) + + self._ensure_asset_version( + attachment_rule, + version="v1.0.0", + content=self._attachment_submission_requirement_markdown( + version_note="补充票据缺失、收据替代和差旅等效凭证口径,待审核。", + include_review_note=True, + ), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="补充票据替代与差旅等效凭证口径,待审核。", + created_by="高嘉禾", + ) + + self._ensure_asset_review( + attachment_rule, + version="v1.0.0", + reviewer="高嘉禾", + review_status=AgentReviewStatus.PENDING.value, + review_note="等待制度管理员确认收据替代与补件时限口径。", + reviewed_at=None, + ) + + if "rule.expense.scene_submission_standard" not in existing_codes: + + scene_submission_rule = self._create_seed_asset( + asset_type=AgentAssetType.RULE.value, + code="rule.expense.scene_submission_standard", + name="报销场景提交与附件标准", + description="统一定义各报销场景的必填字段、附件类型要求和金额阈值。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=["expense", "risk_check", "scene_policy", "attachment_policy"], + owner="费用运营组", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + config_json={ + "severity": "high", + "enabled": True, + "runtime_kind": "scene_matrix", + "rule_template_label": "系统内置场景矩阵规则", + }, + ) + + if scene_submission_rule is not None: + + if not str(scene_submission_rule.current_version or "").strip(): + + scene_submission_rule.current_version = "v1.0.0" + + if not str(scene_submission_rule.working_version or "").strip(): + + scene_submission_rule.working_version = scene_submission_rule.current_version + + if not str(scene_submission_rule.published_version or "").strip(): + + scene_submission_rule.published_version = scene_submission_rule.current_version + + scene_submission_rule.status = ( + scene_submission_rule.status or AgentAssetStatus.ACTIVE.value + ) + + scene_submission_rule.description = ( + "统一定义各报销场景的必填字段、附件类型要求和金额阈值。" + ) + + scene_submission_rule.config_json = { + "severity": "high", + "enabled": True, + "runtime_kind": "scene_matrix", + "rule_template_label": "系统内置场景矩阵规则", + } + + self._ensure_asset_version( + scene_submission_rule, + version="v1.0.0", + content=self._scene_submission_standard_markdown(), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="首版报销场景提交标准,覆盖附件类型、必填字段和金额阈值。", + created_by="系统初始化", + ) + + self._ensure_asset_review( + scene_submission_rule, + version="v1.0.0", + reviewer="顾承宇", + review_status=AgentReviewStatus.APPROVED.value, + review_note="可作为报销场景统一审核标准正式执行。", + reviewed_at=datetime.now(UTC), + ) + + if "rule.expense.travel_risk_control_standard" not in existing_codes: + + travel_policy_rule = self._create_seed_asset( + asset_type=AgentAssetType.RULE.value, + code="rule.expense.travel_risk_control_standard", + name="差旅报销风险管控制度", + description="统一定义差旅报销的行程闭环、酒店地点一致性、职级差标和风险处置口径。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=["expense", "risk_check", "travel_policy", "travel_standard"], + owner="风控与审计部", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.1.0", + config_json={ + "severity": "high", + "enabled": True, + "block_on_high_risk": True, + "warning_on_medium_risk": True, + "source_doc": "document/development/risks/travel-risk-control-standard.md", + "runtime_kind": "travel_policy", + "rule_template_key": "travel_standard_v1", + "rule_template_label": "差旅标准模板", + }, + ) + + if travel_policy_rule is not None: + + if not str(travel_policy_rule.current_version or "").strip(): + + travel_policy_rule.current_version = "v1.1.0" + + if not str(travel_policy_rule.working_version or "").strip(): + + travel_policy_rule.working_version = travel_policy_rule.current_version + + if not str(travel_policy_rule.published_version or "").strip(): + + travel_policy_rule.published_version = travel_policy_rule.current_version + + travel_policy_rule.status = travel_policy_rule.status or AgentAssetStatus.ACTIVE.value + + travel_policy_rule.config_json = { + "severity": "high", + "enabled": True, + "block_on_high_risk": True, + "warning_on_medium_risk": True, + "source_doc": "document/development/risks/travel-risk-control-standard.md", + "runtime_kind": "travel_policy", + "rule_template_key": "travel_standard_v1", + "rule_template_label": "差旅标准模板", + } + + self._ensure_asset_version( + travel_policy_rule, + version="v1.0.0", + content=self._travel_risk_control_standard_markdown(version="v1.0.0"), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="首版差旅制度执行规则,覆盖行程闭环与基础差标校验。", + created_by="系统初始化", + ) + + self._ensure_asset_version( + travel_policy_rule, + version="v1.1.0", + content=self._travel_risk_control_standard_markdown(version="v1.1.0"), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="补充可执行规则块,供审核引擎直接消费差旅制度标准。", + created_by="系统初始化", + ) + + self._ensure_asset_review( + travel_policy_rule, + version="v1.1.0", + reviewer="顾承宇", + review_status=AgentReviewStatus.APPROVED.value, + review_note="制度口径已确认,并已补充可执行配置供审核引擎读取。", + reviewed_at=datetime.now(UTC), + ) + + self.sync_platform_risk_rules_from_library() + + if COMPANY_TRAVEL_EXPENSE_RULE_CODE not in existing_codes: + + company_travel_rule = self._create_seed_asset( + asset_type=AgentAssetType.RULE.value, + code=COMPANY_TRAVEL_EXPENSE_RULE_CODE, + name="公司差旅费报销规则", + description="通过 Excel 明细表维护差旅费报销标准、票据要求和审批口径。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=list(COMPANY_TRAVEL_RULE_SCENARIO_JSON), + owner="财务制度管理组", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version=COMPANY_TRAVEL_RULE_VERSION, + config_json={ + "severity": "medium", + "enabled": True, + "tag": "财务规则", + "detail_mode": "spreadsheet", + "scenario_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], + "ai_review_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], + "rule_template_label": "差旅报销 Excel 模板", + }, + ) + if COMPANY_COMMUNICATION_EXPENSE_RULE_CODE not in existing_codes: + + company_communication_rule = self._create_seed_asset( + asset_type=AgentAssetType.RULE.value, + code=COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, + name="公司通信费报销规则", + description="通过 Excel 明细表维护员工通信费报销标准、专项补充口径和审批要求。", + domain=AgentAssetDomain.EXPENSE.value, + scenario_json=list(COMPANY_COMMUNICATION_RULE_SCENARIO_JSON), + owner="财务制度管理组", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version=COMPANY_COMMUNICATION_RULE_VERSION, + config_json={ + "severity": "medium", + "enabled": True, + "tag": "财务规则", + "detail_mode": "spreadsheet", + "scenario_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], + "ai_review_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], + "rule_template_label": "通信费报销 Excel 模板", + }, + ) + + if company_travel_rule is not None: + company_travel_rule.scenario_json = list(COMPANY_TRAVEL_RULE_SCENARIO_JSON) + if not str(company_travel_rule.current_version or "").strip(): + company_travel_rule.current_version = COMPANY_TRAVEL_RULE_VERSION + if not str(company_travel_rule.working_version or "").strip(): + + company_travel_rule.working_version = company_travel_rule.current_version + + if not str(company_travel_rule.published_version or "").strip(): + + company_travel_rule.published_version = company_travel_rule.current_version + + if not str(company_travel_rule.status or "").strip(): + + company_travel_rule.status = AgentAssetStatus.ACTIVE.value + + company_travel_rule.description = ( + "通过 Excel 明细表维护差旅费报销标准、票据要求和审批口径。" + ) + + company_travel_rule.config_json = { + **(company_travel_rule.config_json or {}), + "severity": "medium", + "enabled": True, + "tag": "财务规则", + "detail_mode": "spreadsheet", + "rule_library": FINANCE_RULES_LIBRARY, + "scenario_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], + "ai_review_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0], + "rule_template_label": "差旅报销 Excel 模板", + } + company_travel_rule_meta = self._ensure_company_travel_rule_spreadsheet_seed( + company_travel_rule, + version=str(company_travel_rule.current_version or COMPANY_TRAVEL_RULE_VERSION), + actor_name="系统初始化", + ) + + self._ensure_asset_version( + company_travel_rule, + version=str(company_travel_rule.current_version or COMPANY_TRAVEL_RULE_VERSION), + content=AgentAssetSpreadsheetManager.build_version_markdown( + rule_name=company_travel_rule.name, + version=str(company_travel_rule.current_version or COMPANY_TRAVEL_RULE_VERSION), + metadata=company_travel_rule_meta, + ), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="初始化差旅费报销 Excel 规则表。", + created_by="系统初始化", + ) + + if ( + str(company_travel_rule.current_version or "").strip() + == COMPANY_TRAVEL_RULE_VERSION + ): + + self._ensure_asset_review( + company_travel_rule, + version=COMPANY_TRAVEL_RULE_VERSION, + reviewer="顾承宇", + review_status=AgentReviewStatus.APPROVED.value, + review_note="首版 Excel 规则表已确认,可作为财务规则使用。", + reviewed_at=datetime.now(UTC), + ) + + if company_communication_rule is not None: + company_communication_rule.scenario_json = list( + COMPANY_COMMUNICATION_RULE_SCENARIO_JSON + ) + if not str(company_communication_rule.current_version or "").strip(): + company_communication_rule.current_version = COMPANY_COMMUNICATION_RULE_VERSION + if not str(company_communication_rule.working_version or "").strip(): + + company_communication_rule.working_version = ( + company_communication_rule.current_version + ) + + if not str(company_communication_rule.published_version or "").strip(): + + company_communication_rule.published_version = ( + company_communication_rule.current_version + ) + + if not str(company_communication_rule.status or "").strip(): + + company_communication_rule.status = AgentAssetStatus.ACTIVE.value + + company_communication_rule.description = ( + "通过 Excel 明细表维护员工通信费报销标准、专项补充口径和审批要求。" + ) + + company_communication_rule.config_json = { + **(company_communication_rule.config_json or {}), + "severity": "medium", + "enabled": True, + "tag": "财务规则", + "detail_mode": "spreadsheet", + "rule_library": FINANCE_RULES_LIBRARY, + "scenario_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], + "ai_review_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0], + "rule_template_label": "通信费报销 Excel 模板", + } + company_communication_rule_meta = ( + self._ensure_company_communication_rule_spreadsheet_seed( + company_communication_rule, + version=str( + company_communication_rule.current_version + or COMPANY_COMMUNICATION_RULE_VERSION + ), + actor_name="系统初始化", + ) + ) + + self._ensure_asset_version( + company_communication_rule, + version=str( + company_communication_rule.current_version or COMPANY_COMMUNICATION_RULE_VERSION + ), + content=AgentAssetSpreadsheetManager.build_version_markdown( + rule_name=company_communication_rule.name, + version=str( + company_communication_rule.current_version + or COMPANY_COMMUNICATION_RULE_VERSION + ), + metadata=company_communication_rule_meta, + ), + content_type=AgentAssetContentType.MARKDOWN.value, + change_note="初始化通信费报销 Excel 规则表。", + created_by="系统初始化", + ) + + if ( + str(company_communication_rule.current_version or "").strip() + == COMPANY_COMMUNICATION_RULE_VERSION + ): + + self._ensure_asset_review( + company_communication_rule, + version=COMPANY_COMMUNICATION_RULE_VERSION, + reviewer="顾承宇", + review_status=AgentReviewStatus.APPROVED.value, + review_note="首版 Excel 规则表已确认,可作为财务规则使用。", + reviewed_at=datetime.now(UTC), + ) + + if "skill.ar.aging_summary" not in existing_codes: + + asset = self._create_seed_asset( + asset_type=AgentAssetType.SKILL.value, + code="skill.ar.aging_summary", + name="应收账龄汇总技能", + description="按客户、账龄和逾期状态汇总应收风险分布。", + domain=AgentAssetDomain.AR.value, + scenario_json=["accounts_receivable", "query", "aging_summary"], + owner="平台研发组", + reviewer="陈硕", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + config_json={"input_schema": ["customer", "aging_bucket", "status"]}, + ) + + self._ensure_asset_version( + asset, + version="v1.0.0", + content=self._json_content( + { + "inputs": ["customer", "aging_bucket", "status"], + "outputs": ["receivable_total", "overdue_total", "customer_count"], + "dependencies": ["database.accounts_receivable"], + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化应收账龄技能快照。", + created_by="系统初始化", + ) + + if "mcp.ledger.snapshot_mock" not in existing_codes: + + asset = self._create_seed_asset( + asset_type=AgentAssetType.MCP.value, + code="mcp.ledger.snapshot_mock", + name="总账快照 Mock 服务", + description="模拟返回应收、应付和费用汇总快照,供 Agent 查询和巡检。", + domain=AgentAssetDomain.SYSTEM.value, + scenario_json=["expense", "accounts_receivable", "accounts_payable"], + owner="平台研发组", + reviewer="周悦宁", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + config_json={"endpoint": "mock://ledger/snapshot", "timeout_ms": 1500}, + ) + + self._ensure_asset_version( + asset, + version="v1.0.0", + content=self._json_content( + { + "service_type": "mock", + "auth_mode": "service_account", + "degrade_strategy": "return_cached_snapshot_with_warning", + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化总账快照 MCP。", + created_by="系统初始化", + ) + + if "task.hermes.weekly_ar_summary" not in existing_codes: + + asset = self._create_seed_asset( + asset_type=AgentAssetType.TASK.value, + code="task.hermes.weekly_ar_summary", + name="Hermes 每周应收账龄汇总", + description="每周汇总逾期应收、账龄分布和客户风险变化。", + domain=AgentAssetDomain.SYSTEM.value, + scenario_json=["schedule", "accounts_receivable", "summary"], + owner="风控与审计部", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + config_json={"cron": "0 10 * * 1", "agent": AgentName.HERMES.value}, + ) + + self._ensure_asset_version( + asset, + version="v1.0.0", + content=self._json_content( + { + "task_type": "weekly_ar_summary", + "schedule": "0 10 * * 1", + "target_agent": AgentName.HERMES.value, + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化应收账龄汇总任务。", + created_by="系统初始化", + ) + + if "task.hermes.rule_review_digest" not in existing_codes: + + asset = self._create_seed_asset( + asset_type=AgentAssetType.TASK.value, + code="task.hermes.rule_review_digest", + name="Hermes 规则待审摘要", + description="每天汇总待审规则、待补样例和被拒规则修订建议。", + domain=AgentAssetDomain.SYSTEM.value, + scenario_json=["schedule", "rule_center", "review_digest"], + owner="风控与审计部", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + config_json={"cron": "0 18 * * *", "agent": AgentName.HERMES.value}, + ) + + self._ensure_asset_version( + asset, + version="v1.0.0", + content=self._json_content( + { + "task_type": "rule_review_digest", + "schedule": "0 18 * * *", + "target_agent": AgentName.HERMES.value, + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化规则待审摘要任务。", + created_by="系统初始化", + ) + + if "task.hermes.knowledge_index_sync" not in existing_codes: + + asset = self._create_seed_asset( + asset_type=AgentAssetType.TASK.value, + code="task.hermes.knowledge_index_sync", + name="Hermes ??????", + description="?????????? LightRAG ???????", + domain=AgentAssetDomain.SYSTEM.value, + scenario_json=["schedule", "knowledge", "rule_center"], + owner="财务制度管理组", + reviewer="顾承宇", + status=AgentAssetStatus.ACTIVE.value, + current_version="v1.0.0", + config_json={"cron": "0 0 * * *", "agent": AgentName.HERMES.value}, + ) + + self._ensure_asset_version( + asset, + version="v1.0.0", + content=self._json_content( + { + "task_type": "knowledge_index_sync", + "schedule": "0 0 * * *", + "target_agent": AgentName.HERMES.value, + "folder": "报销制度", + "changed_only": True, + } + ), + content_type=AgentAssetContentType.JSON.value, + change_note="初始化制度知识与规则草稿形成任务。", + created_by="系统初始化", + ) diff --git a/server/src/app/services/agent_foundation_constants.py b/server/src/app/services/agent_foundation_constants.py new file mode 100644 index 0000000..9eb1663 --- /dev/null +++ b/server/src/app/services/agent_foundation_constants.py @@ -0,0 +1,207 @@ +from __future__ import annotations + +PLATFORM_DESTINATION_LOCATION_RULE_CODE = "risk.travel.destination_receipt_location" + +PLATFORM_DESTINATION_LOCATION_RULE_FILENAME = "risk.travel.destination_receipt_location.json" + +DEMO_EXPENSE_CLAIM_SIGNATURES = { + + ( + + "EXP-202605-001", + + "张三", + + "华南客户拜访差旅报销", + + "3280.00", + + "submitted", + + ), + + ( + + "EXP-202605-002", + + "李四", + + "客户路演餐费", + + "860.00", + + "approved", + + ), + + ( + + "EXP-202605-003", + + "王五", + + "市场活动会务差旅", + + "3280.00", + + "review", + + ), + +} + +DEMO_RECEIVABLE_SIGNATURES = { + + ("AR-202605-001", "客户A", "50000.00", "partial"), + + ("AR-202605-002", "客户B", "78000.00", "overdue"), + +} + +DEMO_PAYABLE_SIGNATURES = { + + ("AP-202605-001", "供应商A", "33000.00", "scheduled"), + + ("AP-202605-002", "供应商B", "96000.00", "overdue"), + +} + +LEGACY_RULE_CODES = ( + + "rule.expense.duplicate_expense_check", + + "rule.expense.travel_receipt_requirements", + + "rule.ap.payment_dual_review", + +) + +ATTACHMENT_RULE_ASSET_CODE = "rule.expense.attachment_submission_requirements" + +COMPANY_TRAVEL_RULE_VERSION = "v1.0.0" + +COMPANY_COMMUNICATION_RULE_VERSION = "v1.0.0" + +COMPANY_TRAVEL_RULE_SCENARIO_JSON = ("差旅",) + +COMPANY_COMMUNICATION_RULE_SCENARIO_JSON = ("费用科目",) + +ATTACHMENT_RULE_RUNTIME_CONFIG = { + + "kind": "policy_rule_draft", + + "version": 1, + + "template_key": "attachment_requirement_v1", + + "rule_name": "报销附件与单据完整性规则", + + "scenario": "attachment_policy", + + "source_document_name": "报销制度 / 单据与附件要求", + + "review_required": True, + + "target": { + + "expense_types": [ + + "travel", + + "hotel", + + "transport", + + "meal", + + "office", + + "meeting", + + "training", + + "communication", + + "welfare", + + "other", + + ], + + "scene_codes": ["expense", "attachment_policy", "invoice_anomaly"], + + }, + + "attachment_requirements": { + + "min_attachment_count": 1, + + "items": [ + + { + + "document_type": "vat_invoice", + + "required": True, + + "min_count": 1, + + "description": "金额类报销原则上必须提供合法票据。", + + }, + + { + + "document_type": "receipt", + + "required": False, + + "min_count": 1, + + "description": "特殊场景无发票时需补充收据与情况说明。", + + }, + + { + + "document_type": "flight_itinerary", + + "required": False, + + "min_count": 1, + + "description": "差旅交通报销需提供行程单或等效凭证。", + + }, + + { + + "document_type": "hotel_invoice", + + "required": False, + + "min_count": 1, + + "description": "住宿报销需提供酒店票据或等效住宿凭证。", + + }, + + ], + + "manual_fill_required": False, + + }, + + "missing_attachment_action": "block", + + "output": { + + "risk_code": "invoice_anomaly", + + "action": "block", + + "message": "附件或单据不完整,需补件后再提交。", + + }, + +} diff --git a/server/src/app/services/agent_foundation_financial_seed.py b/server/src/app/services/agent_foundation_financial_seed.py new file mode 100644 index 0000000..557d24b --- /dev/null +++ b/server/src/app/services/agent_foundation_financial_seed.py @@ -0,0 +1,726 @@ +from __future__ import annotations + +import hashlib +import json +from datetime import UTC, date, datetime +from decimal import Decimal +from pathlib import Path + +from sqlalchemy import inspect, select, text + +from app.core.agent_enums import ( + AgentAssetContentType, + AgentAssetDomain, + AgentAssetStatus, + AgentAssetType, + AgentName, + AgentPermissionLevel, + AgentReviewStatus, + AgentRunSource, + AgentRunStatus, + AgentToolType, +) +from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion +from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog +from app.models.audit_log import AuditLog +from app.models.financial_record import ( + AccountsPayableRecord, + AccountsReceivableRecord, + ExpenseClaim, + ExpenseClaimItem, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import ( + AgentAssetSpreadsheetManager, + COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, + COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, + COMPANY_TRAVEL_EXPENSE_RULE_CODE, + COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + FINANCE_RULES_LIBRARY, + RISK_RULES_LIBRARY, +) +from app.services.expense_rule_runtime import ( + build_scene_submission_standard_markdown, + build_travel_risk_control_standard_markdown, +) +from app.services.agent_foundation_constants import ( + ATTACHMENT_RULE_ASSET_CODE, + ATTACHMENT_RULE_RUNTIME_CONFIG, + COMPANY_COMMUNICATION_RULE_SCENARIO_JSON, + COMPANY_COMMUNICATION_RULE_VERSION, + COMPANY_TRAVEL_RULE_SCENARIO_JSON, + COMPANY_TRAVEL_RULE_VERSION, + DEMO_EXPENSE_CLAIM_SIGNATURES, + DEMO_PAYABLE_SIGNATURES, + DEMO_RECEIVABLE_SIGNATURES, + LEGACY_RULE_CODES, + PLATFORM_DESTINATION_LOCATION_RULE_FILENAME, +) +from app.core.logging import get_logger + +logger = get_logger("app.services.agent_foundation") + +class AgentFoundationFinancialSeedMixin: + def _seed_financial_records(self) -> None: + + if self.db.scalar(select(ExpenseClaim.id).limit(1)) is not None: + + return + + claim_1 = ExpenseClaim( + + claim_no="EXP-202605-001", + + employee_name="张三", + + department_name="财务共享中心", + + project_code="PRJ-EXP-01", + + expense_type="travel", + + reason="华南客户拜访差旅报销", + + location="深圳", + + amount=Decimal("3280.00"), + + currency="CNY", + + invoice_count=3, + + occurred_at=datetime(2026, 5, 6, 9, 0, tzinfo=UTC), + + submitted_at=datetime(2026, 5, 7, 10, 20, tzinfo=UTC), + + status="submitted", + + approval_stage="finance_review", + + risk_flags_json=["amount_over_limit"], + + ) + + claim_1.items = [ + + ExpenseClaimItem( + + item_date=date(2026, 5, 5), + + item_type="hotel", + + item_reason="客户拜访住宿", + + item_location="深圳", + + item_amount=Decimal("1880.00"), + + invoice_id="INV-HOTEL-001", + + ), + + ExpenseClaimItem( + + item_date=date(2026, 5, 6), + + item_type="transport", + + item_reason="往返交通", + + item_location="深圳", + + item_amount=Decimal("1400.00"), + + invoice_id="INV-TRANS-009", + + ), + + ] + + claim_2 = ExpenseClaim( + + claim_no="EXP-202605-002", + + employee_name="李四", + + department_name="华东销售部", + + project_code="PRJ-SALES-02", + + expense_type="meal", + + reason="客户路演餐费", + + location="上海", + + amount=Decimal("860.00"), + + currency="CNY", + + invoice_count=1, + + occurred_at=datetime(2026, 5, 8, 12, 0, tzinfo=UTC), + + submitted_at=datetime(2026, 5, 8, 18, 30, tzinfo=UTC), + + status="approved", + + approval_stage="completed", + + risk_flags_json=[], + + ) + + claim_3 = ExpenseClaim( + + claim_no="EXP-202605-003", + + employee_name="王五", + + department_name="市场品牌部", + + project_code="PRJ-MKT-08", + + expense_type="travel", + + reason="市场活动会务差旅", + + location="北京", + + amount=Decimal("3280.00"), + + currency="CNY", + + invoice_count=2, + + occurred_at=datetime(2026, 5, 6, 11, 30, tzinfo=UTC), + + submitted_at=datetime(2026, 5, 8, 9, 10, tzinfo=UTC), + + status="review", + + approval_stage="risk_check", + + risk_flags_json=["duplicate_expense"], + + ) + + ar_records = [ + + AccountsReceivableRecord( + + receivable_no="AR-202605-001", + + customer_id="CUS-A", + + customer_name="客户A", + + contract_no="CTR-AR-1001", + + invoice_no="INV-AR-9001", + + amount_receivable=Decimal("120000.00"), + + amount_received=Decimal("70000.00"), + + amount_outstanding=Decimal("50000.00"), + + currency="CNY", + + posting_date=date(2026, 4, 1), + + due_date=date(2026, 4, 30), + + aging_days=11, + + status="partial", + + risk_flags_json=[], + + ), + + AccountsReceivableRecord( + + receivable_no="AR-202605-002", + + customer_id="CUS-B", + + customer_name="客户B", + + contract_no="CTR-AR-1002", + + invoice_no="INV-AR-9002", + + amount_receivable=Decimal("88000.00"), + + amount_received=Decimal("10000.00"), + + amount_outstanding=Decimal("78000.00"), + + currency="CNY", + + posting_date=date(2026, 3, 15), + + due_date=date(2026, 4, 15), + + aging_days=26, + + status="overdue", + + risk_flags_json=["ar_overdue"], + + ), + + ] + + ap_records = [ + + AccountsPayableRecord( + + payable_no="AP-202605-001", + + vendor_id="VEN-A", + + vendor_name="供应商A", + + invoice_no="INV-AP-5001", + + amount_payable=Decimal("43000.00"), + + amount_paid=Decimal("10000.00"), + + amount_outstanding=Decimal("33000.00"), + + currency="CNY", + + posting_date=date(2026, 4, 20), + + due_date=date(2026, 5, 12), + + aging_days=0, + + status="scheduled", + + risk_flags_json=[], + + ), + + AccountsPayableRecord( + + payable_no="AP-202605-002", + + vendor_id="VEN-B", + + vendor_name="供应商B", + + invoice_no="INV-AP-5002", + + amount_payable=Decimal("96000.00"), + + amount_paid=Decimal("0.00"), + + amount_outstanding=Decimal("96000.00"), + + currency="CNY", + + posting_date=date(2026, 4, 10), + + due_date=date(2026, 5, 5), + + aging_days=6, + + status="overdue", + + risk_flags_json=["ap_overdue"], + + ), + + ] + + self.db.add_all([claim_1, claim_2, claim_3, *ar_records, *ap_records]) + + def _purge_demo_financial_records(self) -> None: + + demo_claims = list(self.db.scalars(select(ExpenseClaim)).all()) + + for claim in demo_claims: + + signature = ( + + str(claim.claim_no or "").strip(), + + str(claim.employee_name or "").strip(), + + str(claim.reason or "").strip(), + + f"{Decimal(claim.amount or 0):.2f}", + + str(claim.status or "").strip(), + + ) + + if signature in DEMO_EXPENSE_CLAIM_SIGNATURES: + + self.db.delete(claim) + + demo_receivables = list(self.db.scalars(select(AccountsReceivableRecord)).all()) + + for record in demo_receivables: + + signature = ( + + str(record.receivable_no or "").strip(), + + str(record.customer_name or "").strip(), + + f"{Decimal(record.amount_outstanding or 0):.2f}", + + str(record.status or "").strip(), + + ) + + if signature in DEMO_RECEIVABLE_SIGNATURES: + + self.db.delete(record) + + demo_payables = list(self.db.scalars(select(AccountsPayableRecord)).all()) + + for record in demo_payables: + + signature = ( + + str(record.payable_no or "").strip(), + + str(record.vendor_name or "").strip(), + + f"{Decimal(record.amount_outstanding or 0):.2f}", + + str(record.status or "").strip(), + + ) + + if signature in DEMO_PAYABLE_SIGNATURES: + + self.db.delete(record) + + def _seed_runs_and_logs(self) -> None: + + if self.db.scalar(select(AgentRun.id).limit(1)) is not None: + + return + + task_asset = self.db.scalar( + + select(AgentAsset).where(AgentAsset.code == "task.hermes.daily_risk_scan") + + ) + + user_run = AgentRun( + + run_id="run_user_20260511_001", + + agent=AgentName.USER_AGENT.value, + + source=AgentRunSource.USER_MESSAGE.value, + + user_id="emp_001", + + task_id=None, + + ontology_json={"scenario": "expense", "intent": "query"}, + + route_json={"selected_agent": AgentName.USER_AGENT.value, "route_reason": "user query"}, + + permission_level=AgentPermissionLevel.READ.value, + + status=AgentRunStatus.SUCCEEDED.value, + + result_summary="已返回本周报销金额和风险摘要。", + + started_at=datetime(2026, 5, 11, 8, 35, tzinfo=UTC), + + finished_at=datetime(2026, 5, 11, 8, 35, 2, tzinfo=UTC), + + ) + + hermes_run = AgentRun( + + run_id="run_hermes_20260511_001", + + agent=AgentName.HERMES.value, + + source=AgentRunSource.SCHEDULE.value, + + user_id=None, + + task_id=task_asset.id if task_asset else None, + + ontology_json={"scenario": "expense", "intent": "risk_check"}, + + route_json={ + + "selected_agent": AgentName.HERMES.value, + + "route_reason": "scheduled risk scan", + + }, + + permission_level=AgentPermissionLevel.READ.value, + + status=AgentRunStatus.SUCCEEDED.value, + + result_summary="Hermes 已生成今日风险巡检摘要。", + + started_at=datetime(2026, 5, 11, 9, 0, tzinfo=UTC), + + finished_at=datetime(2026, 5, 11, 9, 0, 4, tzinfo=UTC), + + ) + + blocked_run = AgentRun( + + run_id="run_user_20260511_002", + + agent=AgentName.ORCHESTRATOR.value, + + source=AgentRunSource.USER_MESSAGE.value, + + user_id="emp_002", + + task_id=None, + + ontology_json={"scenario": "accounts_payable", "intent": "operate"}, + + route_json={ + + "selected_agent": AgentName.USER_AGENT.value, + + "route_reason": "payment request", + + }, + + permission_level=AgentPermissionLevel.APPROVAL_REQUIRED.value, + + status=AgentRunStatus.BLOCKED.value, + + result_summary="动作需要人工确认。", + + error_message="直接付款属于高风险动作,已阻断自动执行。", + + started_at=datetime(2026, 5, 11, 10, 5, tzinfo=UTC), + + finished_at=datetime(2026, 5, 11, 10, 5, 1, tzinfo=UTC), + + ) + + self.db.add_all([user_run, hermes_run, blocked_run]) + + self.db.flush() + + self.db.add_all( + + [ + + AgentToolCall( + + run_id=user_run.run_id, + + tool_type=AgentToolType.DATABASE.value, + + tool_name="expense_claims.lookup", + + request_json={"time_range": "this_week", "employee": "all"}, + + response_json={"claim_count": 3, "total_amount": "7420.00"}, + + status="succeeded", + + duration_ms=48, + + ), + + AgentToolCall( + + run_id=hermes_run.run_id, + + tool_type=AgentToolType.MCP.value, + + tool_name="invoice.verify_mock", + + request_json={"claim_no": "EXP-202605-003"}, + + response_json={ + + "warning": "external service degraded", + + "fallback": "used mock response", + + }, + + status="failed", + + duration_ms=132, + + error_message="mock upstream timeout", + + ), + + AgentToolCall( + + run_id=blocked_run.run_id, + + tool_type=AgentToolType.RULE_ENGINE.value, + + tool_name="permission.guard", + + request_json={"action": "direct_payment"}, + + response_json={"requires_confirmation": True}, + + status="succeeded", + + duration_ms=5, + + ), + + SemanticParseLog( + + run_id=user_run.run_id, + + user_id="emp_001", + + raw_query="查一下本周报销超标风险", + + scenario="expense", + + intent="risk_check", + + entities_json=[], + + time_range_json={"start_date": "2026-05-11", "end_date": "2026-05-17"}, + + metrics_json=["amount"], + + constraints_json=[], + + risk_flags_json=["amount_over_limit"], + + permission_json={"level": AgentPermissionLevel.READ.value}, + + confidence=0.93, + + ), + + SemanticParseLog( + + run_id=blocked_run.run_id, + + user_id="emp_002", + + raw_query="帮我直接付款给供应商B", + + scenario="accounts_payable", + + intent="operate", + + entities_json=[{"type": "vendor", "value": "供应商B"}], + + time_range_json={}, + + metrics_json=["amount"], + + constraints_json=[], + + risk_flags_json=["ap_overdue"], + + permission_json={"level": AgentPermissionLevel.APPROVAL_REQUIRED.value}, + + confidence=0.96, + + ), + + ] + + ) + + if self.db.scalar(select(AuditLog.id).limit(1)) is None: + + self.db.add_all( + + [ + + AuditLog( + + actor="系统初始化", + + action="save_rule_markdown", + + resource_type="rule", + + resource_id=ATTACHMENT_RULE_ASSET_CODE, + + before_json=None, + + after_json={"version": "v1.0.0"}, + + request_id="seed-audit-001", + + ), + + AuditLog( + + actor="高嘉禾", + + action="review_rule", + + resource_type="rule", + + resource_id=ATTACHMENT_RULE_ASSET_CODE, + + before_json={"review_status": "pending"}, + + after_json={"review_status": "pending"}, + + request_id="seed-audit-002", + + ), + + AuditLog( + + actor="系统初始化", + + action="activate_rule", + + resource_type="rule", + + resource_id="rule.expense.scene_submission_standard", + + before_json={"status": "draft"}, + + after_json={"status": "active"}, + + request_id="seed-audit-003", + + ), + + AuditLog( + + actor="Hermes", + + action="update_task_status", + + resource_type="task", + + resource_id="task.hermes.daily_risk_scan", + + before_json={"status": "idle"}, + + after_json={"status": "succeeded"}, + + request_id="seed-audit-004", + + ), + + ] + + ) diff --git a/server/src/app/services/agent_foundation_markdown.py b/server/src/app/services/agent_foundation_markdown.py new file mode 100644 index 0000000..9efbe68 --- /dev/null +++ b/server/src/app/services/agent_foundation_markdown.py @@ -0,0 +1,202 @@ +from __future__ import annotations + +import hashlib +import json +from datetime import UTC, date, datetime +from decimal import Decimal +from pathlib import Path + +from sqlalchemy import inspect, select, text + +from app.core.agent_enums import ( + AgentAssetContentType, + AgentAssetDomain, + AgentAssetStatus, + AgentAssetType, + AgentName, + AgentPermissionLevel, + AgentReviewStatus, + AgentRunSource, + AgentRunStatus, + AgentToolType, +) +from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion +from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog +from app.models.audit_log import AuditLog +from app.models.financial_record import ( + AccountsPayableRecord, + AccountsReceivableRecord, + ExpenseClaim, + ExpenseClaimItem, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import ( + AgentAssetSpreadsheetManager, + COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, + COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, + COMPANY_TRAVEL_EXPENSE_RULE_CODE, + COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + FINANCE_RULES_LIBRARY, + RISK_RULES_LIBRARY, +) +from app.services.expense_rule_runtime import ( + build_scene_submission_standard_markdown, + build_travel_risk_control_standard_markdown, +) +from app.services.agent_foundation_constants import ( + ATTACHMENT_RULE_ASSET_CODE, + ATTACHMENT_RULE_RUNTIME_CONFIG, + COMPANY_COMMUNICATION_RULE_SCENARIO_JSON, + COMPANY_COMMUNICATION_RULE_VERSION, + COMPANY_TRAVEL_RULE_SCENARIO_JSON, + COMPANY_TRAVEL_RULE_VERSION, + DEMO_EXPENSE_CLAIM_SIGNATURES, + DEMO_PAYABLE_SIGNATURES, + DEMO_RECEIVABLE_SIGNATURES, + LEGACY_RULE_CODES, + PLATFORM_DESTINATION_LOCATION_RULE_FILENAME, +) +from app.core.logging import get_logger + +logger = get_logger("app.services.agent_foundation") + +class AgentFoundationMarkdownMixin: + def _attachment_submission_requirement_markdown( + + self, + + *, + + version_note: str, + + include_review_note: bool, + + ) -> str: + + sections = [ + + "# 报销附件与单据完整性规则", + + "", + + "## 模板信息", + + "", + + "- 模板键:`attachment_requirement_v1`", + + "- 来源文档:报销制度 / 单据与附件要求", + + "- 审核状态:待审核", + + "", + + "## 目标", + + "", + + "统一约束报销提交时的票据、附件与替代凭证要求,避免缺件、错件和无依据流转。", + + "", + + "## 适用范围", + + "", + + "适用于员工报销提交场景,重点覆盖差旅、住宿、交通、餐费、办公和其他费用的附件校验。", + + "", + + "## 输入字段", + + "", + + "- expense_type", + + "- attachments", + + "- invoice_count", + + "- reason", + + "", + + "## 判断规则", + + "", + + "- 报销提交前至少需要 1 份有效附件。", + + "- 金额类报销原则上应提供合法票据;特殊场景无发票时,必须补充收据与情况说明。", + + "- 差旅交通报销需提供行程单或等效凭证;住宿报销需提供酒店票据或等效住宿凭证。", + + "- 缺少必要附件时直接拦截,并提示补件后重新提交。", + + "", + + "## 输出", + + "", + + "- 风险编码:`invoice_anomaly`", + + "- 默认动作:`block`", + + "- 处理说明:附件或单据不完整时退回补充。", + + "", + + "## 来源依据", + + "", + + "- 报销制度对票据、附件、替代凭证和补件要求的统一约束。", + + "", + + "## 审核约束", + + "", + + "- 当前规则属于真实业务规则,但仍处于待审核状态。", + + "- 上线前需由制度管理员确认收据替代、补件时限和特殊场景豁免口径。", + + f"- 当前版本说明:{version_note}", + + "", + + "## 管理员备注", + + "", + + "需要结合公司正式报销制度,补充各场景附件替代口径与例外审批要求。", + + ] + + if include_review_note: + + sections.extend(["", "```expense-rule", json.dumps(ATTACHMENT_RULE_RUNTIME_CONFIG, ensure_ascii=False, indent=2), "```"]) + + return "\n".join(sections) + + def _scene_submission_standard_markdown(self) -> str: + + return self._markdown_content(build_scene_submission_standard_markdown()) + + def _travel_risk_control_standard_markdown(self, *, version: str = "v1.1.0") -> str: + + return self._markdown_content(build_travel_risk_control_standard_markdown()) + + @staticmethod + + def _markdown_content(content: str) -> str: + + return content + + @staticmethod + + def _json_content(content: dict[str, object]) -> str: + + return json.dumps(content, ensure_ascii=False, sort_keys=True, indent=2) diff --git a/server/src/app/services/agent_foundation_risk_rules.py b/server/src/app/services/agent_foundation_risk_rules.py new file mode 100644 index 0000000..208dc1e --- /dev/null +++ b/server/src/app/services/agent_foundation_risk_rules.py @@ -0,0 +1,474 @@ +from __future__ import annotations + +import hashlib +import json +from datetime import UTC, date, datetime +from decimal import Decimal +from pathlib import Path + +from sqlalchemy import inspect, select, text + +from app.core.agent_enums import ( + AgentAssetContentType, + AgentAssetDomain, + AgentAssetStatus, + AgentAssetType, + AgentName, + AgentPermissionLevel, + AgentReviewStatus, + AgentRunSource, + AgentRunStatus, + AgentToolType, +) +from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion +from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog +from app.models.audit_log import AuditLog +from app.models.financial_record import ( + AccountsPayableRecord, + AccountsReceivableRecord, + ExpenseClaim, + ExpenseClaimItem, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import ( + AgentAssetSpreadsheetManager, + COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, + COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, + COMPANY_TRAVEL_EXPENSE_RULE_CODE, + COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + FINANCE_RULES_LIBRARY, + RISK_RULES_LIBRARY, +) +from app.services.expense_rule_runtime import ( + build_scene_submission_standard_markdown, + build_travel_risk_control_standard_markdown, +) +from app.services.agent_foundation_constants import ( + ATTACHMENT_RULE_ASSET_CODE, + ATTACHMENT_RULE_RUNTIME_CONFIG, + COMPANY_COMMUNICATION_RULE_SCENARIO_JSON, + COMPANY_COMMUNICATION_RULE_VERSION, + COMPANY_TRAVEL_RULE_SCENARIO_JSON, + COMPANY_TRAVEL_RULE_VERSION, + DEMO_EXPENSE_CLAIM_SIGNATURES, + DEMO_PAYABLE_SIGNATURES, + DEMO_RECEIVABLE_SIGNATURES, + LEGACY_RULE_CODES, + PLATFORM_DESTINATION_LOCATION_RULE_FILENAME, +) +from app.core.logging import get_logger + +logger = get_logger("app.services.agent_foundation") + +class AgentFoundationRiskRuleMixin: + def _iter_platform_risk_manifests(self) -> list[tuple[str, dict[str, object]]]: + + manager = AgentAssetRuleLibraryManager() + + manifests: list[tuple[str, dict[str, object]]] = [] + + for file_name in sorted(manager.list_rule_library_json_files(library=RISK_RULES_LIBRARY)): + + payload = manager.read_rule_library_json(library=RISK_RULES_LIBRARY, file_name=file_name) + + if payload.get("enabled") is False: + + continue + + manifests.append((file_name, payload)) + + return manifests + + @staticmethod + + def _resolve_platform_risk_category(manifest: dict[str, object]) -> str: + + explicit = str(manifest.get("risk_category") or "").strip() + + if explicit: + + return explicit + + rule_code = str(manifest.get("rule_code") or "").strip().lower() + + applies_to = manifest.get("applies_to") if isinstance(manifest.get("applies_to"), dict) else {} + + domains = {str(item or "").strip().lower() for item in applies_to.get("domains") or []} + + expense_types = { + + str(item or "").strip().lower() for item in applies_to.get("expense_types") or [] + + } + + if rule_code.startswith("risk.invoice."): + + return "发票" + + if "meal" in domains or "entertainment" in expense_types: + + return "餐饮招待" + + if "transport" in expense_types or "consecutive_transport" in rule_code: + + return "交通出行" + + if "office" in expense_types: + + return "办公物料" + + if "travel" in domains or rule_code.startswith("risk.travel."): + + return "差旅" + + if rule_code.startswith("risk.expense."): + + return "费用科目" + + return "通用" + + def _platform_risk_scenario_json(self, manifest: dict[str, object]) -> list[str]: + + category = self._resolve_platform_risk_category(manifest) + + return [category] if category else ["通用"] + + def _platform_risk_config_json(self, file_name: str, manifest: dict[str, object]) -> dict[str, object]: + + outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {} + + fail_outcome = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {} + + risk_category = self._resolve_platform_risk_category(manifest) + + return { + + "severity": str(fail_outcome.get("severity") or "medium"), + + "enabled": True, + + "tag": "风险规则", + + "detail_mode": "json_risk", + + "risk_category": risk_category, + + "rule_library": RISK_RULES_LIBRARY, + + "rule_document": { + + "file_name": file_name, + + "storage_key": f"rules/{RISK_RULES_LIBRARY}/{file_name}", + + }, + + "ontology_signal": str(manifest.get("ontology_signal") or "").strip(), + + "evaluator": str(manifest.get("evaluator") or "").strip(), + + "source_ref": ( + + (manifest.get("metadata") or {}).get("source_ref") + + if isinstance(manifest.get("metadata"), dict) + + else "" + + ), + + } + + def _build_platform_risk_seed_assets(self) -> list[AgentAsset]: + + assets: list[AgentAsset] = [] + + for file_name, manifest in self._iter_platform_risk_manifests(): + + rule_code = str(manifest.get("rule_code") or "").strip() + + if not rule_code: + + continue + + metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {} + + source_ref = str(metadata.get("source_ref") or "").strip() + + rule_description = str(manifest.get("description") or "").strip() + + assets.append( + + AgentAsset( + + asset_type=AgentAssetType.RULE.value, + + code=rule_code, + + name=str(manifest.get("name") or rule_code), + + description=rule_description + + or f"平台通用风险规则:{source_ref or manifest.get('name') or rule_code}", + + domain=AgentAssetDomain.EXPENSE.value, + + scenario_json=self._platform_risk_scenario_json(manifest), + + owner=str(metadata.get("owner") or "风控与审计部"), + + reviewer="顾承宇", + + status=AgentAssetStatus.ACTIVE.value, + + current_version="v1.0.0", + + published_version="v1.0.0", + + working_version="v1.0.0", + + config_json=self._platform_risk_config_json(file_name, manifest), + + ) + + ) + + return assets + + def sync_platform_risk_rules_from_library(self) -> int: + + existing_codes = set(self.db.scalars(select(AgentAsset.code)).all()) + + before_count = len(existing_codes) + + self._ensure_platform_risk_rules_from_library(existing_codes) + + self.db.flush() + + after_codes = set(self.db.scalars(select(AgentAsset.code)).all()) + + synced = max(len(after_codes) - before_count, 0) + + manifest_count = len(self._iter_platform_risk_manifests()) + + logger.info( + + "Platform risk rules synced from library", + + extra={"manifest_count": manifest_count, "created_count": synced, "total": len(after_codes)}, + + ) + + return manifest_count + + def _ensure_platform_risk_rules_from_library(self, existing_codes: set[str]) -> None: + + for file_name, manifest in self._iter_platform_risk_manifests(): + + rule_code = str(manifest.get("rule_code") or "").strip() + + if not rule_code: + + continue + + metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {} + + source_ref = str(metadata.get("source_ref") or "").strip() + + rule_description = str(manifest.get("description") or "").strip() + + config_json = self._platform_risk_config_json(file_name, manifest) + + scenario_json = self._platform_risk_scenario_json(manifest) + + asset = self.db.scalar(select(AgentAsset).where(AgentAsset.code == rule_code)) + + if asset is None and rule_code not in existing_codes: + + asset = self._create_seed_asset( + + asset_type=AgentAssetType.RULE.value, + + code=rule_code, + + name=str(manifest.get("name") or rule_code), + + description=rule_description + + or f"平台通用风险规则:{source_ref or manifest.get('name') or rule_code}", + + domain=AgentAssetDomain.EXPENSE.value, + + scenario_json=scenario_json, + + owner=str(metadata.get("owner") or "风控与审计部"), + + reviewer="顾承宇", + + status=AgentAssetStatus.ACTIVE.value, + + current_version="v1.0.0", + + config_json=config_json, + + ) + + if asset is None: + + continue + + if not str(asset.current_version or "").strip(): + + asset.current_version = "v1.0.0" + + if not str(asset.working_version or "").strip(): + + asset.working_version = asset.current_version + + if not str(asset.published_version or "").strip(): + + asset.published_version = asset.current_version + + asset.status = asset.status or AgentAssetStatus.ACTIVE.value + + asset.name = str(manifest.get("name") or asset.name or rule_code) + + if rule_description: + + asset.description = rule_description + + asset.config_json = config_json + + asset.scenario_json = scenario_json + + self._ensure_asset_version( + + asset, + + version="v1.0.0", + + content=self._platform_risk_rule_markdown(asset, manifest=manifest, file_name=file_name), + + content_type=AgentAssetContentType.MARKDOWN.value, + + change_note=f"平台通用风险规则:{asset.name}", + + created_by="系统初始化", + + ) + + self._ensure_asset_review( + + asset, + + version="v1.0.0", + + reviewer="顾承宇", + + review_status=AgentReviewStatus.APPROVED.value, + + review_note="平台内置风险规则,供提交验审与风险问答共用。", + + reviewed_at=datetime.now(UTC), + + ) + + @staticmethod + + def _platform_risk_rule_markdown( + + asset: AgentAsset, + + *, + + manifest: dict[str, object] | None = None, + + file_name: str = "", + + ) -> str: + + config = asset.config_json if isinstance(asset.config_json, dict) else {} + + rule_document = config.get("rule_document") if isinstance(config.get("rule_document"), dict) else {} + + resolved_file_name = file_name or str(rule_document.get("file_name") or "").strip() + + evaluator = str(config.get("evaluator") or (manifest or {}).get("evaluator") or "").strip() + + ontology_signal = str(config.get("ontology_signal") or (manifest or {}).get("ontology_signal") or "").strip() + + source_ref = str(config.get("source_ref") or "").strip() + + if not source_ref and isinstance(manifest, dict): + + metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {} + + source_ref = str(metadata.get("source_ref") or "").strip() + + lines = [ + + f"# {asset.name}", + + "", + + "## 规则类型", + + "", + + "- 平台内置通用风险规则(`json_risk`)", + + ] + + if evaluator: + + lines.append(f"- 检查器:`{evaluator}`") + + if ontology_signal: + + lines.append(f"- 本体信号:`{ontology_signal}`") + + if source_ref: + + lines.extend(["", "## 来源", "", f"- {source_ref}"]) + + if resolved_file_name: + + lines.extend( + + [ + + "", + + "## 配置文件", + + "", + + f"- `rules/{RISK_RULES_LIBRARY}/{resolved_file_name}`", + + ] + + ) + + return "\n".join(lines) + + @staticmethod + + def _platform_destination_location_risk_markdown() -> str: + + return AgentFoundationRiskRuleMixin._platform_risk_rule_markdown( + + AgentAsset(name="申报地点与票据地点一致", config_json={"evaluator": "location_consistency"}), + + manifest={ + + "evaluator": "location_consistency", + + "ontology_signal": "location_mismatch", + + "metadata": {"source_ref": "常用risk.txt / 一、出差类 / 行程不符"}, + + }, + + file_name=PLATFORM_DESTINATION_LOCATION_RULE_FILENAME, + + ) diff --git a/server/src/app/services/agent_foundation_spreadsheets.py b/server/src/app/services/agent_foundation_spreadsheets.py new file mode 100644 index 0000000..8701695 --- /dev/null +++ b/server/src/app/services/agent_foundation_spreadsheets.py @@ -0,0 +1,400 @@ +from __future__ import annotations + +import hashlib +import json +from datetime import UTC, date, datetime +from decimal import Decimal +from pathlib import Path + +from sqlalchemy import inspect, select, text + +from app.core.agent_enums import ( + AgentAssetContentType, + AgentAssetDomain, + AgentAssetStatus, + AgentAssetType, + AgentName, + AgentPermissionLevel, + AgentReviewStatus, + AgentRunSource, + AgentRunStatus, + AgentToolType, +) +from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion +from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog +from app.models.audit_log import AuditLog +from app.models.financial_record import ( + AccountsPayableRecord, + AccountsReceivableRecord, + ExpenseClaim, + ExpenseClaimItem, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import ( + AgentAssetSpreadsheetManager, + COMPANY_COMMUNICATION_EXPENSE_RULE_CODE, + COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, + COMPANY_TRAVEL_EXPENSE_RULE_CODE, + COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + FINANCE_RULES_LIBRARY, + RISK_RULES_LIBRARY, +) +from app.services.expense_rule_runtime import ( + build_scene_submission_standard_markdown, + build_travel_risk_control_standard_markdown, +) +from app.services.agent_foundation_constants import ( + ATTACHMENT_RULE_ASSET_CODE, + ATTACHMENT_RULE_RUNTIME_CONFIG, + COMPANY_COMMUNICATION_RULE_SCENARIO_JSON, + COMPANY_COMMUNICATION_RULE_VERSION, + COMPANY_TRAVEL_RULE_SCENARIO_JSON, + COMPANY_TRAVEL_RULE_VERSION, + DEMO_EXPENSE_CLAIM_SIGNATURES, + DEMO_PAYABLE_SIGNATURES, + DEMO_RECEIVABLE_SIGNATURES, + LEGACY_RULE_CODES, + PLATFORM_DESTINATION_LOCATION_RULE_FILENAME, +) +from app.core.logging import get_logger + +logger = get_logger("app.services.agent_foundation") + +class AgentFoundationSpreadsheetMixin: + def _ensure_company_travel_rule_spreadsheet_seed( + + self, + + asset: AgentAsset, + + *, + + version: str, + + actor_name: str, + + ): + + manager = AgentAssetSpreadsheetManager() + + manager.ensure_rule_library_dirs() + + live_document = manager.store_rule_library_spreadsheet( + + library=FINANCE_RULES_LIBRARY, + + file_name=COMPANY_TRAVEL_EXPENSE_RULE_FILENAME, + + content=self._read_or_build_company_travel_rule_file(manager), + + actor_name=actor_name, + + source="rule-library", + + ) + + existing_document = ( + + asset.config_json.get("rule_document") + + if isinstance(asset.config_json, dict) + + else None + + ) + + storage_key = ( + + str(existing_document.get("storage_key") or "").strip() + + if isinstance(existing_document, dict) + + else "" + + ) + + if storage_key: + + try: + + existing_path = manager.resolve_storage_path(storage_key) + + except FileNotFoundError: + + existing_path = None + + if existing_path is not None and existing_path.exists(): + + asset.config_json = { + + **(asset.config_json or {}), + + "detail_mode": "spreadsheet", + + "tag": "财务规则", + + "rule_library": FINANCE_RULES_LIBRARY, + + "rule_document": { + + **AgentAssetSpreadsheetManager.build_rule_document_config( + + live_document, + + asset_version=version, + + ), + + "storage_key": live_document.storage_key, + + }, + + } + + return live_document + + asset.config_json = { + + **(asset.config_json or {}), + + "detail_mode": "spreadsheet", + + "tag": "财务规则", + + "rule_library": FINANCE_RULES_LIBRARY, + + "rule_document": { + + **AgentAssetSpreadsheetManager.build_rule_document_config( + + live_document, + + asset_version=version, + + ), + + "storage_key": live_document.storage_key, + + }, + + } + + return live_document + + def _ensure_company_communication_rule_spreadsheet_seed( + + self, + + asset: AgentAsset, + + *, + + version: str, + + actor_name: str, + + ): + + return self._ensure_finance_rule_spreadsheet_seed( + + asset, + + version=version, + + actor_name=actor_name, + + file_name=COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME, + + fallback_sheet_name="通信费报销规则", + + ) + + @staticmethod + + def _read_or_build_company_travel_rule_file( + + manager: AgentAssetSpreadsheetManager, + + ) -> bytes: + + live_key = ( + + Path("rules") + + / FINANCE_RULES_LIBRARY + + / COMPANY_TRAVEL_EXPENSE_RULE_FILENAME + + ).as_posix() + + live_path = manager.resolve_storage_path(live_key) + + if live_path.exists(): + + return live_path.read_bytes() + + return AgentAssetSpreadsheetManager.build_blank_rule_workbook("差旅费报销规则") + + def _ensure_finance_rule_spreadsheet_seed( + + self, + + asset: AgentAsset, + + *, + + version: str, + + actor_name: str, + + file_name: str, + + fallback_sheet_name: str, + + ): + + manager = AgentAssetSpreadsheetManager() + + manager.ensure_rule_library_dirs() + + live_document = manager.store_rule_library_spreadsheet( + + library=FINANCE_RULES_LIBRARY, + + file_name=file_name, + + content=self._read_or_build_finance_rule_file( + + manager, + + file_name=file_name, + + fallback_sheet_name=fallback_sheet_name, + + ), + + actor_name=actor_name, + + source="rule-library", + + ) + + existing_document = ( + + asset.config_json.get("rule_document") + + if isinstance(asset.config_json, dict) + + else None + + ) + + storage_key = ( + + str(existing_document.get("storage_key") or "").strip() + + if isinstance(existing_document, dict) + + else "" + + ) + + if storage_key: + + try: + + existing_path = manager.resolve_storage_path(storage_key) + + except FileNotFoundError: + + existing_path = None + + if existing_path is not None and existing_path.exists(): + + asset.config_json = { + + **(asset.config_json or {}), + + "detail_mode": "spreadsheet", + + "tag": "财务规则", + + "rule_library": FINANCE_RULES_LIBRARY, + + "rule_document": { + + **AgentAssetSpreadsheetManager.build_rule_document_config( + + live_document, + + asset_version=version, + + ), + + "storage_key": live_document.storage_key, + + }, + + } + + return live_document + + asset.config_json = { + + **(asset.config_json or {}), + + "detail_mode": "spreadsheet", + + "tag": "财务规则", + + "rule_library": FINANCE_RULES_LIBRARY, + + "rule_document": { + + **AgentAssetSpreadsheetManager.build_rule_document_config( + + live_document, + + asset_version=version, + + ), + + "storage_key": live_document.storage_key, + + }, + + } + + return live_document + + @staticmethod + + def _read_or_build_finance_rule_file( + + manager: AgentAssetSpreadsheetManager, + + *, + + file_name: str, + + fallback_sheet_name: str, + + ) -> bytes: + + live_key = ( + + Path("rules") + + / FINANCE_RULES_LIBRARY + + / file_name + + ).as_posix() + + live_path = manager.resolve_storage_path(live_key) + + if live_path.exists(): + + return live_path.read_bytes() + + return AgentAssetSpreadsheetManager.build_blank_rule_workbook(fallback_sheet_name) diff --git a/server/src/app/services/document_intelligence.py b/server/src/app/services/document_intelligence.py index 4c562fc..440817d 100644 --- a/server/src/app/services/document_intelligence.py +++ b/server/src/app/services/document_intelligence.py @@ -2,178 +2,20 @@ from __future__ import annotations import json import re -from dataclasses import dataclass from decimal import Decimal, InvalidOperation from typing import Any -from pydantic import BaseModel, Field, ValidationError +from pydantic import ValidationError from sqlalchemy.orm import Session - -@dataclass(frozen=True, slots=True) -class DocumentField: - key: str - label: str - value: str - - -@dataclass(frozen=True, slots=True) -class DocumentInsight: - document_type: str - document_type_label: str - scene_code: str - scene_label: str - expense_type: str - fields: tuple[DocumentField, ...] = () - classification_source: str = "rule" - classification_confidence: float = 0.0 - evidence: tuple[str, ...] = () - warnings: tuple[str, ...] = () - - -@dataclass(frozen=True, slots=True) -class DocumentRule: - document_type: str - document_type_label: str - scene_code: str - scene_label: str - expense_type: str - keywords: tuple[str, ...] - score_bias: float = 0.0 - - -@dataclass(frozen=True, slots=True) -class RuleMatch: - rule: DocumentRule | None - confidence: float - evidence: tuple[str, ...] - score: float - - -class LlmDocumentClassification(BaseModel): - document_type: str = Field(default="other") - scene_code: str = Field(default="other") - scene_label: str = Field(default="其他票据") - expense_type: str = Field(default="other") - confidence: float = Field(default=0.0, ge=0.0, le=1.0) - evidence: list[str] = Field(default_factory=list) - fields: list[DocumentField] = Field(default_factory=list) - - -DEFAULT_RULE = DocumentRule( - document_type="other", - document_type_label="其他单据", - scene_code="other", - scene_label="其他票据", - expense_type="other", - keywords=(), - score_bias=0.0, +from app.services.document_intelligence_rules import DEFAULT_RULE, DOCUMENT_RULES, DOCUMENT_TYPE_RULE_MAP, SUPPORTED_DOCUMENT_TYPES +from app.services.document_intelligence_types import ( + DocumentField, + DocumentInsight, + LlmDocumentClassification, + RuleMatch, ) -DOCUMENT_RULES: tuple[DocumentRule, ...] = ( - DocumentRule( - document_type="flight_itinerary", - document_type_label="机票/航班行程单", - scene_code="travel", - scene_label="差旅票据", - expense_type="travel", - keywords=("电子行程单", "航班号", "航班", "机票", "登机", "航空", "客票"), - score_bias=0.34, - ), - DocumentRule( - document_type="train_ticket", - document_type_label="火车/高铁票", - scene_code="travel", - scene_label="差旅票据", - expense_type="travel", - keywords=("铁路电子客票", "电子客票", "高铁", "火车", "动车", "铁路", "车次", "检票", "二等座", "一等座", "票价"), - score_bias=0.32, - ), - DocumentRule( - document_type="hotel_invoice", - document_type_label="酒店住宿票据", - scene_code="hotel", - scene_label="住宿票据", - expense_type="hotel", - keywords=("住宿", "房费", "客房", "入住", "离店", "酒店", "宾馆", "间夜"), - score_bias=0.16, - ), - DocumentRule( - document_type="taxi_receipt", - document_type_label="出租车/网约车票据", - scene_code="transport", - scene_label="交通票据", - expense_type="transport", - keywords=("滴滴出行", "滴滴", "网约车", "出租车", "打车", "乘车", "用车", "叫车", "车费", "车资", "的士", "快车", "专车", "订单号", "上车", "下车", "起点", "终点", "里程", "司机"), - score_bias=0.38, - ), - DocumentRule( - document_type="parking_toll_receipt", - document_type_label="停车/通行费票据", - scene_code="transport", - scene_label="交通票据", - expense_type="transport", - keywords=("停车费", "通行费", "过路费", "收费站", "停车场", "停车"), - score_bias=0.28, - ), - DocumentRule( - document_type="meal_receipt", - document_type_label="餐饮票据", - scene_code="meal", - scene_label="餐饮票据", - expense_type="meal", - keywords=("餐饮", "餐费", "用餐", "饭店", "酒楼", "餐厅", "食品", "外卖", "咖啡"), - score_bias=0.14, - ), - DocumentRule( - document_type="office_invoice", - document_type_label="办公用品票据", - scene_code="office", - scene_label="办公用品票据", - expense_type="office", - keywords=("办公用品", "文具", "耗材", "打印纸", "墨盒", "硒鼓", "键盘", "鼠标"), - score_bias=0.14, - ), - DocumentRule( - document_type="meeting_invoice", - document_type_label="会议/会务票据", - scene_code="meeting", - scene_label="会务票据", - expense_type="meeting", - keywords=("会议", "会务", "会展", "论坛", "会议室", "会场"), - score_bias=0.12, - ), - DocumentRule( - document_type="training_invoice", - document_type_label="培训票据", - scene_code="training", - scene_label="培训票据", - expense_type="training", - keywords=("培训", "课程", "讲师", "教材", "学费", "认证"), - score_bias=0.12, - ), - DocumentRule( - document_type="vat_invoice", - document_type_label="增值税发票", - scene_code="other", - scene_label="通用发票", - expense_type="other", - keywords=("发票代码", "发票号码", "价税合计", "增值税", "电子发票"), - score_bias=-0.08, - ), - DocumentRule( - document_type="receipt", - document_type_label="一般收据/凭证", - scene_code="other", - scene_label="其他票据", - expense_type="other", - keywords=("收据", "凭证", "票据"), - score_bias=-0.18, - ), -) - -DOCUMENT_TYPE_RULE_MAP = {rule.document_type: rule for rule in DOCUMENT_RULES} -SUPPORTED_DOCUMENT_TYPES = tuple(DOCUMENT_TYPE_RULE_MAP.keys()) + ("other",) AMOUNT_PATTERNS = ( re.compile( diff --git a/server/src/app/services/document_intelligence_rules.py b/server/src/app/services/document_intelligence_rules.py new file mode 100644 index 0000000..82b3e1f --- /dev/null +++ b/server/src/app/services/document_intelligence_rules.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +from app.services.document_intelligence_types import DocumentRule + + +DEFAULT_RULE = DocumentRule( + document_type="other", + document_type_label="其他单据", + scene_code="other", + scene_label="其他票据", + expense_type="other", + keywords=(), + score_bias=0.0, +) + +DOCUMENT_RULES: tuple[DocumentRule, ...] = ( + DocumentRule( + document_type="flight_itinerary", + document_type_label="机票/航班行程单", + scene_code="travel", + scene_label="差旅票据", + expense_type="travel", + keywords=("电子行程单", "航班号", "航班", "机票", "登机", "航空", "客票"), + score_bias=0.34, + ), + DocumentRule( + document_type="train_ticket", + document_type_label="火车/高铁票", + scene_code="travel", + scene_label="差旅票据", + expense_type="travel", + keywords=("铁路电子客票", "电子客票", "高铁", "火车", "动车", "铁路", "车次", "检票", "二等座", "一等座", "票价"), + score_bias=0.32, + ), + DocumentRule( + document_type="hotel_invoice", + document_type_label="酒店住宿票据", + scene_code="hotel", + scene_label="住宿票据", + expense_type="hotel", + keywords=("住宿", "房费", "客房", "入住", "离店", "酒店", "宾馆", "间夜"), + score_bias=0.16, + ), + DocumentRule( + document_type="taxi_receipt", + document_type_label="出租车/网约车票据", + scene_code="transport", + scene_label="交通票据", + expense_type="transport", + keywords=("滴滴出行", "滴滴", "网约车", "出租车", "打车", "乘车", "用车", "叫车", "车费", "车资", "的士", "快车", "专车", "订单号", "上车", "下车", "起点", "终点", "里程", "司机"), + score_bias=0.38, + ), + DocumentRule( + document_type="parking_toll_receipt", + document_type_label="停车/通行费票据", + scene_code="transport", + scene_label="交通票据", + expense_type="transport", + keywords=("停车费", "通行费", "过路费", "收费站", "停车场", "停车"), + score_bias=0.28, + ), + DocumentRule( + document_type="meal_receipt", + document_type_label="餐饮票据", + scene_code="meal", + scene_label="餐饮票据", + expense_type="meal", + keywords=("餐饮", "餐费", "用餐", "饭店", "酒楼", "餐厅", "食品", "外卖", "咖啡"), + score_bias=0.14, + ), + DocumentRule( + document_type="office_invoice", + document_type_label="办公用品票据", + scene_code="office", + scene_label="办公用品票据", + expense_type="office", + keywords=("办公用品", "文具", "耗材", "打印纸", "墨盒", "硒鼓", "键盘", "鼠标"), + score_bias=0.14, + ), + DocumentRule( + document_type="meeting_invoice", + document_type_label="会议/会务票据", + scene_code="meeting", + scene_label="会务票据", + expense_type="meeting", + keywords=("会议", "会务", "会展", "论坛", "会议室", "会场"), + score_bias=0.12, + ), + DocumentRule( + document_type="training_invoice", + document_type_label="培训票据", + scene_code="training", + scene_label="培训票据", + expense_type="training", + keywords=("培训", "课程", "讲师", "教材", "学费", "认证"), + score_bias=0.12, + ), + DocumentRule( + document_type="vat_invoice", + document_type_label="增值税发票", + scene_code="other", + scene_label="通用发票", + expense_type="other", + keywords=("发票代码", "发票号码", "价税合计", "增值税", "电子发票"), + score_bias=-0.08, + ), + DocumentRule( + document_type="receipt", + document_type_label="一般收据/凭证", + scene_code="other", + scene_label="其他票据", + expense_type="other", + keywords=("收据", "凭证", "票据"), + score_bias=-0.18, + ), +) + +DOCUMENT_TYPE_RULE_MAP = {rule.document_type: rule for rule in DOCUMENT_RULES} + +SUPPORTED_DOCUMENT_TYPES = tuple(DOCUMENT_TYPE_RULE_MAP.keys()) + ("other",) diff --git a/server/src/app/services/document_intelligence_types.py b/server/src/app/services/document_intelligence_types.py new file mode 100644 index 0000000..2a0e89f --- /dev/null +++ b/server/src/app/services/document_intelligence_types.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from pydantic import BaseModel, ConfigDict, Field + + +@dataclass(frozen=True, slots=True) +class DocumentField: + key: str + label: str + value: str + +@dataclass(frozen=True, slots=True) +class DocumentInsight: + document_type: str + document_type_label: str + scene_code: str + scene_label: str + expense_type: str + fields: tuple[DocumentField, ...] = () + classification_source: str = "rule" + classification_confidence: float = 0.0 + evidence: tuple[str, ...] = () + warnings: tuple[str, ...] = () + +@dataclass(frozen=True, slots=True) +class DocumentRule: + document_type: str + document_type_label: str + scene_code: str + scene_label: str + expense_type: str + keywords: tuple[str, ...] + score_bias: float = 0.0 + +@dataclass(frozen=True, slots=True) +class RuleMatch: + rule: DocumentRule | None + confidence: float + evidence: tuple[str, ...] + score: float + +class LlmDocumentClassification(BaseModel): + model_config = ConfigDict(arbitrary_types_allowed=True) + + document_type: str = Field(default="other") + scene_code: str = Field(default="other") + scene_label: str = Field(default="其他票据") + expense_type: str = Field(default="other") + confidence: float = Field(default=0.0, ge=0.0, le=1.0) + evidence: list[str] = Field(default_factory=list) + fields: list[DocumentField] = Field(default_factory=list) diff --git a/server/src/app/services/employee.py b/server/src/app/services/employee.py index 7c356f8..c313416 100644 --- a/server/src/app/services/employee.py +++ b/server/src/app/services/employee.py @@ -20,10 +20,7 @@ from app.models.role import Role from app.repositories.employee import EmployeeRepository from app.schemas.employee import ( EmployeeCreate, - EmployeeHistoryRead, - EmployeeImportErrorRead, EmployeeImportResultRead, - EmployeeImportSummaryRead, EmployeeMetaRead, EmployeeOrganizationRead, EmployeeRead, @@ -31,13 +28,12 @@ from app.schemas.employee import ( EmployeeStatusSummaryRead, EmployeeUpdate, ) -from app.services.employee_spreadsheet import ( - EmployeeImportRow, - EmployeeSpreadsheetError, - build_export_workbook_bytes, - build_import_template_bytes, - parse_employee_workbook, +from app.services.employee_import import EmployeeImportCoordinator +from app.services.employee_serialization import ( + format_history_datetime as serialize_history_datetime, + serialize_employee, ) +from app.services.employee_spreadsheet import build_import_template_bytes from app.services.employee_seed import ( EMPLOYEE_DEFINITIONS, EMPLOYEE_PROFILE_REPAIRS, @@ -440,288 +436,21 @@ class EmployeeService: def export_employees(self, status: str | None = None, keyword: str | None = None) -> bytes: self.ensure_directory_ready() - employees = self.repository.list(status=status, keyword=keyword) - rows: list[list[str]] = [] - - for employee in employees: - organization = employee.organization_unit - role_codes = ",".join(role.role_code for role in self._sorted_roles(list(employee.roles))) - rows.append( - [ - employee.employee_no, - employee.name, - employee.email, - employee.gender or "", - self._format_date(employee.birth_date) or "", - employee.phone or "", - self._format_date(employee.join_date) or "", - employee.location or "", - employee.position, - employee.grade, - organization.unit_code if organization else "", - employee.manager.employee_no if employee.manager else "", - employee.finance_owner_name or "", - employee.cost_center or "", - employee.employment_status, - role_codes, - ] - ) - - return build_export_workbook_bytes(rows) + return self._import_coordinator().export_employees(status=status, keyword=keyword) def import_employees(self, content: bytes, actor: str = "系统管理员") -> EmployeeImportResultRead: self.ensure_directory_ready() - parsed_rows, parse_errors = parse_employee_workbook(content) - if parse_errors: - return self._build_import_failure(parse_errors, total_rows=len(parsed_rows)) + return self._import_coordinator().import_employees(content, actor=actor) - validation_errors = self._validate_import_rows(parsed_rows) - if validation_errors: - return self._build_import_failure(validation_errors, total_rows=len(parsed_rows)) - - try: - summary = self._apply_import_rows(parsed_rows, actor=actor) - except Exception: - self.db.rollback() - logger.exception("Employee import failed during database write") - raise - - imported_at = self._format_datetime(datetime.now(UTC)) or "" - message = f"导入成功:新增 {summary['created']} 人,更新 {summary['updated']} 人。" - logger.info( - "Imported employees created=%d updated=%d total=%d", - summary["created"], - summary["updated"], - len(parsed_rows), - ) - return EmployeeImportResultRead( - success=True, - message=message, - summary=EmployeeImportSummaryRead( - totalRows=len(parsed_rows), - created=summary["created"], - updated=summary["updated"], - errorCount=0, - ), - errors=[], - importedAt=imported_at, - ) - - def _validate_import_rows( - self, rows: list[EmployeeImportRow] - ) -> list[EmployeeSpreadsheetError]: - errors: list[EmployeeSpreadsheetError] = [] - employee_nos_in_file: dict[str, int] = {} - emails_in_file: dict[str, int] = {} - - roles_by_code = {role.role_code: role for role in self.repository.list_roles()} - organizations_by_code = { - unit.unit_code: unit for unit in self.repository.list_organization_units() - } - employees_by_no = { - employee.employee_no: employee for employee in self.repository.list() - } - import_employee_nos = {row.employee_no for row in rows} - - for row in rows: - if row.employee_no in employee_nos_in_file: - errors.append( - EmployeeSpreadsheetError( - row=row.row_number, - column="员工编号*", - employee_no=row.employee_no, - message=f"员工编号 {row.employee_no} 在文件中重复。", - ) - ) - else: - employee_nos_in_file[row.employee_no] = row.row_number - - if row.email in emails_in_file: - errors.append( - EmployeeSpreadsheetError( - row=row.row_number, - column="邮箱*", - employee_no=row.employee_no, - message=f"邮箱 {row.email} 在文件中重复。", - ) - ) - else: - emails_in_file[row.email] = row.row_number - - existing_by_email = self.repository.get_by_email(row.email) - if existing_by_email is not None and existing_by_email.employee_no != row.employee_no: - errors.append( - EmployeeSpreadsheetError( - row=row.row_number, - column="邮箱*", - employee_no=row.employee_no, - message=( - f"邮箱 {row.email} 已被员工 " - f"{existing_by_email.employee_no} 使用。" - ), - ) - ) - - if row.organization_unit_code and row.organization_unit_code not in organizations_by_code: - errors.append( - EmployeeSpreadsheetError( - row=row.row_number, - column="部门编码", - employee_no=row.employee_no, - message=f"部门编码 {row.organization_unit_code} 不存在。", - ) - ) - - if row.manager_employee_no: - manager_exists = ( - row.manager_employee_no in employees_by_no - or row.manager_employee_no in import_employee_nos - ) - if not manager_exists: - errors.append( - EmployeeSpreadsheetError( - row=row.row_number, - column="直属上级工号", - employee_no=row.employee_no, - message=f"直属上级工号 {row.manager_employee_no} 不存在。", - ) - ) - if row.manager_employee_no == row.employee_no: - errors.append( - EmployeeSpreadsheetError( - row=row.row_number, - column="直属上级工号", - employee_no=row.employee_no, - message="直属上级不能是员工本人。", - ) - ) - - invalid_role_codes = [ - code for code in row.role_codes if code not in roles_by_code - ] - if invalid_role_codes: - errors.append( - EmployeeSpreadsheetError( - row=row.row_number, - column="角色编码", - employee_no=row.employee_no, - message=f"角色不存在:{'、'.join(invalid_role_codes)}。", - ) - ) - - return errors - - def _apply_import_rows( - self, - rows: list[EmployeeImportRow], - *, - actor: str, - ) -> dict[str, int]: - roles_by_code = {role.role_code: role for role in self.repository.list_roles()} - organizations_by_code = { - unit.unit_code: unit for unit in self.repository.list_organization_units() - } - employees_by_no = { - employee.employee_no: employee for employee in self.repository.list() - } - created = 0 - updated = 0 - now = datetime.now(UTC) - - try: - for row in rows: - employee = employees_by_no.get(row.employee_no) - is_new = employee is None - - if is_new: - employee = Employee( - employee_no=row.employee_no, - name=row.name, - email=row.email, - password_hash=hash_password(DEFAULT_EMPLOYEE_PASSWORD), - ) - self.db.add(employee) - employees_by_no[row.employee_no] = employee - created += 1 - else: - updated += 1 - - employee.name = row.name - employee.email = row.email - employee.gender = row.gender - employee.birth_date = row.birth_date - employee.phone = row.phone - employee.join_date = row.join_date - employee.location = row.location - employee.position = row.position - employee.grade = row.grade - employee.finance_owner_name = row.finance_owner_name - employee.cost_center = row.cost_center - employee.employment_status = row.employment_status - employee.sync_state = "已同步" - employee.last_sync_at = now - - if row.organization_unit_code: - employee.organization_unit = organizations_by_code[row.organization_unit_code] - else: - employee.organization_unit = None - - employee.roles = self._sorted_roles( - [roles_by_code[code] for code in row.role_codes if code in roles_by_code] - ) - - action = ( - "通过 Excel 导入新建员工档案" - if is_new - else "通过 Excel 导入更新员工档案" - ) - self._append_change_log(employee, action=action, owner=actor, occurred_at=now) - - self.db.flush() - - for row in rows: - employee = employees_by_no[row.employee_no] - if row.manager_employee_no: - employee.manager = employees_by_no.get(row.manager_employee_no) - else: - employee.manager = None - - self.db.commit() - except Exception: - self.db.rollback() - raise - - return {"created": created, "updated": updated} - - def _build_import_failure( - self, - errors: list[EmployeeSpreadsheetError], - *, - total_rows: int, - ) -> EmployeeImportResultRead: - error_reads = [ - EmployeeImportErrorRead( - row=item.row, - column=item.column, - employeeNo=item.employee_no, - message=item.message, - ) - for item in errors - ] - return EmployeeImportResultRead( - success=False, - message=( - f"导入未执行:共发现 {len(error_reads)} 处错误,请修正后重新导入。" - "原有员工数据未变更。" - ), - summary=EmployeeImportSummaryRead( - totalRows=total_rows, - created=0, - updated=0, - errorCount=len(error_reads), - ), - errors=error_reads, - importedAt=None, + def _import_coordinator(self) -> EmployeeImportCoordinator: + return EmployeeImportCoordinator( + self.db, + self.repository, + sorted_roles=self._sorted_roles, + append_change_log=self._append_change_log, + format_date=self._format_date, + format_datetime=self._format_datetime, + default_password=DEFAULT_EMPLOYEE_PASSWORD, ) def _seed_roles(self) -> None: @@ -1006,78 +735,18 @@ class EmployeeService: self.db.delete(stale) def _serialize_employee(self, employee: Employee) -> EmployeeRead: - organization = employee.organization_unit - roles = self._sorted_roles(list(employee.roles)) - role_labels = [role.name for role in roles] - role_codes = [role.role_code for role in roles] - - history = [ - EmployeeHistoryRead( - action=item.action, - owner=item.owner, - time=self._format_history_datetime(item.occurred_at), - occurredAt=self._format_history_datetime(item.occurred_at), - ) - for item in self._sorted_change_logs(employee)[:MAX_EMPLOYEE_CHANGE_LOGS] - ] - - return EmployeeRead( - id=employee.id, - avatar=(employee.name or "?")[:1], - name=employee.name, - employeeNo=employee.employee_no, - department=organization.name if organization else "", - position=employee.position, - grade=employee.grade, - manager=employee.manager.name if employee.manager else "CEO", - managerEmployeeNo=employee.manager.employee_no if employee.manager else None, - financeOwner=employee.finance_owner_name or "", - roles=role_labels, - roleCodes=role_codes, - status=employee.employment_status, - statusTone=STATUS_TONE_MAP.get(employee.employment_status, "neutral"), - gender=employee.gender, - age=self._calculate_age(employee.birth_date), - birthDate=self._format_date(employee.birth_date), - email=employee.email, - phone=employee.phone, - joinDate=self._format_date(employee.join_date), - location=employee.location, - costCenter=employee.cost_center, - updatedAt=self._format_datetime(employee.updated_at or employee.created_at), - lastSync=self._format_datetime(employee.last_sync_at), - syncState=employee.sync_state, - spotlight=employee.spotlight, - permissions=self._collect_permissions(role_codes), - history=history, - organization=( - EmployeeOrganizationRead( - id=organization.id, - code=organization.unit_code, - name=organization.name, - unitType=organization.unit_type, - costCenter=organization.cost_center, - location=organization.location, - managerName=organization.manager_name, - ) - if organization - else None - ), + return serialize_employee( + employee, + sorted_roles=self._sorted_roles(list(employee.roles)), + sorted_change_logs=self._sorted_change_logs(employee), + format_date=self._format_date, + format_datetime=self._format_datetime, + format_history_datetime=self._format_history_datetime, + role_permission_map=ROLE_PERMISSION_MAP, + status_tone_map=STATUS_TONE_MAP, + max_change_logs=MAX_EMPLOYEE_CHANGE_LOGS, ) - def _collect_permissions(self, role_codes: list[str]) -> list[str]: - permissions: list[str] = [] - seen: set[str] = set() - - for role_code in role_codes: - for permission in ROLE_PERMISSION_MAP.get(role_code, []): - if permission in seen: - continue - permissions.append(permission) - seen.add(permission) - - return permissions - def _sorted_roles(self, roles: list[Role]) -> list[Role]: return sorted(roles, key=lambda item: (ROLE_DISPLAY_ORDER.get(item.role_code, 999), item.name)) @@ -1125,21 +794,7 @@ class EmployeeService: @staticmethod def _format_history_datetime(value: datetime | None) -> str: - if value is None: - return "" - local = EmployeeService._to_display_datetime(value) - return ( - f"{local.year}年{local.month}月{local.day}日" - f"{local.hour}时{local.minute}分" + return serialize_history_datetime( + value, + to_display_datetime=EmployeeService._to_display_datetime, ) - - @staticmethod - def _calculate_age(birth_date: date | None) -> int | None: - if birth_date is None: - return None - - today = date.today() - age = today.year - birth_date.year - if (today.month, today.day) < (birth_date.month, birth_date.day): - age -= 1 - return age diff --git a/server/src/app/services/employee_import.py b/server/src/app/services/employee_import.py new file mode 100644 index 0000000..2d3f61b --- /dev/null +++ b/server/src/app/services/employee_import.py @@ -0,0 +1,331 @@ +from __future__ import annotations + +from collections.abc import Callable +from datetime import UTC, date, datetime + +from sqlalchemy.orm import Session + +from app.core.logging import get_logger +from app.core.security import hash_password +from app.models.employee import Employee +from app.models.role import Role +from app.repositories.employee import EmployeeRepository +from app.schemas.employee import ( + EmployeeImportErrorRead, + EmployeeImportResultRead, + EmployeeImportSummaryRead, +) +from app.services.employee_spreadsheet import ( + EmployeeImportRow, + EmployeeSpreadsheetError, + build_export_workbook_bytes, + parse_employee_workbook, +) + +logger = get_logger("app.services.employee") + + +class EmployeeImportCoordinator: + def __init__( + self, + db: Session, + repository: EmployeeRepository, + *, + sorted_roles: Callable[[list[Role]], list[Role]], + append_change_log: Callable[[Employee, str, str, datetime | None], None], + format_date: Callable[[date | None], str | None], + format_datetime: Callable[[datetime | None], str | None], + default_password: str, + ) -> None: + self.db = db + self.repository = repository + self.sorted_roles = sorted_roles + self.append_change_log = append_change_log + self.format_date = format_date + self.format_datetime = format_datetime + self.default_password = default_password + + def export_employees(self, status: str | None = None, keyword: str | None = None) -> bytes: + employees = self.repository.list(status=status, keyword=keyword) + rows: list[list[str]] = [] + + for employee in employees: + organization = employee.organization_unit + role_codes = ",".join(role.role_code for role in self.sorted_roles(list(employee.roles))) + rows.append( + [ + employee.employee_no, + employee.name, + employee.email, + employee.gender or "", + self.format_date(employee.birth_date) or "", + employee.phone or "", + self.format_date(employee.join_date) or "", + employee.location or "", + employee.position, + employee.grade, + organization.unit_code if organization else "", + employee.manager.employee_no if employee.manager else "", + employee.finance_owner_name or "", + employee.cost_center or "", + employee.employment_status, + role_codes, + ] + ) + + return build_export_workbook_bytes(rows) + + def import_employees(self, content: bytes, actor: str = "系统管理员") -> EmployeeImportResultRead: + parsed_rows, parse_errors = parse_employee_workbook(content) + if parse_errors: + return self._build_import_failure(parse_errors, total_rows=len(parsed_rows)) + + validation_errors = self._validate_import_rows(parsed_rows) + if validation_errors: + return self._build_import_failure(validation_errors, total_rows=len(parsed_rows)) + + try: + summary = self._apply_import_rows(parsed_rows, actor=actor) + except Exception: + self.db.rollback() + logger.exception("Employee import failed during database write") + raise + + imported_at = self.format_datetime(datetime.now(UTC)) or "" + message = f"导入成功:新增 {summary['created']} 人,更新 {summary['updated']} 人。" + logger.info( + "Imported employees created=%d updated=%d total=%d", + summary["created"], + summary["updated"], + len(parsed_rows), + ) + return EmployeeImportResultRead( + success=True, + message=message, + summary=EmployeeImportSummaryRead( + totalRows=len(parsed_rows), + created=summary["created"], + updated=summary["updated"], + errorCount=0, + ), + errors=[], + importedAt=imported_at, + ) + + def _validate_import_rows( + self, rows: list[EmployeeImportRow] + ) -> list[EmployeeSpreadsheetError]: + errors: list[EmployeeSpreadsheetError] = [] + employee_nos_in_file: dict[str, int] = {} + emails_in_file: dict[str, int] = {} + + roles_by_code = {role.role_code: role for role in self.repository.list_roles()} + organizations_by_code = { + unit.unit_code: unit for unit in self.repository.list_organization_units() + } + employees_by_no = { + employee.employee_no: employee for employee in self.repository.list() + } + import_employee_nos = {row.employee_no for row in rows} + + for row in rows: + if row.employee_no in employee_nos_in_file: + errors.append( + EmployeeSpreadsheetError( + row=row.row_number, + column="员工编号*", + employee_no=row.employee_no, + message=f"员工编号 {row.employee_no} 在文件中重复。", + ) + ) + else: + employee_nos_in_file[row.employee_no] = row.row_number + + if row.email in emails_in_file: + errors.append( + EmployeeSpreadsheetError( + row=row.row_number, + column="邮箱*", + employee_no=row.employee_no, + message=f"邮箱 {row.email} 在文件中重复。", + ) + ) + else: + emails_in_file[row.email] = row.row_number + + existing_by_email = self.repository.get_by_email(row.email) + if existing_by_email is not None and existing_by_email.employee_no != row.employee_no: + errors.append( + EmployeeSpreadsheetError( + row=row.row_number, + column="邮箱*", + employee_no=row.employee_no, + message=( + f"邮箱 {row.email} 已被员工 " + f"{existing_by_email.employee_no} 使用。" + ), + ) + ) + + if row.organization_unit_code and row.organization_unit_code not in organizations_by_code: + errors.append( + EmployeeSpreadsheetError( + row=row.row_number, + column="部门编码", + employee_no=row.employee_no, + message=f"部门编码 {row.organization_unit_code} 不存在。", + ) + ) + + if row.manager_employee_no: + manager_exists = ( + row.manager_employee_no in employees_by_no + or row.manager_employee_no in import_employee_nos + ) + if not manager_exists: + errors.append( + EmployeeSpreadsheetError( + row=row.row_number, + column="直属上级工号", + employee_no=row.employee_no, + message=f"直属上级工号 {row.manager_employee_no} 不存在。", + ) + ) + if row.manager_employee_no == row.employee_no: + errors.append( + EmployeeSpreadsheetError( + row=row.row_number, + column="直属上级工号", + employee_no=row.employee_no, + message="直属上级不能是员工本人。", + ) + ) + + invalid_role_codes = [ + code for code in row.role_codes if code not in roles_by_code + ] + if invalid_role_codes: + errors.append( + EmployeeSpreadsheetError( + row=row.row_number, + column="角色编码", + employee_no=row.employee_no, + message=f"角色不存在:{'、'.join(invalid_role_codes)}。", + ) + ) + + return errors + + def _apply_import_rows( + self, + rows: list[EmployeeImportRow], + *, + actor: str, + ) -> dict[str, int]: + roles_by_code = {role.role_code: role for role in self.repository.list_roles()} + organizations_by_code = { + unit.unit_code: unit for unit in self.repository.list_organization_units() + } + employees_by_no = { + employee.employee_no: employee for employee in self.repository.list() + } + created = 0 + updated = 0 + now = datetime.now(UTC) + + try: + for row in rows: + employee = employees_by_no.get(row.employee_no) + is_new = employee is None + + if is_new: + employee = Employee( + employee_no=row.employee_no, + name=row.name, + email=row.email, + password_hash=hash_password(self.default_password), + ) + self.db.add(employee) + employees_by_no[row.employee_no] = employee + created += 1 + else: + updated += 1 + + employee.name = row.name + employee.email = row.email + employee.gender = row.gender + employee.birth_date = row.birth_date + employee.phone = row.phone + employee.join_date = row.join_date + employee.location = row.location + employee.position = row.position + employee.grade = row.grade + employee.finance_owner_name = row.finance_owner_name + employee.cost_center = row.cost_center + employee.employment_status = row.employment_status + employee.sync_state = "已同步" + employee.last_sync_at = now + + if row.organization_unit_code: + employee.organization_unit = organizations_by_code[row.organization_unit_code] + else: + employee.organization_unit = None + + employee.roles = self.sorted_roles( + [roles_by_code[code] for code in row.role_codes if code in roles_by_code] + ) + + action = ( + "通过 Excel 导入新建员工档案" + if is_new + else "通过 Excel 导入更新员工档案" + ) + self.append_change_log(employee, action=action, owner=actor, occurred_at=now) + + self.db.flush() + + for row in rows: + employee = employees_by_no[row.employee_no] + if row.manager_employee_no: + employee.manager = employees_by_no.get(row.manager_employee_no) + else: + employee.manager = None + + self.db.commit() + except Exception: + self.db.rollback() + raise + + return {"created": created, "updated": updated} + + def _build_import_failure( + self, + errors: list[EmployeeSpreadsheetError], + *, + total_rows: int, + ) -> EmployeeImportResultRead: + error_reads = [ + EmployeeImportErrorRead( + row=item.row, + column=item.column, + employeeNo=item.employee_no, + message=item.message, + ) + for item in errors + ] + return EmployeeImportResultRead( + success=False, + message=( + f"导入未执行:共发现 {len(error_reads)} 处错误,请修正后重新导入。" + "原有员工数据未变更。" + ), + summary=EmployeeImportSummaryRead( + totalRows=total_rows, + created=0, + updated=0, + errorCount=len(error_reads), + ), + errors=error_reads, + importedAt=None, + ) + diff --git a/server/src/app/services/employee_seed.py b/server/src/app/services/employee_seed.py index 817608d..7164e63 100644 --- a/server/src/app/services/employee_seed.py +++ b/server/src/app/services/employee_seed.py @@ -1,1004 +1,17 @@ from __future__ import annotations -ROLE_DISPLAY_ORDER = { - "manager": 1, - "finance": 2, - "approver": 3, - "executive": 4, - "auditor": 5, - "user": 6, -} +from app.services.employee_seed_roles import ROLE_DEFINITIONS, ROLE_DISPLAY_ORDER, ROLE_PERMISSION_MAP +from app.services.employee_seed_organizations import EMPLOYEE_PROFILE_REPAIRS, ORGANIZATION_DEFINITIONS +from app.services.employee_seed_part1 import EMPLOYEE_DEFINITIONS_PART_1 +from app.services.employee_seed_part2 import EMPLOYEE_DEFINITIONS_PART_2 -ROLE_DEFINITIONS = [ - { - "role_code": "user", - "name": "使用者", - "description": "可以发起报销、查看个人单据和使用 AI 助手。", - }, - { - "role_code": "finance", - "name": "财务人员", - "description": "可以处理复核、查看财务知识与风险校验结果。", - }, - { - "role_code": "manager", - "name": "管理员", - "description": "可以维护员工档案、组织结构和角色权限。", - }, - { - "role_code": "executive", - "name": "高级管理人员", - "description": "可以查看跨部门数据看板与关键审批结果。", - }, - { - "role_code": "approver", - "name": "审批负责人", - "description": "可以处理审批中心中的待审单据。", - }, - { - "role_code": "auditor", - "name": "审计观察员", - "description": "可以查看变更记录和权限调整历史。", - }, -] - -ROLE_PERMISSION_MAP = { - "user": ["可发起差旅申请与报销", "可查看个人单据与票据识别结果"], - "finance": ["可处理财务复核任务", "可查看风险校验与财务知识库"], - "manager": ["可维护员工档案与组织结构", "可配置系统角色与访问边界"], - "executive": ["可查看跨部门经营看板", "可处理高金额报销最终审批"], - "approver": ["可处理本部门待审单据", "可查看审批链路与 SLA 状态"], - "auditor": ["可查看权限变更与审计留痕", "可导出员工权限观察记录"], -} - -ORGANIZATION_DEFINITIONS = [ - { - "unit_code": "ORG-ROOT", - "name": "星海科技", - "unit_type": "company", - "parent_code": None, - "cost_center": "CC-0000", - "location": "上海", - "manager_name": "李文静", - }, - { - "unit_code": "EXEC-OFFICE", - "name": "总经办", - "unit_type": "department", - "parent_code": "ORG-ROOT", - "cost_center": "CC-1001", - "location": "上海", - "manager_name": "李文静", - }, - { - "unit_code": "FIN-SSC", - "name": "财务共享中心", - "unit_type": "department", - "parent_code": "ORG-ROOT", - "cost_center": "CC-2108", - "location": "上海", - "manager_name": "张晓晴", - }, - { - "unit_code": "HR-OD", - "name": "人力与组织", - "unit_type": "department", - "parent_code": "ORG-ROOT", - "cost_center": "CC-3206", - "location": "杭州", - "manager_name": "陈硕", - }, - { - "unit_code": "SALES-SOUTH", - "name": "华南销售部", - "unit_type": "department", - "parent_code": "ORG-ROOT", - "cost_center": "CC-4102", - "location": "深圳", - "manager_name": "陈嘉", - }, - { - "unit_code": "SALES-EAST", - "name": "华东销售部", - "unit_type": "department", - "parent_code": "ORG-ROOT", - "cost_center": "CC-4108", - "location": "上海", - "manager_name": "秦墨然", - }, - { - "unit_code": "MKT-BRAND", - "name": "市场品牌部", - "unit_type": "department", - "parent_code": "ORG-ROOT", - "cost_center": "CC-5203", - "location": "北京", - "manager_name": "刘思雨", - }, - { - "unit_code": "RND-CENTER", - "name": "产品研发中心", - "unit_type": "department", - "parent_code": "ORG-ROOT", - "cost_center": "CC-6105", - "location": "北京", - "manager_name": "吴磊", - }, - { - "unit_code": "OPS-ADMIN", - "name": "行政采购部", - "unit_type": "department", - "parent_code": "ORG-ROOT", - "cost_center": "CC-7204", - "location": "南京", - "manager_name": "梁雨辰", - }, - { - "unit_code": "AUDIT-RISK", - "name": "风控与审计部", - "unit_type": "department", - "parent_code": "ORG-ROOT", - "cost_center": "CC-8102", - "location": "上海", - "manager_name": "顾承宇", - }, -] - -EMPLOYEE_PROFILE_REPAIRS = [ - { - "employee_no": "E90919", - "name": "曹笑竹", - "email": "caoxiaozhu@xf.com", - "location": "武汉", - "position": "财务智能化产品经理", - "grade": "P5", - "organization_unit_code": "RND-CENTER", - "manager_employee_no": "E11745", - "finance_owner_name": "研发财务BP", - "cost_center": "CC-6112", - "employment_status": "在职", - "sync_state": "已同步", - "role_codes": ["user"], - }, -] - -EMPLOYEE_DEFINITIONS = [ - { - "employee_no": "E10018", - "name": "李文静", - "gender": "女", - "birth_date": "1987-03-26", - "phone": "13900187688", - "email": "wenjing.li@xfinance.com", - "join_date": "2018-06-21", - "location": "上海", - "position": "高级财务总监", - "grade": "D2", - "organization_unit_code": "EXEC-OFFICE", - "manager_employee_no": None, - "finance_owner_name": "集团财务", - "cost_center": "CC-1001", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-05 16:20", - "last_sync_at": "2026-05-05 16:20", - "role_codes": ["executive", "approver"], - }, - { - "employee_no": "E10234", - "name": "张晓晴", - "gender": "女", - "birth_date": "1994-08-12", - "phone": "13810234567", - "email": "xiaoqing.zhang@xfinance.com", - "join_date": "2021-03-15", - "location": "上海", - "position": "费用运营经理", - "grade": "M3", - "organization_unit_code": "FIN-SSC", - "manager_employee_no": "E10018", - "finance_owner_name": "华东财务组", - "cost_center": "CC-2108", - "employment_status": "在职", - "sync_state": "待生效", - "spotlight": True, - "updated_at": "2026-05-06 10:24", - "last_sync_at": "2026-05-06 10:24", - "role_codes": ["manager", "finance", "approver"], - "history": [ - { - "action": "新增“审批负责人”角色", - "owner": "系统管理员 · 王敏", - "occurred_at": "2026-05-06 10:24", - }, - { - "action": "调整财务归口为华东财务组", - "owner": "组织管理员 · 陈硕", - "occurred_at": "2026-05-05 18:10", - }, - ], - }, - { - "employee_no": "E10258", - "name": "孙楠", - "gender": "男", - "birth_date": "1992-09-17", - "phone": "13722580312", - "email": "nan.sun@xfinance.com", - "join_date": "2020-11-09", - "location": "上海", - "position": "财务分析师", - "grade": "P5", - "organization_unit_code": "FIN-SSC", - "manager_employee_no": "E10234", - "finance_owner_name": "华东财务组", - "cost_center": "CC-2111", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-04 15:18", - "last_sync_at": "2026-05-04 15:18", - "role_codes": ["finance"], - }, - { - "employee_no": "E10271", - "name": "周悦宁", - "gender": "女", - "birth_date": "1993-04-21", - "phone": "13622711986", - "email": "yuening.zhou@xfinance.com", - "join_date": "2021-07-05", - "location": "上海", - "position": "财务系统专员", - "grade": "P5", - "organization_unit_code": "FIN-SSC", - "manager_employee_no": "E10234", - "finance_owner_name": "华东财务组", - "cost_center": "CC-2112", - "employment_status": "在职", - "sync_state": "同步中", - "spotlight": False, - "updated_at": "2026-05-07 09:35", - "last_sync_at": "2026-05-07 09:10", - "role_codes": ["finance", "auditor"], - }, - { - "employee_no": "E10289", - "name": "高嘉禾", - "gender": "女", - "birth_date": "1996-02-14", - "phone": "13522895642", - "email": "jiahe.gao@xfinance.com", - "join_date": "2023-01-10", - "location": "上海", - "position": "差旅合规专员", - "grade": "P4", - "organization_unit_code": "FIN-SSC", - "manager_employee_no": "E10234", - "finance_owner_name": "华东财务组", - "cost_center": "CC-2115", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-03 11:42", - "last_sync_at": "2026-05-03 11:42", - "role_codes": ["finance"], - }, - { - "employee_no": "E10867", - "name": "王敏", - "gender": "女", - "birth_date": "1996-11-05", - "phone": "13688671200", - "email": "min.wang@xfinance.com", - "join_date": "2022-08-08", - "location": "杭州", - "position": "组织发展主管", - "grade": "P6", - "organization_unit_code": "HR-OD", - "manager_employee_no": "E11618", - "finance_owner_name": "总部财务BP", - "cost_center": "CC-3206", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-05 09:18", - "last_sync_at": "2026-05-05 09:18", - "role_codes": ["manager", "auditor"], - }, - { - "employee_no": "E11618", - "name": "陈硕", - "gender": "男", - "birth_date": "1990-05-09", - "phone": "13816186540", - "email": "shuo.chen@xfinance.com", - "join_date": "2019-09-16", - "location": "杭州", - "position": "人力资源经理", - "grade": "M2", - "organization_unit_code": "HR-OD", - "manager_employee_no": "E10018", - "finance_owner_name": "总部财务BP", - "cost_center": "CC-3201", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-04 17:08", - "last_sync_at": "2026-05-04 17:08", - "role_codes": ["manager", "approver"], - }, - { - "employee_no": "E12311", - "name": "何思成", - "gender": "男", - "birth_date": "1998-07-19", - "phone": "13723117654", - "email": "sicheng.he@xfinance.com", - "join_date": "2026-02-17", - "location": "杭州", - "position": "HRBP", - "grade": "P4", - "organization_unit_code": "HR-OD", - "manager_employee_no": "E11618", - "finance_owner_name": "总部财务BP", - "cost_center": "CC-3208", - "employment_status": "试用中", - "sync_state": "待生效", - "spotlight": False, - "updated_at": "2026-05-07 08:42", - "last_sync_at": "2026-05-07 08:42", - "role_codes": ["user"], - }, - { - "employee_no": "E11026", - "name": "刘思雨", - "gender": "女", - "birth_date": "1991-12-03", - "phone": "13921036540", - "email": "siyu.liu@xfinance.com", - "join_date": "2020-04-13", - "location": "北京", - "position": "品牌市场经理", - "grade": "M2", - "organization_unit_code": "MKT-BRAND", - "manager_employee_no": "E10018", - "finance_owner_name": "市场财务BP", - "cost_center": "CC-5203", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-06 14:36", - "last_sync_at": "2026-05-06 14:36", - "role_codes": ["user", "approver"], - }, - { - "employee_no": "E12408", - "name": "冯可欣", - "gender": "女", - "birth_date": "1997-10-28", - "phone": "13624085542", - "email": "kexin.feng@xfinance.com", - "join_date": "2024-03-11", - "location": "北京", - "position": "品牌策划", - "grade": "P4", - "organization_unit_code": "MKT-BRAND", - "manager_employee_no": "E11026", - "finance_owner_name": "市场财务BP", - "cost_center": "CC-5207", - "employment_status": "在职", - "sync_state": "同步中", - "spotlight": False, - "updated_at": "2026-05-07 10:02", - "last_sync_at": "2026-05-07 09:48", - "role_codes": ["user"], - }, - { - "employee_no": "E12419", - "name": "许泽航", - "gender": "男", - "birth_date": "1995-05-15", - "phone": "13524199508", - "email": "zehang.xu@xfinance.com", - "join_date": "2023-06-19", - "location": "北京", - "position": "数字营销专员", - "grade": "P4", - "organization_unit_code": "MKT-BRAND", - "manager_employee_no": "E11026", - "finance_owner_name": "市场财务BP", - "cost_center": "CC-5209", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-03 16:52", - "last_sync_at": "2026-05-03 16:52", - "role_codes": ["user"], - }, - { - "employee_no": "E11602", - "name": "陈嘉", - "gender": "男", - "birth_date": "1997-02-18", - "phone": "13716029901", - "email": "jia.chen@xfinance.com", - "join_date": "2026-03-01", - "location": "深圳", - "position": "区域销售经理", - "grade": "M2", - "organization_unit_code": "SALES-SOUTH", - "manager_employee_no": "E10018", - "finance_owner_name": "华南财务组", - "cost_center": "CC-4102", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-04 14:12", - "last_sync_at": "2026-05-04 14:12", - "role_codes": ["user", "approver"], - }, - { - "employee_no": "E12476", - "name": "马骁然", - "gender": "男", - "birth_date": "1994-01-08", - "phone": "13824760139", - "email": "xiaoran.ma@xfinance.com", - "join_date": "2022-09-05", - "location": "深圳", - "position": "销售运营专家", - "grade": "P5", - "organization_unit_code": "SALES-SOUTH", - "manager_employee_no": "E11602", - "finance_owner_name": "华南财务组", - "cost_center": "CC-4106", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-06 18:15", - "last_sync_at": "2026-05-06 18:15", - "role_codes": ["user"], - }, - { - "employee_no": "E12508", - "name": "唐子墨", - "gender": "男", - "birth_date": "1996-06-11", - "phone": "13925088761", - "email": "zimo.tang@xfinance.com", - "join_date": "2024-02-26", - "location": "深圳", - "position": "大客户代表", - "grade": "P4", - "organization_unit_code": "SALES-SOUTH", - "manager_employee_no": "E11602", - "finance_owner_name": "华南财务组", - "cost_center": "CC-4109", - "employment_status": "停用", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-01 11:06", - "last_sync_at": "2026-05-01 11:06", - "role_codes": ["user"], - }, - { - "employee_no": "E12514", - "name": "罗欣怡", - "gender": "女", - "birth_date": "2000-03-02", - "phone": "13625141227", - "email": "xinyi.luo@xfinance.com", - "join_date": "2026-02-24", - "location": "深圳", - "position": "销售协调专员", - "grade": "P3", - "organization_unit_code": "SALES-SOUTH", - "manager_employee_no": "E11602", - "finance_owner_name": "华南财务组", - "cost_center": "CC-4112", - "employment_status": "试用中", - "sync_state": "待生效", - "spotlight": False, - "updated_at": "2026-05-05 15:42", - "last_sync_at": "2026-05-05 15:42", - "role_codes": ["user"], - }, - { - "employee_no": "E11745", - "name": "吴磊", - "gender": "男", - "birth_date": "1989-09-27", - "phone": "13817459812", - "email": "lei.wu@xfinance.com", - "join_date": "2019-12-09", - "location": "北京", - "position": "研发平台主管", - "grade": "M3", - "organization_unit_code": "RND-CENTER", - "manager_employee_no": "E10018", - "finance_owner_name": "研发财务BP", - "cost_center": "CC-6105", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-06 13:08", - "last_sync_at": "2026-05-06 13:08", - "role_codes": ["user", "approver", "auditor"], - }, - { - "employee_no": "E11991", - "name": "赵明", - "gender": "男", - "birth_date": "1994-06-09", - "phone": "13519913300", - "email": "ming.zhao@xfinance.com", - "join_date": "2023-11-18", - "location": "北京", - "position": "产品经理", - "grade": "P5", - "organization_unit_code": "RND-CENTER", - "manager_employee_no": "E11745", - "finance_owner_name": "研发财务BP", - "cost_center": "CC-6112", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-02 11:32", - "last_sync_at": "2026-05-02 11:32", - "role_codes": ["user"], - }, - { - "employee_no": "E12611", - "name": "彭一凡", - "gender": "男", - "birth_date": "1995-02-03", - "phone": "13726114588", - "email": "yifan.peng@xfinance.com", - "join_date": "2022-04-18", - "location": "北京", - "position": "后端工程师", - "grade": "P5", - "organization_unit_code": "RND-CENTER", - "manager_employee_no": "E11745", - "finance_owner_name": "研发财务BP", - "cost_center": "CC-6114", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-06 09:44", - "last_sync_at": "2026-05-06 09:44", - "role_codes": ["user"], - }, - { - "employee_no": "E12618", - "name": "苏清禾", - "gender": "女", - "birth_date": "1994-12-25", - "phone": "13626188763", - "email": "qinghe.su@xfinance.com", - "join_date": "2022-05-16", - "location": "北京", - "position": "数据工程师", - "grade": "P5", - "organization_unit_code": "RND-CENTER", - "manager_employee_no": "E11745", - "finance_owner_name": "研发财务BP", - "cost_center": "CC-6116", - "employment_status": "在职", - "sync_state": "同步中", - "spotlight": False, - "updated_at": "2026-05-07 10:26", - "last_sync_at": "2026-05-07 10:18", - "role_codes": ["user"], - }, - { - "employee_no": "E12624", - "name": "沈知远", - "gender": "男", - "birth_date": "1992-11-06", - "phone": "13926241855", - "email": "zhiyuan.shen@xfinance.com", - "join_date": "2021-11-22", - "location": "北京", - "position": "测试负责人", - "grade": "P6", - "organization_unit_code": "RND-CENTER", - "manager_employee_no": "E11745", - "finance_owner_name": "研发财务BP", - "cost_center": "CC-6119", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-05 13:12", - "last_sync_at": "2026-05-05 13:12", - "role_codes": ["user"], - }, - { - "employee_no": "E11852", - "name": "周晓彤", - "gender": "女", - "birth_date": "1997-05-27", - "phone": "13818529954", - "email": "xiaotong.zhou@xfinance.com", - "join_date": "2022-06-30", - "location": "南京", - "position": "行政采购专员", - "grade": "P4", - "organization_unit_code": "OPS-ADMIN", - "manager_employee_no": "E12653", - "finance_owner_name": "行政财务BP", - "cost_center": "CC-7204", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-05 11:22", - "last_sync_at": "2026-05-05 11:22", - "role_codes": ["user"], - }, - { - "employee_no": "E12653", - "name": "梁雨辰", - "gender": "男", - "birth_date": "1991-08-30", - "phone": "13726539876", - "email": "yuchen.liang@xfinance.com", - "join_date": "2021-01-04", - "location": "南京", - "position": "行政运营经理", - "grade": "M1", - "organization_unit_code": "OPS-ADMIN", - "manager_employee_no": "E10018", - "finance_owner_name": "行政财务BP", - "cost_center": "CC-7201", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-06 17:44", - "last_sync_at": "2026-05-06 17:44", - "role_codes": ["user", "approver"], - }, - { - "employee_no": "E12661", - "name": "顾承宇", - "gender": "男", - "birth_date": "1988-04-16", - "phone": "13926614528", - "email": "chengyu.gu@xfinance.com", - "join_date": "2020-02-03", - "location": "上海", - "position": "风控审计经理", - "grade": "M2", - "organization_unit_code": "AUDIT-RISK", - "manager_employee_no": "E10018", - "finance_owner_name": "集团财务", - "cost_center": "CC-8102", - "employment_status": "在职", - "sync_state": "待生效", - "spotlight": True, - "updated_at": "2026-05-07 09:52", - "last_sync_at": "2026-05-07 09:52", - "role_codes": ["auditor", "finance"], - "history": [ - { - "action": "更新审计观察范围", - "owner": "系统管理员 · 张晓晴", - "occurred_at": "2026-05-07 09:52", - }, - { - "action": "补充高风险费用抽样规则", - "owner": "审计管理员 · 王敏", - "occurred_at": "2026-05-06 18:30", - }, - ], - }, - { - "employee_no": "E12679", - "name": "郑若彤", - "gender": "女", - "birth_date": "1997-09-13", - "phone": "13626794520", - "email": "ruotong.zheng@xfinance.com", - "join_date": "2024-01-08", - "location": "上海", - "position": "审计专员", - "grade": "P4", - "organization_unit_code": "AUDIT-RISK", - "manager_employee_no": "E12661", - "finance_owner_name": "集团财务", - "cost_center": "CC-8105", - "employment_status": "在职", - "sync_state": "同步中", - "spotlight": False, - "updated_at": "2026-05-07 08:58", - "last_sync_at": "2026-05-07 08:40", - "role_codes": ["auditor"], - }, - { - "employee_no": "E12688", - "name": "方逸晨", - "gender": "男", - "birth_date": "1995-01-20", - "phone": "13526881142", - "email": "yichen.fang@xfinance.com", - "join_date": "2023-08-14", - "location": "南京", - "position": "采购合规分析师", - "grade": "P4", - "organization_unit_code": "OPS-ADMIN", - "manager_employee_no": "E12653", - "finance_owner_name": "行政财务BP", - "cost_center": "CC-7208", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-03 14:16", - "last_sync_at": "2026-05-03 14:16", - "role_codes": ["user", "finance"], - }, - { - "employee_no": "E12067", - "name": "秦墨然", - "gender": "男", - "birth_date": "1990-10-10", - "phone": "13820674519", - "email": "moran.qin@xfinance.com", - "join_date": "2020-07-20", - "location": "上海", - "position": "华东销售总监", - "grade": "M2", - "organization_unit_code": "SALES-EAST", - "manager_employee_no": "E10018", - "finance_owner_name": "华东财务组", - "cost_center": "CC-4108", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-06 12:40", - "last_sync_at": "2026-05-06 12:40", - "role_codes": ["user", "approver"], - }, - { - "employee_no": "E12703", - "name": "宋知夏", - "gender": "女", - "birth_date": "1994-07-07", - "phone": "13727031129", - "email": "zhixia.song@xfinance.com", - "join_date": "2022-12-12", - "location": "上海", - "position": "重点客户经理", - "grade": "P5", - "organization_unit_code": "SALES-EAST", - "manager_employee_no": "E12067", - "finance_owner_name": "华东财务组", - "cost_center": "CC-4111", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-04 10:58", - "last_sync_at": "2026-05-04 10:58", - "role_codes": ["user"], - }, - { - "employee_no": "E12716", - "name": "杜嘉宁", - "gender": "男", - "birth_date": "1999-11-16", - "phone": "13627161248", - "email": "jianing.du@xfinance.com", - "join_date": "2026-01-19", - "location": "上海", - "position": "销售代表", - "grade": "P3", - "organization_unit_code": "SALES-EAST", - "manager_employee_no": "E12067", - "finance_owner_name": "华东财务组", - "cost_center": "CC-4114", - "employment_status": "试用中", - "sync_state": "待生效", - "spotlight": False, - "updated_at": "2026-05-05 12:26", - "last_sync_at": "2026-05-05 12:26", - "role_codes": ["user"], - }, - { - "employee_no": "E12722", - "name": "邵宁远", - "gender": "男", - "birth_date": "1998-12-01", - "phone": "13527221506", - "email": "ningyuan.shao@xfinance.com", - "join_date": "2026-02-08", - "location": "北京", - "position": "数据分析师", - "grade": "P4", - "organization_unit_code": "RND-CENTER", - "manager_employee_no": "E11745", - "finance_owner_name": "研发财务BP", - "cost_center": "CC-6122", - "employment_status": "试用中", - "sync_state": "同步中", - "spotlight": False, - "updated_at": "2026-05-07 09:06", - "last_sync_at": "2026-05-07 08:55", - "role_codes": ["user"], - }, - { - "employee_no": "E12739", - "name": "林可昕", - "gender": "女", - "birth_date": "1996-10-23", - "phone": "13827394510", - "email": "kexin.lin@xfinance.com", - "join_date": "2023-04-17", - "location": "上海", - "position": "费用核算专员", - "grade": "P4", - "organization_unit_code": "FIN-SSC", - "manager_employee_no": "E10234", - "finance_owner_name": "华东财务组", - "cost_center": "CC-2118", - "employment_status": "停用", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-04-30 18:05", - "last_sync_at": "2026-04-30 18:05", - "role_codes": ["finance"], - }, - { - "employee_no": "E12744", - "name": "赵予安", - "gender": "男", - "birth_date": "1993-01-30", - "phone": "13727442139", - "email": "yuan.zhao@xfinance.com", - "join_date": "2021-10-11", - "location": "上海", - "position": "预算控制经理", - "grade": "M1", - "organization_unit_code": "FIN-SSC", - "manager_employee_no": "E10234", - "finance_owner_name": "集团财务", - "cost_center": "CC-2120", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-06 15:34", - "last_sync_at": "2026-05-06 15:34", - "role_codes": ["finance", "approver"], - }, - { - "employee_no": "E12750", - "name": "谢知行", - "gender": "男", - "birth_date": "1995-09-14", - "phone": "13627501386", - "email": "zhixing.xie@xfinance.com", - "join_date": "2022-07-25", - "location": "深圳", - "position": "渠道销售经理", - "grade": "P5", - "organization_unit_code": "SALES-SOUTH", - "manager_employee_no": "E11602", - "finance_owner_name": "华南财务组", - "cost_center": "CC-4116", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-04 09:48", - "last_sync_at": "2026-05-04 09:48", - "role_codes": ["user"], - }, - { - "employee_no": "E12758", - "name": "顾南枝", - "gender": "女", - "birth_date": "1994-04-12", - "phone": "13827584522", - "email": "nanzhi.gu@xfinance.com", - "join_date": "2022-05-09", - "location": "北京", - "position": "内容运营经理", - "grade": "P5", - "organization_unit_code": "MKT-BRAND", - "manager_employee_no": "E11026", - "finance_owner_name": "市场财务BP", - "cost_center": "CC-5211", - "employment_status": "在职", - "sync_state": "同步中", - "spotlight": False, - "updated_at": "2026-05-07 11:08", - "last_sync_at": "2026-05-07 10:50", - "role_codes": ["user"], - }, - { - "employee_no": "E12763", - "name": "孟书言", - "gender": "男", - "birth_date": "1992-02-09", - "phone": "13527633148", - "email": "shuyan.meng@xfinance.com", - "join_date": "2021-06-28", - "location": "北京", - "position": "架构工程师", - "grade": "P6", - "organization_unit_code": "RND-CENTER", - "manager_employee_no": "E11745", - "finance_owner_name": "研发财务BP", - "cost_center": "CC-6125", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-06 19:05", - "last_sync_at": "2026-05-06 19:05", - "role_codes": ["user"], - }, - { - "employee_no": "E12771", - "name": "孔令谦", - "gender": "男", - "birth_date": "1993-07-18", - "phone": "13627711572", - "email": "lingqian.kong@xfinance.com", - "join_date": "2021-09-13", - "location": "南京", - "position": "供应商管理专员", - "grade": "P4", - "organization_unit_code": "OPS-ADMIN", - "manager_employee_no": "E12653", - "finance_owner_name": "行政财务BP", - "cost_center": "CC-7210", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-02 17:22", - "last_sync_at": "2026-05-02 17:22", - "role_codes": ["user"], - }, - { - "employee_no": "E12782", - "name": "乔语岚", - "gender": "女", - "birth_date": "1996-05-06", - "phone": "13727823045", - "email": "yulan.qiao@xfinance.com", - "join_date": "2023-03-06", - "location": "上海", - "position": "风控策略分析师", - "grade": "P4", - "organization_unit_code": "AUDIT-RISK", - "manager_employee_no": "E12661", - "finance_owner_name": "集团财务", - "cost_center": "CC-8108", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-03 13:18", - "last_sync_at": "2026-05-03 13:18", - "role_codes": ["auditor"], - }, - { - "employee_no": "E12790", - "name": "邹闻韬", - "gender": "男", - "birth_date": "1991-03-11", - "phone": "13827903167", - "email": "wentao.zou@xfinance.com", - "join_date": "2020-10-26", - "location": "上海", - "position": "合规产品负责人", - "grade": "P7", - "organization_unit_code": "RND-CENTER", - "manager_employee_no": "E11745", - "finance_owner_name": "研发财务BP", - "cost_center": "CC-6128", - "employment_status": "在职", - "sync_state": "已同步", - "spotlight": False, - "updated_at": "2026-05-06 08:56", - "last_sync_at": "2026-05-06 08:56", - "role_codes": ["user", "auditor"], - }, +EMPLOYEE_DEFINITIONS = EMPLOYEE_DEFINITIONS_PART_1 + EMPLOYEE_DEFINITIONS_PART_2 + +__all__ = [ + "ROLE_DISPLAY_ORDER", + "ROLE_DEFINITIONS", + "ROLE_PERMISSION_MAP", + "ORGANIZATION_DEFINITIONS", + "EMPLOYEE_PROFILE_REPAIRS", + "EMPLOYEE_DEFINITIONS", ] diff --git a/server/src/app/services/employee_seed_organizations.py b/server/src/app/services/employee_seed_organizations.py new file mode 100644 index 0000000..497f658 --- /dev/null +++ b/server/src/app/services/employee_seed_organizations.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +ORGANIZATION_DEFINITIONS = [ + { + "unit_code": "ORG-ROOT", + "name": "星海科技", + "unit_type": "company", + "parent_code": None, + "cost_center": "CC-0000", + "location": "上海", + "manager_name": "李文静", + }, + { + "unit_code": "EXEC-OFFICE", + "name": "总经办", + "unit_type": "department", + "parent_code": "ORG-ROOT", + "cost_center": "CC-1001", + "location": "上海", + "manager_name": "李文静", + }, + { + "unit_code": "FIN-SSC", + "name": "财务共享中心", + "unit_type": "department", + "parent_code": "ORG-ROOT", + "cost_center": "CC-2108", + "location": "上海", + "manager_name": "张晓晴", + }, + { + "unit_code": "HR-OD", + "name": "人力与组织", + "unit_type": "department", + "parent_code": "ORG-ROOT", + "cost_center": "CC-3206", + "location": "杭州", + "manager_name": "陈硕", + }, + { + "unit_code": "SALES-SOUTH", + "name": "华南销售部", + "unit_type": "department", + "parent_code": "ORG-ROOT", + "cost_center": "CC-4102", + "location": "深圳", + "manager_name": "陈嘉", + }, + { + "unit_code": "SALES-EAST", + "name": "华东销售部", + "unit_type": "department", + "parent_code": "ORG-ROOT", + "cost_center": "CC-4108", + "location": "上海", + "manager_name": "秦墨然", + }, + { + "unit_code": "MKT-BRAND", + "name": "市场品牌部", + "unit_type": "department", + "parent_code": "ORG-ROOT", + "cost_center": "CC-5203", + "location": "北京", + "manager_name": "刘思雨", + }, + { + "unit_code": "RND-CENTER", + "name": "产品研发中心", + "unit_type": "department", + "parent_code": "ORG-ROOT", + "cost_center": "CC-6105", + "location": "北京", + "manager_name": "吴磊", + }, + { + "unit_code": "OPS-ADMIN", + "name": "行政采购部", + "unit_type": "department", + "parent_code": "ORG-ROOT", + "cost_center": "CC-7204", + "location": "南京", + "manager_name": "梁雨辰", + }, + { + "unit_code": "AUDIT-RISK", + "name": "风控与审计部", + "unit_type": "department", + "parent_code": "ORG-ROOT", + "cost_center": "CC-8102", + "location": "上海", + "manager_name": "顾承宇", + }, +] + +EMPLOYEE_PROFILE_REPAIRS = [ + { + "employee_no": "E90919", + "name": "曹笑竹", + "email": "caoxiaozhu@xf.com", + "location": "武汉", + "position": "财务智能化产品经理", + "grade": "P5", + "organization_unit_code": "RND-CENTER", + "manager_employee_no": "E11745", + "finance_owner_name": "研发财务BP", + "cost_center": "CC-6112", + "employment_status": "在职", + "sync_state": "已同步", + "role_codes": ["user"], + }, +] diff --git a/server/src/app/services/employee_seed_part1.py b/server/src/app/services/employee_seed_part1.py new file mode 100644 index 0000000..ee29c54 --- /dev/null +++ b/server/src/app/services/employee_seed_part1.py @@ -0,0 +1,434 @@ +from __future__ import annotations + +EMPLOYEE_DEFINITIONS_PART_1 = [ + { + "employee_no": "E10018", + "name": "李文静", + "gender": "女", + "birth_date": "1987-03-26", + "phone": "13900187688", + "email": "wenjing.li@xfinance.com", + "join_date": "2018-06-21", + "location": "上海", + "position": "高级财务总监", + "grade": "D2", + "organization_unit_code": "EXEC-OFFICE", + "manager_employee_no": None, + "finance_owner_name": "集团财务", + "cost_center": "CC-1001", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-05 16:20", + "last_sync_at": "2026-05-05 16:20", + "role_codes": ["executive", "approver"], + }, + { + "employee_no": "E10234", + "name": "张晓晴", + "gender": "女", + "birth_date": "1994-08-12", + "phone": "13810234567", + "email": "xiaoqing.zhang@xfinance.com", + "join_date": "2021-03-15", + "location": "上海", + "position": "费用运营经理", + "grade": "M3", + "organization_unit_code": "FIN-SSC", + "manager_employee_no": "E10018", + "finance_owner_name": "华东财务组", + "cost_center": "CC-2108", + "employment_status": "在职", + "sync_state": "待生效", + "spotlight": True, + "updated_at": "2026-05-06 10:24", + "last_sync_at": "2026-05-06 10:24", + "role_codes": ["manager", "finance", "approver"], + "history": [ + { + "action": "新增“审批负责人”角色", + "owner": "系统管理员 · 王敏", + "occurred_at": "2026-05-06 10:24", + }, + { + "action": "调整财务归口为华东财务组", + "owner": "组织管理员 · 陈硕", + "occurred_at": "2026-05-05 18:10", + }, + ], + }, + { + "employee_no": "E10258", + "name": "孙楠", + "gender": "男", + "birth_date": "1992-09-17", + "phone": "13722580312", + "email": "nan.sun@xfinance.com", + "join_date": "2020-11-09", + "location": "上海", + "position": "财务分析师", + "grade": "P5", + "organization_unit_code": "FIN-SSC", + "manager_employee_no": "E10234", + "finance_owner_name": "华东财务组", + "cost_center": "CC-2111", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-04 15:18", + "last_sync_at": "2026-05-04 15:18", + "role_codes": ["finance"], + }, + { + "employee_no": "E10271", + "name": "周悦宁", + "gender": "女", + "birth_date": "1993-04-21", + "phone": "13622711986", + "email": "yuening.zhou@xfinance.com", + "join_date": "2021-07-05", + "location": "上海", + "position": "财务系统专员", + "grade": "P5", + "organization_unit_code": "FIN-SSC", + "manager_employee_no": "E10234", + "finance_owner_name": "华东财务组", + "cost_center": "CC-2112", + "employment_status": "在职", + "sync_state": "同步中", + "spotlight": False, + "updated_at": "2026-05-07 09:35", + "last_sync_at": "2026-05-07 09:10", + "role_codes": ["finance", "auditor"], + }, + { + "employee_no": "E10289", + "name": "高嘉禾", + "gender": "女", + "birth_date": "1996-02-14", + "phone": "13522895642", + "email": "jiahe.gao@xfinance.com", + "join_date": "2023-01-10", + "location": "上海", + "position": "差旅合规专员", + "grade": "P4", + "organization_unit_code": "FIN-SSC", + "manager_employee_no": "E10234", + "finance_owner_name": "华东财务组", + "cost_center": "CC-2115", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-03 11:42", + "last_sync_at": "2026-05-03 11:42", + "role_codes": ["finance"], + }, + { + "employee_no": "E10867", + "name": "王敏", + "gender": "女", + "birth_date": "1996-11-05", + "phone": "13688671200", + "email": "min.wang@xfinance.com", + "join_date": "2022-08-08", + "location": "杭州", + "position": "组织发展主管", + "grade": "P6", + "organization_unit_code": "HR-OD", + "manager_employee_no": "E11618", + "finance_owner_name": "总部财务BP", + "cost_center": "CC-3206", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-05 09:18", + "last_sync_at": "2026-05-05 09:18", + "role_codes": ["manager", "auditor"], + }, + { + "employee_no": "E11618", + "name": "陈硕", + "gender": "男", + "birth_date": "1990-05-09", + "phone": "13816186540", + "email": "shuo.chen@xfinance.com", + "join_date": "2019-09-16", + "location": "杭州", + "position": "人力资源经理", + "grade": "M2", + "organization_unit_code": "HR-OD", + "manager_employee_no": "E10018", + "finance_owner_name": "总部财务BP", + "cost_center": "CC-3201", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-04 17:08", + "last_sync_at": "2026-05-04 17:08", + "role_codes": ["manager", "approver"], + }, + { + "employee_no": "E12311", + "name": "何思成", + "gender": "男", + "birth_date": "1998-07-19", + "phone": "13723117654", + "email": "sicheng.he@xfinance.com", + "join_date": "2026-02-17", + "location": "杭州", + "position": "HRBP", + "grade": "P4", + "organization_unit_code": "HR-OD", + "manager_employee_no": "E11618", + "finance_owner_name": "总部财务BP", + "cost_center": "CC-3208", + "employment_status": "试用中", + "sync_state": "待生效", + "spotlight": False, + "updated_at": "2026-05-07 08:42", + "last_sync_at": "2026-05-07 08:42", + "role_codes": ["user"], + }, + { + "employee_no": "E11026", + "name": "刘思雨", + "gender": "女", + "birth_date": "1991-12-03", + "phone": "13921036540", + "email": "siyu.liu@xfinance.com", + "join_date": "2020-04-13", + "location": "北京", + "position": "品牌市场经理", + "grade": "M2", + "organization_unit_code": "MKT-BRAND", + "manager_employee_no": "E10018", + "finance_owner_name": "市场财务BP", + "cost_center": "CC-5203", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-06 14:36", + "last_sync_at": "2026-05-06 14:36", + "role_codes": ["user", "approver"], + }, + { + "employee_no": "E12408", + "name": "冯可欣", + "gender": "女", + "birth_date": "1997-10-28", + "phone": "13624085542", + "email": "kexin.feng@xfinance.com", + "join_date": "2024-03-11", + "location": "北京", + "position": "品牌策划", + "grade": "P4", + "organization_unit_code": "MKT-BRAND", + "manager_employee_no": "E11026", + "finance_owner_name": "市场财务BP", + "cost_center": "CC-5207", + "employment_status": "在职", + "sync_state": "同步中", + "spotlight": False, + "updated_at": "2026-05-07 10:02", + "last_sync_at": "2026-05-07 09:48", + "role_codes": ["user"], + }, + { + "employee_no": "E12419", + "name": "许泽航", + "gender": "男", + "birth_date": "1995-05-15", + "phone": "13524199508", + "email": "zehang.xu@xfinance.com", + "join_date": "2023-06-19", + "location": "北京", + "position": "数字营销专员", + "grade": "P4", + "organization_unit_code": "MKT-BRAND", + "manager_employee_no": "E11026", + "finance_owner_name": "市场财务BP", + "cost_center": "CC-5209", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-03 16:52", + "last_sync_at": "2026-05-03 16:52", + "role_codes": ["user"], + }, + { + "employee_no": "E11602", + "name": "陈嘉", + "gender": "男", + "birth_date": "1997-02-18", + "phone": "13716029901", + "email": "jia.chen@xfinance.com", + "join_date": "2026-03-01", + "location": "深圳", + "position": "区域销售经理", + "grade": "M2", + "organization_unit_code": "SALES-SOUTH", + "manager_employee_no": "E10018", + "finance_owner_name": "华南财务组", + "cost_center": "CC-4102", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-04 14:12", + "last_sync_at": "2026-05-04 14:12", + "role_codes": ["user", "approver"], + }, + { + "employee_no": "E12476", + "name": "马骁然", + "gender": "男", + "birth_date": "1994-01-08", + "phone": "13824760139", + "email": "xiaoran.ma@xfinance.com", + "join_date": "2022-09-05", + "location": "深圳", + "position": "销售运营专家", + "grade": "P5", + "organization_unit_code": "SALES-SOUTH", + "manager_employee_no": "E11602", + "finance_owner_name": "华南财务组", + "cost_center": "CC-4106", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-06 18:15", + "last_sync_at": "2026-05-06 18:15", + "role_codes": ["user"], + }, + { + "employee_no": "E12508", + "name": "唐子墨", + "gender": "男", + "birth_date": "1996-06-11", + "phone": "13925088761", + "email": "zimo.tang@xfinance.com", + "join_date": "2024-02-26", + "location": "深圳", + "position": "大客户代表", + "grade": "P4", + "organization_unit_code": "SALES-SOUTH", + "manager_employee_no": "E11602", + "finance_owner_name": "华南财务组", + "cost_center": "CC-4109", + "employment_status": "停用", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-01 11:06", + "last_sync_at": "2026-05-01 11:06", + "role_codes": ["user"], + }, + { + "employee_no": "E12514", + "name": "罗欣怡", + "gender": "女", + "birth_date": "2000-03-02", + "phone": "13625141227", + "email": "xinyi.luo@xfinance.com", + "join_date": "2026-02-24", + "location": "深圳", + "position": "销售协调专员", + "grade": "P3", + "organization_unit_code": "SALES-SOUTH", + "manager_employee_no": "E11602", + "finance_owner_name": "华南财务组", + "cost_center": "CC-4112", + "employment_status": "试用中", + "sync_state": "待生效", + "spotlight": False, + "updated_at": "2026-05-05 15:42", + "last_sync_at": "2026-05-05 15:42", + "role_codes": ["user"], + }, + { + "employee_no": "E11745", + "name": "吴磊", + "gender": "男", + "birth_date": "1989-09-27", + "phone": "13817459812", + "email": "lei.wu@xfinance.com", + "join_date": "2019-12-09", + "location": "北京", + "position": "研发平台主管", + "grade": "M3", + "organization_unit_code": "RND-CENTER", + "manager_employee_no": "E10018", + "finance_owner_name": "研发财务BP", + "cost_center": "CC-6105", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-06 13:08", + "last_sync_at": "2026-05-06 13:08", + "role_codes": ["user", "approver", "auditor"], + }, + { + "employee_no": "E11991", + "name": "赵明", + "gender": "男", + "birth_date": "1994-06-09", + "phone": "13519913300", + "email": "ming.zhao@xfinance.com", + "join_date": "2023-11-18", + "location": "北京", + "position": "产品经理", + "grade": "P5", + "organization_unit_code": "RND-CENTER", + "manager_employee_no": "E11745", + "finance_owner_name": "研发财务BP", + "cost_center": "CC-6112", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-02 11:32", + "last_sync_at": "2026-05-02 11:32", + "role_codes": ["user"], + }, + { + "employee_no": "E12611", + "name": "彭一凡", + "gender": "男", + "birth_date": "1995-02-03", + "phone": "13726114588", + "email": "yifan.peng@xfinance.com", + "join_date": "2022-04-18", + "location": "北京", + "position": "后端工程师", + "grade": "P5", + "organization_unit_code": "RND-CENTER", + "manager_employee_no": "E11745", + "finance_owner_name": "研发财务BP", + "cost_center": "CC-6114", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-06 09:44", + "last_sync_at": "2026-05-06 09:44", + "role_codes": ["user"], + }, + { + "employee_no": "E12618", + "name": "苏清禾", + "gender": "女", + "birth_date": "1994-12-25", + "phone": "13626188763", + "email": "qinghe.su@xfinance.com", + "join_date": "2022-05-16", + "location": "北京", + "position": "数据工程师", + "grade": "P5", + "organization_unit_code": "RND-CENTER", + "manager_employee_no": "E11745", + "finance_owner_name": "研发财务BP", + "cost_center": "CC-6116", + "employment_status": "在职", + "sync_state": "同步中", + "spotlight": False, + "updated_at": "2026-05-07 10:26", + "last_sync_at": "2026-05-07 10:18", + "role_codes": ["user"], + }, +] diff --git a/server/src/app/services/employee_seed_part2.py b/server/src/app/services/employee_seed_part2.py new file mode 100644 index 0000000..27fa80f --- /dev/null +++ b/server/src/app/services/employee_seed_part2.py @@ -0,0 +1,412 @@ +from __future__ import annotations + +EMPLOYEE_DEFINITIONS_PART_2 = [ + { + "employee_no": "E12624", + "name": "沈知远", + "gender": "男", + "birth_date": "1992-11-06", + "phone": "13926241855", + "email": "zhiyuan.shen@xfinance.com", + "join_date": "2021-11-22", + "location": "北京", + "position": "测试负责人", + "grade": "P6", + "organization_unit_code": "RND-CENTER", + "manager_employee_no": "E11745", + "finance_owner_name": "研发财务BP", + "cost_center": "CC-6119", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-05 13:12", + "last_sync_at": "2026-05-05 13:12", + "role_codes": ["user"], + }, + { + "employee_no": "E11852", + "name": "周晓彤", + "gender": "女", + "birth_date": "1997-05-27", + "phone": "13818529954", + "email": "xiaotong.zhou@xfinance.com", + "join_date": "2022-06-30", + "location": "南京", + "position": "行政采购专员", + "grade": "P4", + "organization_unit_code": "OPS-ADMIN", + "manager_employee_no": "E12653", + "finance_owner_name": "行政财务BP", + "cost_center": "CC-7204", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-05 11:22", + "last_sync_at": "2026-05-05 11:22", + "role_codes": ["user"], + }, + { + "employee_no": "E12653", + "name": "梁雨辰", + "gender": "男", + "birth_date": "1991-08-30", + "phone": "13726539876", + "email": "yuchen.liang@xfinance.com", + "join_date": "2021-01-04", + "location": "南京", + "position": "行政运营经理", + "grade": "M1", + "organization_unit_code": "OPS-ADMIN", + "manager_employee_no": "E10018", + "finance_owner_name": "行政财务BP", + "cost_center": "CC-7201", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-06 17:44", + "last_sync_at": "2026-05-06 17:44", + "role_codes": ["user", "approver"], + }, + { + "employee_no": "E12661", + "name": "顾承宇", + "gender": "男", + "birth_date": "1988-04-16", + "phone": "13926614528", + "email": "chengyu.gu@xfinance.com", + "join_date": "2020-02-03", + "location": "上海", + "position": "风控审计经理", + "grade": "M2", + "organization_unit_code": "AUDIT-RISK", + "manager_employee_no": "E10018", + "finance_owner_name": "集团财务", + "cost_center": "CC-8102", + "employment_status": "在职", + "sync_state": "待生效", + "spotlight": True, + "updated_at": "2026-05-07 09:52", + "last_sync_at": "2026-05-07 09:52", + "role_codes": ["auditor", "finance"], + "history": [ + { + "action": "更新审计观察范围", + "owner": "系统管理员 · 张晓晴", + "occurred_at": "2026-05-07 09:52", + }, + { + "action": "补充高风险费用抽样规则", + "owner": "审计管理员 · 王敏", + "occurred_at": "2026-05-06 18:30", + }, + ], + }, + { + "employee_no": "E12679", + "name": "郑若彤", + "gender": "女", + "birth_date": "1997-09-13", + "phone": "13626794520", + "email": "ruotong.zheng@xfinance.com", + "join_date": "2024-01-08", + "location": "上海", + "position": "审计专员", + "grade": "P4", + "organization_unit_code": "AUDIT-RISK", + "manager_employee_no": "E12661", + "finance_owner_name": "集团财务", + "cost_center": "CC-8105", + "employment_status": "在职", + "sync_state": "同步中", + "spotlight": False, + "updated_at": "2026-05-07 08:58", + "last_sync_at": "2026-05-07 08:40", + "role_codes": ["auditor"], + }, + { + "employee_no": "E12688", + "name": "方逸晨", + "gender": "男", + "birth_date": "1995-01-20", + "phone": "13526881142", + "email": "yichen.fang@xfinance.com", + "join_date": "2023-08-14", + "location": "南京", + "position": "采购合规分析师", + "grade": "P4", + "organization_unit_code": "OPS-ADMIN", + "manager_employee_no": "E12653", + "finance_owner_name": "行政财务BP", + "cost_center": "CC-7208", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-03 14:16", + "last_sync_at": "2026-05-03 14:16", + "role_codes": ["user", "finance"], + }, + { + "employee_no": "E12067", + "name": "秦墨然", + "gender": "男", + "birth_date": "1990-10-10", + "phone": "13820674519", + "email": "moran.qin@xfinance.com", + "join_date": "2020-07-20", + "location": "上海", + "position": "华东销售总监", + "grade": "M2", + "organization_unit_code": "SALES-EAST", + "manager_employee_no": "E10018", + "finance_owner_name": "华东财务组", + "cost_center": "CC-4108", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-06 12:40", + "last_sync_at": "2026-05-06 12:40", + "role_codes": ["user", "approver"], + }, + { + "employee_no": "E12703", + "name": "宋知夏", + "gender": "女", + "birth_date": "1994-07-07", + "phone": "13727031129", + "email": "zhixia.song@xfinance.com", + "join_date": "2022-12-12", + "location": "上海", + "position": "重点客户经理", + "grade": "P5", + "organization_unit_code": "SALES-EAST", + "manager_employee_no": "E12067", + "finance_owner_name": "华东财务组", + "cost_center": "CC-4111", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-04 10:58", + "last_sync_at": "2026-05-04 10:58", + "role_codes": ["user"], + }, + { + "employee_no": "E12716", + "name": "杜嘉宁", + "gender": "男", + "birth_date": "1999-11-16", + "phone": "13627161248", + "email": "jianing.du@xfinance.com", + "join_date": "2026-01-19", + "location": "上海", + "position": "销售代表", + "grade": "P3", + "organization_unit_code": "SALES-EAST", + "manager_employee_no": "E12067", + "finance_owner_name": "华东财务组", + "cost_center": "CC-4114", + "employment_status": "试用中", + "sync_state": "待生效", + "spotlight": False, + "updated_at": "2026-05-05 12:26", + "last_sync_at": "2026-05-05 12:26", + "role_codes": ["user"], + }, + { + "employee_no": "E12722", + "name": "邵宁远", + "gender": "男", + "birth_date": "1998-12-01", + "phone": "13527221506", + "email": "ningyuan.shao@xfinance.com", + "join_date": "2026-02-08", + "location": "北京", + "position": "数据分析师", + "grade": "P4", + "organization_unit_code": "RND-CENTER", + "manager_employee_no": "E11745", + "finance_owner_name": "研发财务BP", + "cost_center": "CC-6122", + "employment_status": "试用中", + "sync_state": "同步中", + "spotlight": False, + "updated_at": "2026-05-07 09:06", + "last_sync_at": "2026-05-07 08:55", + "role_codes": ["user"], + }, + { + "employee_no": "E12739", + "name": "林可昕", + "gender": "女", + "birth_date": "1996-10-23", + "phone": "13827394510", + "email": "kexin.lin@xfinance.com", + "join_date": "2023-04-17", + "location": "上海", + "position": "费用核算专员", + "grade": "P4", + "organization_unit_code": "FIN-SSC", + "manager_employee_no": "E10234", + "finance_owner_name": "华东财务组", + "cost_center": "CC-2118", + "employment_status": "停用", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-04-30 18:05", + "last_sync_at": "2026-04-30 18:05", + "role_codes": ["finance"], + }, + { + "employee_no": "E12744", + "name": "赵予安", + "gender": "男", + "birth_date": "1993-01-30", + "phone": "13727442139", + "email": "yuan.zhao@xfinance.com", + "join_date": "2021-10-11", + "location": "上海", + "position": "预算控制经理", + "grade": "M1", + "organization_unit_code": "FIN-SSC", + "manager_employee_no": "E10234", + "finance_owner_name": "集团财务", + "cost_center": "CC-2120", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-06 15:34", + "last_sync_at": "2026-05-06 15:34", + "role_codes": ["finance", "approver"], + }, + { + "employee_no": "E12750", + "name": "谢知行", + "gender": "男", + "birth_date": "1995-09-14", + "phone": "13627501386", + "email": "zhixing.xie@xfinance.com", + "join_date": "2022-07-25", + "location": "深圳", + "position": "渠道销售经理", + "grade": "P5", + "organization_unit_code": "SALES-SOUTH", + "manager_employee_no": "E11602", + "finance_owner_name": "华南财务组", + "cost_center": "CC-4116", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-04 09:48", + "last_sync_at": "2026-05-04 09:48", + "role_codes": ["user"], + }, + { + "employee_no": "E12758", + "name": "顾南枝", + "gender": "女", + "birth_date": "1994-04-12", + "phone": "13827584522", + "email": "nanzhi.gu@xfinance.com", + "join_date": "2022-05-09", + "location": "北京", + "position": "内容运营经理", + "grade": "P5", + "organization_unit_code": "MKT-BRAND", + "manager_employee_no": "E11026", + "finance_owner_name": "市场财务BP", + "cost_center": "CC-5211", + "employment_status": "在职", + "sync_state": "同步中", + "spotlight": False, + "updated_at": "2026-05-07 11:08", + "last_sync_at": "2026-05-07 10:50", + "role_codes": ["user"], + }, + { + "employee_no": "E12763", + "name": "孟书言", + "gender": "男", + "birth_date": "1992-02-09", + "phone": "13527633148", + "email": "shuyan.meng@xfinance.com", + "join_date": "2021-06-28", + "location": "北京", + "position": "架构工程师", + "grade": "P6", + "organization_unit_code": "RND-CENTER", + "manager_employee_no": "E11745", + "finance_owner_name": "研发财务BP", + "cost_center": "CC-6125", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-06 19:05", + "last_sync_at": "2026-05-06 19:05", + "role_codes": ["user"], + }, + { + "employee_no": "E12771", + "name": "孔令谦", + "gender": "男", + "birth_date": "1993-07-18", + "phone": "13627711572", + "email": "lingqian.kong@xfinance.com", + "join_date": "2021-09-13", + "location": "南京", + "position": "供应商管理专员", + "grade": "P4", + "organization_unit_code": "OPS-ADMIN", + "manager_employee_no": "E12653", + "finance_owner_name": "行政财务BP", + "cost_center": "CC-7210", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-02 17:22", + "last_sync_at": "2026-05-02 17:22", + "role_codes": ["user"], + }, + { + "employee_no": "E12782", + "name": "乔语岚", + "gender": "女", + "birth_date": "1996-05-06", + "phone": "13727823045", + "email": "yulan.qiao@xfinance.com", + "join_date": "2023-03-06", + "location": "上海", + "position": "风控策略分析师", + "grade": "P4", + "organization_unit_code": "AUDIT-RISK", + "manager_employee_no": "E12661", + "finance_owner_name": "集团财务", + "cost_center": "CC-8108", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-03 13:18", + "last_sync_at": "2026-05-03 13:18", + "role_codes": ["auditor"], + }, + { + "employee_no": "E12790", + "name": "邹闻韬", + "gender": "男", + "birth_date": "1991-03-11", + "phone": "13827903167", + "email": "wentao.zou@xfinance.com", + "join_date": "2020-10-26", + "location": "上海", + "position": "合规产品负责人", + "grade": "P7", + "organization_unit_code": "RND-CENTER", + "manager_employee_no": "E11745", + "finance_owner_name": "研发财务BP", + "cost_center": "CC-6128", + "employment_status": "在职", + "sync_state": "已同步", + "spotlight": False, + "updated_at": "2026-05-06 08:56", + "last_sync_at": "2026-05-06 08:56", + "role_codes": ["user", "auditor"], + }, +] diff --git a/server/src/app/services/employee_seed_roles.py b/server/src/app/services/employee_seed_roles.py new file mode 100644 index 0000000..80765a4 --- /dev/null +++ b/server/src/app/services/employee_seed_roles.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +ROLE_DISPLAY_ORDER = { + "manager": 1, + "finance": 2, + "approver": 3, + "executive": 4, + "auditor": 5, + "user": 6, +} + +ROLE_DEFINITIONS = [ + { + "role_code": "user", + "name": "使用者", + "description": "可以发起报销、查看个人单据和使用 AI 助手。", + }, + { + "role_code": "finance", + "name": "财务人员", + "description": "可以处理复核、查看财务知识与风险校验结果。", + }, + { + "role_code": "manager", + "name": "管理员", + "description": "可以维护员工档案、组织结构和角色权限。", + }, + { + "role_code": "executive", + "name": "高级管理人员", + "description": "可以查看跨部门数据看板与关键审批结果。", + }, + { + "role_code": "approver", + "name": "审批负责人", + "description": "可以处理审批中心中的待审单据。", + }, + { + "role_code": "auditor", + "name": "审计观察员", + "description": "可以查看变更记录和权限调整历史。", + }, +] + +ROLE_PERMISSION_MAP = { + "user": ["可发起差旅申请与报销", "可查看个人单据与票据识别结果"], + "finance": ["可处理财务复核任务", "可查看风险校验与财务知识库"], + "manager": ["可维护员工档案与组织结构", "可配置系统角色与访问边界"], + "executive": ["可查看跨部门经营看板", "可处理高金额报销最终审批"], + "approver": ["可处理本部门待审单据", "可查看审批链路与 SLA 状态"], + "auditor": ["可查看权限变更与审计留痕", "可导出员工权限观察记录"], +} diff --git a/server/src/app/services/employee_serialization.py b/server/src/app/services/employee_serialization.py new file mode 100644 index 0000000..b6be6a1 --- /dev/null +++ b/server/src/app/services/employee_serialization.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +from collections.abc import Callable +from datetime import date, datetime + +from app.models.employee import Employee +from app.models.employee_change_log import EmployeeChangeLog +from app.models.role import Role +from app.schemas.employee import ( + EmployeeHistoryRead, + EmployeeOrganizationRead, + EmployeeRead, +) + + +def serialize_employee( + employee: Employee, + *, + sorted_roles: list[Role], + sorted_change_logs: list[EmployeeChangeLog], + format_date: Callable[[date | None], str | None], + format_datetime: Callable[[datetime | None], str | None], + format_history_datetime: Callable[[datetime | None], str], + role_permission_map: dict[str, list[str]], + status_tone_map: dict[str, str], + max_change_logs: int, +) -> EmployeeRead: + organization = employee.organization_unit + role_labels = [role.name for role in sorted_roles] + role_codes = [role.role_code for role in sorted_roles] + + history = [ + EmployeeHistoryRead( + action=item.action, + owner=item.owner, + time=format_history_datetime(item.occurred_at), + occurredAt=format_history_datetime(item.occurred_at), + ) + for item in sorted_change_logs[:max_change_logs] + ] + + return EmployeeRead( + id=employee.id, + avatar=(employee.name or "?")[:1], + name=employee.name, + employeeNo=employee.employee_no, + department=organization.name if organization else "", + position=employee.position, + grade=employee.grade, + manager=employee.manager.name if employee.manager else "CEO", + managerEmployeeNo=employee.manager.employee_no if employee.manager else None, + financeOwner=employee.finance_owner_name or "", + roles=role_labels, + roleCodes=role_codes, + status=employee.employment_status, + statusTone=status_tone_map.get(employee.employment_status, "neutral"), + gender=employee.gender, + age=calculate_age(employee.birth_date), + birthDate=format_date(employee.birth_date), + email=employee.email, + phone=employee.phone, + joinDate=format_date(employee.join_date), + location=employee.location, + costCenter=employee.cost_center, + updatedAt=format_datetime(employee.updated_at or employee.created_at), + lastSync=format_datetime(employee.last_sync_at), + syncState=employee.sync_state, + spotlight=employee.spotlight, + permissions=collect_permissions(role_codes, role_permission_map), + history=history, + organization=( + EmployeeOrganizationRead( + id=organization.id, + code=organization.unit_code, + name=organization.name, + unitType=organization.unit_type, + costCenter=organization.cost_center, + location=organization.location, + managerName=organization.manager_name, + ) + if organization + else None + ), + ) + + +def collect_permissions( + role_codes: list[str], + role_permission_map: dict[str, list[str]], +) -> list[str]: + permissions: list[str] = [] + seen: set[str] = set() + + for role_code in role_codes: + for permission in role_permission_map.get(role_code, []): + if permission in seen: + continue + permissions.append(permission) + seen.add(permission) + + return permissions + + +def format_history_datetime( + value: datetime | None, + *, + to_display_datetime: Callable[[datetime], datetime], +) -> str: + if value is None: + return "" + local = to_display_datetime(value) + return ( + f"{local.year}年{local.month}月{local.day}日" + f"{local.hour}时{local.minute}分" + ) + + +def calculate_age(birth_date: date | None) -> int | None: + if birth_date is None: + return None + + today = date.today() + age = today.year - birth_date.year + if (today.month, today.day) < (birth_date.month, birth_date.day): + age -= 1 + return age diff --git a/server/src/app/services/expense_claim_access_policy.py b/server/src/app/services/expense_claim_access_policy.py new file mode 100644 index 0000000..5f7b113 --- /dev/null +++ b/server/src/app/services/expense_claim_access_policy.py @@ -0,0 +1,401 @@ +from __future__ import annotations + +import re +from typing import Any + +from sqlalchemy import and_, func, or_, select +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim +from app.models.organization import OrganizationUnit + + +PRIVILEGED_CLAIM_ROLE_CODES = {"finance", "executive"} +APPROVAL_VISIBLE_CLAIM_ROLE_CODES = {"manager", "approver"} +CLAIM_DELETE_ROLE_CODES = {"executive"} + + +class ExpenseClaimAccessPolicy: + def __init__(self, db: Session) -> None: + self.db = db + + @staticmethod + def has_privileged_claim_access(current_user: CurrentUserContext) -> bool: + if current_user.is_admin: + return True + return bool(ExpenseClaimAccessPolicy.normalize_role_codes(current_user) & PRIVILEGED_CLAIM_ROLE_CODES) + + @staticmethod + def has_claim_delete_access(current_user: CurrentUserContext) -> bool: + if current_user.is_admin: + return True + return bool(ExpenseClaimAccessPolicy.normalize_role_codes(current_user) & CLAIM_DELETE_ROLE_CODES) + + def can_return_claim(self, current_user: CurrentUserContext, claim: ExpenseClaim) -> bool: + if self.has_privileged_claim_access(current_user): + return True + + role_codes = self.normalize_role_codes(current_user) + if not (role_codes & APPROVAL_VISIBLE_CLAIM_ROLE_CODES): + return False + if str(claim.status or "").strip().lower() != "submitted": + return False + if str(claim.approval_stage or "").strip() != "直属领导审批": + return False + + current_employee = self.resolve_current_employee(current_user) + if current_employee is not None and str(claim.employee_id or "").strip() == current_employee.id: + return False + + claim_employee = claim.employee + if current_employee is not None and claim_employee is not None: + if claim_employee.manager_id == current_employee.id: + return True + if claim_employee.manager is not None and claim_employee.manager.id == current_employee.id: + return True + + approver_name = str( + current_employee.name if current_employee is not None and current_employee.name else current_user.name or "" + ).strip() + if not approver_name: + return False + + return self.resolve_claim_manager_name(claim) == approver_name + + def can_approve_claim(self, current_user: CurrentUserContext, claim: ExpenseClaim) -> bool: + stage = str(claim.approval_stage or "").strip() + if stage == "直属领导审批": + return self.is_current_direct_manager_approver(current_user, claim) + if stage == "财务审批": + role_codes = self.normalize_role_codes(current_user) + return current_user.is_admin or "finance" in role_codes + return False + + def is_current_direct_manager_approver(self, current_user: CurrentUserContext, claim: ExpenseClaim) -> bool: + role_codes = self.normalize_role_codes(current_user) + if not (role_codes & APPROVAL_VISIBLE_CLAIM_ROLE_CODES): + return False + if str(claim.status or "").strip().lower() != "submitted": + return False + if str(claim.approval_stage or "").strip() != "直属领导审批": + return False + + current_employee = self.resolve_current_employee(current_user) + if current_employee is not None and str(claim.employee_id or "").strip() == current_employee.id: + return False + + claim_employee = claim.employee + if current_employee is not None and claim_employee is not None: + if claim_employee.manager_id == current_employee.id: + return True + if claim_employee.manager is not None and claim_employee.manager.id == current_employee.id: + return True + + approver_name = str( + current_employee.name if current_employee is not None and current_employee.name else current_user.name or "" + ).strip() + if not approver_name: + return False + + return self.resolve_claim_manager_name(claim) == approver_name + + @staticmethod + def normalize_role_codes(current_user: CurrentUserContext) -> set[str]: + return { + str(item).strip().lower() + for item in current_user.role_codes + if str(item).strip() + } + + def resolve_current_employee(self, current_user: CurrentUserContext) -> Employee | None: + return self.resolve_employee_by_identity_candidates( + [ + str(current_user.username or "").strip(), + str(current_user.name or "").strip(), + ] + ) + + def resolve_current_user_display_name(self, current_user: CurrentUserContext) -> str: + current_employee = self.resolve_current_employee(current_user) + if current_employee is not None and str(current_employee.name or "").strip(): + return str(current_employee.name).strip() + + for candidate in (current_user.name, current_user.username): + normalized = str(candidate or "").strip() + if normalized and not self.is_email_like(normalized): + return normalized + + return str(current_user.username or current_user.name or "anonymous").strip() or "anonymous" + + def is_claim_owned_by_current_user(self, claim: ExpenseClaim, current_user: CurrentUserContext) -> bool: + current_employee = self.resolve_current_employee(current_user) + if current_employee is not None: + if str(claim.employee_id or "").strip() == current_employee.id: + return True + identity_values = { + str(current_employee.name or "").strip(), + str(current_employee.email or "").strip(), + str(current_employee.employee_no or "").strip(), + } + else: + identity_values = set() + + identity_values.update( + { + str(current_user.username or "").strip(), + str(current_user.name or "").strip(), + } + ) + identity_values.discard("") + return str(claim.employee_name or "").strip() in identity_values + + @staticmethod + def is_email_like(value: str) -> bool: + return bool(re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", str(value or "").strip())) + + def resolve_claim_employee_for_backfill(self, claim: ExpenseClaim) -> Employee | None: + if claim.employee is not None: + employee = self.db.scalar( + select(Employee) + .options( + selectinload(Employee.organization_unit), + selectinload(Employee.manager), + selectinload(Employee.roles), + ) + .where(Employee.id == claim.employee.id) + .limit(1) + ) + return employee or claim.employee + + employee_id = str(claim.employee_id or "").strip() + if employee_id: + employee = self.db.scalar( + select(Employee) + .options( + selectinload(Employee.organization_unit), + selectinload(Employee.manager), + selectinload(Employee.roles), + ) + .where(Employee.id == employee_id) + .limit(1) + ) + if employee is not None: + return employee + + return self.resolve_employee_by_identity_candidates([str(claim.employee_name or "").strip()]) + + def resolve_employee_by_identity_candidates(self, candidates: list[str]) -> Employee | None: + normalized_candidates = [ + item + for item in dict.fromkeys(str(candidate or "").strip() for candidate in candidates) + if item + ] + if not normalized_candidates: + return None + + load_options = ( + selectinload(Employee.organization_unit), + selectinload(Employee.manager), + selectinload(Employee.roles), + ) + + for candidate in normalized_candidates: + employee = self.db.scalar( + select(Employee) + .options(*load_options) + .where( + or_( + func.lower(Employee.email) == candidate.lower(), + func.lower(Employee.employee_no) == candidate.lower(), + ) + ) + .limit(1) + ) + if employee is not None: + return employee + + for candidate in normalized_candidates: + matches = list( + self.db.scalars( + select(Employee) + .options(*load_options) + .where(Employee.name == candidate) + .limit(2) + ).all() + ) + if len(matches) == 1: + return matches[0] + + return None + + def backfill_claim_identity_from_current_user( + self, + claim: ExpenseClaim, + current_user: CurrentUserContext, + ) -> None: + employee = self.resolve_claim_employee_for_backfill(claim) or self.resolve_current_employee(current_user) + + if employee is not None: + claim_employee_id = str(claim.employee_id or "").strip() + claim_employee_name = str(claim.employee_name or "").strip() + employee_names = { + str(employee.name or "").strip(), + str(employee.email or "").strip(), + str(employee.employee_no or "").strip(), + } + employee_names.discard("") + + can_apply_employee = ( + not claim_employee_id + or claim_employee_id == employee.id + or self.is_missing_value(claim_employee_name) + or claim_employee_name in employee_names + ) + + if can_apply_employee: + claim.employee = employee + claim.employee_id = employee.id + if employee.name: + claim.employee_name = employee.name + if employee.organization_unit is not None: + claim.department_id = employee.organization_unit_id + claim.department_name = employee.organization_unit.name + return + + context_department = str( + getattr(current_user, "department_name", "") + or getattr(current_user, "department", "") + or getattr(current_user, "departmentName", "") + or "" + ).strip() + if context_department and self.is_missing_value(claim.department_name): + claim.department_name = context_department + + context_name = str(current_user.name or current_user.username or "").strip() + if context_name and self.is_missing_value(claim.employee_name): + claim.employee_name = context_name + + def employee_name_is_unique(self, employee: Employee) -> bool: + normalized_name = str(employee.name or "").strip() + if not normalized_name: + return False + + same_name_count = int( + self.db.scalar( + select(func.count()).select_from(Employee).where(Employee.name == normalized_name) + ) + or 0 + ) + return same_name_count == 1 + + def build_personal_claim_conditions(self, current_user: CurrentUserContext) -> list[Any]: + conditions = [] + username = str(current_user.username or "").strip() + employee = self.resolve_current_employee(current_user) + + def add_condition(field_name: str, value: str | None) -> None: + normalized = str(value or "").strip() + if not normalized: + return + if field_name == "employee_id": + conditions.append(ExpenseClaim.employee_id == normalized) + return + conditions.append(ExpenseClaim.employee_name == normalized) + + if employee is not None: + add_condition("employee_id", employee.id) + add_condition("employee_name", employee.email) + if self.employee_name_is_unique(employee): + add_condition("employee_name", employee.name) + else: + add_condition("employee_id", username) + add_condition("employee_name", username) + + return conditions + + def build_approval_claim_conditions(self, current_user: CurrentUserContext) -> list[Any]: + role_codes = self.normalize_role_codes(current_user) + if not (role_codes & APPROVAL_VISIBLE_CLAIM_ROLE_CODES): + return [] + + employee = self.resolve_current_employee(current_user) + manager_name = str( + employee.name if employee is not None and employee.name else current_user.name or "" + ).strip() + pending_leader_approval_parts = [ + ExpenseClaim.status == "submitted", + ExpenseClaim.approval_stage == "直属领导审批", + ] + if employee is not None: + pending_leader_approval_parts.append( + or_(ExpenseClaim.employee_id.is_(None), ExpenseClaim.employee_id != employee.id) + ) + if manager_name: + pending_leader_approval_parts.append(ExpenseClaim.employee_name != manager_name) + + pending_leader_approval = and_(*pending_leader_approval_parts) + conditions = [] + + if employee is not None: + subordinate_ids = select(Employee.id).where(Employee.manager_id == employee.id) + conditions.append(and_(pending_leader_approval, ExpenseClaim.employee_id.in_(subordinate_ids))) + + if manager_name: + managed_department_ids = select(OrganizationUnit.id).where(OrganizationUnit.manager_name == manager_name) + managed_department_names = select(OrganizationUnit.name).where(OrganizationUnit.manager_name == manager_name) + conditions.append(and_(pending_leader_approval, ExpenseClaim.department_id.in_(managed_department_ids))) + conditions.append(and_(pending_leader_approval, ExpenseClaim.department_name.in_(managed_department_names))) + + return conditions + + def apply_approval_claim_scope(self, stmt: Any, current_user: CurrentUserContext) -> Any: + role_codes = self.normalize_role_codes(current_user) + if current_user.is_admin or "executive" in role_codes: + return stmt.where(ExpenseClaim.status == "submitted") + if "finance" in role_codes: + return stmt.where( + ExpenseClaim.status == "submitted", + ExpenseClaim.approval_stage == "财务审批", + ) + + conditions = self.build_approval_claim_conditions(current_user) + if not conditions: + return stmt.where(ExpenseClaim.id == "__no_visible_claim__") + + return stmt.where(or_(*conditions)) + + def apply_claim_scope( + self, + stmt: Any, + current_user: CurrentUserContext, + *, + include_approval_scope: bool = False, + ) -> Any: + if self.has_privileged_claim_access(current_user): + return stmt + + conditions = self.build_personal_claim_conditions(current_user) + + if not conditions: + return stmt.where(ExpenseClaim.id == "__no_visible_claim__") + + if include_approval_scope: + conditions.extend(self.build_approval_claim_conditions(current_user)) + + return stmt.where(or_(*conditions)) + + @staticmethod + def resolve_claim_manager_name(claim: ExpenseClaim) -> str: + if claim.employee is not None: + if claim.employee.manager is not None and claim.employee.manager.name: + return str(claim.employee.manager.name).strip() + if claim.employee.organization_unit is not None and claim.employee.organization_unit.manager_name: + return str(claim.employee.organization_unit.manager_name).strip() + return "" + + @staticmethod + def is_missing_value(value: Any) -> bool: + normalized = str(value or "").strip() + return not normalized or normalized in {"待补充", "待确认", "N/A", "n/a", "无"} diff --git a/server/src/app/services/expense_claim_attachment_analysis.py b/server/src/app/services/expense_claim_attachment_analysis.py new file mode 100644 index 0000000..282f17f --- /dev/null +++ b/server/src/app/services/expense_claim_attachment_analysis.py @@ -0,0 +1,668 @@ +from __future__ import annotations + +import json +import re +import shutil +import uuid +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import func, or_, select +from sqlalchemy import inspect as sqlalchemy_inspect +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.ontology import OntologyEntity, OntologyParseResult +from app.schemas.reimbursement import ( + ExpenseClaimItemCreate, + ExpenseClaimItemUpdate, + ExpenseClaimUpdate, + TravelReimbursementCalculatorRequest, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin +from app.services.expense_amounts import ( + extract_amount_candidates, + format_decimal_amount, + is_amount_match_date_fragment, + is_date_like_amount_candidate, + is_probable_year_amount, + parse_document_amount_value, + parse_plain_document_amount_value, + resolve_document_field_amount, + resolve_document_item_amount, + resolve_document_text_amount, +) +from app.services.expense_rule_runtime import ( + DEFAULT_SCENE_RULE_ASSET_CODE, + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.ocr import OcrService + + +class ExpenseClaimAttachmentAnalysisMixin: + def _build_attachment_expense_audit_points( + self, + *, + document: Any, + item: ExpenseClaimItem, + document_info: dict[str, Any], + ) -> list[str]: + text = " ".join( + [ + str(getattr(document, "summary", "") or "").strip(), + str(getattr(document, "text", "") or "").strip(), + ] + ).strip() + document_payload = { + "document_fields": document_info.get("fields") or [], + "summary": str(getattr(document, "summary", "") or ""), + "text": str(getattr(document, "text", "") or ""), + } + field_amount = self._resolve_document_field_amount(document_payload) + audited_amount = self._resolve_document_item_amount(document_payload) + item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) + + points: list[str] = [] + if ( + field_amount is not None + and audited_amount is not None + and self._is_date_like_amount_candidate(field_amount, text) + and abs(field_amount - audited_amount) > Decimal("1.00") + ): + points.append( + "费用核算:OCR 金额疑似误取日期" + f" {self._format_decimal_amount(field_amount)}," + f"已按票据文本中的总费用 {self._format_decimal_amount(audited_amount)} 元回填," + "请核对酒店或票据原文总额。" + ) + + if ( + audited_amount is not None + and item_amount > Decimal("0.00") + and abs(audited_amount - item_amount) > Decimal("1.00") + ): + points.append( + f"费用核算:票据文本复核金额为 {self._format_decimal_amount(audited_amount)} 元," + f"当前明细金额为 {self._format_decimal_amount(item_amount)} 元,请确认是否需要调整。" + ) + + return points + + def _build_attachment_travel_policy_audit( + self, + *, + document: Any, + item: ExpenseClaimItem, + document_info: dict[str, Any], + claim: ExpenseClaim | None = None, + ) -> dict[str, Any]: + policy = self._get_expense_rule_catalog().travel_policy + if policy is None: + return {"points": [], "rule_basis": [], "has_high_risk": False} + + item_type = str(item.item_type or "").strip().lower() + document_type = str(document_info.get("document_type") or "").strip().lower() + scene_code = str(document_info.get("scene_code") or "").strip().lower() + if not ( + item_type in {"hotel", "hotel_ticket"} + or document_type == "hotel_invoice" + or scene_code == "hotel" + ): + return {"points": [], "rule_basis": [], "has_high_risk": False} + + item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) + if item_amount <= Decimal("0.00"): + return {"points": [], "rule_basis": [], "has_high_risk": False} + + claim = claim or getattr(item, "claim", None) + grade_band = self._resolve_travel_policy_band(getattr(claim, "employee_grade", None)) + rule_name = str(policy.standard_rule_name or policy.rule_name or "公司差旅费报销规则").strip() + rule_version = str(policy.standard_rule_version or policy.rule_version or "").strip() + version_text = f"({rule_version})" if rule_version else "" + rule_basis = [ + f"依据《{rule_name}》{version_text},住宿费按员工职级、出差城市和每晚金额进行差标核算。" + ] + if grade_band is None: + return { + "points": ["住宿标准:当前员工职级缺失,无法匹配规则中心的住宿报销标准。"], + "rule_basis": rule_basis, + "has_high_risk": False, + } + + text = " ".join( + [ + str(getattr(document, "summary", "") or "").strip(), + str(getattr(document, "text", "") or "").strip(), + ] + ).strip() + context = { + "item": item, + "document_info": document_info, + "ocr_summary": str(getattr(document, "summary", "") or "").strip(), + "ocr_text": str(getattr(document, "text", "") or "").strip(), + } + hotel_city = self._extract_hotel_city(context, policy) + claim_city = self._extract_city_from_text(str(getattr(claim, "location", "") or ""), policy) if claim else "" + reason_city = self._extract_city_from_text(str(getattr(claim, "reason", "") or ""), policy) if claim else "" + baseline_city = hotel_city or claim_city or reason_city + if not baseline_city: + baseline_city = self._extract_city_from_text(text, policy) + if not baseline_city: + return { + "points": ["住宿标准:未能从酒店名称、出差地点或票据内容匹配到规则中心城市,无法核算住宿差标。"], + "rule_basis": rule_basis, + "has_high_risk": False, + } + + standard = self._resolve_travel_policy_hotel_standard( + policy=policy, + grade_band=grade_band, + city=baseline_city, + ) + if standard is None: + return {"points": [], "rule_basis": rule_basis, "has_high_risk": False} + + cap, standard_label = standard + night_count = self._extract_hotel_night_count(context) + nightly_amount = (item_amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01")) + if nightly_amount <= cap: + return {"points": [], "rule_basis": rule_basis, "has_high_risk": False} + + band_label = policy.band_labels.get(grade_band, str(getattr(claim, "employee_grade", "") or "当前职级").strip()) + over_amount = (nightly_amount - cap).quantize(Decimal("0.01")) + return { + "points": [ + ( + f"住宿标准:{band_label}在{standard_label}的住宿标准为 " + f"{self._format_decimal_amount(cap)} 元/晚,票据识别金额 " + f"{self._format_decimal_amount(item_amount)} 元 / {night_count} 晚," + f"约 {self._format_decimal_amount(nightly_amount)} 元/晚," + f"超出 {self._format_decimal_amount(over_amount)} 元/晚。" + ) + ], + "rule_basis": rule_basis, + "has_high_risk": True, + } + + def _build_attachment_requirement_check( + self, + *, + item: ExpenseClaimItem, + document_info: dict[str, Any], + ) -> dict[str, Any]: + expense_type = str(item.item_type or "").strip().lower() or "other" + policy = self._get_expense_scene_policy(expense_type) + expense_label = policy.label if policy is not None else self._resolve_expense_type_label(expense_type) + allowed_scenes = set(policy.allowed_scene_codes) if policy is not None else set() + allowed_document_types = set(policy.allowed_document_types) if policy is not None else set() + allowed_scene_labels = [self._resolve_document_scene_label(code) for code in sorted(allowed_scenes)] + allowed_document_type_labels = [ + resolve_document_type_label(document_type) + for document_type in sorted(allowed_document_types) + ] + recognized_scene_code = str(document_info.get("scene_code") or "other").strip() or "other" + recognized_scene_label = str( + document_info.get("scene_label") or self._resolve_document_scene_label(recognized_scene_code) + ).strip() + recognized_document_type = str(document_info.get("document_type") or "other").strip() or "other" + recognized_document_type_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据" + matches = ( + (not allowed_scenes and not allowed_document_types) + or recognized_scene_code in allowed_scenes + or recognized_document_type in allowed_document_types + ) + + if matches: + if allowed_scene_labels or allowed_document_type_labels: + message = ( + f"当前费用项目为{expense_label},已识别为{recognized_document_type_label}," + f"符合当前{expense_label}场景的附件要求。" + ) + else: + message = f"当前费用项目为{expense_label},已识别为{recognized_document_type_label}。" + else: + expected_parts = [label + "相关票据" for label in allowed_scene_labels] + expected_parts.extend(allowed_document_type_labels) + expected_text = "、".join(dict.fromkeys(part for part in expected_parts if part)) or "对应场景票据" + message = ( + f"当前费用项目为{expense_label},要求上传{expected_text};" + f"当前识别为{recognized_document_type_label},不符合当前场景,建议过滤或更换附件。" + ) + + return { + "matches": matches, + "current_expense_type": expense_type, + "current_expense_type_label": expense_label, + "allowed_scene_labels": allowed_scene_labels, + "allowed_document_type_labels": allowed_document_type_labels, + "recognized_scene_code": recognized_scene_code, + "recognized_scene_label": recognized_scene_label, + "recognized_document_type": recognized_document_type, + "recognized_document_type_label": recognized_document_type_label, + "mismatch_severity": policy.attachment_mismatch_severity if policy is not None else "high", + "rule_code": policy.rule_code if policy is not None else DEFAULT_SCENE_RULE_ASSET_CODE, + "rule_name": policy.rule_name if policy is not None else "报销场景提交与附件标准", + "message": message, + } + + @staticmethod + def _resolve_document_scene_label(scene_code: str) -> str: + normalized = str(scene_code or "").strip().lower() + return DOCUMENT_SCENE_LABELS.get(normalized, "其他票据") + + @staticmethod + def _extract_amount_candidates(text: str) -> list[Decimal]: + return extract_amount_candidates(text) + + @staticmethod + def _is_amount_match_date_fragment( + amount: Decimal, + text: str, + start: int, + end: int, + ) -> bool: + return is_amount_match_date_fragment(amount, text, start, end) + + @staticmethod + def _has_date_like_text(text: str) -> bool: + return bool(re.search(r"(20\d{2}[年/\-.]\d{1,2}[月/\-.]\d{1,2}日?)", text)) + + @staticmethod + def _normalize_match_text(text: str) -> str: + return re.sub(r"\s+", "", str(text or "")).lower() + + @staticmethod + def _resolve_expense_type_label(expense_type: str | None) -> str: + normalized = str(expense_type or "").strip().lower() + return EXPENSE_TYPE_LABELS.get(normalized, "其他") + + def _resolve_allowed_document_scenes(self, expense_type: str | None) -> set[str]: + normalized = str(expense_type or "").strip().lower() + policy = self._get_expense_scene_policy(normalized) + allowed_scenes = set(policy.allowed_scene_codes) if policy is not None else set() + allowed_scenes.update(EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(normalized, set())) + return allowed_scenes + + def _resolve_document_analysis_scenes(self, document_info: dict[str, Any], text: str) -> set[str]: + scenes: set[str] = set() + recognized_scene_code = str(document_info.get("scene_code") or "").strip().lower() + if recognized_scene_code and recognized_scene_code != "other": + scenes.add(recognized_scene_code) + + recognized_document_type = str(document_info.get("document_type") or "").strip().lower() + mapped_scene = DOCUMENT_TYPE_SCENE_MAP.get(recognized_document_type) + if mapped_scene: + scenes.add(mapped_scene) + + if scenes: + return scenes + return set(self._detect_expense_scenes(text).keys()) + + def _detect_expense_scenes(self, text: str) -> dict[str, list[str]]: + normalized = self._normalize_match_text(text) + if not normalized: + return {} + + matches: dict[str, list[str]] = {} + for scene, keywords in EXPENSE_SCENE_KEYWORDS.items(): + matched = [keyword for keyword in keywords if keyword in normalized] + if matched: + matches[scene] = matched[:3] + return matches + + def _format_scene_labels(self, scene_codes: set[str]) -> str: + labels = [self._resolve_expense_type_label(code) for code in scene_codes] + unique_labels = list(dict.fromkeys(label for label in labels if label)) + return "、".join(unique_labels) if unique_labels else "其他" + + def _build_purpose_mismatch_point( + self, + *, + item: ExpenseClaimItem, + document_scenes: set[str], + ) -> str | None: + if not document_scenes: + return None + + allowed_scenes = self._resolve_allowed_document_scenes(item.item_type) + document_scene_labels = self._format_scene_labels(document_scenes) + + if allowed_scenes and document_scenes.isdisjoint(allowed_scenes): + expense_label = self._resolve_expense_type_label(item.item_type) + return f"附件类型:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。" + + return None + + @staticmethod + def _is_valid_route_description(value: str) -> bool: + text = str(value or "").strip() + if not text: + return False + if DOCUMENT_DATE_PATTERN.search(text): + return False + return bool(DOCUMENT_ROUTE_FORMAT_PATTERN.match(text)) + + def _build_route_format_point( + self, + *, + item: ExpenseClaimItem, + document_info: dict[str, Any], + ) -> str | None: + item_type = str(item.item_type or "").strip().lower() + document_type = str(document_info.get("document_type") or "").strip().lower() + route_required = item_type in ROUTE_DESCRIPTION_ITEM_TYPES or document_type in { + "train_ticket", + "flight_itinerary", + "taxi_receipt", + "transport_receipt", + } + if not route_required: + return None + + reason = str(item.item_reason or "").strip() + if self._is_valid_route_description(reason): + return None + + example = "广州南-北京南" if item_type != "ride_ticket" else "深圳北站-腾讯滨海大厦" + current = f"当前为“{reason[:30]}”," if reason else "" + return ( + f"行程说明:{current}格式应为“起始地-目的地”," + f"例如“{example}”,请按票据行程补充。" + ) + + def _build_fallback_attachment_analysis( + self, + *, + media_type: str | None, + item: ExpenseClaimItem, + ) -> dict[str, Any]: + return { + "severity": "medium", + "label": "中风险", + "headline": "AI提示:附件已上传,待识别结果", + "summary": "附件已成功保存,但当前尚未拿到有效识别结果,建议人工先核对票据内容。", + "points": [ + f"附件格式:{self._attachment_presentation.resolve_media_type('attachment', fallback=media_type)}", + f"费用金额:当前明细金额为 {item.item_amount} 元", + ], + "suggestion": "建议打开附件确认金额、日期和票据类型是否完整,再继续提交审批。", + } + + def _build_failed_ocr_attachment_analysis( + self, + *, + media_type: str | None, + error_message: str, + item: ExpenseClaimItem, + ) -> dict[str, Any]: + return { + "severity": "medium", + "label": "中风险", + "headline": "AI提示:附件已上传,但识别失败", + "summary": "文件已经保存成功,但本次 AI 识别未完成,因此无法给出完整票据核验结论。", + "points": [ + f"识别异常:{error_message or 'OCR 服务暂不可用'}", + f"费用金额:当前明细金额为 {item.item_amount} 元", + f"附件格式:{self._attachment_presentation.resolve_media_type('attachment', fallback=media_type)}", + ], + "suggestion": "建议重新上传更清晰的票据图片,或稍后重试识别后再提交。", + } + + def _build_attachment_analysis( + self, + *, + document: Any, + item: ExpenseClaimItem, + claim: ExpenseClaim | None = None, + document_info: dict[str, Any] | None = None, + requirement_check: dict[str, Any] | None = None, + ) -> dict[str, Any]: + warnings = [str(value).strip() for value in list(getattr(document, "warnings", []) or []) if str(value).strip()] + text = " ".join( + [ + str(getattr(document, "summary", "") or "").strip(), + str(getattr(document, "text", "") or "").strip(), + ] + ).strip() + compact_text = text.replace(" ", "") + avg_score = float(getattr(document, "avg_score", 0.0) or 0.0) + line_count = int(getattr(document, "line_count", 0) or 0) + document_info = document_info or self._build_attachment_document_info(document) + requirement_check = requirement_check or self._build_attachment_requirement_check( + item=item, + document_info=document_info, + ) + document_scenes = self._resolve_document_analysis_scenes(document_info, text) + purpose_mismatch_point = self._build_purpose_mismatch_point( + item=item, + document_scenes=document_scenes, + ) + route_format_point = self._build_route_format_point( + item=item, + document_info=document_info, + ) + expense_audit_points = self._build_attachment_expense_audit_points( + document=document, + item=item, + document_info=document_info, + ) + travel_policy_audit = self._build_attachment_travel_policy_audit( + document=document, + item=item, + claim=claim, + document_info=document_info, + ) + travel_policy_points = [ + str(point).strip() + for point in list(travel_policy_audit.get("points") or []) + if str(point).strip() + ] + travel_policy_rule_basis = [ + str(point).strip() + for point in list(travel_policy_audit.get("rule_basis") or []) + if str(point).strip() + ] + travel_policy_high_risk = bool(travel_policy_audit.get("has_high_risk")) + recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other" + recognized_document_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据" + requirement_matches = bool(requirement_check.get("matches")) + mismatch_severity = str(requirement_check.get("mismatch_severity") or "high").strip().lower() or "high" + + has_ticket_keyword = any( + keyword in compact_text + for keyword in ( + "发票", + "票据", + "增值税", + "电子行程单", + "购买方", + "销售方", + "税额", + "价税", + "票号", + "发票代码", + "凭证", + ) + ) + amount_candidates = self._extract_amount_candidates(text) + item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) + has_matching_amount = any(abs(candidate - item_amount) <= Decimal("1.00") for candidate in amount_candidates) + has_date_text = self._has_date_like_text(text) + amount_mismatch = bool(amount_candidates) and item_amount > Decimal("0.00") and not has_matching_amount + + points: list[str] = [] + if warnings: + points.append(f"识别提示:{warnings[0]}") + if line_count == 0 or not compact_text: + points.append("附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。") + if recognized_document_type == "other" and not has_ticket_keyword: + points.append("票据类型:未识别到发票、票据、电子行程单等关键字,暂无法判断票据类型。") + if not amount_candidates: + points.append("金额字段:未识别到可用于核对的金额。") + elif amount_mismatch: + candidate_text = "、".join(str(candidate) for candidate in amount_candidates[:3]) + points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。") + if not has_date_text: + date_requirement = DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS.get( + recognized_document_type, + "开票日期或业务发生日期", + ) + points.append(f"日期字段:未识别到{date_requirement}。") + if not requirement_matches: + points.append(f"附件类型要求:{requirement_check.get('message')}") + points.extend(expense_audit_points) + points.extend(travel_policy_points) + if purpose_mismatch_point: + points.append(purpose_mismatch_point) + if route_format_point: + points.append(route_format_point) + if avg_score and avg_score < 0.72: + points.append(f"识别质量:OCR 置信度偏低({avg_score:.0%}),可能影响票据核验准确性。") + + issue_count = len(points) + if issue_count == 0: + return { + "severity": "pass", + "label": "AI提示符合条件", + "headline": "AI提示:附件符合基础校验条件", + "summary": "已识别到票据类型和关键字段,且符合当前费用场景的附件要求。", + "points": [ + f"票据类型:已识别为{recognized_document_label}。", + f"附件类型要求:{requirement_check.get('message')}", + f"金额字段:已识别到与当前明细接近的金额 {item_amount} 元。", + ], + "rule_basis": travel_policy_rule_basis, + "suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。", + } + + severity = "low" + label = "低风险" + headline = "AI提示:附件存在轻微待核对项" + summary = "当前附件已识别出部分票据要素,但仍建议人工继续复核。" + + if travel_policy_high_risk: + severity = "high" + label = "高风险" + headline = "AI提示:住宿金额超出报销标准" + summary = "当前住宿票据金额超过规则中心差旅住宿标准,强行提交前需补充超标原因。" + elif ( + line_count == 0 + or not compact_text + or (recognized_document_type == "other" and not has_ticket_keyword and issue_count >= 2) + or (not requirement_matches and mismatch_severity == "high") + or (purpose_mismatch_point and amount_mismatch) + ): + severity = "high" + label = "高风险" + headline = "AI提示:附件不符合票据校验条件" + summary = "当前附件存在明显异常,票据类型与当前费用场景不匹配,或无法作为有效报销材料。" + elif ( + purpose_mismatch_point + or route_format_point + or expense_audit_points + or travel_policy_points + or amount_mismatch + or issue_count >= 2 + or warnings + or (avg_score and avg_score < 0.72) + or (not requirement_matches and mismatch_severity in {"medium", "low"}) + ): + severity = "medium" + label = "中风险" + headline = "AI提示:附件存在明显待整改项" + summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。" + if route_format_point and issue_count == 1: + summary = "票据行程已识别,但费用明细说明未按“起始地-目的地”格式填写。" + elif expense_audit_points and issue_count == len(expense_audit_points): + summary = "OCR 金额已完成二次核算,请按票据原文总额复核。" + elif travel_policy_points and issue_count == len(travel_policy_points): + summary = "住宿票据已识别,但当前缺少职级或城市信息,无法完成差旅住宿标准核算。" + + suggestion = { + "high": "建议过滤当前不匹配的票据,重新上传符合当前费用场景的清晰原件。", + "medium": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。", + "low": "建议人工再次核对金额和业务说明,确认后可继续流转。", + }[severity] + if travel_policy_high_risk: + suggestion = "请核对住宿发票金额、晚数和出差城市;如确需超标,需在附加说明中补充超标说明并提交审批重点复核。" + + return { + "severity": severity, + "label": label, + "headline": headline, + "summary": summary, + "points": points, + "rule_basis": list(dict.fromkeys(travel_policy_rule_basis)), + "suggestion": suggestion, + } diff --git a/server/src/app/services/expense_claim_attachment_document.py b/server/src/app/services/expense_claim_attachment_document.py new file mode 100644 index 0000000..a01cf20 --- /dev/null +++ b/server/src/app/services/expense_claim_attachment_document.py @@ -0,0 +1,336 @@ +from __future__ import annotations + +import json +import re +import shutil +import uuid +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import func, or_, select +from sqlalchemy import inspect as sqlalchemy_inspect +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.ontology import OntologyEntity, OntologyParseResult +from app.schemas.reimbursement import ( + ExpenseClaimItemCreate, + ExpenseClaimItemUpdate, + ExpenseClaimUpdate, + TravelReimbursementCalculatorRequest, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin +from app.services.expense_amounts import ( + extract_amount_candidates, + format_decimal_amount, + is_amount_match_date_fragment, + is_date_like_amount_candidate, + is_probable_year_amount, + parse_document_amount_value, + parse_plain_document_amount_value, + resolve_document_field_amount, + resolve_document_item_amount, + resolve_document_text_amount, +) +from app.services.expense_rule_runtime import ( + DEFAULT_SCENE_RULE_ASSET_CODE, + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.ocr import OcrService + + +class ExpenseClaimAttachmentDocumentMixin: + def _build_attachment_payload(self, item: ExpenseClaimItem) -> dict[str, Any]: + file_path, media_type, filename = self._resolve_item_attachment_content(item) + metadata = self._attachment_storage.read_meta(file_path) + metadata = self._repair_pdf_text_layer_metadata_if_needed( + file_path=file_path, + metadata=metadata, + item=item, + ) + uploaded_at_value = metadata.get("uploaded_at") + uploaded_at = None + if isinstance(uploaded_at_value, str) and uploaded_at_value.strip(): + try: + uploaded_at = datetime.fromisoformat(uploaded_at_value) + except ValueError: + uploaded_at = None + + analysis = metadata.get("analysis") + if not isinstance(analysis, dict): + analysis = None + + document_info = metadata.get("document_info") + if not isinstance(document_info, dict): + document_info = None + + requirement_check = metadata.get("requirement_check") + if not isinstance(requirement_check, dict): + requirement_check = None + + preview_kind = str(metadata.get("preview_kind") or "").strip() + previewable = bool(metadata.get("previewable", self._attachment_presentation.is_previewable_media_type(media_type, filename))) + preview_url = self._attachment_presentation.build_preview_client_path(item.claim_id, item.id) if previewable else "" + + return { + "file_name": str(metadata.get("file_name") or filename), + "storage_key": str(item.invoice_id or ""), + "media_type": str(metadata.get("media_type") or media_type), + "size_bytes": int(metadata.get("size_bytes") or file_path.stat().st_size), + "uploaded_at": uploaded_at, + "previewable": previewable, + "preview_kind": preview_kind or self._attachment_presentation.resolve_preview_kind(media_type, filename), + "preview_url": preview_url, + "analysis": analysis, + "document_info": document_info, + "requirement_check": requirement_check, + } + + def _build_attachment_document_info(self, document: Any) -> dict[str, Any]: + insight = build_document_insight( + filename=str(getattr(document, "filename", "") or ""), + summary=str(getattr(document, "summary", "") or ""), + text=str(getattr(document, "text", "") or ""), + ) + document_type = str(getattr(document, "document_type", "") or "").strip() + if document_type in {"", "other"}: + document_type = insight.document_type + + document_type_label = str(getattr(document, "document_type_label", "") or "").strip() + if not document_type_label or document_type_label == "其他单据": + document_type_label = insight.document_type_label + + scene_code = str(getattr(document, "scene_code", "") or "").strip() + if scene_code in {"", "other"}: + scene_code = insight.scene_code + + scene_label = str(getattr(document, "scene_label", "") or "").strip() + if not scene_label or scene_label == "其他票据": + scene_label = insight.scene_label + + raw_fields = list(getattr(document, "document_fields", []) or []) + normalized_fields: list[dict[str, str]] = [] + for item in raw_fields: + key = "" + label = "" + value = "" + if isinstance(item, dict): + key = str(item.get("key") or "").strip() + label = str(item.get("label") or "").strip() + value = str(item.get("value") or "").strip() + else: + key = str(getattr(item, "key", "") or "").strip() + label = str(getattr(item, "label", "") or "").strip() + value = str(getattr(item, "value", "") or "").strip() + if key and label and value: + label = self._resolve_document_field_display_label( + document_type=document_type, + key=key, + label=label, + ) + normalized_fields.append( + { + "key": key, + "label": label, + "value": value, + } + ) + + if not normalized_fields: + normalized_fields = [ + { + "key": field.key, + "label": field.label, + "value": field.value, + } + for field in insight.fields + if field.value + ] + + return { + "document_type": document_type, + "document_type_label": document_type_label, + "scene_code": scene_code, + "scene_label": scene_label, + "fields": normalized_fields, + } + + @staticmethod + def _resolve_document_field_display_label( + *, + document_type: str, + key: str, + label: str, + ) -> str: + trip_label = DOCUMENT_TRIP_DATE_LABELS.get( + str(document_type or "").strip().lower() + ) + if not trip_label: + return label + + normalized_key = str(key or "").strip().lower().replace("_", "") + normalized_label = str(label or "").replace(" ", "") + if normalized_key in DOCUMENT_INVOICE_DATE_KEYS or any( + token in normalized_label for token in DOCUMENT_INVOICE_DATE_LABEL_TOKENS + ): + return label + + is_date_field = ( + normalized_key + in DOCUMENT_TRIP_DATE_KEYS + | DOCUMENT_GENERIC_DATE_KEYS + or any( + token in normalized_label + for token in ( + *DOCUMENT_TRIP_DATE_LABEL_TOKENS, + *DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + ) + ) + ) + return trip_label if is_date_field else label + + def _backfill_item_type_from_attachment( + self, + *, + item: ExpenseClaimItem, + document_info: dict[str, Any], + ) -> None: + current_type = str(item.item_type or "").strip().lower() + if current_type not in GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES: + return + + document_type = str(document_info.get("document_type") or "").strip() + mapped_type = DOCUMENT_TYPE_ITEM_TYPE_MAP.get(document_type) + if mapped_type: + item.item_type = mapped_type + + def _backfill_item_amount_from_attachment( + self, + *, + item: ExpenseClaimItem, + document: Any, + document_info: dict[str, Any], + ) -> None: + current_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) + if current_amount > Decimal("0.00"): + return + + amount = self._resolve_document_item_amount( + { + "document_fields": document_info.get("fields") or [], + "summary": str(getattr(document, "summary", "") or ""), + "text": str(getattr(document, "text", "") or ""), + } + ) + if amount is not None and amount > Decimal("0.00"): + item.item_amount = amount + + def _backfill_item_date_from_attachment( + self, + *, + item: ExpenseClaimItem, + document: Any, + document_info: dict[str, Any], + ) -> None: + document_payload = { + "document_type": str(document_info.get("document_type") or "").strip(), + "scene_code": str(document_info.get("scene_code") or "").strip(), + "summary": str(getattr(document, "summary", "") or "").strip(), + "text": str(getattr(document, "text", "") or "").strip(), + "document_fields": list(document_info.get("fields") or []), + } + parsed = self._resolve_document_item_date_candidate(document_payload) + if parsed is not None: + item.item_date = parsed + + def _backfill_item_reason_from_attachment( + self, + *, + item: ExpenseClaimItem, + document: Any, + document_info: dict[str, Any], + ) -> None: + reason = self._resolve_document_item_reason( + { + "document_type": str(document_info.get("document_type") or "").strip(), + "scene_code": str(document_info.get("scene_code") or "").strip(), + "scene_label": str(document_info.get("scene_label") or "").strip(), + "document_fields": document_info.get("fields") or [], + "summary": str(getattr(document, "summary", "") or ""), + "text": str(getattr(document, "text", "") or ""), + }, + fallback=str(item.item_reason or "").strip(), + ) + if reason: + item.item_reason = reason diff --git a/server/src/app/services/expense_claim_attachment_operations.py b/server/src/app/services/expense_claim_attachment_operations.py new file mode 100644 index 0000000..7eba990 --- /dev/null +++ b/server/src/app/services/expense_claim_attachment_operations.py @@ -0,0 +1,495 @@ +from __future__ import annotations + +import json +import re +import shutil +import uuid +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import func, or_, select +from sqlalchemy import inspect as sqlalchemy_inspect +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.ontology import OntologyEntity, OntologyParseResult +from app.schemas.reimbursement import ( + ExpenseClaimItemCreate, + ExpenseClaimItemUpdate, + ExpenseClaimUpdate, + TravelReimbursementCalculatorRequest, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin +from app.services.expense_amounts import ( + extract_amount_candidates, + format_decimal_amount, + is_amount_match_date_fragment, + is_date_like_amount_candidate, + is_probable_year_amount, + parse_document_amount_value, + parse_plain_document_amount_value, + resolve_document_field_amount, + resolve_document_item_amount, + resolve_document_text_amount, +) +from app.services.expense_rule_runtime import ( + DEFAULT_SCENE_RULE_ASSET_CODE, + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.ocr import OcrService + + +class ExpenseClaimAttachmentOperationsMixin: + def upload_claim_item_attachment( + self, + *, + claim_id: str, + item_id: str, + filename: str, + content: bytes, + media_type: str | None, + current_user: CurrentUserContext, + ) -> dict[str, Any] | None: + claim, item = self._get_claim_item_or_raise( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + if claim is None: + return None + + self._ensure_draft_claim(claim) + self._ensure_mutable_claim_item(item) + normalized_name = self._attachment_storage.normalize_filename(filename) + if not content: + raise ValueError("上传文件不能为空。") + + before_json = self._serialize_claim(claim) + attachment_dir = self._attachment_storage.build_item_dir(claim.id, item.id) + shutil.rmtree(attachment_dir, ignore_errors=True) + attachment_dir.mkdir(parents=True, exist_ok=True) + + file_path = attachment_dir / normalized_name + file_path.write_bytes(content) + resolved_media_type = self._attachment_presentation.resolve_media_type( + normalized_name, + fallback=media_type, + ) + + attachment_analysis = self._build_fallback_attachment_analysis( + media_type=media_type, + item=item, + ) + ocr_document = None + document_info = None + requirement_check = None + ocr_status = "empty" + ocr_error = "" + try: + ocr_result = OcrService(self.db).recognize_files( + [(normalized_name, content, media_type or "application/octet-stream")] + ) + documents = list(ocr_result.documents or []) + if documents: + ocr_document = documents[0] + ocr_status = "recognized" + document_info = self._build_attachment_document_info(ocr_document) + self._backfill_item_type_from_attachment( + item=item, + document_info=document_info, + ) + self._backfill_item_amount_from_attachment( + item=item, + document=ocr_document, + document_info=document_info, + ) + self._backfill_item_date_from_attachment( + item=item, + document=ocr_document, + document_info=document_info, + ) + self._backfill_item_reason_from_attachment( + item=item, + document=ocr_document, + document_info=document_info, + ) + requirement_check = self._build_attachment_requirement_check( + item=item, + document_info=document_info, + ) + attachment_analysis = self._build_attachment_analysis( + document=ocr_document, + item=item, + claim=claim, + document_info=document_info, + requirement_check=requirement_check, + ) + except Exception as exc: # pragma: no cover - fallback path depends on OCR runtime + ocr_status = "failed" + ocr_error = str(exc) + attachment_analysis = self._build_failed_ocr_attachment_analysis( + media_type=media_type, + error_message=ocr_error, + item=item, + ) + + item.invoice_id = self._attachment_storage.to_storage_key(file_path) + preview_meta = self._attachment_presentation.build_preview_meta( + file_path=file_path, + media_type=resolved_media_type, + ocr_document=ocr_document, + ) + meta = { + "file_name": normalized_name, + "storage_key": item.invoice_id, + "media_type": resolved_media_type, + "size_bytes": len(content), + "uploaded_at": datetime.now(UTC).isoformat(), + "previewable": bool(preview_meta["previewable"]), + "preview_kind": str(preview_meta["preview_kind"]), + "preview_storage_key": str(preview_meta["preview_storage_key"]), + "preview_media_type": str(preview_meta["preview_media_type"]), + "preview_file_name": str(preview_meta["preview_file_name"]), + "analysis": attachment_analysis, + "document_info": document_info, + "requirement_check": requirement_check, + "ocr_status": ocr_status, + "ocr_error": ocr_error, + "ocr_text": str(getattr(ocr_document, "text", "") or ""), + "ocr_summary": str(getattr(ocr_document, "summary", "") or ""), + "ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0), + "ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0), + "ocr_classification_source": str(getattr(ocr_document, "classification_source", "") or ""), + "ocr_classification_confidence": float(getattr(ocr_document, "classification_confidence", 0.0) or 0.0), + "ocr_classification_evidence": [ + str(item) + for item in getattr(ocr_document, "classification_evidence", []) or [] + if str(item).strip() + ], + "ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []], + } + self._attachment_storage.write_meta(file_path, meta) + + self._sync_claim_from_items(claim) + self.db.commit() + self.db.refresh(claim) + + self.audit_service.log_action( + actor=current_user.name or current_user.username, + action="expense_claim.attachment_upload", + resource_type="expense_claim", + resource_id=claim.id, + before_json=before_json, + after_json=self._serialize_claim(claim), + ) + + return { + "message": f"{normalized_name} 已上传并关联到当前费用明细。", + "claim_id": claim.id, + "item_id": item.id, + "invoice_id": item.invoice_id, + "item_date": item.item_date.isoformat() if item.item_date else None, + "item_type": item.item_type, + "item_reason": item.item_reason, + "item_location": item.item_location, + "item_amount": item.item_amount, + "claim_amount": claim.amount, + "attachment": self._build_attachment_payload(item), + } + + def get_claim_item_attachment_meta( + self, + *, + claim_id: str, + item_id: str, + current_user: CurrentUserContext, + ) -> dict[str, Any] | None: + claim, item = self._get_claim_item_or_raise( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + if claim is None: + return None + + return self._build_attachment_payload(item) + + def get_claim_item_attachment_content( + self, + *, + claim_id: str, + item_id: str, + current_user: CurrentUserContext, + ) -> tuple[Path, str, str] | None: + claim, item = self._get_claim_item_or_raise( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + if claim is None: + return None + + return self._resolve_item_attachment_content(item) + + def get_claim_item_attachment_preview_content( + self, + *, + claim_id: str, + item_id: str, + current_user: CurrentUserContext, + ) -> tuple[Path, str, str] | None: + claim, item = self._get_claim_item_or_raise( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + if claim is None: + return None + + return self._resolve_item_attachment_preview_content(item) + + def delete_claim_item_attachment( + self, + *, + claim_id: str, + item_id: str, + current_user: CurrentUserContext, + ) -> dict[str, Any] | None: + claim, item = self._get_claim_item_or_raise( + claim_id=claim_id, + item_id=item_id, + current_user=current_user, + ) + if claim is None: + return None + + self._ensure_draft_claim(claim) + self._ensure_mutable_claim_item(item) + before_json = self._serialize_claim(claim) + previous_name = self._attachment_presentation.resolve_display_name(item.invoice_id) + self._attachment_storage.delete_item_files(item) + item.invoice_id = None + + self._sync_claim_from_items(claim) + self.db.commit() + self.db.refresh(claim) + + self.audit_service.log_action( + actor=current_user.name or current_user.username, + action="expense_claim.attachment_delete", + resource_type="expense_claim", + resource_id=claim.id, + before_json=before_json, + after_json=self._serialize_claim(claim), + ) + + return { + "message": f"{previous_name or '附件'} 已删除。", + "claim_id": claim.id, + "item_id": item.id, + "invoice_id": item.invoice_id, + "attachment": None, + } + + def _get_claim_item_or_raise( + self, + *, + claim_id: str, + item_id: str, + current_user: CurrentUserContext, + ) -> tuple[ExpenseClaim | None, ExpenseClaimItem]: + claim = self.get_claim(claim_id, current_user) + if claim is None: + return None, None # type: ignore[return-value] + + item = next((entry for entry in claim.items if entry.id == item_id), None) + if item is None: + raise LookupError("Item not found") + return claim, item + + def _resolve_item_attachment_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]: + file_path = self._attachment_storage.resolve_item_path(item) + if file_path is None or not file_path.exists(): + raise FileNotFoundError("Attachment not found") + + metadata = self._attachment_storage.read_meta(file_path) + filename = str(metadata.get("file_name") or file_path.name) + media_type = self._attachment_presentation.resolve_media_type( + filename, + fallback=str(metadata.get("media_type") or ""), + ) + return file_path, media_type, filename + + def _repair_pdf_text_layer_metadata_if_needed( + self, + *, + file_path: Path, + metadata: dict[str, Any], + item: ExpenseClaimItem | None = None, + ) -> dict[str, Any]: + if not metadata: + return metadata + + media_type = str(metadata.get("media_type") or self._attachment_presentation.resolve_media_type(file_path.name)).strip() + if media_type != "application/pdf": + return metadata + + ocr_text = str(metadata.get("ocr_text") or "") + ocr_summary = str(metadata.get("ocr_summary") or "") + if OcrService._placeholder_ratio(f"{ocr_summary}\n{ocr_text}") < 0.12: + return metadata + + text_layer = OcrService(self.db)._extract_pdf_text_layer(file_path) + repaired_text, used_text_layer = OcrService._choose_document_text( + ocr_text=ocr_text, + text_layer=text_layer, + ) + if not used_text_layer or not repaired_text: + return metadata + + repaired_summary = OcrService._summarize_text(repaired_text) + document = SimpleNamespace( + filename=str(metadata.get("file_name") or file_path.name), + text=repaired_text, + summary=repaired_summary, + avg_score=float(metadata.get("ocr_avg_score") or 0.0), + line_count=int(metadata.get("ocr_line_count") or 0), + document_type="", + document_type_label="", + scene_code="", + scene_label="", + document_fields=[], + warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()], + ) + document_info = self._build_attachment_document_info(document) + document.document_type = document_info.get("document_type", "") + document.document_type_label = document_info.get("document_type_label", "") + document.scene_code = document_info.get("scene_code", "") + document.scene_label = document_info.get("scene_label", "") + document.document_fields = list(document_info.get("fields") or []) + + metadata["ocr_text"] = repaired_text + metadata["ocr_summary"] = repaired_summary + metadata["document_info"] = document_info + metadata["previewable"] = True + metadata["preview_kind"] = "pdf" + metadata["preview_storage_key"] = str( + metadata.get("storage_key") or self._attachment_storage.to_storage_key(file_path) + ) + metadata["preview_media_type"] = "application/pdf" + metadata["preview_file_name"] = str(metadata.get("file_name") or file_path.name) + + if item is not None: + requirement_check = self._build_attachment_requirement_check( + item=item, + document_info=document_info, + ) + metadata["requirement_check"] = requirement_check + metadata["analysis"] = self._build_attachment_analysis( + document=document, + item=item, + claim=getattr(item, "claim", None), + document_info=document_info, + requirement_check=requirement_check, + ) + + self._attachment_storage.write_meta(file_path, metadata) + return metadata + + def _resolve_item_attachment_preview_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]: + file_path, media_type, filename = self._resolve_item_attachment_content(item) + metadata = self._attachment_storage.read_meta(file_path) + metadata = self._repair_pdf_text_layer_metadata_if_needed( + file_path=file_path, + metadata=metadata, + item=item, + ) + preview_storage_key = str(metadata.get("preview_storage_key") or "").strip() + preview_file_name = str(metadata.get("preview_file_name") or "").strip() + preview_media_type = str(metadata.get("preview_media_type") or "").strip() + + if preview_storage_key: + preview_path = self._attachment_storage.resolve_path(preview_storage_key) + if preview_path is not None and preview_path.exists(): + resolved_name = preview_file_name or preview_path.name + resolved_media_type = self._attachment_presentation.resolve_media_type( + resolved_name, + fallback=preview_media_type, + ) + return preview_path, resolved_media_type, resolved_name + + if self._attachment_presentation.is_previewable_media_type(media_type, filename): + return file_path, media_type, filename + + raise FileNotFoundError("Attachment preview not found") diff --git a/server/src/app/services/expense_claim_attachment_presentation.py b/server/src/app/services/expense_claim_attachment_presentation.py new file mode 100644 index 0000000..75ac294 --- /dev/null +++ b/server/src/app/services/expense_claim_attachment_presentation.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +import base64 +import binascii +import mimetypes +import re +from pathlib import Path +from typing import Any +from urllib.parse import quote + +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage + + +class ExpenseClaimAttachmentPresentation: + def __init__(self, storage: ExpenseClaimAttachmentStorage) -> None: + self.storage = storage + + def build_preview_meta( + self, + *, + file_path: Path, + media_type: str, + ocr_document: Any | None, + ) -> dict[str, Any]: + filename = file_path.name + storage_key = self.storage.to_storage_key(file_path) + preview_kind = self.resolve_preview_kind(media_type, filename) + + preview_data_url = str(getattr(ocr_document, "preview_data_url", "") or "").strip() + preview_source_kind = str(getattr(ocr_document, "preview_kind", "") or "").strip() + if preview_source_kind == "image" and preview_data_url: + preview_asset = self._write_preview_asset_from_data_url( + attachment_dir=file_path.parent, + original_filename=filename, + preview_data_url=preview_data_url, + ) + if preview_asset is not None: + preview_path, preview_media_type, preview_file_name = preview_asset + return { + "previewable": True, + "preview_kind": "image", + "preview_storage_key": self.storage.to_storage_key(preview_path), + "preview_media_type": preview_media_type, + "preview_file_name": preview_file_name, + } + + if preview_kind: + return { + "previewable": True, + "preview_kind": preview_kind, + "preview_storage_key": storage_key, + "preview_media_type": media_type, + "preview_file_name": filename, + } + + return { + "previewable": False, + "preview_kind": "", + "preview_storage_key": "", + "preview_media_type": "", + "preview_file_name": "", + } + + @staticmethod + def resolve_preview_kind(media_type: str | None, filename: str) -> str: + resolved = str(media_type or "").strip() or (mimetypes.guess_type(filename)[0] or "") + if resolved.startswith("image/"): + return "image" + if resolved == "application/pdf": + return "pdf" + return "" + + @staticmethod + def decode_data_url(payload: str) -> tuple[str, bytes] | None: + normalized = str(payload or "").strip() + matched = re.match(r"^data:(?P[\w.+-]+/[\w.+-]+);base64,(?P.+)$", normalized, flags=re.DOTALL) + if not matched: + return None + try: + content = base64.b64decode(matched.group("body"), validate=True) + except (binascii.Error, ValueError): + return None + return matched.group("media"), content + + def _write_preview_asset_from_data_url( + self, + *, + attachment_dir: Path, + original_filename: str, + preview_data_url: str, + ) -> tuple[Path, str, str] | None: + decoded = self.decode_data_url(preview_data_url) + if decoded is None: + return None + + preview_media_type, preview_content = decoded + suffix = mimetypes.guess_extension(preview_media_type) or ".bin" + preview_name = f"{Path(original_filename).stem}.preview{suffix}" + preview_path = attachment_dir / preview_name + preview_path.write_bytes(preview_content) + return preview_path, preview_media_type, preview_name + + @staticmethod + def build_preview_client_path(claim_id: str, item_id: str) -> str: + return ( + "/reimbursements/claims/" + f"{quote(str(claim_id or '').strip(), safe='')}" + f"/items/{quote(str(item_id or '').strip(), safe='')}/attachment/preview" + ) + + @staticmethod + def resolve_media_type(filename: str, *, fallback: str | None = None) -> str: + guessed = mimetypes.guess_type(filename)[0] + return str(guessed or fallback or "application/octet-stream") + + @staticmethod + def is_previewable_media_type(media_type: str | None, filename: str) -> bool: + resolved = str(media_type or "").strip() or (mimetypes.guess_type(filename)[0] or "") + return resolved.startswith("image/") or resolved == "application/pdf" + + @staticmethod + def resolve_display_name(storage_key: str | None) -> str: + return Path(str(storage_key or "").strip()).name + + @classmethod + def merge_reference(cls, current_invoice_id: str | None, next_invoice_id: str | None) -> str | None: + normalized_next = str(next_invoice_id or "").strip() + if not normalized_next: + return None + + normalized_current = str(current_invoice_id or "").strip() + if ( + normalized_current + and cls.resolve_display_name(normalized_current) == cls.resolve_display_name(normalized_next) + ): + return normalized_current + + return normalized_next diff --git a/server/src/app/services/expense_claim_attachment_storage.py b/server/src/app/services/expense_claim_attachment_storage.py new file mode 100644 index 0000000..b6168e9 --- /dev/null +++ b/server/src/app/services/expense_claim_attachment_storage.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +import json +import re +import shutil +from pathlib import Path + +from app.core.config import get_settings +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem + + +class ExpenseClaimAttachmentStorage: + """Centralizes filesystem operations for expense claim attachments.""" + + def root(self) -> Path: + return (get_settings().resolved_storage_root_dir / "expense_claims").resolve() + + def build_item_dir(self, claim_id: str, item_id: str) -> Path: + return (self.root() / claim_id / item_id).resolve() + + def delete_claim_files(self, claim: ExpenseClaim) -> None: + for item in list(claim.items or []): + self.delete_item_files(item) + self.delete_claim_root(claim.id) + + def delete_claim_root(self, claim_id: str) -> None: + claim_root = self._assert_child(self.root() / claim_id) + self._delete_path(claim_root) + + @staticmethod + def normalize_filename(filename: str | None) -> str: + normalized = Path(str(filename or "").strip()).name + normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._") + suffix = Path(normalized).suffix + if normalized: + return normalized + return f"attachment{suffix or '.bin'}" + + def resolve_path(self, storage_key: str | None) -> Path | None: + normalized = str(storage_key or "").strip() + if not normalized: + return None + + root = self.root() + path = (root / normalized).resolve() + try: + path.relative_to(root) + except ValueError as exc: + raise FileNotFoundError("Attachment path is invalid") from exc + return path + + def resolve_item_path(self, item: ExpenseClaimItem) -> Path | None: + if not str(item.invoice_id or "").strip(): + return None + + file_path = self.resolve_path(item.invoice_id) + if file_path is not None and file_path.exists(): + return file_path + + filename = self.normalize_filename(item.invoice_id) + if not filename: + return file_path + + fallback_path = (self.build_item_dir(item.claim_id, item.id) / filename).resolve() + try: + fallback_path.relative_to(self.root()) + except ValueError as exc: + raise FileNotFoundError("Attachment path is invalid") from exc + return fallback_path + + def to_storage_key(self, file_path: Path) -> str: + return file_path.resolve().relative_to(self.root()).as_posix() + + def delete_item_files(self, item: ExpenseClaimItem) -> None: + file_path = self.resolve_item_path(item) + if file_path is None: + return + + root = self.root() + if file_path.parent == root: + self._delete_path(file_path) + self._delete_path(self.meta_path(file_path)) + return + + self._delete_path(file_path.parent) + + @staticmethod + def meta_path(file_path: Path) -> Path: + return file_path.with_name(f"{file_path.name}.meta.json") + + def write_meta(self, file_path: Path, payload: dict) -> None: + meta_path = self.meta_path(file_path) + meta_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") + + def read_meta(self, file_path: Path) -> dict: + meta_path = self.meta_path(file_path) + if not meta_path.exists(): + return {} + + try: + payload = json.loads(meta_path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return {} + return payload if isinstance(payload, dict) else {} + + def _assert_child(self, path: Path) -> Path: + root = self.root() + resolved = path.resolve() + try: + resolved.relative_to(root) + except ValueError as exc: + raise FileNotFoundError("Attachment path is invalid") from exc + return resolved + + def _delete_path(self, path: Path | None) -> None: + if path is None: + return + + target = self._assert_child(path) + if not target.exists(): + return + + if target.is_dir(): + shutil.rmtree(target) + else: + target.unlink() + + if target.exists(): + raise OSError(f"Attachment path was not deleted: {target}") diff --git a/server/src/app/services/expense_claim_constants.py b/server/src/app/services/expense_claim_constants.py new file mode 100644 index 0000000..51039e0 --- /dev/null +++ b/server/src/app/services/expense_claim_constants.py @@ -0,0 +1,361 @@ +from __future__ import annotations + +import re +from decimal import Decimal + +EXPENSE_TYPE_LABELS = { + "travel": "差旅", + "train_ticket": "火车票", + "flight_ticket": "机票", + "hotel_ticket": "住宿票", + "ride_ticket": "乘车", + "travel_allowance": "出差补贴", + "hotel": "住宿", + "transport": "交通", + "meal": "餐费", + "meeting": "会务", + "entertainment": "招待", + "office": "办公", + "training": "培训", + "communication": "通讯", + "welfare": "福利", +} +MAX_DRAFT_CLAIMS_PER_USER = 3 +EDITABLE_CLAIM_STATUSES = ("draft", "supplement", "returned") +SYSTEM_GENERATED_ITEM_TYPES = {"travel_allowance"} +TRAVEL_DETAIL_ITEM_TYPES = { + "train_ticket", + "flight_ticket", + "hotel_ticket", + "ride_ticket", + "travel_allowance", +} +TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES = {"train_ticket", "flight_ticket"} +DOCUMENT_TYPE_ITEM_TYPE_MAP = { + "train_ticket": "train_ticket", + "flight_itinerary": "flight_ticket", + "hotel_invoice": "hotel_ticket", + "taxi_receipt": "ride_ticket", + "transport_receipt": "ride_ticket", +} +DOCUMENT_TYPE_SCENE_MAP = { + "train_ticket": "travel", + "flight_itinerary": "travel", + "hotel_invoice": "hotel", + "taxi_receipt": "transport", + "transport_receipt": "transport", + "parking_toll_receipt": "transport", + "meal_receipt": "meal", + "office_invoice": "office", + "meeting_invoice": "meeting", + "training_invoice": "training", +} +DOCUMENT_FACT_ITEM_TYPES = {"train_ticket", "flight_ticket", "hotel_ticket", "ride_ticket", "ship_ticket", "ferry_ticket"} +ROUTE_DESCRIPTION_ITEM_TYPES = {"train_ticket", "flight_ticket", "ship_ticket", "ferry_ticket", "ride_ticket"} +DOCUMENT_TRIP_DATE_LABELS = { + "train_ticket": "列车出发时间", + "flight_itinerary": "起飞日期", + "taxi_receipt": "乘车时间", + "transport_receipt": "乘车时间", + "parking_toll_receipt": "通行日期", +} +DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS = { + "train_ticket": "列车出发时间或乘车日期", + "flight_itinerary": "起飞日期或航班日期", + "taxi_receipt": "乘车时间", + "transport_receipt": "乘车时间", + "parking_toll_receipt": "通行日期", + "hotel_invoice": "入住或离店日期", +} +DOCUMENT_TRIP_DATE_KEYS = { + "traveldate", + "tripdate", + "journeydate", + "departuredate", + "departuretime", + "departdate", + "departtime", + "boardingdate", + "boardingtime", + "traindate", + "traintime", + "traindeparturetime", + "scheduleddeparturetime", + "flightdate", + "flighttime", + "ridedate", + "ridetime", + "pickuptime", + "starttime", +} +DOCUMENT_GENERIC_DATE_KEYS = {"date", "time", "occurredat", "occurreddate", "businessdate"} +DOCUMENT_INVOICE_DATE_KEYS = {"issuedat", "issuedate", "invoicedate", "billingdate"} +DOCUMENT_TRIP_DATE_LABEL_TOKENS = ( + "出发日期", + "出发时间", + "列车出发时间", + "发车日期", + "发车时间", + "开车时间", + "乘车日期", + "乘车时间", + "起飞日期", + "航班日期", + "行程日期", + "上车时间", + "用车时间", + "通行日期", +) +DOCUMENT_GENERIC_DATE_LABEL_TOKENS = ("日期", "时间", "发生时间", "业务发生日期") +DOCUMENT_INVOICE_DATE_LABEL_TOKENS = ("开票日期", "发票日期") +DOCUMENT_ROUTE_FORMAT_PATTERN = re.compile( + r"^[A-Za-z0-9\u4e00-\u9fa5()()·]{2,40}\s*-\s*" + r"[A-Za-z0-9\u4e00-\u9fa5()()·]{2,40}$" +) +DOCUMENT_ROUTE_TEXT_PATTERN = re.compile( + r"([A-Za-z0-9\u4e00-\u9fa5()()·]{2,40})\s*(?:至|到|→|->|—|–|-)\s*" + r"([A-Za-z0-9\u4e00-\u9fa5()()·]{2,40})" +) +DOCUMENT_ROUTE_ORIGIN_LABELS = {"起点", "上车", "上车地点", "上车地址", "出发", "出发地", "出发站", "始发站", "乘车起点"} +DOCUMENT_ROUTE_DESTINATION_LABELS = { + "终点", + "下车", + "下车地点", + "下车地址", + "到达", + "到达地", + "到达站", + "目的地", + "乘车终点", +} +GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES = {"", "other", "travel", "transport", "hotel"} +LOCATION_REQUIRED_EXPENSE_TYPES = {"travel", "meeting", "entertainment"} +EXPENSE_SCENE_KEYWORDS = { + "travel": ("差旅", "出差", "行程"), + "hotel": ("酒店", "住宿", "房费", "客房", "入住", "离店"), + "transport": ( + "交通", + "打车", + "出租车", + "网约车", + "滴滴", + "出行", + "乘车", + "用车", + "叫车", + "车费", + "车资", + "的士", + "高铁", + "动车", + "火车", + "机票", + "航班", + "行程单", + "登机", + "客票", + "公交", + "地铁", + "过路费", + "通行费", + "停车", + ), + "meal": ("餐饮", "餐费", "用餐", "外卖", "快餐", "酒楼", "饭店", "饭馆", "食品", "咖啡"), + "entertainment": ("招待", "宴请", "接待", "客户餐", "商务餐", "业务招待"), + "office": ("办公", "办公用品", "文具", "耗材", "打印", "纸张", "硒鼓", "墨盒", "鼠标", "键盘", "电脑"), + "meeting": ("会议", "会务", "会展", "会议室", "会场", "场地费", "论坛"), + "training": ("培训", "课程", "讲师", "教材", "学费", "认证"), +} +EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = { + "travel": {"travel", "hotel", "transport", "meal"}, + "train_ticket": {"travel"}, + "flight_ticket": {"travel"}, + "hotel_ticket": {"hotel"}, + "ride_ticket": {"transport"}, + "travel_allowance": set(), + "hotel": {"hotel"}, + "transport": {"transport", "travel"}, + "meal": {"meal", "entertainment"}, + "entertainment": {"entertainment", "meal"}, + "office": {"office"}, + "meeting": {"meeting"}, + "training": {"training"}, +} +DOCUMENT_SCENE_LABELS = { + "travel": "差旅", + "hotel": "住宿", + "transport": "交通", + "meal": "餐饮", + "entertainment": "业务招待", + "office": "办公用品", + "meeting": "会务", + "training": "培训", + "other": "其他票据", +} +DOCUMENT_ASSOCIATION_REVIEW_ACTIONS = { + "link_to_existing_draft", + "create_new_claim_from_documents", +} +PERSISTENT_EXPENSE_REVIEW_ACTIONS = { + "save_draft", + "next_step", + *DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, +} +RETURN_REASON_OPTIONS = { + "missing_attachment": "附件缺失或不清晰", + "invoice_mismatch": "票据类型/金额与明细不一致", + "over_policy": "超出制度标准或缺少超标说明", + "business_explanation": "业务事由/地点/人员信息不完整", + "duplicate_or_abnormal": "疑似重复或异常票据", + "approval_question": "审批人需要补充说明", +} +MAX_CLAIM_NO_RETRY_ATTEMPTS = 3 +DOCUMENT_DATE_PATTERN = re.compile(r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)") +SYSTEM_GENERATED_REASON_PREFIXES = ( + "我上传了", + "请按当前已识别信息", + "请把当前上传的票据", + "请基于当前上传的多张票据", + "我已核对右侧识别结果", + "请同步修正逐票据识别结果", + "我已修改识别信息", + "查看报销草稿", + "请解释一下当前这笔报销的合规风险和待补充项", +) +LEADING_REASON_TIME_PATTERNS = ( + re.compile( + r"^\s*(?:识别事项(?:有)?[::]\s*)?" + r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*" + r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" + r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" + r"\s*[,,。;;、]?\s*" + ), + re.compile( + r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" + r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" + r"\s*[,,。;;、]\s*" + ), +) +AI_REVIEW_LOOKBACK_DAYS = 90 +AI_REVIEW_REPEAT_RISK_WARNING_COUNT = 1 +AI_REVIEW_REPEAT_RISK_BLOCK_COUNT = 2 +TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES = {"travel", "hotel", "transport"} +TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES = {"flight_itinerary", "train_ticket"} +TRAVEL_POLICY_CITY_TIERS = { + "北京": "tier_1", + "上海": "tier_1", + "广州": "tier_1", + "深圳": "tier_1", + "杭州": "tier_2", + "南京": "tier_2", + "苏州": "tier_2", + "武汉": "tier_2", + "成都": "tier_2", + "重庆": "tier_2", + "西安": "tier_2", + "天津": "tier_2", + "宁波": "tier_2", + "厦门": "tier_2", + "青岛": "tier_2", + "长沙": "tier_2", + "郑州": "tier_2", + "合肥": "tier_2", + "济南": "tier_2", + "沈阳": "tier_2", + "大连": "tier_2", + "福州": "tier_2", + "昆明": "tier_2", + "海口": "tier_2", + "三亚": "tier_2", + "无锡": "tier_2", + "东莞": "tier_2", + "佛山": "tier_2", +} +TRAVEL_POLICY_CITY_MATCH_ORDER = tuple( + sorted(TRAVEL_POLICY_CITY_TIERS.keys(), key=lambda item: len(item), reverse=True) +) +TRAVEL_POLICY_BAND_LABELS = { + "junior": "P1-P3", + "mid": "P4-P5", + "senior": "P6-P7", + "manager": "M1-M2", + "executive": "M3及以上 / D序列", +} +TRAVEL_POLICY_HOTEL_LIMITS = { + "junior": { + "tier_1": Decimal("450.00"), + "tier_2": Decimal("380.00"), + "tier_3": Decimal("320.00"), + }, + "mid": { + "tier_1": Decimal("550.00"), + "tier_2": Decimal("480.00"), + "tier_3": Decimal("380.00"), + }, + "senior": { + "tier_1": Decimal("700.00"), + "tier_2": Decimal("620.00"), + "tier_3": Decimal("520.00"), + }, + "manager": { + "tier_1": Decimal("900.00"), + "tier_2": Decimal("820.00"), + "tier_3": Decimal("720.00"), + }, + "executive": { + "tier_1": Decimal("1200.00"), + "tier_2": Decimal("1000.00"), + "tier_3": Decimal("900.00"), + }, +} +TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS = { + "junior": {"flight": 1, "train": 1}, + "mid": {"flight": 1, "train": 1}, + "senior": {"flight": 2, "train": 2}, + "manager": {"flight": 3, "train": 3}, + "executive": {"flight": 4, "train": 3}, +} +TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS = ( + "中转", + "转机", + "经停", + "改签", + "多地出差", + "多城市", + "多站", + "异地返程", + "异地结束", + "临时变更", + "继续前往", + "第二站", +) +TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS = ( + "超标说明", + "无直达", + "展会高峰", + "会议高峰", + "协议酒店满房", + "客户指定", + "临时改签", + "行程变更", + "红眼航班", + "晚到店", +) +TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS = ( + ("头等舱", 4), + ("公务舱", 3), + ("商务舱", 3), + ("超级经济舱", 2), + ("高端经济舱", 2), + ("明珠经济舱", 2), + ("经济舱", 1), +) +TRAVEL_POLICY_TRAIN_CLASS_PATTERNS = ( + ("商务座", 3), + ("一等座", 2), + ("软卧", 2), + ("二等座", 1), + ("二等卧", 1), + ("硬卧", 1), +) +TRAVEL_POLICY_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)") diff --git a/server/src/app/services/expense_claim_document_item_builder.py b/server/src/app/services/expense_claim_document_item_builder.py new file mode 100644 index 0000000..70e5bee --- /dev/null +++ b/server/src/app/services/expense_claim_document_item_builder.py @@ -0,0 +1,560 @@ +from __future__ import annotations + +import json +import re +import shutil +import uuid +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import func, or_, select +from sqlalchemy import inspect as sqlalchemy_inspect +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.ontology import OntologyEntity, OntologyParseResult +from app.schemas.reimbursement import ( + ExpenseClaimItemCreate, + ExpenseClaimItemUpdate, + ExpenseClaimUpdate, + TravelReimbursementCalculatorRequest, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin +from app.services.expense_amounts import ( + extract_amount_candidates, + format_decimal_amount, + is_amount_match_date_fragment, + is_date_like_amount_candidate, + is_probable_year_amount, + parse_document_amount_value, + parse_plain_document_amount_value, + resolve_document_field_amount, + resolve_document_item_amount, + resolve_document_text_amount, +) +from app.services.expense_rule_runtime import ( + DEFAULT_SCENE_RULE_ASSET_CODE, + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.ocr import OcrService + + +class ExpenseClaimDocumentItemBuilderMixin: + def _resolve_context_documents(self, context_json: dict[str, Any]) -> list[dict[str, Any]]: + documents = context_json.get("ocr_documents") + if not isinstance(documents, list): + documents = [] + + normalized: list[dict[str, Any]] = [] + for index, item in enumerate(documents[:10], start=1): + if not isinstance(item, dict): + continue + normalized.append( + { + "index": index, + "filename": str(item.get("filename") or "").strip(), + "summary": str(item.get("summary") or "").strip(), + "text": str(item.get("text") or "").strip(), + "document_type": str(item.get("document_type") or "").strip(), + "scene_code": str(item.get("scene_code") or "").strip(), + "scene_label": str(item.get("scene_label") or "").strip(), + "document_fields": self._normalize_document_fields(item.get("document_fields")), + } + ) + + overrides = context_json.get("review_document_form_values") + if not isinstance(overrides, list) or not normalized: + return normalized + + override_map: dict[tuple[int, str], dict[str, Any]] = {} + for item in overrides: + if not isinstance(item, dict): + continue + filename = str(item.get("filename") or "").strip() + index = int(item.get("index") or 0) + if not filename and index <= 0: + continue + override_map[(index, filename)] = item + + for item in normalized: + override = override_map.get((int(item["index"]), str(item["filename"]))) + if override is None: + override = override_map.get((int(item["index"]), "")) + if override is None: + continue + summary = str(override.get("summary") or "").strip() + scene_label = str(override.get("scene_label") or "").strip() + fields = override.get("fields") + if summary: + item["summary"] = summary + if scene_label: + item["scene_label"] = scene_label + if isinstance(fields, list): + item["document_fields"] = self._normalize_document_fields(fields) + + return normalized + + @staticmethod + def _normalize_document_fields(raw_fields: Any) -> list[dict[str, str]]: + if not isinstance(raw_fields, list): + return [] + normalized: list[dict[str, str]] = [] + for field in raw_fields: + if not isinstance(field, dict): + continue + label = str(field.get("label") or "").strip() + value = str(field.get("value") or "").strip() + key = str(field.get("key") or label or "").strip() + if not label or not value: + continue + normalized.append( + { + "key": key, + "label": label, + "value": value, + } + ) + return normalized + + def _build_context_item_specs( + self, + *, + context_documents: list[dict[str, Any]], + attachment_names: list[str], + occurred_at: datetime, + expense_type: str, + amount: Decimal, + reason: str, + location: str, + context_json: dict[str, Any], + employee_grade: str | None = None, + user_id: str = "", + ) -> list[dict[str, Any]]: + specs: list[dict[str, Any]] = [] + if context_documents: + for document in context_documents: + specs.append( + { + "item_date": self._resolve_document_item_date(document, fallback=occurred_at.date()), + "item_type": self._resolve_document_item_type(document, fallback=expense_type), + "item_reason": self._resolve_document_item_reason(document, fallback=reason), + "item_location": location, + "item_amount": self._resolve_document_item_amount(document), + "invoice_id": str(document.get("filename") or "").strip() or None, + } + ) + elif attachment_names: + for attachment_name in attachment_names: + specs.append( + { + "item_date": occurred_at.date(), + "item_type": expense_type, + "item_reason": reason, + "item_location": location, + "item_amount": None, + "invoice_id": attachment_name, + } + ) + + if not specs: + return [] + + total_recognized = sum( + spec["item_amount"] for spec in specs if isinstance(spec.get("item_amount"), Decimal) + ) + missing_specs = [spec for spec in specs if spec.get("item_amount") is None] + if missing_specs: + remaining = (amount - total_recognized).quantize(Decimal("0.01")) + if remaining > Decimal("0.00"): + missing_specs[0]["item_amount"] = remaining + + for spec in specs: + if spec.get("item_amount") is None: + spec["item_amount"] = Decimal("0.00") + + allowance_spec = self._build_travel_allowance_item_spec( + context_documents=context_documents, + specs=specs, + occurred_at=occurred_at, + expense_type=expense_type, + location=location, + context_json=context_json, + employee_grade=employee_grade, + user_id=user_id, + ) + if allowance_spec is not None: + specs = [spec for spec in specs if str(spec.get("item_type") or "").strip() != "travel_allowance"] + specs.append(allowance_spec) + + return specs + + def _build_travel_allowance_item_spec( + self, + *, + context_documents: list[dict[str, Any]], + specs: list[dict[str, Any]], + occurred_at: datetime, + expense_type: str, + location: str, + context_json: dict[str, Any], + employee_grade: str | None, + user_id: str, + ) -> dict[str, Any] | None: + if not self._should_add_travel_allowance_item( + expense_type=expense_type, + context_documents=context_documents, + context_json=context_json, + ): + return None + + grade = str(employee_grade or context_json.get("grade") or "").strip() + if not grade: + return None + + days, _, end_date = self._resolve_travel_allowance_days( + context_json=context_json, + occurred_at=occurred_at, + ) + allowance_location = self._resolve_travel_allowance_location( + location=location, + context_documents=context_documents, + ) + if days < 1 or not allowance_location: + return None + + try: + from app.services.travel_reimbursement_calculator import ( + TravelReimbursementCalculatorService, + ) + + result = TravelReimbursementCalculatorService(self.db).calculate( + TravelReimbursementCalculatorRequest( + days=days, + location=allowance_location, + grade=grade, + ), + CurrentUserContext( + username=user_id, + name="", + role_codes=[], + is_admin=False, + ), + ) + except ValueError: + return None + + allowance_amount = Decimal(result.allowance_amount or Decimal("0.00")).quantize(Decimal("0.01")) + allowance_rate = Decimal(result.total_allowance_rate or Decimal("0.00")).quantize(Decimal("0.01")) + if allowance_amount <= Decimal("0.00") or allowance_rate <= Decimal("0.00"): + return None + + return { + "item_date": end_date, + "item_type": "travel_allowance", + "item_reason": ( + f"系统自动计算出差补贴:{result.matched_city},{days}天," + f"{allowance_rate:.2f}元/天" + ), + "item_location": str(result.allowance_region or allowance_location).strip(), + "item_amount": allowance_amount, + "invoice_id": None, + } + + @staticmethod + def _should_add_travel_allowance_item( + *, + expense_type: str, + context_documents: list[dict[str, Any]], + context_json: dict[str, Any], + ) -> bool: + normalized_expense_type = str(expense_type or "").strip().lower() + if normalized_expense_type == "travel": + return True + + review_form_values = context_json.get("review_form_values") + if isinstance(review_form_values, dict): + review_type = str( + review_form_values.get("expense_type") + or review_form_values.get("scene_label") + or review_form_values.get("reason_value") + or "" + ) + if any(keyword in review_type for keyword in ("差旅", "出差")): + return True + + for document in context_documents: + document_type = str(document.get("document_type") or "").strip() + scene_code = str(document.get("scene_code") or "").strip() + if document_type in {"train_ticket", "flight_itinerary"} or scene_code == "travel": + return True + return False + + def _resolve_travel_allowance_days( + self, + *, + context_json: dict[str, Any], + occurred_at: datetime, + ) -> tuple[int, date, date]: + start_date = occurred_at.date() + end_date = start_date + explicit_days = self._extract_travel_allowance_days_from_context(context_json) + + business_time_context = context_json.get("business_time_context") + if isinstance(business_time_context, dict): + start_date = self._parse_iso_date_or_default(business_time_context.get("start_date"), start_date) + end_date = self._parse_iso_date_or_default(business_time_context.get("end_date"), start_date) + else: + review_form_values = context_json.get("review_form_values") + if isinstance(review_form_values, dict): + time_text = str( + review_form_values.get("time_range") + or review_form_values.get("business_time") + or review_form_values.get("occurred_date") + or "" + ).strip() + matched_dates = re.findall(r"\d{4}-\d{2}-\d{2}", time_text) + if matched_dates: + start_date = self._parse_iso_date_or_default(matched_dates[0], start_date) + end_date = self._parse_iso_date_or_default(matched_dates[-1], start_date) + + if end_date < start_date: + end_date = start_date + if explicit_days > 0: + return explicit_days, start_date, start_date + timedelta(days=explicit_days - 1) + days = (end_date - start_date).days + 1 + return max(1, days), start_date, end_date + + @staticmethod + def _extract_travel_allowance_days_from_context(context_json: dict[str, Any]) -> int: + review_form_values = context_json.get("review_form_values") + text_parts: list[str] = [] + if isinstance(review_form_values, dict): + text_parts.extend( + str(review_form_values.get(key) or "") + for key in ( + "reason", + "business_reason", + "reason_value", + "scene_label", + "time_range", + "business_time", + ) + ) + text_parts.extend( + str(context_json.get(key) or "") + for key in ("user_input_text", "message", "raw_text", "ocr_summary") + ) + return ExpenseClaimDocumentItemBuilderMixin._extract_travel_day_count(" ".join(text_parts)) + + @staticmethod + def _extract_travel_day_count(text: str) -> int: + normalized = str(text or "").replace(" ", "") + if not normalized: + return 0 + patterns = ( + r"(?:出差|差旅|行程|支撑|支持|部署|项目|业务)\D{0,12}?(\d{1,2})天", + r"(\d{1,2})天(?:出差|差旅|行程)", + ) + for pattern in patterns: + match = re.search(pattern, normalized) + if not match: + continue + try: + return max(1, int(match.group(1))) + except ValueError: + continue + return 0 + + @staticmethod + def _parse_iso_date_or_default(value: Any, fallback: date) -> date: + try: + return date.fromisoformat(str(value or "").strip()) + except ValueError: + return fallback + + @staticmethod + def _resolve_travel_allowance_location( + *, + location: str, + context_documents: list[dict[str, Any]], + ) -> str: + normalized_location = str(location or "").strip() + if normalized_location and normalized_location not in {"待补充", "未知", "暂无"}: + return normalized_location + + for document in context_documents: + for field in list(document.get("document_fields") or []): + if not isinstance(field, dict): + continue + key = str(field.get("key") or "").strip().lower() + label = str(field.get("label") or "").strip() + value = str(field.get("value") or "").strip() + if key == "route" or "行程" in label: + separators = ("-", "至", "→", "->") + for separator in separators: + if separator in value: + return value.split(separator)[-1].strip() + if key in {"destination", "arrival_city"} or label in {"目的地", "到达城市"}: + return value + return "" + + def _collect_invoice_keys_from_incoming_document(self, document: dict[str, Any]) -> list[str]: + document_info = dict(document or {}) + if "fields" not in document_info and isinstance(document_info.get("document_fields"), list): + document_info["fields"] = document_info.get("document_fields") + return self._collect_invoice_keys_from_document_info(document_info) + + def _resolve_document_item_type(self, document: dict[str, Any], *, fallback: str) -> str: + document_type = str(document.get("document_type") or "").strip() + mapped_type = DOCUMENT_TYPE_ITEM_TYPE_MAP.get(document_type) + if mapped_type: + return mapped_type + + scene_code = str(document.get("scene_code") or "").strip() + if scene_code in {"travel", "hotel", "transport", "meal", "office", "meeting", "training"}: + return scene_code + + if document_type in {"flight_itinerary", "train_ticket"}: + return "travel" + if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}: + return "transport" + if document_type == "hotel_invoice": + return "hotel" + if document_type == "meal_receipt": + return "meal" + if document_type == "office_invoice": + return "office" + if document_type == "meeting_invoice": + return "meeting" + if document_type == "training_invoice": + return "training" + + scene_label = str(document.get("scene_label") or "").strip() + if "交通" in scene_label: + return "transport" + if "住宿" in scene_label: + return "hotel" + if "餐" in scene_label: + return "meal" + if "会务" in scene_label or "会议" in scene_label: + return "meeting" + if "培训" in scene_label: + return "training" + return fallback or "other" + + def _resolve_document_item_reason(self, document: dict[str, Any], *, fallback: str) -> str: + document_type = str(document.get("document_type") or "").strip().lower() + item_type = self._resolve_document_item_type(document, fallback="") + + if document_type in {"train_ticket", "flight_itinerary"} or item_type in {"train_ticket", "flight_ticket"}: + route = self._resolve_document_route_value(document) + trip_no = self._resolve_document_fact_field( + document, + keys={"trip_no", "flight_no", "train_no"}, + labels={"车次", "航班"}, + ) + if route and trip_no: + return f"{self._format_document_route(route)}({trip_no})" + if route: + return self._format_document_route(route) + + if document_type in {"taxi_receipt", "transport_receipt"} or item_type == "ride_ticket": + route = self._resolve_document_route_value(document) + if route: + return self._format_document_route(route) + + if document_type == "hotel_invoice" or item_type == "hotel_ticket": + merchant = self._resolve_document_fact_field( + document, + keys={"merchant_name", "merchant", "seller_name", "vendor_name", "hotel_name"}, + labels={"商户", "酒店", "宾馆", "销售方", "开票方"}, + ) + stay_range = self._resolve_document_stay_range(document) + if merchant and stay_range: + return f"{merchant},{stay_range}" + if merchant: + return merchant + if stay_range: + return stay_range + + merchant = self._resolve_document_fact_field( + document, + keys={"merchant_name", "merchant", "seller_name", "vendor_name"}, + labels={"商户", "销售方", "开票方", "收款方"}, + ) + if merchant: + return merchant + + summary = str(document.get("summary") or "").strip() + return summary or fallback or "" diff --git a/server/src/app/services/expense_claim_document_parsing.py b/server/src/app/services/expense_claim_document_parsing.py new file mode 100644 index 0000000..8dc51dd --- /dev/null +++ b/server/src/app/services/expense_claim_document_parsing.py @@ -0,0 +1,396 @@ +from __future__ import annotations + +import json +import re +import shutil +import uuid +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import func, or_, select +from sqlalchemy import inspect as sqlalchemy_inspect +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.ontology import OntologyEntity, OntologyParseResult +from app.schemas.reimbursement import ( + ExpenseClaimItemCreate, + ExpenseClaimItemUpdate, + ExpenseClaimUpdate, + TravelReimbursementCalculatorRequest, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin +from app.services.expense_amounts import ( + extract_amount_candidates, + format_decimal_amount, + is_amount_match_date_fragment, + is_date_like_amount_candidate, + is_probable_year_amount, + parse_document_amount_value, + parse_plain_document_amount_value, + resolve_document_field_amount, + resolve_document_item_amount, + resolve_document_text_amount, +) +from app.services.expense_rule_runtime import ( + DEFAULT_SCENE_RULE_ASSET_CODE, + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.ocr import OcrService + + +class ExpenseClaimDocumentParsingMixin: + def _resolve_document_route_value(self, document: dict[str, Any]) -> str: + route = self._resolve_document_fact_field( + document, + keys={"route", "trip_route"}, + labels={"行程", "路线"}, + ) + if route: + return route + + origin = self._resolve_document_fact_field( + document, + keys={ + "origin", + "from", + "from_city", + "departure", + "departure_city", + "start", + "start_location", + "start_address", + "pickup_location", + "pickup_address", + "boarding_station", + }, + labels=DOCUMENT_ROUTE_ORIGIN_LABELS, + ) + destination = self._resolve_document_fact_field( + document, + keys={ + "destination", + "to", + "to_city", + "arrival", + "arrival_city", + "end", + "end_location", + "end_address", + "dropoff_location", + "dropoff_address", + "alighting_station", + }, + labels=DOCUMENT_ROUTE_DESTINATION_LABELS, + ) + if origin and destination: + return f"{origin}-{destination}" + + text = " ".join( + [ + str(document.get("summary") or "").strip(), + str(document.get("text") or "").strip(), + ] + ).strip() + text_route = self._extract_document_route_from_text(text) + if text_route: + return text_route + + text_origin = self._extract_document_labeled_text_value(text, DOCUMENT_ROUTE_ORIGIN_LABELS) + text_destination = self._extract_document_labeled_text_value(text, DOCUMENT_ROUTE_DESTINATION_LABELS) + if text_origin and text_destination: + return f"{text_origin}-{text_destination}" + return "" + + @staticmethod + def _resolve_document_fact_field( + document: dict[str, Any], + *, + keys: set[str], + labels: set[str], + ) -> str: + raw_fields = document.get("document_fields") + if not isinstance(raw_fields, list): + raw_fields = document.get("fields") + if not isinstance(raw_fields, list): + return "" + + normalized_keys = {str(key or "").strip().lower().replace("_", "") for key in keys} + for field in raw_fields: + if not isinstance(field, dict): + continue + field_key = str(field.get("key") or "").strip().lower().replace("_", "") + label = str(field.get("label") or "").replace(" ", "") + value = str(field.get("value") or "").strip() + if not value: + continue + if field_key in normalized_keys or any(token in label for token in labels): + return value + return "" + + @staticmethod + def _format_document_route(route: str) -> str: + normalized = ( + str(route or "") + .strip() + .replace("->", "-") + .replace("→", "-") + .replace("—", "-") + .replace("–", "-") + .replace("至", "-") + .replace("到", "-") + ) + if "-" not in normalized: + return str(route or "").strip() + origin, destination = [part.strip() for part in normalized.split("-", 1)] + origin = origin.removeprefix("从").strip() + destination = destination.removeprefix("至").removeprefix("到").strip() + if not origin or not destination or origin == destination: + return str(route or "").strip() + return f"{origin}-{destination}" + + @staticmethod + def _extract_document_route_from_text(text: str) -> str: + for match in DOCUMENT_ROUTE_TEXT_PATTERN.finditer(str(text or "")): + origin = str(match.group(1) or "").strip() + destination = str(match.group(2) or "").strip() + if not origin or not destination or origin == destination: + continue + if origin.isdigit() and destination.isdigit(): + continue + if DOCUMENT_DATE_PATTERN.search(f"{origin}-{destination}"): + continue + return f"{origin}-{destination}" + return "" + + @staticmethod + def _extract_document_labeled_text_value(text: str, labels: set[str]) -> str: + for label in sorted(labels, key=len, reverse=True): + pattern = re.compile( + rf"{re.escape(label)}[::\s]*" + r"([A-Za-z0-9\u4e00-\u9fa5()()·\-路街道号弄区县市省园桥站机场中心]{2,50})" + ) + match = pattern.search(str(text or "")) + if match: + return str(match.group(1) or "").strip() + return "" + + def _resolve_document_stay_range(self, document: dict[str, Any]) -> str: + check_in = self._resolve_document_fact_field( + document, + keys={"check_in", "checkin", "arrival_date", "start_date"}, + labels={"入住", "入住日期", "到店", "开始日期"}, + ) + check_out = self._resolve_document_fact_field( + document, + keys={"check_out", "checkout", "departure_date", "end_date"}, + labels={"离店", "退房", "离店日期", "结束日期"}, + ) + if check_in and check_out: + return f"{check_in}至{check_out}" + nights = self._resolve_document_fact_field( + document, + keys={"nights", "night_count", "room_nights"}, + labels={"间夜", "晚数", "入住天数"}, + ) + if nights: + return f"{nights}晚" + return "" + + def _resolve_document_item_amount(self, document: dict[str, Any]) -> Decimal | None: + return resolve_document_item_amount(document) + + def _resolve_document_field_amount(self, document: dict[str, Any]) -> Decimal | None: + return resolve_document_field_amount(document) + + def _resolve_document_text_amount(self, text: str) -> Decimal | None: + return resolve_document_text_amount(text) + + def _parse_document_amount_value(self, value: str) -> Decimal | None: + return parse_document_amount_value(value) + + @staticmethod + def _parse_plain_document_amount_value(value: str) -> Decimal | None: + return parse_plain_document_amount_value(value) + + @staticmethod + def _is_probable_year_amount(amount: Decimal | None) -> bool: + return is_probable_year_amount(amount) + + @classmethod + def _is_date_like_amount_candidate(cls, amount: Decimal | None, text: str) -> bool: + return is_date_like_amount_candidate(amount, text) + + @staticmethod + def _format_decimal_amount(amount: Decimal | None) -> str: + return format_decimal_amount(amount) + + def _resolve_document_item_date(self, document: dict[str, Any], *, fallback: date) -> date: + return self._resolve_document_item_date_candidate(document) or fallback + + def _resolve_document_item_date_candidate(self, document: dict[str, Any]) -> date | None: + document_type = str(document.get("document_type") or "").strip().lower() + if document_type in DOCUMENT_TRIP_DATE_LABELS: + parsed = self._resolve_document_date_from_fields( + document, + keys=DOCUMENT_TRIP_DATE_KEYS, + labels=DOCUMENT_TRIP_DATE_LABEL_TOKENS, + ) + if parsed is not None: + return parsed + + parsed = self._resolve_document_date_from_fields( + document, + keys=DOCUMENT_GENERIC_DATE_KEYS, + labels=DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + excluded_labels=DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + ) + if parsed is not None: + return parsed + + parsed = self._parse_document_date( + " ".join( + [ + str(document.get("summary") or "").strip(), + str(document.get("text") or "").strip(), + ] + ).strip() + ) + if parsed is not None: + return parsed + + return None + + for field in list(document.get("document_fields") or []): + if not isinstance(field, dict): + continue + key = str(field.get("key") or "").strip().lower().replace("_", "") + label = str(field.get("label") or "").replace(" ", "") + value = str(field.get("value") or "").strip() + if not value: + continue + if key in {"date", "time", "issuedat", "issuedate", "invoicedate"} or any( + token in label for token in ("日期", "时间", "开票日期", "发生时间") + ): + parsed = self._parse_document_date(value) + if parsed is not None: + return parsed + + parsed = self._parse_document_date( + " ".join( + [ + str(document.get("summary") or "").strip(), + str(document.get("text") or "").strip(), + ] + ).strip() + ) + return parsed + + def _resolve_document_date_from_fields( + self, + document: dict[str, Any], + *, + keys: set[str], + labels: tuple[str, ...], + excluded_labels: tuple[str, ...] = (), + ) -> date | None: + for field in list(document.get("document_fields") or []): + if not isinstance(field, dict): + continue + key = str(field.get("key") or "").strip().lower().replace("_", "") + label = str(field.get("label") or "").replace(" ", "") + if excluded_labels and any(token in label for token in excluded_labels): + continue + if key not in keys and not any(token in label for token in labels): + continue + parsed = self._parse_document_date(str(field.get("value") or "")) + if parsed is not None: + return parsed + return None + + @staticmethod + def _parse_document_date(value: str) -> date | None: + match = DOCUMENT_DATE_PATTERN.search(str(value or "")) + if not match: + return None + raw_value = str(match.group(1) or "").strip() + normalized = raw_value.replace("年", "-").replace("月", "-").replace("日", "") + normalized = normalized.replace("/", "-").replace(".", "-") + parts = [part for part in normalized.split("-") if part] + if len(parts) != 3: + return None + try: + return date(int(parts[0]), int(parts[1]), int(parts[2])) + except ValueError: + return None diff --git a/server/src/app/services/expense_claim_draft_flow.py b/server/src/app/services/expense_claim_draft_flow.py new file mode 100644 index 0000000..85e0c65 --- /dev/null +++ b/server/src/app/services/expense_claim_draft_flow.py @@ -0,0 +1,612 @@ +from __future__ import annotations + +import json +import re +import shutil +import uuid +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import func, or_, select +from sqlalchemy import inspect as sqlalchemy_inspect +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.ontology import OntologyEntity, OntologyParseResult +from app.schemas.reimbursement import ( + ExpenseClaimItemCreate, + ExpenseClaimItemUpdate, + ExpenseClaimUpdate, + TravelReimbursementCalculatorRequest, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin +from app.services.expense_amounts import ( + extract_amount_candidates, + format_decimal_amount, + is_amount_match_date_fragment, + is_date_like_amount_candidate, + is_probable_year_amount, + parse_document_amount_value, + parse_plain_document_amount_value, + resolve_document_field_amount, + resolve_document_item_amount, + resolve_document_text_amount, +) +from app.services.expense_rule_runtime import ( + DEFAULT_SCENE_RULE_ASSET_CODE, + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.ocr import OcrService + + +class ExpenseClaimDraftFlowMixin: + def upsert_draft_from_ontology( + self, + *, + run_id: str, + user_id: str | None, + message: str, + ontology: OntologyParseResult, + context_json: dict[str, Any], + ) -> dict[str, Any]: + self._ensure_ready() + context_json = dict(context_json or {}) + retry_count = self._resolve_claim_no_retry_count(context_json) + + review_action = str(context_json.get("review_action") or "").strip() + attachment_names = self._resolve_attachment_names(context_json) + context_documents = self._resolve_context_documents(context_json) + + employee = self._resolve_employee( + ontology=ontology, + context_json=context_json, + user_id=user_id, + ) + draft_owner_name = ( + employee.name + if employee is not None + else self._resolve_employee_name( + ontology=ontology, + context_json=context_json, + user_id=user_id, + ) + ) + + association_candidate = self._find_association_candidate( + ontology=ontology, + context_json=context_json, + user_id=user_id, + employee=employee, + ) + if self._should_defer_multi_document_association( + context_json=context_json, + review_action=review_action, + association_candidate=association_candidate, + context_documents=context_documents, + ): + document_count = max(len(context_documents), len(attachment_names), self._resolve_attachment_count(context_json)) + return { + "message": ( + f"检测到你已有草稿 {association_candidate.claim_no}," + f"当前新上传了 {document_count} 张票据,请先选择关联到现有草稿,或单独建立新的报销单。" + ), + "draft_only": False, + "status": "pending_association_decision", + "pending_association_decision": True, + "association_candidate_claim_id": association_candidate.id, + "association_candidate_claim_no": association_candidate.claim_no, + } + + claim = self._find_target_claim( + ontology=ontology, + context_json=context_json, + review_action=review_action, + association_candidate=association_candidate, + ) + is_new_claim = claim is None + before_json = self._serialize_claim(claim) if claim is not None else None + if is_new_claim: + existing_draft_count = self._count_draft_claims_for_owner( + employee=employee, + user_id=user_id, + ) + if existing_draft_count >= MAX_DRAFT_CLAIMS_PER_USER: + return { + "message": ( + f"你当前已保存 {MAX_DRAFT_CLAIMS_PER_USER} 个草稿,请先完成已保存的草稿," + "才能再次新建草稿。" + ), + "draft_limit_reached": True, + "draft_only": False, + "status": "blocked", + "draft_count": existing_draft_count, + "max_draft_count": MAX_DRAFT_CLAIMS_PER_USER, + } + + amount = self._resolve_amount(ontology.entities, context_json=context_json) + occurred_at = self._resolve_occurred_at(ontology, context_json=context_json) + explicit_expense_type = self._resolve_explicit_review_expense_type(context_json) + inferred_expense_type = self._resolve_expense_type(ontology.entities, context_json=context_json) + locked_expense_type = explicit_expense_type + if not locked_expense_type and claim is not None and review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS: + locked_expense_type = str(claim.expense_type or "").strip() + expense_type = locked_expense_type or inferred_expense_type + location = self._resolve_location(message=message, context_json=context_json) + reason = self._resolve_reason( + message=message, + context_json=context_json, + allow_message_fallback=is_new_claim, + ) + attachment_count = len(attachment_names) or self._resolve_attachment_count(context_json) + + final_amount = amount if amount is not None else (claim.amount if claim is not None else Decimal("0.00")) + final_occurred_at = ( + occurred_at if occurred_at is not None else (claim.occurred_at if claim is not None else datetime.now(UTC)) + ) + final_expense_type = expense_type or (claim.expense_type if claim is not None else "other") + final_location = location or (claim.location if claim is not None else "待补充") + final_reason = reason or (claim.reason if claim is not None else "待补充") + final_attachment_count = ( + attachment_count if attachment_count > 0 else int(claim.invoice_count or 0) if claim is not None else 0 + ) + final_risk_flags = self._merge_persistent_claim_risk_flags( + existing_flags=list(claim.risk_flags_json or []) if claim is not None else [], + next_flags=list(ontology.risk_flags), + ) + if context_documents or attachment_names: + document_specs = self._build_context_item_specs( + context_documents=context_documents, + attachment_names=attachment_names, + occurred_at=final_occurred_at, + expense_type=final_expense_type, + amount=final_amount, + reason=final_reason, + location=final_location, + context_json=context_json, + employee_grade=str(employee.grade or "").strip() if employee is not None else "", + user_id=user_id, + ) + else: + document_specs = [] + + if claim is not None and review_action == "link_to_existing_draft" and document_specs: + duplicate_result = self._build_duplicate_attachment_block_result( + claim=claim, + document_specs=document_specs, + context_documents=context_documents, + ) + if duplicate_result is not None: + return duplicate_result + + try: + if claim is None: + claim = ExpenseClaim( + claim_no=self._generate_claim_no(final_occurred_at), + employee_id=employee.id if employee is not None else None, + employee_name=draft_owner_name, + department_id=employee.organization_unit_id if employee is not None else None, + department_name=self._resolve_department_name( + employee=employee, + context_json=context_json, + ), + project_code=self._resolve_project_code(ontology.entities), + expense_type=final_expense_type, + reason=final_reason, + location=final_location, + amount=final_amount, + currency="CNY", + invoice_count=final_attachment_count, + occurred_at=final_occurred_at, + status="draft", + approval_stage="待提交", + risk_flags_json=final_risk_flags, + ) + self.db.add(claim) + else: + claim.employee_id = employee.id if employee is not None else claim.employee_id + claim.employee_name = ( + employee.name + if employee is not None + else self._resolve_employee_name( + ontology=ontology, + context_json=context_json, + user_id=user_id, + fallback=claim.employee_name, + ) + ) + claim.department_id = employee.organization_unit_id if employee is not None else claim.department_id + claim.department_name = self._resolve_department_name( + employee=employee, + context_json=context_json, + fallback=claim.department_name, + ) + claim.project_code = self._resolve_project_code(ontology.entities) or claim.project_code + claim.expense_type = final_expense_type + claim.reason = final_reason + claim.location = final_location + claim.amount = final_amount + claim.invoice_count = final_attachment_count + claim.occurred_at = final_occurred_at + claim.status = "draft" + claim.approval_stage = "待提交" + claim.risk_flags_json = final_risk_flags + + self.db.flush() + if document_specs and (is_new_claim or review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS): + if review_action == "link_to_existing_draft" and claim.items: + self._append_document_items( + claim=claim, + item_specs=document_specs, + ) + else: + self._replace_claim_items( + claim=claim, + item_specs=document_specs, + ) + self._sync_claim_from_items(claim) + else: + self._upsert_primary_item( + claim=claim, + occurred_at=final_occurred_at, + expense_type=final_expense_type, + amount=final_amount, + reason=final_reason, + location=final_location, + attachment_names=attachment_names, + ) + self._sync_claim_from_items(claim) + if locked_expense_type: + claim.expense_type = locked_expense_type + self.db.commit() + self.db.refresh(claim) + except IntegrityError as exc: + self.db.rollback() + if ( + is_new_claim + and retry_count < MAX_CLAIM_NO_RETRY_ATTEMPTS + and self._is_claim_no_conflict_error(exc) + ): + retry_context = dict(context_json) + retry_context["_claim_no_retry_count"] = retry_count + 1 + return self.upsert_draft_from_ontology( + run_id=run_id, + user_id=user_id, + message=message, + ontology=ontology, + context_json=retry_context, + ) + raise + except Exception: + self.db.rollback() + raise + + self.audit_service.log_action( + actor=user_id or claim.employee_name or "anonymous", + action="expense_claim.draft_upsert", + resource_type="expense_claim", + resource_id=claim.id, + before_json=before_json, + after_json=self._serialize_claim(claim), + request_id=run_id, + ) + + return { + "message": ( + f"已{'创建' if is_new_claim else '更新'}报销草稿 {claim.claim_no},当前状态为 draft。" + "请核对识别结果,确认无误后继续提交。" + ), + "draft_only": True, + "claim_id": claim.id, + "claim_no": claim.claim_no, + "status": claim.status, + "amount": float(claim.amount), + "invoice_count": int(claim.invoice_count or 0), + } + + def _find_target_claim( + self, + *, + ontology: OntologyParseResult, + context_json: dict[str, Any], + review_action: str = "", + association_candidate: ExpenseClaim | None = None, + ) -> ExpenseClaim | None: + if review_action == "create_new_claim_from_documents": + return None + if review_action == "link_to_existing_draft" and association_candidate is not None: + return association_candidate + + draft_claim_id = str(context_json.get("draft_claim_id") or "").strip() + if draft_claim_id: + claim = self.db.get(ExpenseClaim, draft_claim_id) + if claim is not None and self._is_editable_claim_status(claim.status): + return claim + return None + + claim_codes = [ + item.normalized_value + for item in ontology.entities + if item.type == "expense_claim" and item.normalized_value + ] + if not claim_codes: + return None + + stmt = ( + select(ExpenseClaim) + .where(ExpenseClaim.claim_no.in_(claim_codes)) + .where(ExpenseClaim.status.in_(EDITABLE_CLAIM_STATUSES)) + .limit(1) + ) + return self.db.scalar(stmt) + + def _find_association_candidate( + self, + *, + ontology: OntologyParseResult, + context_json: dict[str, Any], + user_id: str | None, + employee: Employee | None, + ) -> ExpenseClaim | None: + draft_claim_id = str(context_json.get("draft_claim_id") or "").strip() + if draft_claim_id: + claim = self.db.get(ExpenseClaim, draft_claim_id) + if claim is not None and self._is_editable_claim_status(claim.status): + return claim + + owner_filters = self._build_draft_owner_filters( + employee=employee, + user_id=user_id, + ) + if not owner_filters: + fallback_name = self._resolve_employee_name( + ontology=ontology, + context_json=context_json, + user_id=user_id, + fallback="", + ) + if fallback_name: + owner_filters = [ExpenseClaim.employee_name == fallback_name] + + if not owner_filters: + return None + + stmt = ( + select(ExpenseClaim) + .where(ExpenseClaim.status.in_(EDITABLE_CLAIM_STATUSES)) + .where(or_(*owner_filters)) + .order_by(ExpenseClaim.updated_at.desc(), ExpenseClaim.created_at.desc()) + .limit(1) + ) + return self.db.scalar(stmt) + + def _should_defer_multi_document_association( + self, + *, + context_json: dict[str, Any], + review_action: str, + association_candidate: ExpenseClaim | None, + context_documents: list[dict[str, Any]], + ) -> bool: + if association_candidate is None: + return False + if review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS: + return False + document_count = max( + len(context_documents), + len(self._resolve_attachment_names(context_json)), + self._resolve_attachment_count(context_json), + ) + return document_count > 1 + + def _replace_claim_items( + self, + *, + claim: ExpenseClaim, + item_specs: list[dict[str, Any]], + ) -> None: + existing_items = sorted( + list(claim.items), + key=lambda item: ( + item.item_date or date.max, + self._normalize_sort_datetime(item.created_at), + ), + ) + for index, spec in enumerate(item_specs): + item = existing_items[index] if index < len(existing_items) else None + if item is None: + item = ExpenseClaimItem(claim_id=claim.id) + claim.items.append(item) + self.db.add(item) + item.item_date = spec["item_date"] + item.item_type = spec["item_type"] + item.item_reason = spec["item_reason"] + item.item_location = spec["item_location"] + item.item_amount = spec["item_amount"] + item.invoice_id = ( + None + if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES + else self._attachment_presentation.merge_reference(item.invoice_id, spec["invoice_id"]) + ) + + for stale_item in existing_items[len(item_specs) :]: + claim.items.remove(stale_item) + self.db.delete(stale_item) + + def _append_document_items( + self, + *, + claim: ExpenseClaim, + item_specs: list[dict[str, Any]], + ) -> None: + system_specs = [ + spec for spec in item_specs if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES + ] + normal_specs = [ + spec for spec in item_specs if str(spec.get("item_type") or "").strip() not in SYSTEM_GENERATED_ITEM_TYPES + ] + existing_invoice_ids = { + str(item.invoice_id or "").strip() + for item in claim.items + if str(item.invoice_id or "").strip() + } + existing_invoice_names = { + self._attachment_presentation.resolve_display_name(item.invoice_id) + for item in claim.items + if str(item.invoice_id or "").strip() + } + for spec in normal_specs: + invoice_id = str(spec.get("invoice_id") or "").strip() + invoice_name = self._attachment_presentation.resolve_display_name(invoice_id) + if invoice_id and (invoice_id in existing_invoice_ids or invoice_name in existing_invoice_names): + continue + claim.items.append( + ExpenseClaimItem( + claim_id=claim.id, + item_date=spec["item_date"], + item_type=spec["item_type"], + item_reason=spec["item_reason"], + item_location=spec["item_location"], + item_amount=spec["item_amount"], + invoice_id=spec["invoice_id"], + ) + ) + self.db.add(claim.items[-1]) + if invoice_id: + existing_invoice_ids.add(invoice_id) + existing_invoice_names.add(invoice_name) + + if system_specs: + existing_system_items = [ + item for item in list(claim.items) if str(item.item_type or "").strip() in SYSTEM_GENERATED_ITEM_TYPES + ] + for stale_item in existing_system_items: + claim.items.remove(stale_item) + self.db.delete(stale_item) + for spec in system_specs: + claim.items.append( + ExpenseClaimItem( + claim_id=claim.id, + item_date=spec["item_date"], + item_type=spec["item_type"], + item_reason=spec["item_reason"], + item_location=spec["item_location"], + item_amount=spec["item_amount"], + invoice_id=spec["invoice_id"], + ) + ) + self.db.add(claim.items[-1]) + + def _build_duplicate_attachment_block_result( + self, + *, + claim: ExpenseClaim, + document_specs: list[dict[str, Any]], + context_documents: list[dict[str, Any]], + ) -> dict[str, Any] | None: + duplicate_matches = self._find_duplicate_attachment_matches( + claim=claim, + document_specs=document_specs, + context_documents=context_documents, + ) + if not duplicate_matches: + return None + + duplicate_labels = list( + dict.fromkeys( + str(item.get("incoming_label") or item.get("existing_label") or "").strip() + for item in duplicate_matches + if str(item.get("incoming_label") or item.get("existing_label") or "").strip() + ) + ) + duplicate_text = "、".join(duplicate_labels[:3]) or "本次上传票据" + reason = ( + f"检测到本次上传的票据与草稿 {claim.claim_no} 中已有票据重复:{duplicate_text}。" + "请重新上传不同的票据后再归集。" + ) + return { + "message": reason, + "draft_only": False, + "status": "blocked", + "duplicate_attachment_blocked": True, + "duplicate_invoice_blocked": True, + "submission_blocked": True, + "submission_blocked_reasons": [reason], + "missing_fields": [reason], + "risk_flags": ["duplicate_invoice"], + "duplicate_attachments": duplicate_matches, + "claim_id": claim.id, + "claim_no": claim.claim_no, + "amount": float(claim.amount or Decimal("0.00")), + "invoice_count": int(claim.invoice_count or 0), + } diff --git a/server/src/app/services/expense_claim_draft_persistence.py b/server/src/app/services/expense_claim_draft_persistence.py new file mode 100644 index 0000000..c05d816 --- /dev/null +++ b/server/src/app/services/expense_claim_draft_persistence.py @@ -0,0 +1,343 @@ +from __future__ import annotations + +import json +import re +import shutil +import uuid +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import func, or_, select +from sqlalchemy import inspect as sqlalchemy_inspect +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.ontology import OntologyEntity, OntologyParseResult +from app.schemas.reimbursement import ( + ExpenseClaimItemCreate, + ExpenseClaimItemUpdate, + ExpenseClaimUpdate, + TravelReimbursementCalculatorRequest, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin +from app.services.expense_amounts import ( + extract_amount_candidates, + format_decimal_amount, + is_amount_match_date_fragment, + is_date_like_amount_candidate, + is_probable_year_amount, + parse_document_amount_value, + parse_plain_document_amount_value, + resolve_document_field_amount, + resolve_document_item_amount, + resolve_document_text_amount, +) +from app.services.expense_rule_runtime import ( + DEFAULT_SCENE_RULE_ASSET_CODE, + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.ocr import OcrService + + +class ExpenseClaimDraftPersistenceMixin: + def _find_duplicate_attachment_matches( + self, + *, + claim: ExpenseClaim, + document_specs: list[dict[str, Any]], + context_documents: list[dict[str, Any]], + ) -> list[dict[str, str]]: + existing_tokens: dict[str, dict[str, str]] = {} + for item in list(claim.items or []): + if str(item.item_type or "").strip() in SYSTEM_GENERATED_ITEM_TYPES: + continue + invoice_id = str(item.invoice_id or "").strip() + if not invoice_id: + continue + + display_name = self._attachment_presentation.resolve_display_name(invoice_id) + for token in self._build_duplicate_attachment_tokens(invoice_id): + existing_tokens.setdefault( + token, + { + "existing_label": display_name or invoice_id, + "existing_item_id": str(item.id or ""), + "match_type": "filename", + }, + ) + + file_path = self._attachment_storage.resolve_item_path(item) + if file_path is not None and file_path.exists(): + metadata = self._attachment_storage.read_meta(file_path) + document_info = metadata.get("document_info") + if isinstance(document_info, dict): + for invoice_key in self._collect_invoice_keys_from_document_info(document_info): + token = self._normalize_duplicate_attachment_token(invoice_key) + if token: + existing_tokens.setdefault( + token, + { + "existing_label": display_name or invoice_id, + "existing_item_id": str(item.id or ""), + "match_type": "invoice_key", + }, + ) + + if not existing_tokens: + return [] + + document_by_filename = { + str(document.get("filename") or "").strip(): document + for document in context_documents + if isinstance(document, dict) and str(document.get("filename") or "").strip() + } + matches: list[dict[str, str]] = [] + seen_tokens: set[str] = set() + for spec in document_specs: + if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES: + continue + invoice_id = str(spec.get("invoice_id") or "").strip() + if not invoice_id: + continue + incoming_tokens = self._build_duplicate_attachment_tokens(invoice_id) + document = document_by_filename.get(invoice_id) + if document is not None: + incoming_tokens.extend( + self._normalize_duplicate_attachment_token(invoice_key) + for invoice_key in self._collect_invoice_keys_from_incoming_document(document) + ) + for token in incoming_tokens: + if not token or token in seen_tokens or token not in existing_tokens: + continue + seen_tokens.add(token) + existing = existing_tokens[token] + matches.append( + { + "incoming_label": self._attachment_presentation.resolve_display_name(invoice_id) or invoice_id, + "existing_label": existing.get("existing_label", ""), + "existing_item_id": existing.get("existing_item_id", ""), + "match_type": existing.get("match_type", "filename"), + } + ) + return matches + + @classmethod + def _build_duplicate_attachment_tokens(cls, value: str | None) -> list[str]: + raw = str(value or "").strip() + display_name = ExpenseClaimAttachmentPresentation.resolve_display_name(raw) + candidates = [raw, display_name] + return list( + dict.fromkeys( + token + for token in (cls._normalize_duplicate_attachment_token(candidate) for candidate in candidates) + if token + ) + ) + + @staticmethod + def _normalize_duplicate_attachment_token(value: str | None) -> str: + normalized = Path(str(value or "").strip()).name.lower() + normalized = re.sub(r"\s+", "", normalized) + normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._") + return normalized + + def _upsert_primary_item( + self, + *, + claim: ExpenseClaim, + occurred_at: datetime, + expense_type: str, + amount: Decimal, + reason: str, + location: str, + attachment_names: list[str], + ) -> None: + item = claim.items[0] if claim.items else None + if item is None: + item = ExpenseClaimItem( + claim_id=claim.id, + item_date=occurred_at.date(), + item_type=expense_type, + item_reason=reason, + item_location=location, + item_amount=amount, + invoice_id=attachment_names[0] if attachment_names else None, + ) + claim.items.append(item) + self.db.add(item) + return + + item.item_date = occurred_at.date() + item.item_type = expense_type + item.item_reason = reason + item.item_location = location + item.item_amount = amount + item.invoice_id = ( + self._attachment_presentation.merge_reference(item.invoice_id, attachment_names[0]) + if attachment_names + else item.invoice_id + ) + + def _generate_claim_no(self, occurred_at: datetime) -> str: + month_code = occurred_at.strftime("%Y%m") + prefix = f"EXP-{month_code}-" + existing_claim_nos = list( + self.db.scalars( + select(ExpenseClaim.claim_no).where(ExpenseClaim.claim_no.like(f"{prefix}%")) + ) + ) + max_suffix = 0 + for claim_no in existing_claim_nos: + normalized = str(claim_no or "").strip() + if not normalized.startswith(prefix): + continue + suffix = normalized[len(prefix):] + if not suffix.isdigit(): + continue + max_suffix = max(max_suffix, int(suffix)) + return f"{prefix}{max_suffix + 1:03d}" + + @staticmethod + def _resolve_claim_no_retry_count(context_json: dict[str, Any]) -> int: + try: + return max(0, int(context_json.get("_claim_no_retry_count") or 0)) + except (TypeError, ValueError): + return 0 + + @staticmethod + def _is_claim_no_conflict_error(exc: IntegrityError) -> bool: + message = str(exc).lower() + return ( + "claim_no" in message + and ( + "unique" in message + or "duplicate key" in message + or "ix_expense_claims_claim_no" in message + or "expense_claims.claim_no" in message + ) + ) + + def _count_draft_claims_for_owner( + self, + *, + employee: Employee | None, + user_id: str | None, + ) -> int: + owner_filters = self._build_draft_owner_filters( + employee=employee, + user_id=user_id, + ) + if not owner_filters: + return 0 + + stmt = ( + select(func.count()) + .select_from(ExpenseClaim) + .where(ExpenseClaim.status == "draft") + .where(or_(*owner_filters)) + ) + return int(self.db.scalar(stmt) or 0) + + def _build_draft_owner_filters( + self, + *, + employee: Employee | None, + user_id: str | None, + ) -> list[Any]: + conditions: list[Any] = [] + seen: set[tuple[str, str]] = set() + + def add_condition(field_name: str, value: str | None) -> None: + normalized = str(value or "").strip() + if not normalized or normalized == "待补充": + return + + marker = (field_name, normalized.lower()) + if marker in seen: + return + seen.add(marker) + + if field_name == "employee_id": + conditions.append(ExpenseClaim.employee_id == normalized) + return + conditions.append(ExpenseClaim.employee_name == normalized) + + if employee is not None: + add_condition("employee_id", employee.id) + add_condition("employee_name", employee.email) + if self._access_policy.employee_name_is_unique(employee): + add_condition("employee_name", employee.name) + + add_condition("employee_name", user_id) + return conditions diff --git a/server/src/app/services/expense_claim_errors.py b/server/src/app/services/expense_claim_errors.py new file mode 100644 index 0000000..1f2a9a8 --- /dev/null +++ b/server/src/app/services/expense_claim_errors.py @@ -0,0 +1,7 @@ +from __future__ import annotations + + +class ExpenseClaimSubmissionBlockedError(ValueError): + def __init__(self, issues: list[str]) -> None: + self.issues = [str(issue or "").strip() for issue in issues if str(issue or "").strip()] + super().__init__("提交前请先补全信息:" + ";".join(self.issues)) diff --git a/server/src/app/services/expense_claim_item_sync.py b/server/src/app/services/expense_claim_item_sync.py new file mode 100644 index 0000000..8af4bf7 --- /dev/null +++ b/server/src/app/services/expense_claim_item_sync.py @@ -0,0 +1,461 @@ +from __future__ import annotations + +import re +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import or_, select +from sqlalchemy import inspect as sqlalchemy_inspect + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.reimbursement import TravelReimbursementCalculatorRequest +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.expense_claim_constants import ( + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + DOCUMENT_FACT_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_rule_runtime import ( + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, +) + + +class ExpenseClaimItemSyncMixin: + def _sync_travel_allowance_item(self, claim: ExpenseClaim) -> None: + items = list(claim.items or []) + allowance_items = [ + item for item in items if str(item.item_type or "").strip().lower() == "travel_allowance" + ] + business_items = [ + item for item in items if str(item.item_type or "").strip().lower() != "travel_allowance" + ] + business_types = {str(item.item_type or "").strip().lower() for item in business_items} + is_travel_claim = str(claim.expense_type or "").strip().lower() == "travel" + has_travel_detail = bool(business_types & TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES) + if not is_travel_claim and not has_travel_detail: + for item in allowance_items: + self._discard_claim_item(claim, item) + return + + grade = str(claim.employee_grade or "").strip() + if not grade: + return + + allowance_location = self._resolve_travel_allowance_location_from_claim( + claim=claim, + business_items=business_items, + ) + if not allowance_location: + return + + existing_allowance = allowance_items[0] if allowance_items else None + days, start_date, end_date = self._resolve_travel_allowance_days_from_claim( + claim=claim, + business_items=business_items, + existing_allowance=existing_allowance, + ) + if days < 1: + return + + try: + from app.services.travel_reimbursement_calculator import ( + TravelReimbursementCalculatorService, + ) + + result = TravelReimbursementCalculatorService(self.db).calculate( + TravelReimbursementCalculatorRequest( + days=days, + location=allowance_location, + grade=grade, + ), + CurrentUserContext( + username=str(claim.employee_id or claim.employee_name or "system"), + name=str(claim.employee_name or ""), + role_codes=[], + is_admin=False, + ), + ) + except ValueError: + return + + allowance_amount = Decimal(result.allowance_amount or Decimal("0.00")).quantize(Decimal("0.01")) + allowance_rate = Decimal(result.total_allowance_rate or Decimal("0.00")).quantize(Decimal("0.01")) + if allowance_amount <= Decimal("0.00") or allowance_rate <= Decimal("0.00"): + return + + item = existing_allowance + if item is None: + item = ExpenseClaimItem(claim_id=claim.id) + claim.items.append(item) + self.db.add(item) + + for duplicate in allowance_items[1:]: + self._discard_claim_item(claim, duplicate) + + item.item_date = end_date + item.item_type = "travel_allowance" + item.item_reason = ( + f"系统自动计算出差补贴:{result.matched_city},{days}天," + f"{allowance_rate:.2f}元/天" + ) + item.item_location = str(result.allowance_region or allowance_location).strip() + item.item_amount = allowance_amount + item.invoice_id = None + + def _discard_claim_item(self, claim: ExpenseClaim, item: ExpenseClaimItem) -> None: + if item in claim.items: + claim.items.remove(item) + state = sqlalchemy_inspect(item) + if state.persistent: + self.db.delete(item) + elif state.pending: + self.db.expunge(item) + + def _resolve_travel_allowance_days_from_claim( + self, + *, + claim: ExpenseClaim, + business_items: list[ExpenseClaimItem], + existing_allowance: ExpenseClaimItem | None, + ) -> tuple[int, date, date]: + dated_items = sorted( + [item.item_date for item in business_items if item.item_date is not None] + ) + if dated_items: + start_date = dated_items[0] + end_date = dated_items[-1] + elif claim.occurred_at is not None: + start_date = claim.occurred_at.date() + end_date = start_date + else: + start_date = date.today() + end_date = start_date + + days = (end_date - start_date).days + 1 + explicit_days = max( + (self._extract_travel_day_count(item.item_reason) for item in business_items), + default=0, + ) + if explicit_days > 0: + days = explicit_days + end_date = start_date + timedelta(days=days - 1) + return max(1, days), start_date, end_date + existing_days = self._extract_travel_allowance_days(existing_allowance) + unique_dates = {value for value in dated_items} + if existing_days > days and len(unique_dates) <= 1: + days = existing_days + end_date = start_date + timedelta(days=days - 1) + return max(1, days), start_date, end_date + + @staticmethod + def _extract_travel_allowance_days(item: ExpenseClaimItem | None) -> int: + if item is None: + return 0 + match = re.search(r"(\d+)\s*天", str(item.item_reason or "")) + if not match: + return 0 + try: + return max(0, int(match.group(1))) + except ValueError: + return 0 + + def _resolve_travel_allowance_location_from_claim( + self, + *, + claim: ExpenseClaim, + business_items: list[ExpenseClaimItem], + ) -> str: + claim_location = str(claim.location or "").strip() + if claim_location and claim_location not in {"待补充", "未知", "暂无", "非必填"}: + return claim_location + + sorted_items = sorted( + business_items, + key=lambda item: (item.item_date or date.max, self._normalize_sort_datetime(item.created_at)), + ) + for item in sorted_items: + location = str(item.item_location or "").strip() + if location and location not in {"待补充", "未知", "暂无", "非必填"}: + return location + reason = str(item.item_reason or "").strip() + for separator in ("-", "至", "到", "→", "->"): + if separator in reason: + destination = reason.split(separator)[-1].strip() + if destination: + return destination + return "" + + def _sync_claim_from_items(self, claim: ExpenseClaim) -> None: + self._sync_travel_allowance_item(claim) + if not claim.items: + claim.amount = Decimal("0.00") + claim.invoice_count = 0 + claim.risk_flags_json = self._merge_claim_attachment_risk_flags(claim, []) + return + + ordered_items = sorted( + claim.items, + key=lambda item: ( + item.item_date or date.max, + self._normalize_sort_datetime(item.created_at), + ), + ) + primary_item = ordered_items[0] + total_amount = sum((item.item_amount for item in ordered_items), Decimal("0.00")) + + claim.amount = total_amount.quantize(Decimal("0.01")) + claim.invoice_count = sum(1 for item in ordered_items if str(item.invoice_id or "").strip()) + claim.occurred_at = datetime( + primary_item.item_date.year, + primary_item.item_date.month, + primary_item.item_date.day, + tzinfo=UTC, + ) + claim.expense_type = self._resolve_claim_expense_type_from_items( + ordered_items, + fallback=str(primary_item.item_type or claim.expense_type or "other").strip() or "other", + ) + primary_item_type = str(primary_item.item_type or "").strip() + if primary_item_type not in DOCUMENT_FACT_ITEM_TYPES: + claim.reason = ( + self._normalize_optional_text(primary_item.item_reason, fallback=claim.reason or "待补充") + or "待补充" + ) + claim.location = ( + self._normalize_optional_text(primary_item.item_location, fallback=claim.location or "待补充") + or "待补充" + ) + claim.risk_flags_json = self._merge_claim_attachment_risk_flags( + claim, + self._build_claim_attachment_risk_flags(ordered_items), + ) + if str(claim.status or "").strip().lower() == "draft": + claim.approval_stage = "待提交" + + @staticmethod + def _resolve_claim_expense_type_from_items( + items: list[ExpenseClaimItem], + *, + fallback: str, + ) -> str: + fallback_type = str(fallback or "").strip() or "other" + item_types = {str(item.item_type or "").strip().lower() for item in items} + if item_types & (TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES | {"travel_allowance"}): + return "travel" + return fallback_type + + def _refresh_item_attachment_analysis(self, item: ExpenseClaimItem) -> None: + file_path = self._attachment_storage.resolve_path(item.invoice_id) + if file_path is None or not file_path.exists(): + return + + metadata = self._attachment_storage.read_meta(file_path) + media_type = str(metadata.get("media_type") or self._attachment_presentation.resolve_media_type(file_path.name)).strip() + ocr_status = str(metadata.get("ocr_status") or "").strip().lower() + + if ocr_status == "failed": + analysis = self._build_failed_ocr_attachment_analysis( + media_type=media_type, + error_message=str(metadata.get("ocr_error") or ""), + item=item, + ) + elif ocr_status == "recognized" or any( + ( + str(metadata.get("ocr_text") or "").strip(), + str(metadata.get("ocr_summary") or "").strip(), + int(metadata.get("ocr_line_count") or 0), + list(metadata.get("ocr_warnings") or []), + ) + ): + stored_document_info = metadata.get("document_info") + if not isinstance(stored_document_info, dict): + stored_document_info = {} + document = SimpleNamespace( + filename=str(metadata.get("file_name") or file_path.name), + text=str(metadata.get("ocr_text") or ""), + summary=str(metadata.get("ocr_summary") or ""), + avg_score=float(metadata.get("ocr_avg_score") or 0.0), + line_count=int(metadata.get("ocr_line_count") or 0), + document_type=str(stored_document_info.get("document_type") or ""), + document_type_label=str(stored_document_info.get("document_type_label") or ""), + scene_code=str(stored_document_info.get("scene_code") or ""), + scene_label=str(stored_document_info.get("scene_label") or ""), + document_fields=list(stored_document_info.get("fields") or []), + warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()], + ) + document_info = self._build_attachment_document_info(document) + requirement_check = self._build_attachment_requirement_check( + item=item, + document_info=document_info, + ) + analysis = self._build_attachment_analysis( + document=document, + item=item, + claim=getattr(item, "claim", None), + document_info=document_info, + requirement_check=requirement_check, + ) + metadata["document_info"] = document_info + metadata["requirement_check"] = requirement_check + else: + analysis = self._build_fallback_attachment_analysis(media_type=media_type, item=item) + + metadata["analysis"] = analysis + self._attachment_storage.write_meta(file_path, metadata) + + def _build_claim_attachment_risk_flags( + self, ordered_items: list[ExpenseClaimItem] + ) -> list[dict[str, Any]]: + derived_flags: list[dict[str, Any]] = [] + for index, item in enumerate(ordered_items, start=1): + file_path = self._attachment_storage.resolve_path(item.invoice_id) + if file_path is None or not file_path.exists(): + continue + + metadata = self._attachment_storage.read_meta(file_path) + analysis = metadata.get("analysis") + if not isinstance(analysis, dict): + continue + + severity = str(analysis.get("severity") or "").strip().lower() + if severity in {"", "pass", "low"}: + continue + + summary = ( + str(analysis.get("summary") or analysis.get("headline") or "").strip() + or "附件存在待核对风险。" + ) + points = [ + str(point or "").strip() + for point in list(analysis.get("points") or []) + if str(point or "").strip() + ] + message_detail = ";".join(points[:3]) if points else summary + label = str( + analysis.get("label") or ("高风险" if severity == "high" else "中风险") + ).strip() + derived_flags.append( + { + "source": "attachment_analysis", + "item_id": item.id, + "severity": severity, + "label": label, + "message": f"费用明细第 {index} 条:{message_detail}", + "summary": summary, + "points": points, + } + ) + return derived_flags + + def _get_expense_rule_catalog(self) -> Any: + cached = getattr(self, "_expense_rule_catalog", None) + if cached is not None: + return cached + + db = getattr(self, "db", None) + if db is None: + catalog = build_default_expense_rule_catalog() + else: + catalog = ExpenseRuleRuntimeService(db).load_catalog() + setattr(self, "_expense_rule_catalog", catalog) + return catalog + + def _get_expense_scene_policy(self, expense_type: str | None) -> Any | None: + return self._get_expense_rule_catalog().get_scene_policy(expense_type) + + def _resolve_min_attachment_count(self, expense_type: str | None) -> int: + policy = self._get_expense_scene_policy(expense_type) + if policy is None: + return 1 + return max(0, int(policy.min_attachment_count or 0)) + + def _build_scene_reason_corpus(self, claim: ExpenseClaim) -> str: + parts = [str(claim.reason or "").strip(), str(claim.location or "").strip()] + for item in claim.items: + parts.append(str(item.item_reason or "").strip()) + parts.append(str(item.item_location or "").strip()) + return "\n".join(part for part in parts if part) + + @staticmethod + def _merge_claim_attachment_risk_flags( + claim: ExpenseClaim, + attachment_risk_flags: list[dict[str, Any]], + ) -> list[Any]: + preserved_flags = [ + flag + for flag in list(claim.risk_flags_json or []) + if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis") + ] + return preserved_flags + attachment_risk_flags + + @staticmethod + def _format_submission_blocked_message(issues: list[str]) -> str: + normalized_issues = [str(issue or "").strip() for issue in issues if str(issue or "").strip()] + if not normalized_issues: + return "AI预审未通过,但没有返回明确原因,请刷新草稿后重试。" + + return "AI预审暂未通过,原因如下:\n" + "\n".join( + f"{index}. {issue}" for index, issue in enumerate(normalized_issues, start=1) + ) + + def _validate_claim_for_submission(self, claim: ExpenseClaim) -> list[str]: + issues: list[str] = [] + claim_location_required = self._is_location_required_expense_type(claim.expense_type) + claim_min_attachment_count = self._resolve_min_attachment_count(claim.expense_type) + + if self._is_missing_value(claim.employee_name): + issues.append("申请人未完善") + if self._is_missing_value(claim.department_name): + issues.append("所属部门未完善") + if self._is_missing_value(claim.expense_type): + issues.append("报销类型未完善") + if self._is_missing_value(claim.reason): + issues.append("报销事由未完善") + if claim_location_required and self._is_missing_value(claim.location): + issues.append("业务地点未完善") + if claim.amount is None or claim.amount <= Decimal("0.00"): + issues.append("报销金额未完善") + if claim.occurred_at is None: + issues.append("发生时间未完善") + if int(claim.invoice_count or 0) < claim_min_attachment_count: + issues.append("票据附件数量不足") + if not claim.items: + issues.append("费用明细不能为空") + + for index, item in enumerate(claim.items, start=1): + prefix = f"费用明细第 {index} 条" + is_system_generated = str(item.item_type or "").strip().lower() in SYSTEM_GENERATED_ITEM_TYPES + item_location_required = self._is_location_required_expense_type(item.item_type or claim.expense_type) + if item.item_date is None: + issues.append(f"{prefix}缺少日期") + if self._is_missing_value(item.item_type): + issues.append(f"{prefix}缺少费用项目") + if self._is_missing_value(item.item_reason): + issues.append(f"{prefix}缺少说明") + if item_location_required and self._is_missing_value(item.item_location): + issues.append(f"{prefix}缺少地点") + if item.item_amount is None or item.item_amount <= Decimal("0.00"): + issues.append(f"{prefix}缺少金额") + if not is_system_generated and self._is_missing_value(item.invoice_id): + issues.append(f"{prefix}缺少票据标识") + + return issues + + def _is_location_required_expense_type(self, expense_type: str | None) -> bool: + policy = self._get_expense_scene_policy(expense_type) + if policy is None: + return str(expense_type or "").strip().lower() in LOCATION_REQUIRED_EXPENSE_TYPES + return bool(policy.location_required) diff --git a/server/src/app/services/expense_claim_ontology_resolvers.py b/server/src/app/services/expense_claim_ontology_resolvers.py new file mode 100644 index 0000000..340d6fa --- /dev/null +++ b/server/src/app/services/expense_claim_ontology_resolvers.py @@ -0,0 +1,392 @@ +from __future__ import annotations + +import json +import re +import shutil +import uuid +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import func, or_, select +from sqlalchemy import inspect as sqlalchemy_inspect +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.ontology import OntologyEntity, OntologyParseResult +from app.schemas.reimbursement import ( + ExpenseClaimItemCreate, + ExpenseClaimItemUpdate, + ExpenseClaimUpdate, + TravelReimbursementCalculatorRequest, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin +from app.services.expense_amounts import ( + extract_amount_candidates, + format_decimal_amount, + is_amount_match_date_fragment, + is_date_like_amount_candidate, + is_probable_year_amount, + parse_document_amount_value, + parse_plain_document_amount_value, + resolve_document_field_amount, + resolve_document_item_amount, + resolve_document_text_amount, +) +from app.services.expense_rule_runtime import ( + DEFAULT_SCENE_RULE_ASSET_CODE, + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.ocr import OcrService + + +class ExpenseClaimOntologyResolverMixin: + def _resolve_employee( + self, + *, + ontology: OntologyParseResult, + context_json: dict[str, Any], + user_id: str | None, + ) -> Employee | None: + normalized_user_id = str(user_id or "").strip() + if normalized_user_id: + stmt = ( + select(Employee) + .options(selectinload(Employee.organization_unit), selectinload(Employee.manager)) + .where(func.lower(Employee.email) == normalized_user_id.lower()) + .limit(1) + ) + employee = self.db.scalar(stmt) + if employee is not None: + return employee + + employee_name = self._resolve_employee_name( + ontology=ontology, + context_json=context_json, + user_id=None, + ) + if not employee_name: + return None + + stmt = ( + select(Employee) + .options(selectinload(Employee.organization_unit), selectinload(Employee.manager)) + .where(Employee.name == employee_name) + .limit(1) + ) + return self.db.scalar(stmt) + + @staticmethod + def _resolve_employee_name( + *, + ontology: OntologyParseResult, + context_json: dict[str, Any], + user_id: str | None, + fallback: str = "待补充", + ) -> str: + review_form_values = context_json.get("review_form_values") + if isinstance(review_form_values, dict): + for key in ("reporter_name", "employee_name", "claimant_name"): + value = str(review_form_values.get(key) or "").strip() + if value: + return value + for item in ontology.entities: + if item.type == "employee" and item.value.strip(): + return item.value.strip() + for key in ("name", "user_name", "employee_name"): + value = str(context_json.get(key) or "").strip() + if value: + return value + return str(user_id or fallback).strip() or fallback + + @staticmethod + def _resolve_department_name( + *, + employee: Employee | None, + context_json: dict[str, Any], + fallback: str = "待补充", + ) -> str: + if employee is not None and employee.organization_unit is not None: + return employee.organization_unit.name + + request_context = context_json.get("request_context") + if isinstance(request_context, dict): + for key in ("department", "department_name", "deptName"): + value = str(request_context.get(key) or "").strip() + if value: + return value + + for key in ("department_name", "department"): + value = str(context_json.get(key) or "").strip() + if value: + return value + return fallback + + @staticmethod + def _resolve_project_code(entities: list[OntologyEntity]) -> str | None: + for item in entities: + if item.type == "project" and item.normalized_value.strip(): + return item.normalized_value.strip() + return None + + @staticmethod + def _resolve_explicit_review_expense_type(context_json: dict[str, Any]) -> str | None: + review_form_values = context_json.get("review_form_values") + if isinstance(review_form_values, dict): + compact = str( + review_form_values.get("expense_type") + or review_form_values.get("reimbursement_type") + or "" + ).replace(" ", "") + if compact: + if "招待" in compact or ("客户" in compact and any(word in compact for word in ("吃饭", "宴请", "请客", "用餐"))): + return "entertainment" + if any(word in compact for word in ("差旅", "出差", "机票", "行程")): + return "travel" + if any(word in compact for word in ("住宿", "酒店", "宾馆")): + return "hotel" + if any(word in compact for word in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")): + return "transport" + if any(word in compact for word in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")): + return "meal" + if "会务" in compact: + return "meeting" + if any(word in compact for word in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")): + return "office" + if any(word in compact for word in ("培训费", "培训", "讲师费", "课时费", "课程费")): + return "training" + if any(word in compact for word in ("通讯费", "话费", "流量费", "宽带费")): + return "communication" + if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")): + return "welfare" + return None + + @staticmethod + def _resolve_expense_type( + entities: list[OntologyEntity], + *, + context_json: dict[str, Any], + ) -> str | None: + explicit_expense_type = ExpenseClaimOntologyResolverMixin._resolve_explicit_review_expense_type(context_json) + if explicit_expense_type: + return explicit_expense_type + for item in entities: + if item.type == "expense_type": + normalized = item.normalized_value.strip() + if normalized: + return normalized + return None + + @staticmethod + def _resolve_reason( + *, + message: str, + context_json: dict[str, Any], + allow_message_fallback: bool, + ) -> str | None: + review_form_values = context_json.get("review_form_values") + if isinstance(review_form_values, dict): + for key in ("reason", "business_reason"): + value = str(review_form_values.get(key) or "").strip() + if value: + return ExpenseClaimOntologyResolverMixin._strip_leading_time_from_reason(value) + + explicit_text = context_json.get("user_input_text") + if isinstance(explicit_text, str): + normalized_explicit_text = explicit_text.strip() + if normalized_explicit_text: + return ExpenseClaimOntologyResolverMixin._strip_leading_time_from_reason(normalized_explicit_text)[:500] or None + return None + + request_context = context_json.get("request_context") + if ( + isinstance(request_context, dict) + and str(context_json.get("entry_source") or "").strip() == "detail" + ): + for key in ("reason", "title"): + value = str(request_context.get(key) or "").strip() + if value: + return value + if not allow_message_fallback: + return None + + normalized_message = str(message or "").strip() + compact_message = re.sub(r"\s+", "", normalized_message) + if compact_message.startswith(SYSTEM_GENERATED_REASON_PREFIXES): + return None + return ExpenseClaimOntologyResolverMixin._strip_leading_time_from_reason(normalized_message)[:500] or None + + @staticmethod + def _strip_leading_time_from_reason(value: str) -> str: + reason = str(value or "").strip() + for pattern in LEADING_REASON_TIME_PATTERNS: + next_reason = pattern.sub("", reason).strip() + if next_reason != reason: + return next_reason + return reason + + @staticmethod + def _resolve_location(*, message: str, context_json: dict[str, Any]) -> str | None: + review_form_values = context_json.get("review_form_values") + if isinstance(review_form_values, dict): + for key in ("business_location", "location"): + value = str(review_form_values.get(key) or "").strip() + if value: + return value + + request_context = context_json.get("request_context") + if ( + isinstance(request_context, dict) + and str(context_json.get("entry_source") or "").strip() == "detail" + ): + for key in ("city", "location"): + value = str(request_context.get(key) or "").strip() + if value: + return value + compact = str(message or "").replace(" ", "") + city_match = re.search( + r"去(?P[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)", + compact, + ) + if city_match: + return city_match.group("city").strip() + if "客户现场" in compact: + return "客户现场" + return None + + @staticmethod + def _resolve_occurred_at( + ontology: OntologyParseResult, + *, + context_json: dict[str, Any], + ) -> datetime | None: + review_form_values = context_json.get("review_form_values") + if isinstance(review_form_values, dict): + for key in ("occurred_date", "time_range", "business_time"): + value = str(review_form_values.get(key) or "").strip() + if not value: + continue + try: + parsed = date.fromisoformat(value) + return datetime(parsed.year, parsed.month, parsed.day, tzinfo=UTC) + except ValueError: + continue + + start_date = ontology.time_range.start_date + if start_date: + try: + parsed = date.fromisoformat(start_date) + return datetime(parsed.year, parsed.month, parsed.day, tzinfo=UTC) + except ValueError: + pass + return None + + @staticmethod + def _resolve_amount( + entities: list[OntologyEntity], + *, + context_json: dict[str, Any], + ) -> Decimal | None: + review_form_values = context_json.get("review_form_values") + if isinstance(review_form_values, dict): + raw_value = str(review_form_values.get("amount") or "").strip() + if raw_value: + compact = raw_value.replace("元", "").replace(",", "").strip() + try: + return Decimal(compact).quantize(Decimal("0.01")) + except (InvalidOperation, ValueError): + pass + for item in entities: + if item.type != "amount" or item.role == "threshold": + continue + try: + return Decimal(item.normalized_value).quantize(Decimal("0.01")) + except (InvalidOperation, ValueError): + continue + return None + + @staticmethod + def _resolve_attachment_names(context_json: dict[str, Any]) -> list[str]: + names = context_json.get("attachment_names") + if not isinstance(names, list): + return [] + return [str(name).strip() for name in names if str(name).strip()] + + def _resolve_attachment_count(self, context_json: dict[str, Any]) -> int: + names = self._resolve_attachment_names(context_json) + if names: + return len(names) + try: + return max(0, int(context_json.get("attachment_count") or 0)) + except (TypeError, ValueError): + return 0 diff --git a/server/src/app/services/expense_claim_platform_risk.py b/server/src/app/services/expense_claim_platform_risk.py new file mode 100644 index 0000000..52f9dfc --- /dev/null +++ b/server/src/app/services/expense_claim_platform_risk.py @@ -0,0 +1,733 @@ +from __future__ import annotations + +import re +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import or_, select +from sqlalchemy import inspect as sqlalchemy_inspect + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.reimbursement import TravelReimbursementCalculatorRequest +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.expense_claim_constants import ( + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + DOCUMENT_FACT_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_rule_runtime import ( + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, +) + + +class ExpenseClaimPlatformRiskMixin: + def evaluate_platform_risk_rules( + self, + claim: ExpenseClaim, + *, + rule_codes: list[str] | None = None, + ) -> dict[str, list[Any]]: + manifests = self._load_platform_risk_rule_manifests(rule_codes=rule_codes) + if not manifests: + return {"flags": [], "blocking_reasons": []} + + contexts = self._build_claim_attachment_contexts(claim) + flags: list[dict[str, Any]] = [] + blocking_reasons: list[str] = [] + + for manifest in manifests: + if not self._risk_manifest_applies_to_claim(manifest, claim=claim, contexts=contexts): + continue + + flag = self._evaluate_platform_risk_manifest( + manifest, + claim=claim, + contexts=contexts, + ) + if flag is None: + continue + + flags.append(flag) + severity = str(flag.get("severity") or "").strip().lower() + action = str(flag.get("action") or "").strip().lower() + if severity == "high" or action == "block": + blocking_reasons.append(str(flag.get("message") or flag.get("label") or "").strip()) + + deduplicated_reasons = list( + dict.fromkeys(reason for reason in blocking_reasons if reason) + ) + return {"flags": flags, "blocking_reasons": deduplicated_reasons} + + def _load_platform_risk_rule_manifests( + self, + *, + rule_codes: list[str] | None, + ) -> list[dict[str, Any]]: + code_filter = { + str(code or "").strip() + for code in list(rule_codes or []) + if str(code or "").strip() + } + manifests_by_code: dict[str, dict[str, Any]] = {} + + assets = list( + self.db.scalars( + select(AgentAsset) + .where(AgentAsset.asset_type == AgentAssetType.RULE.value) + .where(AgentAsset.status == AgentAssetStatus.ACTIVE.value) + .where(AgentAsset.domain == AgentAssetDomain.EXPENSE.value) + .order_by(AgentAsset.updated_at.desc(), AgentAsset.created_at.desc()) + ).all() + ) + library_manager = AgentAssetRuleLibraryManager() + + for asset in assets: + config_json = asset.config_json if isinstance(asset.config_json, dict) else {} + if str(config_json.get("detail_mode") or "").strip().lower() != "json_risk": + continue + rule_code = str(asset.code or "").strip() + if code_filter and rule_code not in code_filter: + continue + + rule_document = config_json.get("rule_document") + if not isinstance(rule_document, dict): + continue + file_name = str(rule_document.get("file_name") or "").strip() + rule_library = ( + str(config_json.get("rule_library") or RISK_RULES_LIBRARY).strip() + or RISK_RULES_LIBRARY + ) + if not file_name: + continue + + try: + payload = library_manager.read_rule_library_json( + library=rule_library, + file_name=file_name, + ) + except (FileNotFoundError, ValueError): + continue + + manifest_code = str(payload.get("rule_code") or rule_code).strip() + if not manifest_code or (code_filter and manifest_code not in code_filter): + continue + if payload.get("enabled") is False: + continue + + payload = dict(payload) + payload.setdefault("rule_code", manifest_code) + payload["_rule_version"] = str( + asset.published_version or asset.current_version or "v1.0.0" + ) + payload["_rule_asset_id"] = asset.id + manifests_by_code[manifest_code] = payload + + missing_codes = code_filter - set(manifests_by_code) + should_load_fallback = not code_filter or bool(missing_codes) + if should_load_fallback: + try: + files = library_manager.list_rule_library_json_files(library=RISK_RULES_LIBRARY) + except ValueError: + files = [] + for file_name in files: + try: + payload = library_manager.read_rule_library_json( + library=RISK_RULES_LIBRARY, + file_name=file_name, + ) + except (FileNotFoundError, ValueError): + continue + rule_code = str(payload.get("rule_code") or "").strip() + if not rule_code or rule_code in manifests_by_code: + continue + if code_filter and rule_code not in missing_codes: + continue + if payload.get("enabled") is False: + continue + payload = dict(payload) + payload["_rule_version"] = "v1.0.0" + manifests_by_code[rule_code] = payload + + return list(manifests_by_code.values()) + + def _risk_manifest_applies_to_claim( + self, + manifest: dict[str, Any], + *, + claim: ExpenseClaim, + contexts: list[dict[str, Any]], + ) -> bool: + applies_to = manifest.get("applies_to") + if not isinstance(applies_to, dict): + applies_to = {} + + try: + min_attachments = int(applies_to.get("min_attachments") or 0) + except (TypeError, ValueError): + min_attachments = 0 + if min_attachments and int(claim.invoice_count or 0) < min_attachments and not contexts: + return False + + expense_types = { + str(claim.expense_type or "").strip().lower(), + *{ + str(item.item_type or "").strip().lower() + for item in list(claim.items or []) + if str(item.item_type or "").strip() + }, + } + domains = { + str(value or "").strip().lower() + for value in list(applies_to.get("domains") or []) + if str(value or "").strip() + } + configured_expense_types = { + str(value or "").strip().lower() + for value in list(applies_to.get("expense_types") or []) + if str(value or "").strip() + } + + if configured_expense_types and not (expense_types & configured_expense_types): + return False + if domains and not self._risk_domains_match_claim( + domains, + expense_types=expense_types, + contexts=contexts, + ): + return False + + return True + + def _risk_domains_match_claim( + self, + domains: set[str], + *, + expense_types: set[str], + contexts: list[dict[str, Any]], + ) -> bool: + normalized_contexts: list[dict[str, str]] = [] + for context in contexts: + document_info = context.get("document_info") or {} + normalized_contexts.append( + { + "scene_code": str(document_info.get("scene_code") or "").strip().lower(), + "document_type": str( + document_info.get("document_type") or "" + ).strip().lower(), + "item_type": str( + getattr(context.get("item"), "item_type", "") or "" + ).strip().lower(), + } + ) + + if "travel" in domains: + if expense_types & {"travel", "hotel", "transport"}: + return True + if any( + item["scene_code"] in {"travel", "hotel", "transport"} + or item["document_type"] + in { + "flight_itinerary", + "train_ticket", + "hotel_invoice", + "taxi_receipt", + } + for item in normalized_contexts + ): + return True + if "meal" in domains: + if expense_types & {"meal", "entertainment"}: + return True + if any( + item["scene_code"] == "meal" or item["document_type"] == "meal_receipt" + for item in normalized_contexts + ): + return True + return bool(domains & expense_types) + + def _evaluate_platform_risk_manifest( + self, + manifest: dict[str, Any], + *, + claim: ExpenseClaim, + contexts: list[dict[str, Any]], + ) -> dict[str, Any] | None: + evaluator = str(manifest.get("evaluator") or "").strip().lower() + if evaluator == "reason_too_brief": + return self._evaluate_reason_too_brief_risk(manifest, claim=claim) + if evaluator == "entertainment_reason_missing": + return self._evaluate_entertainment_reason_missing_risk(manifest, claim=claim) + if evaluator == "document_expense_mismatch": + return self._evaluate_document_expense_mismatch_risk( + manifest, + claim=claim, + contexts=contexts, + ) + if evaluator == "location_consistency": + return self._evaluate_location_consistency_risk( + manifest, + claim=claim, + contexts=contexts, + ) + if evaluator == "duplicate_invoice": + return self._evaluate_duplicate_invoice_risk(manifest, claim=claim, contexts=contexts) + if evaluator == "identity_consistency": + return self._evaluate_identity_consistency_risk( + manifest, + claim=claim, + contexts=contexts, + ) + if evaluator == "cross_year_invoice": + return self._evaluate_cross_year_invoice_risk(manifest, claim=claim, contexts=contexts) + if evaluator == "void_or_red_invoice": + return self._evaluate_text_keyword_risk( + manifest, + contexts=contexts, + keywords=["作废", "红冲", "红字", "冲红"], + fallback_message="票据文本中出现作废、红冲或红字发票相关信息,建议退回补充或人工复核。", + ) + if evaluator == "vague_goods_description": + return self._evaluate_text_keyword_risk( + manifest, + contexts=contexts, + keywords=["详见清单", "服务费", "咨询费", "其他", "办公用品"], + fallback_message="票据商品或服务描述较笼统,建议审批人核对真实用途和明细清单。", + ) + if evaluator == "multi_city_reason_required": + return self._evaluate_multi_city_reason_required_risk( + manifest, + claim=claim, + contexts=contexts, + ) + return None + + def _evaluate_reason_too_brief_risk( + self, + manifest: dict[str, Any], + *, + claim: ExpenseClaim, + ) -> dict[str, Any] | None: + params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {} + try: + min_reason_length = max(1, int(params.get("min_reason_length") or 6)) + except (TypeError, ValueError): + min_reason_length = 6 + reason_corpus = re.sub(r"\s+", "", self._build_scene_reason_corpus(claim)) + if len(reason_corpus) >= min_reason_length: + return None + return self._build_platform_risk_flag( + manifest, + message=f"报销事由有效描述不足 {min_reason_length} 个字符,暂不足以支撑真实性判断。", + evidence={"reason_length": len(reason_corpus), "min_reason_length": min_reason_length}, + ) + + def _evaluate_entertainment_reason_missing_risk( + self, + manifest: dict[str, Any], + *, + claim: ExpenseClaim, + ) -> dict[str, Any] | None: + expense_types = { + str(claim.expense_type or "").strip().lower(), + *{str(item.item_type or "").strip().lower() for item in list(claim.items or [])}, + } + reason_corpus = self._build_scene_reason_corpus(claim) + compact_reason = re.sub(r"\s+", "", reason_corpus) + looks_like_entertainment = ( + "entertainment" in expense_types + or "招待" in compact_reason + or "客户" in compact_reason + ) + if not looks_like_entertainment: + return None + required_keywords = ("客户", "项目", "参与", "人员", "对象", "商务", "会议") + has_detail = any(keyword in compact_reason for keyword in required_keywords) + if has_detail: + return None + return self._build_platform_risk_flag( + manifest, + message="招待或餐饮类费用未识别到客户、项目、参与人员等必要说明,建议补充后再流转。", + evidence={"reason": reason_corpus[:300]}, + ) + + def _evaluate_document_expense_mismatch_risk( + self, + manifest: dict[str, Any], + *, + claim: ExpenseClaim, + contexts: list[dict[str, Any]], + ) -> dict[str, Any] | None: + mismatches: list[str] = [] + for context in contexts: + item = context["item"] + item_type = ( + str(item.item_type or claim.expense_type or "other").strip().lower() + or "other" + ) + policy = self._get_expense_scene_policy(item_type) + if policy is None: + continue + document_info = context.get("document_info") or {} + recognized_scene_code = ( + str(document_info.get("scene_code") or "other").strip().lower() + or "other" + ) + recognized_document_type = ( + str(document_info.get("document_type") or "other").strip().lower() + or "other" + ) + if ( + recognized_scene_code in set(policy.allowed_scene_codes) + or recognized_document_type in set(policy.allowed_document_types) + ): + continue + recognized_label = str( + document_info.get("document_type_label") + or recognized_document_type + or "未知票据" + ) + mismatches.append(f"第 {context['index']} 条明细为{policy.label},附件识别为{recognized_label}") + + if not mismatches: + return None + return self._build_platform_risk_flag( + manifest, + message=";".join(mismatches[:3]) + ",与当前费用场景不匹配。", + evidence={"mismatches": mismatches[:5]}, + ) + + def _evaluate_location_consistency_risk( + self, + manifest: dict[str, Any], + *, + claim: ExpenseClaim, + contexts: list[dict[str, Any]], + ) -> dict[str, Any] | None: + policy = self._get_expense_rule_catalog().travel_policy + if policy is None: + return None + declared_cities = self._extract_known_cities_from_text( + " ".join( + [ + str(claim.location or ""), + *[str(item.item_location or "") for item in list(claim.items or [])], + ] + ), + policy, + ) + evidence_cities = self._collect_attachment_cities(contexts, policy) + if not declared_cities or not evidence_cities: + return None + if set(declared_cities) & set(evidence_cities): + return None + declared_text = "、".join(declared_cities) + evidence_text = "、".join(evidence_cities[:5]) + return self._build_platform_risk_flag( + manifest, + message=f"申报地点 {declared_text} 与票据识别地点 {evidence_text} 不一致,建议补充异地说明或更换附件。", + evidence={"declared_cities": declared_cities, "evidence_cities": evidence_cities}, + ) + + def _evaluate_duplicate_invoice_risk( + self, + manifest: dict[str, Any], + *, + claim: ExpenseClaim, + contexts: list[dict[str, Any]], + ) -> dict[str, Any] | None: + invoice_keys = self._collect_invoice_keys_from_contexts(contexts) + duplicate_keys = [ + key + for key, count in self._count_values(invoice_keys).items() + if count > 1 + ] + if duplicate_keys: + return self._build_platform_risk_flag( + manifest, + message=f"当前报销单内存在重复票据号码:{'、'.join(duplicate_keys[:3])}。", + evidence={"duplicate_invoice_keys": duplicate_keys[:5]}, + ) + + if not invoice_keys: + return None + + other_items = list( + self.db.scalars( + select(ExpenseClaimItem) + .where(ExpenseClaimItem.claim_id != claim.id) + .where(ExpenseClaimItem.invoice_id.is_not(None)) + ).all() + ) + matched_claim_ids: set[str] = set() + for other_item in other_items: + other_path = self._attachment_storage.resolve_path(other_item.invoice_id) + if other_path is None or not other_path.exists(): + continue + other_meta = self._attachment_storage.read_meta(other_path) + other_document_info = other_meta.get("document_info") + if not isinstance(other_document_info, dict): + continue + other_keys = self._collect_invoice_keys_from_document_info(other_document_info) + if set(invoice_keys) & set(other_keys): + matched_claim_ids.add(str(other_item.claim_id or "")) + + if not matched_claim_ids: + return None + return self._build_platform_risk_flag( + manifest, + message=f"票据号码已在其他报销单中出现,疑似重复报销:{'、'.join(invoice_keys[:3])}。", + evidence={ + "invoice_keys": invoice_keys[:5], + "matched_claim_ids": sorted(matched_claim_ids)[:5], + }, + ) + + def _evaluate_identity_consistency_risk( + self, + manifest: dict[str, Any], + *, + claim: ExpenseClaim, + contexts: list[dict[str, Any]], + ) -> dict[str, Any] | None: + params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {} + allow_keywords = [ + str(value) + for value in list(params.get("allow_keywords") or []) + if str(value).strip() + ] + claimant = str(claim.employee_name or "").strip() + if not claimant: + return None + mismatched_buyers: list[str] = [] + for context in contexts: + buyer = self._resolve_first_document_field_value( + context.get("document_info") or {}, + keys={"buyer_name", "buyer", "purchaser_name", "claimant"}, + labels={"购买方", "抬头", "买方", "购方"}, + ) + if not buyer: + continue + if claimant in buyer or any(keyword in buyer for keyword in allow_keywords): + continue + mismatched_buyers.append(buyer) + if not mismatched_buyers: + return None + return self._build_platform_risk_flag( + manifest, + message=f"发票抬头 {mismatched_buyers[0]} 与报销人 {claimant} 不一致,建议人工复核。", + evidence={"claimant": claimant, "buyers": mismatched_buyers[:5]}, + ) + + def _evaluate_cross_year_invoice_risk( + self, + manifest: dict[str, Any], + *, + claim: ExpenseClaim, + contexts: list[dict[str, Any]], + ) -> dict[str, Any] | None: + claim_year = claim.occurred_at.year if claim.occurred_at is not None else None + if claim_year is None: + return None + issue_years: list[int] = [] + for context in contexts: + text = " ".join( + [ + self._resolve_first_document_field_value( + context.get("document_info") or {}, + keys={"date", "issue_date", "invoice_date"}, + labels={"日期", "开票日期", "发生时间"}, + ), + str(context.get("ocr_summary") or ""), + str(context.get("ocr_text") or ""), + ] + ) + for match in re.findall(r"(20\d{2}|19\d{2})[年/\-.]", text): + try: + issue_years.append(int(match)) + except ValueError: + continue + mismatch_years = sorted({year for year in issue_years if year != claim_year}) + if not mismatch_years: + return None + return self._build_platform_risk_flag( + manifest, + message=f"票据年份 {mismatch_years[0]} 与费用发生年份 {claim_year} 不一致,建议确认是否跨年报销。", + evidence={"claim_year": claim_year, "invoice_years": mismatch_years}, + ) + + def _evaluate_text_keyword_risk( + self, + manifest: dict[str, Any], + *, + contexts: list[dict[str, Any]], + keywords: list[str], + fallback_message: str, + ) -> dict[str, Any] | None: + matched: list[str] = [] + for context in contexts: + text = f"{context.get('ocr_summary') or ''}\n{context.get('ocr_text') or ''}" + for keyword in keywords: + if keyword in text and keyword not in matched: + matched.append(keyword) + if not matched: + return None + return self._build_platform_risk_flag( + manifest, + message=fallback_message, + evidence={"matched_keywords": matched}, + ) + + def _evaluate_multi_city_reason_required_risk( + self, + manifest: dict[str, Any], + *, + claim: ExpenseClaim, + contexts: list[dict[str, Any]], + ) -> dict[str, Any] | None: + policy = self._get_expense_rule_catalog().travel_policy + if policy is None: + return None + cities = self._collect_attachment_cities(contexts, policy) + for item in list(claim.items or []): + for city in self._extract_known_cities_from_text(str(item.item_location or ""), policy): + if city not in cities: + cities.append(city) + if len(cities) <= 2: + return None + reason_corpus = self._build_travel_reason_corpus(claim) + if self._text_contains_keywords(reason_corpus, policy.route_exception_keywords): + return None + return self._build_platform_risk_flag( + manifest, + message=f"本次报销识别到多城市行程({'、'.join(cities[:5])}),但事由中未说明中转、多地拜访或改签原因。", + evidence={"cities": cities[:8]}, + ) + + def _build_platform_risk_flag( + self, + manifest: dict[str, Any], + *, + message: str, + evidence: dict[str, Any], + ) -> dict[str, Any]: + outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {} + fail_outcome = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {} + severity = str(fail_outcome.get("severity") or "medium").strip().lower() or "medium" + default_action = "block" if severity == "high" else "manual_review" + action = str(fail_outcome.get("action") or default_action).strip() + label = str(manifest.get("name") or manifest.get("rule_code") or "风险规则命中").strip() + + return { + "source": "submission_review", + "hit_source": "rule_center", + "rule_type": "risk", + "rule_code": str(manifest.get("rule_code") or "").strip(), + "rule_version": str(manifest.get("_rule_version") or "v1.0.0").strip(), + "severity": severity, + "action": action, + "label": label, + "message": message, + "evidence": evidence, + } + + @staticmethod + def _count_values(values: list[str]) -> dict[str, int]: + counts: dict[str, int] = {} + for value in values: + normalized = str(value or "").strip() + if not normalized: + continue + counts[normalized] = counts.get(normalized, 0) + 1 + return counts + + def _collect_invoice_keys_from_contexts(self, contexts: list[dict[str, Any]]) -> list[str]: + invoice_keys: list[str] = [] + for context in contexts: + document_info = context.get("document_info") or {} + for key in self._collect_invoice_keys_from_document_info(document_info): + if key not in invoice_keys: + invoice_keys.append(key) + return invoice_keys + + def _collect_invoice_keys_from_document_info(self, document_info: dict[str, Any]) -> list[str]: + keys: list[str] = [] + for field in list(document_info.get("fields") or []): + if not isinstance(field, dict): + continue + field_key = str(field.get("key") or "").strip().lower().replace("_", "") + label = str(field.get("label") or "").replace(" ", "") + value = str(field.get("value") or "").strip() + if not value: + continue + if field_key in {"invoiceno", "invoicenumber", "number", "code"} or any( + token in label for token in ("发票号码", "票号", "发票代码", "号码") + ): + normalized = re.sub(r"\s+", "", value) + if normalized and normalized not in keys: + keys.append(normalized) + return keys + + def _collect_attachment_cities( + self, + contexts: list[dict[str, Any]], + policy: RuntimeTravelPolicy, + ) -> list[str]: + cities: list[str] = [] + for context in contexts: + document_info = context.get("document_info") or {} + parts = [ + str(context.get("ocr_summary") or ""), + str(context.get("ocr_text") or ""), + str(context.get("item").item_location if context.get("item") is not None else ""), + ] + for field in list(document_info.get("fields") or []): + if isinstance(field, dict): + parts.append(str(field.get("value") or "")) + for city in self._extract_known_cities_from_text(" ".join(parts), policy): + if city not in cities: + cities.append(city) + return cities + + @staticmethod + def _extract_known_cities_from_text(text: str, policy: RuntimeTravelPolicy) -> list[str]: + normalized = str(text or "").strip() + if not normalized: + return [] + cities: list[str] = [] + for city in sorted(policy.city_tiers.keys(), key=lambda item: len(item), reverse=True): + if city in normalized and city not in cities: + cities.append(city) + return cities + + @staticmethod + def _resolve_first_document_field_value( + document_info: dict[str, Any], + *, + keys: set[str], + labels: set[str], + ) -> str: + normalized_keys = {key.replace("_", "").lower() for key in keys} + for field in list(document_info.get("fields") or []): + if not isinstance(field, dict): + continue + field_key = str(field.get("key") or "").strip().lower().replace("_", "") + label = str(field.get("label") or "").replace(" ", "") + value = str(field.get("value") or "").strip() + if not value: + continue + if field_key in normalized_keys or any(token in label for token in labels): + return value + return "" diff --git a/server/src/app/services/expense_claim_policy_review.py b/server/src/app/services/expense_claim_policy_review.py new file mode 100644 index 0000000..c00c532 --- /dev/null +++ b/server/src/app/services/expense_claim_policy_review.py @@ -0,0 +1,654 @@ +from __future__ import annotations + +import re +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import or_, select +from sqlalchemy import inspect as sqlalchemy_inspect + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.reimbursement import TravelReimbursementCalculatorRequest +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.expense_claim_constants import ( + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + DOCUMENT_FACT_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_rule_runtime import ( + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, +) + + +class ExpenseClaimPolicyReviewMixin: + def _run_scene_policy_review(self, claim: ExpenseClaim) -> dict[str, list[Any]]: + catalog = self._get_expense_rule_catalog() + flags: list[dict[str, Any]] = [] + blocking_reasons: list[str] = [] + reason_corpus = self._build_scene_reason_corpus(claim) + scene_totals: dict[str, Decimal] = defaultdict(lambda: Decimal("0.00")) + scene_warned: set[str] = set() + + for item in claim.items: + item_type = str(item.item_type or claim.expense_type or "other").strip().lower() or "other" + policy = catalog.get_scene_policy(item_type) + if policy is None: + continue + + scene_totals[item_type] += Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) + + if policy.always_warn and item_type not in scene_warned: + scene_warned.add(item_type) + flags.append( + { + "source": "submission_review", + "severity": "medium", + "label": f"{policy.label}人工重点复核", + "message": policy.always_warn_message or f"{policy.label}默认需要人工重点复核。", + "rule_code": policy.rule_code, + } + ) + + item_limit = policy.item_amount_limit + item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) + if item_limit is not None and item_amount > Decimal("0.00"): + exceeded = self._evaluate_amount_limit( + amount=item_amount, + limit_config=item_limit, + reason_text="\n".join( + part + for part in [reason_corpus, str(item.item_reason or "").strip()] + if part + ), + ) + if exceeded is not None: + severity, threshold = exceeded + label = ( + f"{policy.label}金额超标待说明" + if severity == "high" + else f"{policy.label}金额超标提醒" + ) + message = ( + f"{policy.label}当前识别金额为 {item_amount} 元," + f"已超过制度阈值 {threshold} 元。" + ) + if severity == "high": + message += " 当前未识别到例外说明,请先补充原因。" + blocking_reasons.append(f"{policy.label}金额超出制度阈值,且未补充例外说明。") + else: + message += " 已识别到例外说明,请审批人重点复核。" + flags.append( + { + "source": "submission_review", + "severity": severity, + "label": label, + "message": message, + "rule_code": policy.rule_code, + } + ) + + for scene_code, total_amount in scene_totals.items(): + policy = catalog.get_scene_policy(scene_code) + if policy is None or policy.claim_amount_limit is None or total_amount <= Decimal("0.00"): + continue + exceeded = self._evaluate_amount_limit( + amount=total_amount, + limit_config=policy.claim_amount_limit, + reason_text=reason_corpus, + ) + if exceeded is None: + continue + + severity, threshold = exceeded + label = f"{policy.label}合计超标待说明" if severity == "high" else f"{policy.label}合计超标提醒" + message = ( + f"{policy.label}当前合计金额为 {total_amount} 元," + f"已超过制度阈值 {threshold} 元。" + ) + if severity == "high": + message += " 当前未识别到例外说明,请先补充原因。" + blocking_reasons.append(f"{policy.label}合计金额超出制度阈值,且未补充例外说明。") + else: + message += " 已识别到例外说明,请审批人重点复核。" + flags.append( + { + "source": "submission_review", + "severity": severity, + "label": label, + "message": message, + "rule_code": policy.rule_code, + } + ) + + return { + "flags": flags, + "blocking_reasons": list(dict.fromkeys(reason for reason in blocking_reasons if reason)), + } + + def _evaluate_amount_limit( + self, + *, + amount: Decimal, + limit_config: Any, + reason_text: str, + ) -> tuple[str, Decimal] | None: + block_amount = getattr(limit_config, "block_amount", None) + warn_amount = getattr(limit_config, "warn_amount", None) + exception_keywords = list(getattr(limit_config, "exception_keywords", []) or []) + has_exception = self._text_contains_keywords(reason_text, exception_keywords) + + if block_amount is not None and amount > Decimal(block_amount): + return ("medium" if has_exception else "high", Decimal(block_amount)) + if warn_amount is not None and amount > Decimal(warn_amount): + return ("medium", Decimal(warn_amount)) + return None + + def _run_travel_policy_review(self, claim: ExpenseClaim) -> dict[str, list[Any]]: + policy = self._get_expense_rule_catalog().travel_policy + if policy is None: + return {"flags": [], "blocking_reasons": []} + contexts = [ + context + for context in self._build_claim_attachment_contexts(claim) + if self._is_travel_policy_relevant_context(context, policy) + ] + if not contexts: + return {"flags": [], "blocking_reasons": []} + + reason_corpus = self._build_travel_reason_corpus(claim) + has_route_exception = self._text_contains_keywords( + reason_corpus, + policy.route_exception_keywords, + ) + has_standard_exception = self._text_contains_keywords( + reason_corpus, + policy.standard_exception_keywords, + ) + grade_band = self._resolve_travel_policy_band(claim.employee_grade) + band_label = policy.band_labels.get(grade_band or "", str(claim.employee_grade or "").strip() or "当前职级") + + itinerary_segments: list[dict[str, Any]] = [] + itinerary_cities: list[str] = [] + hotel_contexts: list[dict[str, Any]] = [] + flags: list[dict[str, Any]] = [] + blocking_reasons: list[str] = [] + + for context in contexts: + route_segment = self._extract_route_segment(context, policy) + if route_segment and self._is_long_distance_travel_context(context, policy): + itinerary_segments.append( + { + "item": context["item"], + "origin": route_segment[0], + "destination": route_segment[1], + } + ) + itinerary_cities.extend([route_segment[0], route_segment[1]]) + + scene_code = str(context["document_info"].get("scene_code") or "").strip().lower() + document_type = str(context["document_info"].get("document_type") or "").strip().lower() + item_type = str(context["item"].item_type or "").strip().lower() + if "hotel" in {scene_code, document_type, item_type} or document_type == "hotel_invoice": + hotel_contexts.append(context) + + unique_itinerary_cities = list(dict.fromkeys(city for city in itinerary_cities if city)) + expected_destination_city = self._resolve_expected_travel_city( + claim, + contexts, + unique_itinerary_cities, + policy, + ) + + if itinerary_segments: + unique_destinations = list( + dict.fromkeys(segment["destination"] for segment in itinerary_segments if segment["destination"]) + ) + first_origin = str(itinerary_segments[0]["origin"] or "").strip() + last_destination = str(itinerary_segments[-1]["destination"] or "").strip() + + for previous, current in zip(itinerary_segments, itinerary_segments[1:]): + previous_destination = str(previous["destination"] or "").strip() + current_origin = str(current["origin"] or "").strip() + if previous_destination and current_origin and previous_destination != current_origin: + message = ( + f"差旅行程未形成连续链路:上一段到达 {previous_destination}," + f"下一段却从 {current_origin} 出发,请补充中转或改签说明。" + ) + flags.append( + { + "source": "submission_review", + "severity": "high", + "label": "行程闭环异常", + "message": message, + "rule_code": policy.rule_code, + } + ) + blocking_reasons.append("差旅行程未形成连续闭环,请补充中转、改签或异地出发原因。") + break + + if ( + expected_destination_city + and last_destination + and last_destination not in {expected_destination_city, first_origin} + ): + message = ( + f"差旅行程终点识别为 {last_destination}," + f"与申报目的地 {expected_destination_city} 不一致,请补充多地出差或后续行程说明。" + ) + flags.append( + { + "source": "submission_review", + "severity": "high", + "label": "行程终点异常", + "message": message, + "rule_code": policy.rule_code, + } + ) + blocking_reasons.append("差旅行程终点与申报目的地不一致,请补充多地出差说明或补齐后续票据。") + + expected_city_set = { + city + for city in (expected_destination_city, first_origin) + if city + } + extra_destinations = [ + city + for city in unique_destinations + if city and city not in expected_city_set + ] + if extra_destinations and not has_route_exception: + destinations_text = "、".join(extra_destinations[:3]) + flags.append( + { + "source": "submission_review", + "severity": "high", + "label": "多城市行程待说明", + "message": ( + f"检测到本次差旅涉及 {destinations_text} 多个目的地," + "但当前报销事由未说明中转、多地拜访或改签原因。" + ), + "rule_code": policy.rule_code, + } + ) + blocking_reasons.append("检测到多城市差旅行程,但当前未补充中转或多地出差说明。") + + allowed_hotel_cities = { + city + for city in [expected_destination_city, *unique_itinerary_cities] + if city + } + for context in hotel_contexts: + hotel_city = self._extract_hotel_city(context, policy) + if hotel_city and allowed_hotel_cities and hotel_city not in allowed_hotel_cities: + expected_text = "、".join(sorted(allowed_hotel_cities)) + flags.append( + { + "source": "submission_review", + "severity": "high", + "label": "酒店地点异常", + "message": ( + f"酒店票据识别城市为 {hotel_city}," + f"与当前差旅目的地/行程城市 {expected_text} 不一致,请补充异地住宿原因。" + ), + "rule_code": policy.rule_code, + } + ) + blocking_reasons.append("酒店票据地点与差旅目的地不一致,请补充异地住宿原因或更换附件。") + + if grade_band is None: + continue + + baseline_city = hotel_city or expected_destination_city + standard = self._resolve_travel_policy_hotel_standard( + policy=policy, + grade_band=grade_band, + city=baseline_city, + ) + if standard is None: + continue + cap, standard_label = standard + night_count = self._extract_hotel_night_count(context) + item_amount = Decimal(context["item"].item_amount or Decimal("0.00")).quantize(Decimal("0.01")) + nightly_amount = (item_amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01")) + + if nightly_amount <= cap: + continue + + hotel_message = ( + f"{band_label} 职级在{standard_label}的住宿标准为 {cap} 元/晚," + f"当前酒店识别金额约 {nightly_amount} 元/晚。" + ) + item_reason = str(context["item"].item_reason or "").strip() + item_has_exception = self._text_contains_keywords(item_reason, policy.standard_exception_keywords) + if has_standard_exception or item_has_exception: + flags.append( + { + "source": "submission_review", + "severity": "medium", + "label": "住宿超标提醒", + "message": hotel_message + " 已识别到补充说明,请直属领导重点复核。", + "rule_code": policy.rule_code, + } + ) + else: + flags.append( + { + "source": "submission_review", + "severity": "high", + "label": "住宿超标待说明", + "message": hotel_message + " 当前未识别到超标说明,请先补充原因。", + "rule_code": policy.rule_code, + } + ) + blocking_reasons.append("住宿金额超出当前职级差标,且未补充超标说明。") + + if grade_band is not None: + for context in contexts: + transport_class = self._detect_transport_class(context, policy) + if transport_class is None: + continue + + transport_kind, class_label, class_level = transport_class + allowed_level = policy.transport_limits.get(grade_band, {}).get(transport_kind) + if allowed_level is None or class_level <= allowed_level: + continue + + item_reason = str(context["item"].item_reason or "").strip() + item_has_exception = self._text_contains_keywords(item_reason, policy.standard_exception_keywords) + message = f"{band_label} 职级当前默认不可报销 {class_label}。" + if has_standard_exception or item_has_exception: + flags.append( + { + "source": "submission_review", + "severity": "medium", + "label": "交通舱位超标提醒", + "message": message + " 已识别到补充说明,请审批人重点复核。", + "rule_code": policy.rule_code, + } + ) + else: + flags.append( + { + "source": "submission_review", + "severity": "high", + "label": "交通舱位超标待说明", + "message": message + " 当前未识别到例外说明,请先补充原因。", + "rule_code": policy.rule_code, + } + ) + blocking_reasons.append("交通舱位或席别超出当前职级差标,且未补充例外说明。") + + return { + "flags": flags, + "blocking_reasons": list(dict.fromkeys(reason for reason in blocking_reasons if reason)), + } + + def _build_claim_attachment_contexts(self, claim: ExpenseClaim) -> list[dict[str, Any]]: + contexts: list[dict[str, Any]] = [] + ordered_items = sorted( + claim.items, + key=lambda item: ( + item.item_date or date.max, + self._normalize_sort_datetime(item.created_at), + ), + ) + for index, item in enumerate(ordered_items, start=1): + file_path = self._attachment_storage.resolve_path(item.invoice_id) + if file_path is None or not file_path.exists(): + continue + + metadata = self._attachment_storage.read_meta(file_path) + document_info = metadata.get("document_info") + contexts.append( + { + "index": index, + "item": item, + "document_info": document_info if isinstance(document_info, dict) else {}, + "ocr_text": str(metadata.get("ocr_text") or ""), + "ocr_summary": str(metadata.get("ocr_summary") or ""), + } + ) + return contexts + + def _is_travel_policy_relevant_context( + self, + context: dict[str, Any], + policy: RuntimeTravelPolicy, + ) -> bool: + item = context.get("item") + document_info = context.get("document_info") or {} + item_type = str(getattr(item, "item_type", "") or "").strip().lower() + scene_code = str(document_info.get("scene_code") or "").strip().lower() + document_type = str(document_info.get("document_type") or "").strip().lower() + return ( + item_type in set(policy.relevant_expense_types) + or scene_code in set(policy.relevant_expense_types) + or document_type in {"hotel_invoice", *set(policy.long_distance_document_types)} + ) + + @staticmethod + def _resolve_document_field_value(document_info: dict[str, Any], key: str) -> str: + normalized_key = str(key or "").strip().lower() + for field in list(document_info.get("fields") or []): + if not isinstance(field, dict): + continue + field_key = str(field.get("key") or "").strip().lower() + if field_key == normalized_key: + return str(field.get("value") or "").strip() + return "" + + @staticmethod + def _text_contains_keywords(text: str, keywords: tuple[str, ...] | list[str]) -> bool: + compact = re.sub(r"\s+", "", str(text or "")) + if not compact: + return False + return any(keyword in compact for keyword in keywords) + + def _build_travel_reason_corpus(self, claim: ExpenseClaim) -> str: + parts = [str(claim.reason or "").strip(), str(claim.location or "").strip()] + for item in claim.items: + parts.append(str(item.item_reason or "").strip()) + parts.append(str(item.item_location or "").strip()) + return "\n".join(part for part in parts if part) + + @staticmethod + def _resolve_travel_policy_band(grade: str | None) -> str | None: + normalized = str(grade or "").strip().upper() + if not normalized: + return None + + p_match = re.search(r"P(\d+)", normalized) + if p_match: + level = int(p_match.group(1)) + if level <= 3: + return "junior" + if level <= 5: + return "mid" + return "senior" + + m_match = re.search(r"M(\d+)", normalized) + if m_match: + level = int(m_match.group(1)) + if level <= 2: + return "manager" + return "executive" + + if normalized.startswith("D"): + return "executive" + return None + + def _resolve_expected_travel_city( + self, + claim: ExpenseClaim, + contexts: list[dict[str, Any]], + itinerary_cities: list[str], + policy: RuntimeTravelPolicy, + ) -> str: + claim_city = self._extract_city_from_text(str(claim.location or ""), policy) + if claim_city: + return claim_city + + for context in contexts: + hotel_city = self._extract_hotel_city(context, policy) + if hotel_city: + return hotel_city + + if len(itinerary_cities) >= 2 and itinerary_cities[1]: + return itinerary_cities[1] + for city in itinerary_cities: + if city: + return city + return "" + + def _extract_route_segment( + self, + context: dict[str, Any], + policy: RuntimeTravelPolicy, + ) -> tuple[str, str] | None: + document_info = context["document_info"] + route_value = self._resolve_document_field_value(document_info, "route") + if not route_value or "-" not in route_value: + return None + + origin_text, destination_text = [segment.strip() for segment in route_value.split("-", 1)] + origin_city = self._extract_city_from_text(origin_text, policy) + destination_city = self._extract_city_from_text(destination_text, policy) + if not origin_city or not destination_city or origin_city == destination_city: + return None + return origin_city, destination_city + + def _extract_hotel_city(self, context: dict[str, Any], policy: RuntimeTravelPolicy) -> str: + document_info = context["document_info"] + item = context["item"] + merchant_name = self._resolve_document_field_value(document_info, "merchant_name") + for candidate in ( + merchant_name, + str(item.item_location or ""), + str(context.get("ocr_summary") or ""), + str(context.get("ocr_text") or ""), + ): + city = self._extract_city_from_text(candidate, policy) + if city: + return city + return "" + + @staticmethod + def _format_travel_policy_city_tier(city_tier: str) -> str: + return { + "tier_1": "一线城市", + "tier_2": "重点城市", + "tier_3": "其他城市", + }.get(str(city_tier or "").strip(), "当前城市") + + def _resolve_travel_policy_hotel_standard( + self, + *, + policy: RuntimeTravelPolicy, + grade_band: str, + city: str, + ) -> tuple[Decimal, str] | None: + normalized_city = str(city or "").strip() + city_limits = getattr(policy, "hotel_city_limits", {}) or {} + city_entry = city_limits.get(normalized_city) if normalized_city else None + if city_entry and city_entry.get(grade_band) is not None: + cap = Decimal(city_entry[grade_band]).quantize(Decimal("0.01")) + return cap, normalized_city + + city_tier = (getattr(policy, "city_tiers", {}) or {}).get(normalized_city, "tier_3") + tier_entry = (getattr(policy, "hotel_limits", {}) or {}).get(grade_band, {}) + tier_cap = tier_entry.get(city_tier) + if tier_cap is None: + return None + tier_label = self._format_travel_policy_city_tier(city_tier) + cap = Decimal(tier_cap).quantize(Decimal("0.01")) + return cap, tier_label + + @staticmethod + def _extract_city_from_text(text: str, policy: RuntimeTravelPolicy) -> str: + normalized = str(text or "").strip() + if not normalized: + return "" + city_names = set(policy.city_tiers.keys()) + city_names.update((getattr(policy, "hotel_city_limits", {}) or {}).keys()) + city_match_order = sorted(city_names, key=lambda item: len(item), reverse=True) + for city in city_match_order: + if city in normalized: + return city + return "" + + @staticmethod + def _extract_hotel_night_count(context: dict[str, Any]) -> int: + text = " ".join( + [ + str(context.get("ocr_summary") or "").strip(), + str(context.get("ocr_text") or "").strip(), + ] + ).strip() + match = TRAVEL_POLICY_HOTEL_NIGHT_PATTERN.search(text) + if not match: + return 1 + try: + return max(1, int(match.group(1))) + except (TypeError, ValueError): + return 1 + + def _detect_transport_class( + self, + context: dict[str, Any], + policy: RuntimeTravelPolicy, + ) -> tuple[str, str, int] | None: + document_info = context["document_info"] + document_type = str(document_info.get("document_type") or "").strip().lower() + text = " ".join( + [ + str(context.get("ocr_summary") or "").strip(), + str(context.get("ocr_text") or "").strip(), + ] + ).strip() + compact_text = re.sub(r"\s+", "", text) + if not compact_text: + return None + + if document_type == "flight_itinerary": + for config in policy.flight_classes: + label = str(config.keyword or "").strip() + level = int(config.level) + if label in compact_text: + return "flight", label, level + return None + + if document_type == "train_ticket": + for config in policy.train_classes: + label = str(config.keyword or "").strip() + level = int(config.level) + if label in compact_text: + return "train", label, level + return None + + return None + + def _is_long_distance_travel_context( + self, + context: dict[str, Any], + policy: RuntimeTravelPolicy, + ) -> bool: + document_info = context["document_info"] + document_type = str(document_info.get("document_type") or "").strip().lower() + scene_code = str(document_info.get("scene_code") or "").strip().lower() + if document_type in set(policy.long_distance_document_types): + return True + return scene_code == "travel" diff --git a/server/src/app/services/expense_claim_read_model.py b/server/src/app/services/expense_claim_read_model.py new file mode 100644 index 0000000..89cff40 --- /dev/null +++ b/server/src/app/services/expense_claim_read_model.py @@ -0,0 +1,269 @@ +from __future__ import annotations + +import json +import re +import shutil +import uuid +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import func, or_, select +from sqlalchemy import inspect as sqlalchemy_inspect +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.ontology import OntologyEntity, OntologyParseResult +from app.schemas.reimbursement import ( + ExpenseClaimItemCreate, + ExpenseClaimItemUpdate, + ExpenseClaimUpdate, + TravelReimbursementCalculatorRequest, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin +from app.services.expense_amounts import ( + extract_amount_candidates, + format_decimal_amount, + is_amount_match_date_fragment, + is_date_like_amount_candidate, + is_probable_year_amount, + parse_document_amount_value, + parse_plain_document_amount_value, + resolve_document_field_amount, + resolve_document_item_amount, + resolve_document_text_amount, +) +from app.services.expense_rule_runtime import ( + DEFAULT_SCENE_RULE_ASSET_CODE, + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.ocr import OcrService + + +class ExpenseClaimReadModelMixin: + @staticmethod + def _serialize_claim(claim: ExpenseClaim) -> dict[str, Any]: + return { + "id": claim.id, + "claim_no": claim.claim_no, + "employee_name": claim.employee_name, + "department_name": claim.department_name, + "project_code": claim.project_code, + "expense_type": claim.expense_type, + "reason": claim.reason, + "location": claim.location, + "amount": float(claim.amount), + "invoice_count": int(claim.invoice_count or 0), + "status": claim.status, + "approval_stage": claim.approval_stage, + "risk_flags_json": list(claim.risk_flags_json or []), + } + + @staticmethod + def _collect_return_flags(risk_flags: Any) -> list[dict[str, Any]]: + if not isinstance(risk_flags, list): + return [] + + return [ + flag + for flag in risk_flags + if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "manual_return" + ] + + @staticmethod + def _normalize_return_reason_codes(reason_codes: list[str] | None) -> list[str]: + return ExpenseClaimReadModelMixin._normalize_return_reason_code_payload(reason_codes)["reason_codes"] + + @staticmethod + def _normalize_return_reason_code_payload(reason_codes: list[str] | None) -> dict[str, list[str]]: + normalized_codes: list[str] = [] + unknown_codes: list[str] = [] + for item in reason_codes or []: + code = str(item or "").strip() + if not code: + continue + if code in RETURN_REASON_OPTIONS and code not in normalized_codes: + normalized_codes.append(code) + elif code not in RETURN_REASON_OPTIONS and code not in unknown_codes: + unknown_codes.append(code) + return { + "reason_codes": normalized_codes, + "unknown_reason_codes": unknown_codes, + } + + @staticmethod + def _merge_persistent_claim_risk_flags(*, existing_flags: list[Any], next_flags: list[Any]) -> list[Any]: + if not next_flags: + return list(existing_flags or []) + + merged_flags = list(next_flags or []) + next_return_markers = { + ExpenseClaimReadModelMixin._build_return_flag_marker(flag) + for flag in merged_flags + if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "manual_return" + } + for flag in list(existing_flags or []): + if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "manual_return"): + continue + marker = ExpenseClaimReadModelMixin._build_return_flag_marker(flag) + if marker in next_return_markers: + continue + merged_flags.append(flag) + next_return_markers.add(marker) + return merged_flags + + @staticmethod + def _build_return_flag_marker(flag: dict[str, Any]) -> tuple[str, str, str]: + event_id = str(flag.get("return_event_id") or "").strip() + if event_id: + return ("event_id", event_id, "") + return ( + str(flag.get("return_count") or "").strip(), + str(flag.get("created_at") or "").strip(), + str(flag.get("message") or flag.get("reason") or "").strip(), + ) + + @staticmethod + def _build_default_return_message(*, operator: str, risk_points: list[str]) -> str: + if risk_points: + return f"{operator} 退回该报销单:{'、'.join(risk_points)}。请申请人调整后重新提交。" + return f"{operator} 已退回该报销单,请申请人调整后重新提交。" + + @staticmethod + def _normalize_return_stage_key(stage: str | None) -> str: + normalized = str(stage or "").strip() + if "直属" in normalized or "领导" in normalized or "负责人" in normalized: + return "direct_manager" + if "财务" in normalized: + return "finance" + if "AI" in normalized or "预审" in normalized: + return "ai_review" + if "归档" in normalized or "入账" in normalized: + return "archive" + return "unknown" + + @staticmethod + def _is_editable_claim_status(status: str | None) -> bool: + return str(status or "").strip().lower() in EDITABLE_CLAIM_STATUSES + + @staticmethod + def _normalize_optional_text(value: str | None, *, fallback: str = "", allow_empty: bool = False) -> str | None: + normalized = str(value or "").strip() + if normalized: + return normalized + if allow_empty: + return None + return fallback + + @staticmethod + def _normalize_sort_datetime(value: datetime | None) -> datetime: + if value is None: + return datetime.max.replace(tzinfo=UTC) + if value.tzinfo is None: + return value.replace(tzinfo=UTC) + return value + + @staticmethod + def _is_missing_value(value: Any) -> bool: + text = str(value or "").strip() + if not text: + return True + compact = text.replace(" ", "") + return compact in {"待补充", "暂无", "无", "未知", "处理中"} + + def _ensure_draft_claim(self, claim: ExpenseClaim) -> None: + if not self._is_editable_claim_status(claim.status): + raise ValueError("只有草稿、待补充或退回待提交状态的报销单才允许执行该操作。") + + @staticmethod + def _ensure_draft_pending_claim(claim: ExpenseClaim) -> None: + status = str(claim.status or "").strip().lower() + if status != "draft": + raise ValueError("只有草稿待提交状态的报销单才允许编辑附加说明。") + + @staticmethod + def _ensure_mutable_claim_item(item: ExpenseClaimItem) -> None: + if str(item.item_type or "").strip().lower() in SYSTEM_GENERATED_ITEM_TYPES: + raise ValueError("系统自动计算的费用明细不可手动修改。") + + def _delete_claim_assistant_sessions(self, claim_id: str | None) -> None: + from app.services.agent_conversations import AgentConversationService + + AgentConversationService(self.db).delete_conversations_for_draft_claim( + claim_id=claim_id, + source="user_message", + session_type="expense", + ) + + def _ensure_ready(self) -> None: + AgentFoundationService(self.db).ensure_foundation_ready() diff --git a/server/src/app/services/expense_claim_review_preview.py b/server/src/app/services/expense_claim_review_preview.py new file mode 100644 index 0000000..72dde54 --- /dev/null +++ b/server/src/app/services/expense_claim_review_preview.py @@ -0,0 +1,393 @@ +from __future__ import annotations + +import json +import re +import shutil +import uuid +from collections import defaultdict +from datetime import UTC, date, datetime, timedelta +from decimal import Decimal, InvalidOperation +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from sqlalchemy import func, or_, select +from sqlalchemy import inspect as sqlalchemy_inspect +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import Session, selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType +from app.models.agent_asset import AgentAsset +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim, ExpenseClaimItem +from app.schemas.ontology import OntologyEntity, OntologyParseResult +from app.schemas.reimbursement import ( + ExpenseClaimItemCreate, + ExpenseClaimItemUpdate, + ExpenseClaimUpdate, + TravelReimbursementCalculatorRequest, +) +from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager +from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY +from app.services.agent_foundation import AgentFoundationService +from app.services.audit import AuditLogService +from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_errors import ExpenseClaimSubmissionBlockedError +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin +from app.services.expense_amounts import ( + extract_amount_candidates, + format_decimal_amount, + is_amount_match_date_fragment, + is_date_like_amount_candidate, + is_probable_year_amount, + parse_document_amount_value, + parse_plain_document_amount_value, + resolve_document_field_amount, + resolve_document_item_amount, + resolve_document_text_amount, +) +from app.services.expense_rule_runtime import ( + DEFAULT_SCENE_RULE_ASSET_CODE, + ExpenseRuleRuntimeService, + RuntimeTravelPolicy, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.ocr import OcrService + + +class ExpenseClaimReviewPreviewMixin: + def save_or_submit_from_ontology( + self, + *, + run_id: str, + user_id: str | None, + message: str, + ontology: OntologyParseResult, + context_json: dict[str, Any], + ) -> dict[str, Any]: + review_action = str(context_json.get("review_action") or "").strip() + if review_action not in PERSISTENT_EXPENSE_REVIEW_ACTIONS: + return self._build_expense_review_preview_result( + user_id=user_id, + message=message, + ontology=ontology, + context_json=context_json, + ) + + result = self.upsert_draft_from_ontology( + run_id=run_id, + user_id=user_id, + message=message, + ontology=ontology, + context_json=context_json, + ) + + if review_action != "next_step": + return result + + claim_id = str(result.get("claim_id") or "").strip() + if not claim_id or result.get("draft_limit_reached"): + return result + + current_user = CurrentUserContext( + username=str(user_id or context_json.get("name") or "anonymous").strip() or "anonymous", + name=str(context_json.get("name") or user_id or "anonymous").strip() or "anonymous", + role_codes=[ + str(item).strip() + for item in list(context_json.get("role_codes") or []) + if str(item).strip() + ], + is_admin=bool(context_json.get("is_admin")), + department_name=str(context_json.get("department_name") or context_json.get("department") or "").strip(), + ) + + try: + claim = self.submit_claim(claim_id, current_user) + except ExpenseClaimSubmissionBlockedError as exc: + return { + **result, + "message": self._format_submission_blocked_message(exc.issues), + "submission_blocked": True, + "submission_blocked_reasons": exc.issues, + "missing_fields": exc.issues, + "draft_only": False, + } + except ValueError as exc: + message = str(exc) + return { + **result, + "message": message, + "submission_blocked": True, + "submission_blocked_reasons": [message] if message else [], + "missing_fields": [message] if message else [], + "draft_only": False, + } + + if claim is None: + return { + **result, + "message": "未找到可提交的报销单,请刷新后重试。", + "submission_blocked": True, + "draft_only": False, + } + + if str(claim.status or "").strip().lower() != "submitted": + review_message = "" + for flag in list(claim.risk_flags_json or []): + if not isinstance(flag, dict): + continue + if str(flag.get("source") or "").strip() != "submission_review": + continue + review_message = str(flag.get("message") or "").strip() + if review_message: + break + return { + "message": review_message or f"报销单 {claim.claim_no} 经 AI预审后转为待补充,请先修正后再提交。", + "submission_blocked": True, + "draft_only": False, + "claim_id": claim.id, + "claim_no": claim.claim_no, + "status": claim.status, + "approval_stage": claim.approval_stage, + "amount": float(claim.amount), + "invoice_count": int(claim.invoice_count or 0), + } + + return { + "message": ( + f"报销单 {claim.claim_no} 已完成 AI预审," + f"当前节点为 {claim.approval_stage or '审批中'}。" + ), + "draft_only": False, + "claim_id": claim.id, + "claim_no": claim.claim_no, + "status": claim.status, + "approval_stage": claim.approval_stage, + "amount": float(claim.amount), + "invoice_count": int(claim.invoice_count or 0), + } + + def _build_expense_review_preview_result( + self, + *, + user_id: str | None, + message: str, + ontology: OntologyParseResult, + context_json: dict[str, Any], + ) -> dict[str, Any]: + attachment_count = self._resolve_attachment_count(context_json) + calculation_copy = self._build_expense_review_preview_calculation_copy( + user_id=user_id, + message=message, + ontology=ontology, + context_json=context_json, + ) + return { + "message": "\n\n".join( + item + for item in [ + "我已先整理出本次报销的待核对信息。下面是基于当前信息的制度测算,票据补齐后会按真实金额重新复核。", + calculation_copy, + ] + if item + ), + "draft_only": True, + "preview_only": True, + "status": "preview", + "invoice_count": attachment_count, + } + + def _build_expense_review_preview_calculation_copy( + self, + *, + user_id: str | None, + message: str, + ontology: OntologyParseResult, + context_json: dict[str, Any], + ) -> str: + expense_type = self._resolve_explicit_review_expense_type(context_json) or self._resolve_expense_type( + ontology.entities, + context_json=context_json, + ) + if expense_type == "travel" or ( + (not expense_type or expense_type == "other") + and self._should_preview_as_travel(message=message, context_json=context_json) + ): + return self._build_travel_review_preview_calculation_copy( + user_id=user_id, + message=message, + ontology=ontology, + context_json=context_json, + ) + + amount = self._resolve_amount(ontology.entities, context_json=context_json) or Decimal("0.00") + expense_label = EXPENSE_TYPE_LABELS.get(str(expense_type or "").strip(), "当前费用") + return "\n".join( + [ + "报销测算参考:", + "", + "| 项目 | 当前信息 | 复核口径 |", + "| --- | --- | --- |", + f"| 费用类型 | {expense_label} | 匹配规则中心对应费用标准 |", + f"| 票据金额 | {self._format_decimal_amount(amount)} 元 | 以真实票据识别金额和用户确认金额为准 |", + "| 规则校验 | 待票据和关键信息补齐 | 按费用类型、发生地点、业务事由和审批口径复核 |", + ] + ) + + def _build_travel_review_preview_calculation_copy( + self, + *, + user_id: str | None, + message: str, + ontology: OntologyParseResult, + context_json: dict[str, Any], + ) -> str: + location = self._resolve_location(message=message, context_json=context_json) or "待确认" + occurred_at = self._resolve_occurred_at(ontology, context_json=context_json) or datetime.now(UTC) + days, _, _ = self._resolve_travel_allowance_days( + context_json=context_json, + occurred_at=occurred_at, + ) + amount = self._resolve_amount(ontology.entities, context_json=context_json) or Decimal("0.00") + employee = self._resolve_employee( + ontology=ontology, + context_json=context_json, + user_id=user_id, + ) + grade = str( + context_json.get("employee_grade") + or context_json.get("grade") + or context_json.get("user_grade") + or (employee.grade if employee is not None else "") + or "" + ).strip() + + if location == "待确认" or not grade: + return "\n".join( + [ + "报销测算参考:", + "", + "| 项目 | 当前信息 | 测算说明 |", + "| --- | --- | --- |", + f"| 出差地点 | {location} | 用于匹配城市住宿标准和补贴区域 |", + f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |", + f"| 职级 | {grade or '待确认'} | 补齐后才能匹配住宿标准和补贴档位 |", + f"| 交通票据 | {self._format_decimal_amount(amount)} 元 | 上传票据后按真实金额重新复核 |", + ] + ) + + try: + from app.services.travel_reimbursement_calculator import ( + TravelReimbursementCalculatorService, + ) + + result = TravelReimbursementCalculatorService(self.db).calculate( + TravelReimbursementCalculatorRequest(days=days, location=location, grade=grade), + CurrentUserContext( + username=str(user_id or context_json.get("name") or "anonymous").strip() or "anonymous", + name=str(context_json.get("name") or user_id or "anonymous").strip() or "anonymous", + role_codes=[], + is_admin=False, + ), + ) + except ValueError: + return "\n".join( + [ + "报销测算参考:", + "", + "| 项目 | 当前信息 | 测算说明 |", + "| --- | --- | --- |", + f"| 出差地点 | {location} | 暂时未能匹配规则中心地点 |", + f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |", + f"| 职级 | {grade} | 暂时无法自动匹配差旅标准 |", + f"| 交通票据 | {self._format_decimal_amount(amount)} 元 | 上传票据后按真实金额重新复核 |", + ] + ) + + ticket_amount = amount.quantize(Decimal("0.01")) + total_amount = ( + ticket_amount + + Decimal(result.hotel_amount or Decimal("0.00")) + + Decimal(result.allowance_amount or Decimal("0.00")) + ).quantize(Decimal("0.01")) + ticket_basis = "当前未上传交通票据,先按 0.00 元占位" if ticket_amount <= Decimal("0.00") else "已识别或填写的交通票据金额" + return "\n".join( + [ + "报销测算参考:", + "", + f"职级 {grade},目的地 {location},匹配城市 {result.matched_city};补齐交通、酒店等票据后,我会按真实票据金额和规则中心标准重新复核。", + "", + "| 项目 | 测算口径 | 金额 |", + "| --- | --- | ---: |", + f"| 交通票据 | {ticket_basis} | {self._format_decimal_amount(ticket_amount)} 元 |", + f"| 住宿标准 | {self._format_decimal_amount(result.hotel_rate)} 元/天 × {days} 天 | {self._format_decimal_amount(result.hotel_amount)} 元 |", + f"| 出差补贴 | {self._format_decimal_amount(result.total_allowance_rate)} 元/天 × {days} 天 | {self._format_decimal_amount(result.allowance_amount)} 元 |", + f"| 参考合计 | 交通票据 + 住宿标准 + 出差补贴 | {self._format_decimal_amount(total_amount)} 元 |", + ] + ) + + @staticmethod + def _should_preview_as_travel(*, message: str, context_json: dict[str, Any]) -> bool: + text_parts = [message] + review_form_values = context_json.get("review_form_values") + if isinstance(review_form_values, dict): + text_parts.extend(str(value or "") for value in review_form_values.values()) + text_parts.extend(str(context_json.get(key) or "") for key in ("user_input_text", "raw_text", "ocr_summary")) + compact = "".join(text_parts) + return any(keyword in compact for keyword in ("差旅", "出差", "火车票", "机票", "酒店", "住宿票")) diff --git a/server/src/app/services/expense_claim_risk_review.py b/server/src/app/services/expense_claim_risk_review.py new file mode 100644 index 0000000..e0428d9 --- /dev/null +++ b/server/src/app/services/expense_claim_risk_review.py @@ -0,0 +1,177 @@ +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from typing import Any + +from sqlalchemy import or_, select + +from app.models.financial_record import ExpenseClaim +from app.services.expense_claim_constants import ( + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, +) +from app.services.expense_claim_item_sync import ExpenseClaimItemSyncMixin +from app.services.expense_claim_platform_risk import ExpenseClaimPlatformRiskMixin +from app.services.expense_claim_policy_review import ExpenseClaimPolicyReviewMixin + + +class ExpenseClaimRiskReviewMixin( + ExpenseClaimPlatformRiskMixin, + ExpenseClaimPolicyReviewMixin, + ExpenseClaimItemSyncMixin, +): + def _run_ai_submission_review(self, claim: ExpenseClaim) -> dict[str, Any]: + base_flags = list(claim.risk_flags_json or []) + attachment_flags = [ + flag + for flag in base_flags + if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis" + ] + preserved_flags = [ + flag + for flag in base_flags + if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "submission_review") + ] + + review_flags: list[dict[str, Any]] = [] + attention_reasons: list[str] = [] + + high_attachment_flags = [ + flag + for flag in attachment_flags + if str(flag.get("severity") or "").strip().lower() == "high" + ] + medium_attachment_flags = [ + flag + for flag in attachment_flags + if str(flag.get("severity") or "").strip().lower() == "medium" + ] + if high_attachment_flags: + attention_reasons.append("存在高风险票据,需审批人重点复核。") + review_flags.append( + { + "source": "submission_review", + "severity": "high", + "label": "AI预审重点复核", + "message": ( + f"AI预审发现 {len(high_attachment_flags)} 条高风险附件," + "已随单流转给审批人重点复核。" + ), + } + ) + elif medium_attachment_flags: + review_flags.append( + { + "source": "submission_review", + "severity": "medium", + "label": "AI预审提醒", + "message": f"AI预审发现 {len(medium_attachment_flags)} 条中风险附件,已随单流转给审批人复核。", + } + ) + + manager_name = self._resolve_claim_manager_name(claim) + if not manager_name: + attention_reasons.append("未识别到该员工的直属领导,需审批环节补充分配。") + review_flags.append( + { + "source": "submission_review", + "severity": "medium", + "label": "审批链待分配", + "message": "AI预审发现直属领导缺失,已提交到审批环节等待分配或复核。", + } + ) + + historical_risk_count = self._count_recent_risky_claims(claim) + if historical_risk_count >= AI_REVIEW_REPEAT_RISK_BLOCK_COUNT: + review_flags.append( + { + "source": "submission_review", + "severity": "medium", + "label": "历史风险偏高", + "message": ( + f"近 {AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销," + "本次已追加到审批链重点关注。" + ), + } + ) + elif historical_risk_count >= AI_REVIEW_REPEAT_RISK_WARNING_COUNT: + review_flags.append( + { + "source": "submission_review", + "severity": "low", + "label": "历史风险提醒", + "message": ( + f"近 {AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销," + "建议直属领导重点复核。" + ), + } + ) + + travel_review = self._run_travel_policy_review(claim) + attention_reasons.extend(travel_review["blocking_reasons"]) + review_flags.extend(travel_review["flags"]) + + scene_policy_review = self._run_scene_policy_review(claim) + attention_reasons.extend(scene_policy_review["blocking_reasons"]) + review_flags.extend(scene_policy_review["flags"]) + + platform_risk_review = self.evaluate_platform_risk_rules(claim) + attention_reasons.extend(platform_risk_review["blocking_reasons"]) + review_flags.extend(platform_risk_review["flags"]) + + if attention_reasons: + summary_message = "AI预审发现需审批重点关注事项:" + ";".join( + dict.fromkeys(attention_reasons) + ) + review_flags.insert( + 0, + { + "source": "submission_review", + "severity": "medium", + "label": "AI预审重点复核", + "message": summary_message, + }, + ) + + return { + "status": "submitted", + "approval_stage": "直属领导审批", + "risk_flags": preserved_flags + review_flags, + "message": ( + f"报销单 {claim.claim_no} 已完成 AI预审," + f"现已提交给直属领导 {manager_name or '审批人'} 审批。" + ), + "passed": True, + } + + @staticmethod + def _resolve_claim_manager_name(claim: ExpenseClaim) -> str: + if claim.employee is not None: + if claim.employee.manager is not None and claim.employee.manager.name: + return str(claim.employee.manager.name).strip() + if claim.employee.organization_unit is not None and claim.employee.organization_unit.manager_name: + return str(claim.employee.organization_unit.manager_name).strip() + return "" + + def _count_recent_risky_claims(self, claim: ExpenseClaim) -> int: + filters = [] + if claim.employee_id: + filters.append(ExpenseClaim.employee_id == claim.employee_id) + elif claim.employee_name: + filters.append(ExpenseClaim.employee_name == claim.employee_name) + if not filters: + return 0 + + since = datetime.now(UTC) - timedelta(days=AI_REVIEW_LOOKBACK_DAYS) + stmt = ( + select(ExpenseClaim) + .where(or_(*filters)) + .where(ExpenseClaim.id != claim.id) + .where(ExpenseClaim.occurred_at >= since) + ) + recent_claims = list(self.db.scalars(stmt).all()) + return sum(1 for item in recent_claims if list(item.risk_flags_json or [])) + + + diff --git a/server/src/app/services/expense_claims.py b/server/src/app/services/expense_claims.py index 3d62347..68bcaf4 100644 --- a/server/src/app/services/expense_claims.py +++ b/server/src/app/services/expense_claims.py @@ -1,9 +1,6 @@ from __future__ import annotations -import base64 -import binascii import json -import mimetypes import re import shutil import uuid @@ -13,20 +10,17 @@ from decimal import Decimal, InvalidOperation from pathlib import Path from types import SimpleNamespace from typing import Any -from urllib.parse import quote -from sqlalchemy import and_, func, or_, select +from sqlalchemy import func, or_, select from sqlalchemy import inspect as sqlalchemy_inspect from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session, selectinload from app.api.deps import CurrentUserContext from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType -from app.core.config import get_settings from app.models.agent_asset import AgentAsset from app.models.employee import Employee from app.models.financial_record import ExpenseClaim, ExpenseClaimItem -from app.models.organization import OrganizationUnit from app.schemas.ontology import OntologyEntity, OntologyParseResult from app.schemas.reimbursement import ( ExpenseClaimItemCreate, @@ -39,6 +33,72 @@ from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY from app.services.agent_foundation import AgentFoundationService from app.services.audit import AuditLogService from app.services.document_intelligence import build_document_insight +from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy +from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage +from app.services.expense_claim_attachment_analysis import ExpenseClaimAttachmentAnalysisMixin +from app.services.expense_claim_attachment_document import ExpenseClaimAttachmentDocumentMixin +from app.services.expense_claim_attachment_operations import ExpenseClaimAttachmentOperationsMixin +from app.services.expense_claim_document_item_builder import ExpenseClaimDocumentItemBuilderMixin +from app.services.expense_claim_document_parsing import ExpenseClaimDocumentParsingMixin +from app.services.expense_claim_draft_flow import ExpenseClaimDraftFlowMixin +from app.services.expense_claim_draft_persistence import ExpenseClaimDraftPersistenceMixin +from app.services.expense_claim_errors import ExpenseClaimSubmissionBlockedError +from app.services.expense_claim_ontology_resolvers import ExpenseClaimOntologyResolverMixin +from app.services.expense_claim_read_model import ExpenseClaimReadModelMixin +from app.services.expense_claim_review_preview import ExpenseClaimReviewPreviewMixin +from app.services.expense_claim_constants import ( + EXPENSE_TYPE_LABELS, + MAX_DRAFT_CLAIMS_PER_USER, + EDITABLE_CLAIM_STATUSES, + SYSTEM_GENERATED_ITEM_TYPES, + TRAVEL_DETAIL_ITEM_TYPES, + TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES, + DOCUMENT_TYPE_ITEM_TYPE_MAP, + DOCUMENT_TYPE_SCENE_MAP, + DOCUMENT_FACT_ITEM_TYPES, + ROUTE_DESCRIPTION_ITEM_TYPES, + DOCUMENT_TRIP_DATE_LABELS, + DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS, + DOCUMENT_TRIP_DATE_KEYS, + DOCUMENT_GENERIC_DATE_KEYS, + DOCUMENT_INVOICE_DATE_KEYS, + DOCUMENT_TRIP_DATE_LABEL_TOKENS, + DOCUMENT_GENERIC_DATE_LABEL_TOKENS, + DOCUMENT_INVOICE_DATE_LABEL_TOKENS, + DOCUMENT_ROUTE_FORMAT_PATTERN, + DOCUMENT_ROUTE_TEXT_PATTERN, + DOCUMENT_ROUTE_ORIGIN_LABELS, + DOCUMENT_ROUTE_DESTINATION_LABELS, + GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES, + LOCATION_REQUIRED_EXPENSE_TYPES, + EXPENSE_SCENE_KEYWORDS, + EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES, + DOCUMENT_SCENE_LABELS, + DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, + PERSISTENT_EXPENSE_REVIEW_ACTIONS, + RETURN_REASON_OPTIONS, + MAX_CLAIM_NO_RETRY_ATTEMPTS, + DOCUMENT_DATE_PATTERN, + SYSTEM_GENERATED_REASON_PREFIXES, + LEADING_REASON_TIME_PATTERNS, + AI_REVIEW_LOOKBACK_DAYS, + AI_REVIEW_REPEAT_RISK_WARNING_COUNT, + AI_REVIEW_REPEAT_RISK_BLOCK_COUNT, + TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES, + TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES, + TRAVEL_POLICY_CITY_TIERS, + TRAVEL_POLICY_CITY_MATCH_ORDER, + TRAVEL_POLICY_BAND_LABELS, + TRAVEL_POLICY_HOTEL_LIMITS, + TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS, + TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS, + TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS, + TRAVEL_POLICY_TRAIN_CLASS_PATTERNS, + TRAVEL_POLICY_HOTEL_NIGHT_PATTERN, +) +from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin from app.services.expense_amounts import ( extract_amount_candidates, format_decimal_amount, @@ -60,382 +120,28 @@ from app.services.expense_rule_runtime import ( ) from app.services.ocr import OcrService -EXPENSE_TYPE_LABELS = { - "travel": "差旅", - "train_ticket": "火车票", - "flight_ticket": "机票", - "hotel_ticket": "住宿票", - "ride_ticket": "乘车", - "travel_allowance": "出差补贴", - "hotel": "住宿", - "transport": "交通", - "meal": "餐费", - "meeting": "会务", - "entertainment": "招待", - "office": "办公", - "training": "培训", - "communication": "通讯", - "welfare": "福利", -} - -PRIVILEGED_CLAIM_ROLE_CODES = {"finance", "executive"} -APPROVAL_VISIBLE_CLAIM_ROLE_CODES = {"manager", "approver"} -CLAIM_DELETE_ROLE_CODES = {"executive"} -MAX_DRAFT_CLAIMS_PER_USER = 3 -EDITABLE_CLAIM_STATUSES = ("draft", "supplement", "returned") -SYSTEM_GENERATED_ITEM_TYPES = {"travel_allowance"} -TRAVEL_DETAIL_ITEM_TYPES = { - "train_ticket", - "flight_ticket", - "hotel_ticket", - "ride_ticket", - "travel_allowance", -} -TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES = {"train_ticket", "flight_ticket"} -DOCUMENT_TYPE_ITEM_TYPE_MAP = { - "train_ticket": "train_ticket", - "flight_itinerary": "flight_ticket", - "hotel_invoice": "hotel_ticket", - "taxi_receipt": "ride_ticket", - "transport_receipt": "ride_ticket", -} -DOCUMENT_TYPE_SCENE_MAP = { - "train_ticket": "travel", - "flight_itinerary": "travel", - "hotel_invoice": "hotel", - "taxi_receipt": "transport", - "transport_receipt": "transport", - "parking_toll_receipt": "transport", - "meal_receipt": "meal", - "office_invoice": "office", - "meeting_invoice": "meeting", - "training_invoice": "training", -} -DOCUMENT_FACT_ITEM_TYPES = {"train_ticket", "flight_ticket", "hotel_ticket", "ride_ticket", "ship_ticket", "ferry_ticket"} -ROUTE_DESCRIPTION_ITEM_TYPES = {"train_ticket", "flight_ticket", "ship_ticket", "ferry_ticket", "ride_ticket"} -DOCUMENT_TRIP_DATE_LABELS = { - "train_ticket": "列车出发时间", - "flight_itinerary": "起飞日期", - "taxi_receipt": "乘车时间", - "transport_receipt": "乘车时间", - "parking_toll_receipt": "通行日期", -} -DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS = { - "train_ticket": "列车出发时间或乘车日期", - "flight_itinerary": "起飞日期或航班日期", - "taxi_receipt": "乘车时间", - "transport_receipt": "乘车时间", - "parking_toll_receipt": "通行日期", - "hotel_invoice": "入住或离店日期", -} -DOCUMENT_TRIP_DATE_KEYS = { - "traveldate", - "tripdate", - "journeydate", - "departuredate", - "departuretime", - "departdate", - "departtime", - "boardingdate", - "boardingtime", - "traindate", - "traintime", - "traindeparturetime", - "scheduleddeparturetime", - "flightdate", - "flighttime", - "ridedate", - "ridetime", - "pickuptime", - "starttime", -} -DOCUMENT_GENERIC_DATE_KEYS = {"date", "time", "occurredat", "occurreddate", "businessdate"} -DOCUMENT_INVOICE_DATE_KEYS = {"issuedat", "issuedate", "invoicedate", "billingdate"} -DOCUMENT_TRIP_DATE_LABEL_TOKENS = ( - "出发日期", - "出发时间", - "列车出发时间", - "发车日期", - "发车时间", - "开车时间", - "乘车日期", - "乘车时间", - "起飞日期", - "航班日期", - "行程日期", - "上车时间", - "用车时间", - "通行日期", -) -DOCUMENT_GENERIC_DATE_LABEL_TOKENS = ("日期", "时间", "发生时间", "业务发生日期") -DOCUMENT_INVOICE_DATE_LABEL_TOKENS = ("开票日期", "发票日期") -DOCUMENT_ROUTE_FORMAT_PATTERN = re.compile( - r"^[A-Za-z0-9\u4e00-\u9fa5()()·]{2,40}\s*-\s*" - r"[A-Za-z0-9\u4e00-\u9fa5()()·]{2,40}$" -) -DOCUMENT_ROUTE_TEXT_PATTERN = re.compile( - r"([A-Za-z0-9\u4e00-\u9fa5()()·]{2,40})\s*(?:至|到|→|->|—|–|-)\s*" - r"([A-Za-z0-9\u4e00-\u9fa5()()·]{2,40})" -) -DOCUMENT_ROUTE_ORIGIN_LABELS = {"起点", "上车", "上车地点", "上车地址", "出发", "出发地", "出发站", "始发站", "乘车起点"} -DOCUMENT_ROUTE_DESTINATION_LABELS = { - "终点", - "下车", - "下车地点", - "下车地址", - "到达", - "到达地", - "到达站", - "目的地", - "乘车终点", -} -GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES = {"", "other", "travel", "transport", "hotel"} -LOCATION_REQUIRED_EXPENSE_TYPES = {"travel", "meeting", "entertainment"} -class ExpenseClaimSubmissionBlockedError(ValueError): - def __init__(self, issues: list[str]) -> None: - self.issues = [str(issue or "").strip() for issue in issues if str(issue or "").strip()] - super().__init__("提交前请先补全信息:" + ";".join(self.issues)) -EXPENSE_SCENE_KEYWORDS = { - "travel": ("差旅", "出差", "行程"), - "hotel": ("酒店", "住宿", "房费", "客房", "入住", "离店"), - "transport": ( - "交通", - "打车", - "出租车", - "网约车", - "滴滴", - "出行", - "乘车", - "用车", - "叫车", - "车费", - "车资", - "的士", - "高铁", - "动车", - "火车", - "机票", - "航班", - "行程单", - "登机", - "客票", - "公交", - "地铁", - "过路费", - "通行费", - "停车", - ), - "meal": ("餐饮", "餐费", "用餐", "外卖", "快餐", "酒楼", "饭店", "饭馆", "食品", "咖啡"), - "entertainment": ("招待", "宴请", "接待", "客户餐", "商务餐", "业务招待"), - "office": ("办公", "办公用品", "文具", "耗材", "打印", "纸张", "硒鼓", "墨盒", "鼠标", "键盘", "电脑"), - "meeting": ("会议", "会务", "会展", "会议室", "会场", "场地费", "论坛"), - "training": ("培训", "课程", "讲师", "教材", "学费", "认证"), -} - -EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = { - "travel": {"travel", "hotel", "transport", "meal"}, - "train_ticket": {"travel"}, - "flight_ticket": {"travel"}, - "hotel_ticket": {"hotel"}, - "ride_ticket": {"transport"}, - "travel_allowance": set(), - "hotel": {"hotel"}, - "transport": {"transport", "travel"}, - "meal": {"meal", "entertainment"}, - "entertainment": {"entertainment", "meal"}, - "office": {"office"}, - "meeting": {"meeting"}, - "training": {"training"}, -} - -DOCUMENT_SCENE_LABELS = { - "travel": "差旅", - "hotel": "住宿", - "transport": "交通", - "meal": "餐饮", - "entertainment": "业务招待", - "office": "办公用品", - "meeting": "会务", - "training": "培训", - "other": "其他票据", -} - -DOCUMENT_ASSOCIATION_REVIEW_ACTIONS = { - "link_to_existing_draft", - "create_new_claim_from_documents", -} -PERSISTENT_EXPENSE_REVIEW_ACTIONS = { - "save_draft", - "next_step", - *DOCUMENT_ASSOCIATION_REVIEW_ACTIONS, -} -RETURN_REASON_OPTIONS = { - "missing_attachment": "附件缺失或不清晰", - "invoice_mismatch": "票据类型/金额与明细不一致", - "over_policy": "超出制度标准或缺少超标说明", - "business_explanation": "业务事由/地点/人员信息不完整", - "duplicate_or_abnormal": "疑似重复或异常票据", - "approval_question": "审批人需要补充说明", -} -MAX_CLAIM_NO_RETRY_ATTEMPTS = 3 -DOCUMENT_DATE_PATTERN = re.compile(r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)") -SYSTEM_GENERATED_REASON_PREFIXES = ( - "我上传了", - "请按当前已识别信息", - "请把当前上传的票据", - "请基于当前上传的多张票据", - "我已核对右侧识别结果", - "请同步修正逐票据识别结果", - "我已修改识别信息", - "查看报销草稿", - "请解释一下当前这笔报销的合规风险和待补充项", -) -LEADING_REASON_TIME_PATTERNS = ( - re.compile( - r"^\s*(?:识别事项(?:有)?[::]\s*)?" - r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*" - r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" - r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" - r"\s*[,,。;;、]?\s*" - ), - re.compile( - r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" - r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" - r"\s*[,,。;;、]\s*" - ), -) -AI_REVIEW_LOOKBACK_DAYS = 90 -AI_REVIEW_REPEAT_RISK_WARNING_COUNT = 1 -AI_REVIEW_REPEAT_RISK_BLOCK_COUNT = 2 -TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES = {"travel", "hotel", "transport"} -TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES = {"flight_itinerary", "train_ticket"} -TRAVEL_POLICY_CITY_TIERS = { - "北京": "tier_1", - "上海": "tier_1", - "广州": "tier_1", - "深圳": "tier_1", - "杭州": "tier_2", - "南京": "tier_2", - "苏州": "tier_2", - "武汉": "tier_2", - "成都": "tier_2", - "重庆": "tier_2", - "西安": "tier_2", - "天津": "tier_2", - "宁波": "tier_2", - "厦门": "tier_2", - "青岛": "tier_2", - "长沙": "tier_2", - "郑州": "tier_2", - "合肥": "tier_2", - "济南": "tier_2", - "沈阳": "tier_2", - "大连": "tier_2", - "福州": "tier_2", - "昆明": "tier_2", - "海口": "tier_2", - "三亚": "tier_2", - "无锡": "tier_2", - "东莞": "tier_2", - "佛山": "tier_2", -} -TRAVEL_POLICY_CITY_MATCH_ORDER = tuple( - sorted(TRAVEL_POLICY_CITY_TIERS.keys(), key=lambda item: len(item), reverse=True) -) -TRAVEL_POLICY_BAND_LABELS = { - "junior": "P1-P3", - "mid": "P4-P5", - "senior": "P6-P7", - "manager": "M1-M2", - "executive": "M3及以上 / D序列", -} -TRAVEL_POLICY_HOTEL_LIMITS = { - "junior": { - "tier_1": Decimal("450.00"), - "tier_2": Decimal("380.00"), - "tier_3": Decimal("320.00"), - }, - "mid": { - "tier_1": Decimal("550.00"), - "tier_2": Decimal("480.00"), - "tier_3": Decimal("380.00"), - }, - "senior": { - "tier_1": Decimal("700.00"), - "tier_2": Decimal("620.00"), - "tier_3": Decimal("520.00"), - }, - "manager": { - "tier_1": Decimal("900.00"), - "tier_2": Decimal("820.00"), - "tier_3": Decimal("720.00"), - }, - "executive": { - "tier_1": Decimal("1200.00"), - "tier_2": Decimal("1000.00"), - "tier_3": Decimal("900.00"), - }, -} -TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS = { - "junior": {"flight": 1, "train": 1}, - "mid": {"flight": 1, "train": 1}, - "senior": {"flight": 2, "train": 2}, - "manager": {"flight": 3, "train": 3}, - "executive": {"flight": 4, "train": 3}, -} -TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS = ( - "中转", - "转机", - "经停", - "改签", - "多地出差", - "多城市", - "多站", - "异地返程", - "异地结束", - "临时变更", - "继续前往", - "第二站", -) -TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS = ( - "超标说明", - "无直达", - "展会高峰", - "会议高峰", - "协议酒店满房", - "客户指定", - "临时改签", - "行程变更", - "红眼航班", - "晚到店", -) -TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS = ( - ("头等舱", 4), - ("公务舱", 3), - ("商务舱", 3), - ("超级经济舱", 2), - ("高端经济舱", 2), - ("明珠经济舱", 2), - ("经济舱", 1), -) -TRAVEL_POLICY_TRAIN_CLASS_PATTERNS = ( - ("商务座", 3), - ("一等座", 2), - ("软卧", 2), - ("二等座", 1), - ("二等卧", 1), - ("硬卧", 1), -) -TRAVEL_POLICY_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)") - - -class ExpenseClaimService: +class ExpenseClaimService( + ExpenseClaimAttachmentOperationsMixin, + ExpenseClaimReviewPreviewMixin, + ExpenseClaimDraftFlowMixin, + ExpenseClaimDraftPersistenceMixin, + ExpenseClaimDocumentItemBuilderMixin, + ExpenseClaimDocumentParsingMixin, + ExpenseClaimOntologyResolverMixin, + ExpenseClaimAttachmentDocumentMixin, + ExpenseClaimAttachmentAnalysisMixin, + ExpenseClaimReadModelMixin, + ExpenseClaimRiskReviewMixin, +): def __init__(self, db: Session) -> None: self.db = db self.audit_service = AuditLogService(db) + self._access_policy = ExpenseClaimAccessPolicy(db) + self._attachment_storage = ExpenseClaimAttachmentStorage() + self._attachment_presentation = ExpenseClaimAttachmentPresentation(self._attachment_storage) def list_claims(self, current_user: CurrentUserContext) -> list[ExpenseClaim]: stmt = ( @@ -447,7 +153,7 @@ class ExpenseClaimService: ) .order_by(ExpenseClaim.created_at.desc(), ExpenseClaim.occurred_at.desc()) ) - stmt = self._apply_claim_scope(stmt, current_user) + stmt = self._access_policy.apply_claim_scope(stmt, current_user) return list(self.db.scalars(stmt).all()) def list_approval_claims(self, current_user: CurrentUserContext) -> list[ExpenseClaim]: @@ -460,7 +166,7 @@ class ExpenseClaimService: ) .order_by(ExpenseClaim.submitted_at.desc(), ExpenseClaim.created_at.desc()) ) - stmt = self._apply_approval_claim_scope(stmt, current_user) + stmt = self._access_policy.apply_approval_claim_scope(stmt, current_user) return list(self.db.scalars(stmt).all()) def get_claim(self, claim_id: str, current_user: CurrentUserContext) -> ExpenseClaim | None: @@ -473,7 +179,7 @@ class ExpenseClaimService: ) .where(ExpenseClaim.id == claim_id) ) - stmt = self._apply_claim_scope(stmt, current_user, include_approval_scope=True) + stmt = self._access_policy.apply_claim_scope(stmt, current_user, include_approval_scope=True) return self.db.scalar(stmt) def update_claim( @@ -635,7 +341,7 @@ class ExpenseClaimService: before_json = self._serialize_claim(claim) item_label = str(item.item_reason or "").strip() or self._resolve_expense_type_label(item.item_type) - self._delete_item_attachment_files(item) + self._attachment_storage.delete_item_files(item) claim.items = [entry for entry in claim.items if entry.id != item.id] self.db.delete(item) @@ -658,256 +364,10 @@ class ExpenseClaimService: "item_id": item.id, } - def upload_claim_item_attachment( - self, - *, - claim_id: str, - item_id: str, - filename: str, - content: bytes, - media_type: str | None, - current_user: CurrentUserContext, - ) -> dict[str, Any] | None: - claim, item = self._get_claim_item_or_raise( - claim_id=claim_id, - item_id=item_id, - current_user=current_user, - ) - if claim is None: - return None - self._ensure_draft_claim(claim) - self._ensure_mutable_claim_item(item) - normalized_name = self._normalize_attachment_filename(filename) - if not content: - raise ValueError("上传文件不能为空。") - before_json = self._serialize_claim(claim) - attachment_dir = self._build_item_attachment_dir(claim.id, item.id) - shutil.rmtree(attachment_dir, ignore_errors=True) - attachment_dir.mkdir(parents=True, exist_ok=True) - file_path = attachment_dir / normalized_name - file_path.write_bytes(content) - resolved_media_type = self._resolve_attachment_media_type( - normalized_name, - fallback=media_type, - ) - attachment_analysis = self._build_fallback_attachment_analysis( - media_type=media_type, - item=item, - ) - ocr_document = None - document_info = None - requirement_check = None - ocr_status = "empty" - ocr_error = "" - try: - ocr_result = OcrService(self.db).recognize_files( - [(normalized_name, content, media_type or "application/octet-stream")] - ) - documents = list(ocr_result.documents or []) - if documents: - ocr_document = documents[0] - ocr_status = "recognized" - document_info = self._build_attachment_document_info(ocr_document) - self._backfill_item_type_from_attachment( - item=item, - document_info=document_info, - ) - self._backfill_item_amount_from_attachment( - item=item, - document=ocr_document, - document_info=document_info, - ) - self._backfill_item_date_from_attachment( - item=item, - document=ocr_document, - document_info=document_info, - ) - self._backfill_item_reason_from_attachment( - item=item, - document=ocr_document, - document_info=document_info, - ) - requirement_check = self._build_attachment_requirement_check( - item=item, - document_info=document_info, - ) - attachment_analysis = self._build_attachment_analysis( - document=ocr_document, - item=item, - claim=claim, - document_info=document_info, - requirement_check=requirement_check, - ) - except Exception as exc: # pragma: no cover - fallback path depends on OCR runtime - ocr_status = "failed" - ocr_error = str(exc) - attachment_analysis = self._build_failed_ocr_attachment_analysis( - media_type=media_type, - error_message=ocr_error, - item=item, - ) - - item.invoice_id = self._to_attachment_storage_key(file_path) - preview_meta = self._build_attachment_preview_meta( - file_path=file_path, - media_type=resolved_media_type, - ocr_document=ocr_document, - ) - meta = { - "file_name": normalized_name, - "storage_key": item.invoice_id, - "media_type": resolved_media_type, - "size_bytes": len(content), - "uploaded_at": datetime.now(UTC).isoformat(), - "previewable": bool(preview_meta["previewable"]), - "preview_kind": str(preview_meta["preview_kind"]), - "preview_storage_key": str(preview_meta["preview_storage_key"]), - "preview_media_type": str(preview_meta["preview_media_type"]), - "preview_file_name": str(preview_meta["preview_file_name"]), - "analysis": attachment_analysis, - "document_info": document_info, - "requirement_check": requirement_check, - "ocr_status": ocr_status, - "ocr_error": ocr_error, - "ocr_text": str(getattr(ocr_document, "text", "") or ""), - "ocr_summary": str(getattr(ocr_document, "summary", "") or ""), - "ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0), - "ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0), - "ocr_classification_source": str(getattr(ocr_document, "classification_source", "") or ""), - "ocr_classification_confidence": float(getattr(ocr_document, "classification_confidence", 0.0) or 0.0), - "ocr_classification_evidence": [ - str(item) - for item in getattr(ocr_document, "classification_evidence", []) or [] - if str(item).strip() - ], - "ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []], - } - self._write_attachment_meta(file_path, meta) - - self._sync_claim_from_items(claim) - self.db.commit() - self.db.refresh(claim) - - self.audit_service.log_action( - actor=current_user.name or current_user.username, - action="expense_claim.attachment_upload", - resource_type="expense_claim", - resource_id=claim.id, - before_json=before_json, - after_json=self._serialize_claim(claim), - ) - - return { - "message": f"{normalized_name} 已上传并关联到当前费用明细。", - "claim_id": claim.id, - "item_id": item.id, - "invoice_id": item.invoice_id, - "item_date": item.item_date.isoformat() if item.item_date else None, - "item_type": item.item_type, - "item_reason": item.item_reason, - "item_location": item.item_location, - "item_amount": item.item_amount, - "claim_amount": claim.amount, - "attachment": self._build_attachment_payload(item), - } - - def get_claim_item_attachment_meta( - self, - *, - claim_id: str, - item_id: str, - current_user: CurrentUserContext, - ) -> dict[str, Any] | None: - claim, item = self._get_claim_item_or_raise( - claim_id=claim_id, - item_id=item_id, - current_user=current_user, - ) - if claim is None: - return None - - return self._build_attachment_payload(item) - - def get_claim_item_attachment_content( - self, - *, - claim_id: str, - item_id: str, - current_user: CurrentUserContext, - ) -> tuple[Path, str, str] | None: - claim, item = self._get_claim_item_or_raise( - claim_id=claim_id, - item_id=item_id, - current_user=current_user, - ) - if claim is None: - return None - - return self._resolve_item_attachment_content(item) - - def get_claim_item_attachment_preview_content( - self, - *, - claim_id: str, - item_id: str, - current_user: CurrentUserContext, - ) -> tuple[Path, str, str] | None: - claim, item = self._get_claim_item_or_raise( - claim_id=claim_id, - item_id=item_id, - current_user=current_user, - ) - if claim is None: - return None - - return self._resolve_item_attachment_preview_content(item) - - def delete_claim_item_attachment( - self, - *, - claim_id: str, - item_id: str, - current_user: CurrentUserContext, - ) -> dict[str, Any] | None: - claim, item = self._get_claim_item_or_raise( - claim_id=claim_id, - item_id=item_id, - current_user=current_user, - ) - if claim is None: - return None - - self._ensure_draft_claim(claim) - self._ensure_mutable_claim_item(item) - before_json = self._serialize_claim(claim) - previous_name = self._resolve_attachment_display_name(item.invoice_id) - self._delete_item_attachment_files(item) - item.invoice_id = None - - self._sync_claim_from_items(claim) - self.db.commit() - self.db.refresh(claim) - - self.audit_service.log_action( - actor=current_user.name or current_user.username, - action="expense_claim.attachment_delete", - resource_type="expense_claim", - resource_id=claim.id, - before_json=before_json, - after_json=self._serialize_claim(claim), - ) - - return { - "message": f"{previous_name or '附件'} 已删除。", - "claim_id": claim.id, - "item_id": item.id, - "invoice_id": item.invoice_id, - "attachment": None, - } def submit_claim(self, claim_id: str, current_user: CurrentUserContext) -> ExpenseClaim | None: claim = self.get_claim(claim_id, current_user) @@ -915,7 +375,7 @@ class ExpenseClaimService: return None self._ensure_draft_claim(claim) - self._backfill_claim_identity_from_current_user(claim, current_user) + self._access_policy.backfill_claim_identity_from_current_user(claim, current_user) self._sync_claim_from_items(claim) missing_fields = self._validate_claim_for_submission(claim) if missing_fields: @@ -945,300 +405,25 @@ class ExpenseClaimService: return claim - def save_or_submit_from_ontology( - self, - *, - run_id: str, - user_id: str | None, - message: str, - ontology: OntologyParseResult, - context_json: dict[str, Any], - ) -> dict[str, Any]: - review_action = str(context_json.get("review_action") or "").strip() - if review_action not in PERSISTENT_EXPENSE_REVIEW_ACTIONS: - return self._build_expense_review_preview_result( - user_id=user_id, - message=message, - ontology=ontology, - context_json=context_json, - ) - result = self.upsert_draft_from_ontology( - run_id=run_id, - user_id=user_id, - message=message, - ontology=ontology, - context_json=context_json, - ) - if review_action != "next_step": - return result - claim_id = str(result.get("claim_id") or "").strip() - if not claim_id or result.get("draft_limit_reached"): - return result - current_user = CurrentUserContext( - username=str(user_id or context_json.get("name") or "anonymous").strip() or "anonymous", - name=str(context_json.get("name") or user_id or "anonymous").strip() or "anonymous", - role_codes=[ - str(item).strip() - for item in list(context_json.get("role_codes") or []) - if str(item).strip() - ], - is_admin=bool(context_json.get("is_admin")), - department_name=str(context_json.get("department_name") or context_json.get("department") or "").strip(), - ) - - try: - claim = self.submit_claim(claim_id, current_user) - except ExpenseClaimSubmissionBlockedError as exc: - return { - **result, - "message": self._format_submission_blocked_message(exc.issues), - "submission_blocked": True, - "submission_blocked_reasons": exc.issues, - "missing_fields": exc.issues, - "draft_only": False, - } - except ValueError as exc: - message = str(exc) - return { - **result, - "message": message, - "submission_blocked": True, - "submission_blocked_reasons": [message] if message else [], - "missing_fields": [message] if message else [], - "draft_only": False, - } - - if claim is None: - return { - **result, - "message": "未找到可提交的报销单,请刷新后重试。", - "submission_blocked": True, - "draft_only": False, - } - - if str(claim.status or "").strip().lower() != "submitted": - review_message = "" - for flag in list(claim.risk_flags_json or []): - if not isinstance(flag, dict): - continue - if str(flag.get("source") or "").strip() != "submission_review": - continue - review_message = str(flag.get("message") or "").strip() - if review_message: - break - return { - "message": review_message or f"报销单 {claim.claim_no} 经 AI预审后转为待补充,请先修正后再提交。", - "submission_blocked": True, - "draft_only": False, - "claim_id": claim.id, - "claim_no": claim.claim_no, - "status": claim.status, - "approval_stage": claim.approval_stage, - "amount": float(claim.amount), - "invoice_count": int(claim.invoice_count or 0), - } - - return { - "message": ( - f"报销单 {claim.claim_no} 已完成 AI预审," - f"当前节点为 {claim.approval_stage or '审批中'}。" - ), - "draft_only": False, - "claim_id": claim.id, - "claim_no": claim.claim_no, - "status": claim.status, - "approval_stage": claim.approval_stage, - "amount": float(claim.amount), - "invoice_count": int(claim.invoice_count or 0), - } - - def _build_expense_review_preview_result( - self, - *, - user_id: str | None, - message: str, - ontology: OntologyParseResult, - context_json: dict[str, Any], - ) -> dict[str, Any]: - attachment_count = self._resolve_attachment_count(context_json) - calculation_copy = self._build_expense_review_preview_calculation_copy( - user_id=user_id, - message=message, - ontology=ontology, - context_json=context_json, - ) - return { - "message": "\n\n".join( - item - for item in [ - "我已先整理出本次报销的待核对信息。下面是基于当前信息的制度测算,票据补齐后会按真实金额重新复核。", - calculation_copy, - ] - if item - ), - "draft_only": True, - "preview_only": True, - "status": "preview", - "invoice_count": attachment_count, - } - - def _build_expense_review_preview_calculation_copy( - self, - *, - user_id: str | None, - message: str, - ontology: OntologyParseResult, - context_json: dict[str, Any], - ) -> str: - expense_type = self._resolve_explicit_review_expense_type(context_json) or self._resolve_expense_type( - ontology.entities, - context_json=context_json, - ) - if expense_type == "travel" or ( - (not expense_type or expense_type == "other") - and self._should_preview_as_travel(message=message, context_json=context_json) - ): - return self._build_travel_review_preview_calculation_copy( - user_id=user_id, - message=message, - ontology=ontology, - context_json=context_json, - ) - - amount = self._resolve_amount(ontology.entities, context_json=context_json) or Decimal("0.00") - expense_label = EXPENSE_TYPE_LABELS.get(str(expense_type or "").strip(), "当前费用") - return "\n".join( - [ - "报销测算参考:", - "", - "| 项目 | 当前信息 | 复核口径 |", - "| --- | --- | --- |", - f"| 费用类型 | {expense_label} | 匹配规则中心对应费用标准 |", - f"| 票据金额 | {self._format_decimal_amount(amount)} 元 | 以真实票据识别金额和用户确认金额为准 |", - "| 规则校验 | 待票据和关键信息补齐 | 按费用类型、发生地点、业务事由和审批口径复核 |", - ] - ) - - def _build_travel_review_preview_calculation_copy( - self, - *, - user_id: str | None, - message: str, - ontology: OntologyParseResult, - context_json: dict[str, Any], - ) -> str: - location = self._resolve_location(message=message, context_json=context_json) or "待确认" - occurred_at = self._resolve_occurred_at(ontology, context_json=context_json) or datetime.now(UTC) - days, _, _ = self._resolve_travel_allowance_days( - context_json=context_json, - occurred_at=occurred_at, - ) - amount = self._resolve_amount(ontology.entities, context_json=context_json) or Decimal("0.00") - employee = self._resolve_employee( - ontology=ontology, - context_json=context_json, - user_id=user_id, - ) - grade = str( - context_json.get("employee_grade") - or context_json.get("grade") - or context_json.get("user_grade") - or (employee.grade if employee is not None else "") - or "" - ).strip() - - if location == "待确认" or not grade: - return "\n".join( - [ - "报销测算参考:", - "", - "| 项目 | 当前信息 | 测算说明 |", - "| --- | --- | --- |", - f"| 出差地点 | {location} | 用于匹配城市住宿标准和补贴区域 |", - f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |", - f"| 职级 | {grade or '待确认'} | 补齐后才能匹配住宿标准和补贴档位 |", - f"| 交通票据 | {self._format_decimal_amount(amount)} 元 | 上传票据后按真实金额重新复核 |", - ] - ) - - try: - from app.services.travel_reimbursement_calculator import ( - TravelReimbursementCalculatorService, - ) - - result = TravelReimbursementCalculatorService(self.db).calculate( - TravelReimbursementCalculatorRequest(days=days, location=location, grade=grade), - CurrentUserContext( - username=str(user_id or context_json.get("name") or "anonymous").strip() or "anonymous", - name=str(context_json.get("name") or user_id or "anonymous").strip() or "anonymous", - role_codes=[], - is_admin=False, - ), - ) - except ValueError: - return "\n".join( - [ - "报销测算参考:", - "", - "| 项目 | 当前信息 | 测算说明 |", - "| --- | --- | --- |", - f"| 出差地点 | {location} | 暂时未能匹配规则中心地点 |", - f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |", - f"| 职级 | {grade} | 暂时无法自动匹配差旅标准 |", - f"| 交通票据 | {self._format_decimal_amount(amount)} 元 | 上传票据后按真实金额重新复核 |", - ] - ) - - ticket_amount = amount.quantize(Decimal("0.01")) - total_amount = ( - ticket_amount - + Decimal(result.hotel_amount or Decimal("0.00")) - + Decimal(result.allowance_amount or Decimal("0.00")) - ).quantize(Decimal("0.01")) - ticket_basis = "当前未上传交通票据,先按 0.00 元占位" if ticket_amount <= Decimal("0.00") else "已识别或填写的交通票据金额" - return "\n".join( - [ - "报销测算参考:", - "", - f"职级 {grade},目的地 {location},匹配城市 {result.matched_city};补齐交通、酒店等票据后,我会按真实票据金额和规则中心标准重新复核。", - "", - "| 项目 | 测算口径 | 金额 |", - "| --- | --- | ---: |", - f"| 交通票据 | {ticket_basis} | {self._format_decimal_amount(ticket_amount)} 元 |", - f"| 住宿标准 | {self._format_decimal_amount(result.hotel_rate)} 元/天 × {days} 天 | {self._format_decimal_amount(result.hotel_amount)} 元 |", - f"| 出差补贴 | {self._format_decimal_amount(result.total_allowance_rate)} 元/天 × {days} 天 | {self._format_decimal_amount(result.allowance_amount)} 元 |", - f"| 参考合计 | 交通票据 + 住宿标准 + 出差补贴 | {self._format_decimal_amount(total_amount)} 元 |", - ] - ) - - @staticmethod - def _should_preview_as_travel(*, message: str, context_json: dict[str, Any]) -> bool: - text_parts = [message] - review_form_values = context_json.get("review_form_values") - if isinstance(review_form_values, dict): - text_parts.extend(str(value or "") for value in review_form_values.values()) - text_parts.extend(str(context_json.get(key) or "") for key in ("user_input_text", "raw_text", "ocr_summary")) - compact = "".join(text_parts) - return any(keyword in compact for keyword in ("差旅", "出差", "火车票", "机票", "酒店", "住宿票")) def delete_claim(self, claim_id: str, current_user: CurrentUserContext) -> ExpenseClaim | None: claim = self.get_claim(claim_id, current_user) if claim is None: return None - if not self._has_claim_delete_access(current_user): + if not self._access_policy.has_claim_delete_access(current_user): self._ensure_draft_claim(claim) - if not self._is_claim_owned_by_current_user(claim, current_user): + if not self._access_policy.is_claim_owned_by_current_user(claim, current_user): raise ValueError("只有高级管理人员可以删除非本人单据,申请人仅可删除自己的草稿、待补充或退回单据。") before_json = self._serialize_claim(claim) resource_id = claim.id - self._delete_claim_attachment_files(claim) + self._attachment_storage.delete_claim_files(claim) self.db.delete(claim) self.db.commit() @@ -1266,7 +451,7 @@ class ExpenseClaimService: if claim is None: return None - if not self._can_return_claim(current_user, claim): + if not self._access_policy.can_return_claim(current_user, claim): raise ValueError("只有财务人员、高级管理人员或当前审批人可以退回报销单。") normalized_status = str(claim.status or "").strip().lower() @@ -1278,7 +463,7 @@ class ExpenseClaimService: raise ValueError("已完成单据不允许退回。") before_json = self._serialize_claim(claim) - operator = self._resolve_current_user_display_name(current_user) + operator = self._access_policy.resolve_current_user_display_name(current_user) previous_status = str(claim.status or "").strip() previous_stage = str(claim.approval_stage or "").strip() or "未标记审批环节" previous_stage_key = self._normalize_return_stage_key(previous_stage) @@ -1368,7 +553,7 @@ class ExpenseClaimService: previous_stage = str(claim.approval_stage or "").strip() if previous_stage == "直属领导审批": - if not self._can_approve_claim(current_user, claim): + if not self._access_policy.can_approve_claim(current_user, claim): raise ValueError("只有当前直属领导审批人可以审批通过该报销单。") approval_source = "manual_approval" event_type = "expense_claim_approval" @@ -1377,7 +562,7 @@ class ExpenseClaimService: next_stage = "财务审批" default_message = "{operator} 已审批通过,流转至{next_stage}。" elif previous_stage == "财务审批": - if not self._can_approve_claim(current_user, claim): + if not self._access_policy.can_approve_claim(current_user, claim): raise ValueError("只有财务人员可以完成财务终审。") approval_source = "finance_approval" event_type = "expense_claim_finance_approval" @@ -1389,7 +574,7 @@ class ExpenseClaimService: raise ValueError("当前节点不支持审批通过。") before_json = self._serialize_claim(claim) - operator = self._resolve_current_user_display_name(current_user) + operator = self._access_policy.resolve_current_user_display_name(current_user) approval_opinion = str(opinion or "").strip() approval_flag = { "source": approval_source, @@ -1433,5321 +618,110 @@ class ExpenseClaimService: return claim - def upsert_draft_from_ontology( - self, - *, - run_id: str, - user_id: str | None, - message: str, - ontology: OntologyParseResult, - context_json: dict[str, Any], - ) -> dict[str, Any]: - self._ensure_ready() - context_json = dict(context_json or {}) - retry_count = self._resolve_claim_no_retry_count(context_json) - review_action = str(context_json.get("review_action") or "").strip() - attachment_names = self._resolve_attachment_names(context_json) - context_documents = self._resolve_context_documents(context_json) - employee = self._resolve_employee( - ontology=ontology, - context_json=context_json, - user_id=user_id, - ) - draft_owner_name = ( - employee.name - if employee is not None - else self._resolve_employee_name( - ontology=ontology, - context_json=context_json, - user_id=user_id, - ) - ) - association_candidate = self._find_association_candidate( - ontology=ontology, - context_json=context_json, - user_id=user_id, - employee=employee, - ) - if self._should_defer_multi_document_association( - context_json=context_json, - review_action=review_action, - association_candidate=association_candidate, - context_documents=context_documents, - ): - document_count = max(len(context_documents), len(attachment_names), self._resolve_attachment_count(context_json)) - return { - "message": ( - f"检测到你已有草稿 {association_candidate.claim_no}," - f"当前新上传了 {document_count} 张票据,请先选择关联到现有草稿,或单独建立新的报销单。" - ), - "draft_only": False, - "status": "pending_association_decision", - "pending_association_decision": True, - "association_candidate_claim_id": association_candidate.id, - "association_candidate_claim_no": association_candidate.claim_no, - } - claim = self._find_target_claim( - ontology=ontology, - context_json=context_json, - review_action=review_action, - association_candidate=association_candidate, - ) - is_new_claim = claim is None - before_json = self._serialize_claim(claim) if claim is not None else None - if is_new_claim: - existing_draft_count = self._count_draft_claims_for_owner( - employee=employee, - user_id=user_id, - ) - if existing_draft_count >= MAX_DRAFT_CLAIMS_PER_USER: - return { - "message": ( - f"你当前已保存 {MAX_DRAFT_CLAIMS_PER_USER} 个草稿,请先完成已保存的草稿," - "才能再次新建草稿。" - ), - "draft_limit_reached": True, - "draft_only": False, - "status": "blocked", - "draft_count": existing_draft_count, - "max_draft_count": MAX_DRAFT_CLAIMS_PER_USER, - } - amount = self._resolve_amount(ontology.entities, context_json=context_json) - occurred_at = self._resolve_occurred_at(ontology, context_json=context_json) - explicit_expense_type = self._resolve_explicit_review_expense_type(context_json) - inferred_expense_type = self._resolve_expense_type(ontology.entities, context_json=context_json) - locked_expense_type = explicit_expense_type - if not locked_expense_type and claim is not None and review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS: - locked_expense_type = str(claim.expense_type or "").strip() - expense_type = locked_expense_type or inferred_expense_type - location = self._resolve_location(message=message, context_json=context_json) - reason = self._resolve_reason( - message=message, - context_json=context_json, - allow_message_fallback=is_new_claim, - ) - attachment_count = len(attachment_names) or self._resolve_attachment_count(context_json) - final_amount = amount if amount is not None else (claim.amount if claim is not None else Decimal("0.00")) - final_occurred_at = ( - occurred_at if occurred_at is not None else (claim.occurred_at if claim is not None else datetime.now(UTC)) - ) - final_expense_type = expense_type or (claim.expense_type if claim is not None else "other") - final_location = location or (claim.location if claim is not None else "待补充") - final_reason = reason or (claim.reason if claim is not None else "待补充") - final_attachment_count = ( - attachment_count if attachment_count > 0 else int(claim.invoice_count or 0) if claim is not None else 0 - ) - final_risk_flags = self._merge_persistent_claim_risk_flags( - existing_flags=list(claim.risk_flags_json or []) if claim is not None else [], - next_flags=list(ontology.risk_flags), - ) - if context_documents or attachment_names: - document_specs = self._build_context_item_specs( - context_documents=context_documents, - attachment_names=attachment_names, - occurred_at=final_occurred_at, - expense_type=final_expense_type, - amount=final_amount, - reason=final_reason, - location=final_location, - context_json=context_json, - employee_grade=str(employee.grade or "").strip() if employee is not None else "", - user_id=user_id, - ) - else: - document_specs = [] - if claim is not None and review_action == "link_to_existing_draft" and document_specs: - duplicate_result = self._build_duplicate_attachment_block_result( - claim=claim, - document_specs=document_specs, - context_documents=context_documents, - ) - if duplicate_result is not None: - return duplicate_result - try: - if claim is None: - claim = ExpenseClaim( - claim_no=self._generate_claim_no(final_occurred_at), - employee_id=employee.id if employee is not None else None, - employee_name=draft_owner_name, - department_id=employee.organization_unit_id if employee is not None else None, - department_name=self._resolve_department_name( - employee=employee, - context_json=context_json, - ), - project_code=self._resolve_project_code(ontology.entities), - expense_type=final_expense_type, - reason=final_reason, - location=final_location, - amount=final_amount, - currency="CNY", - invoice_count=final_attachment_count, - occurred_at=final_occurred_at, - status="draft", - approval_stage="待提交", - risk_flags_json=final_risk_flags, - ) - self.db.add(claim) - else: - claim.employee_id = employee.id if employee is not None else claim.employee_id - claim.employee_name = ( - employee.name - if employee is not None - else self._resolve_employee_name( - ontology=ontology, - context_json=context_json, - user_id=user_id, - fallback=claim.employee_name, - ) - ) - claim.department_id = employee.organization_unit_id if employee is not None else claim.department_id - claim.department_name = self._resolve_department_name( - employee=employee, - context_json=context_json, - fallback=claim.department_name, - ) - claim.project_code = self._resolve_project_code(ontology.entities) or claim.project_code - claim.expense_type = final_expense_type - claim.reason = final_reason - claim.location = final_location - claim.amount = final_amount - claim.invoice_count = final_attachment_count - claim.occurred_at = final_occurred_at - claim.status = "draft" - claim.approval_stage = "待提交" - claim.risk_flags_json = final_risk_flags - self.db.flush() - if document_specs and (is_new_claim or review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS): - if review_action == "link_to_existing_draft" and claim.items: - self._append_document_items( - claim=claim, - item_specs=document_specs, - ) - else: - self._replace_claim_items( - claim=claim, - item_specs=document_specs, - ) - self._sync_claim_from_items(claim) - else: - self._upsert_primary_item( - claim=claim, - occurred_at=final_occurred_at, - expense_type=final_expense_type, - amount=final_amount, - reason=final_reason, - location=final_location, - attachment_names=attachment_names, - ) - self._sync_claim_from_items(claim) - if locked_expense_type: - claim.expense_type = locked_expense_type - self.db.commit() - self.db.refresh(claim) - except IntegrityError as exc: - self.db.rollback() - if ( - is_new_claim - and retry_count < MAX_CLAIM_NO_RETRY_ATTEMPTS - and self._is_claim_no_conflict_error(exc) - ): - retry_context = dict(context_json) - retry_context["_claim_no_retry_count"] = retry_count + 1 - return self.upsert_draft_from_ontology( - run_id=run_id, - user_id=user_id, - message=message, - ontology=ontology, - context_json=retry_context, - ) - raise - except Exception: - self.db.rollback() - raise - self.audit_service.log_action( - actor=user_id or claim.employee_name or "anonymous", - action="expense_claim.draft_upsert", - resource_type="expense_claim", - resource_id=claim.id, - before_json=before_json, - after_json=self._serialize_claim(claim), - request_id=run_id, - ) - return { - "message": ( - f"已{'创建' if is_new_claim else '更新'}报销草稿 {claim.claim_no},当前状态为 draft。" - "请核对识别结果,确认无误后继续提交。" - ), - "draft_only": True, - "claim_id": claim.id, - "claim_no": claim.claim_no, - "status": claim.status, - "amount": float(claim.amount), - "invoice_count": int(claim.invoice_count or 0), - } - def _find_target_claim( - self, - *, - ontology: OntologyParseResult, - context_json: dict[str, Any], - review_action: str = "", - association_candidate: ExpenseClaim | None = None, - ) -> ExpenseClaim | None: - if review_action == "create_new_claim_from_documents": - return None - if review_action == "link_to_existing_draft" and association_candidate is not None: - return association_candidate - draft_claim_id = str(context_json.get("draft_claim_id") or "").strip() - if draft_claim_id: - claim = self.db.get(ExpenseClaim, draft_claim_id) - if claim is not None and self._is_editable_claim_status(claim.status): - return claim - return None - claim_codes = [ - item.normalized_value - for item in ontology.entities - if item.type == "expense_claim" and item.normalized_value - ] - if not claim_codes: - return None - stmt = ( - select(ExpenseClaim) - .where(ExpenseClaim.claim_no.in_(claim_codes)) - .where(ExpenseClaim.status.in_(EDITABLE_CLAIM_STATUSES)) - .limit(1) - ) - return self.db.scalar(stmt) - def _find_association_candidate( - self, - *, - ontology: OntologyParseResult, - context_json: dict[str, Any], - user_id: str | None, - employee: Employee | None, - ) -> ExpenseClaim | None: - draft_claim_id = str(context_json.get("draft_claim_id") or "").strip() - if draft_claim_id: - claim = self.db.get(ExpenseClaim, draft_claim_id) - if claim is not None and self._is_editable_claim_status(claim.status): - return claim - owner_filters = self._build_draft_owner_filters( - employee=employee, - user_id=user_id, - ) - if not owner_filters: - fallback_name = self._resolve_employee_name( - ontology=ontology, - context_json=context_json, - user_id=user_id, - fallback="", - ) - if fallback_name: - owner_filters = [ExpenseClaim.employee_name == fallback_name] - if not owner_filters: - return None - stmt = ( - select(ExpenseClaim) - .where(ExpenseClaim.status.in_(EDITABLE_CLAIM_STATUSES)) - .where(or_(*owner_filters)) - .order_by(ExpenseClaim.updated_at.desc(), ExpenseClaim.created_at.desc()) - .limit(1) - ) - return self.db.scalar(stmt) - def _should_defer_multi_document_association( - self, - *, - context_json: dict[str, Any], - review_action: str, - association_candidate: ExpenseClaim | None, - context_documents: list[dict[str, Any]], - ) -> bool: - if association_candidate is None: - return False - if review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS: - return False - document_count = max( - len(context_documents), - len(self._resolve_attachment_names(context_json)), - self._resolve_attachment_count(context_json), - ) - return document_count > 1 - def _resolve_context_documents(self, context_json: dict[str, Any]) -> list[dict[str, Any]]: - documents = context_json.get("ocr_documents") - if not isinstance(documents, list): - documents = [] - normalized: list[dict[str, Any]] = [] - for index, item in enumerate(documents[:10], start=1): - if not isinstance(item, dict): - continue - normalized.append( - { - "index": index, - "filename": str(item.get("filename") or "").strip(), - "summary": str(item.get("summary") or "").strip(), - "text": str(item.get("text") or "").strip(), - "document_type": str(item.get("document_type") or "").strip(), - "scene_code": str(item.get("scene_code") or "").strip(), - "scene_label": str(item.get("scene_label") or "").strip(), - "document_fields": self._normalize_document_fields(item.get("document_fields")), - } - ) - overrides = context_json.get("review_document_form_values") - if not isinstance(overrides, list) or not normalized: - return normalized - override_map: dict[tuple[int, str], dict[str, Any]] = {} - for item in overrides: - if not isinstance(item, dict): - continue - filename = str(item.get("filename") or "").strip() - index = int(item.get("index") or 0) - if not filename and index <= 0: - continue - override_map[(index, filename)] = item - for item in normalized: - override = override_map.get((int(item["index"]), str(item["filename"]))) - if override is None: - override = override_map.get((int(item["index"]), "")) - if override is None: - continue - summary = str(override.get("summary") or "").strip() - scene_label = str(override.get("scene_label") or "").strip() - fields = override.get("fields") - if summary: - item["summary"] = summary - if scene_label: - item["scene_label"] = scene_label - if isinstance(fields, list): - item["document_fields"] = self._normalize_document_fields(fields) - return normalized - @staticmethod - def _normalize_document_fields(raw_fields: Any) -> list[dict[str, str]]: - if not isinstance(raw_fields, list): - return [] - normalized: list[dict[str, str]] = [] - for field in raw_fields: - if not isinstance(field, dict): - continue - label = str(field.get("label") or "").strip() - value = str(field.get("value") or "").strip() - key = str(field.get("key") or label or "").strip() - if not label or not value: - continue - normalized.append( - { - "key": key, - "label": label, - "value": value, - } - ) - return normalized - def _build_context_item_specs( - self, - *, - context_documents: list[dict[str, Any]], - attachment_names: list[str], - occurred_at: datetime, - expense_type: str, - amount: Decimal, - reason: str, - location: str, - context_json: dict[str, Any], - employee_grade: str | None = None, - user_id: str = "", - ) -> list[dict[str, Any]]: - specs: list[dict[str, Any]] = [] - if context_documents: - for document in context_documents: - specs.append( - { - "item_date": self._resolve_document_item_date(document, fallback=occurred_at.date()), - "item_type": self._resolve_document_item_type(document, fallback=expense_type), - "item_reason": self._resolve_document_item_reason(document, fallback=reason), - "item_location": location, - "item_amount": self._resolve_document_item_amount(document), - "invoice_id": str(document.get("filename") or "").strip() or None, - } - ) - elif attachment_names: - for attachment_name in attachment_names: - specs.append( - { - "item_date": occurred_at.date(), - "item_type": expense_type, - "item_reason": reason, - "item_location": location, - "item_amount": None, - "invoice_id": attachment_name, - } - ) - if not specs: - return [] - total_recognized = sum( - spec["item_amount"] for spec in specs if isinstance(spec.get("item_amount"), Decimal) - ) - missing_specs = [spec for spec in specs if spec.get("item_amount") is None] - if missing_specs: - remaining = (amount - total_recognized).quantize(Decimal("0.01")) - if remaining > Decimal("0.00"): - missing_specs[0]["item_amount"] = remaining - for spec in specs: - if spec.get("item_amount") is None: - spec["item_amount"] = Decimal("0.00") - allowance_spec = self._build_travel_allowance_item_spec( - context_documents=context_documents, - specs=specs, - occurred_at=occurred_at, - expense_type=expense_type, - location=location, - context_json=context_json, - employee_grade=employee_grade, - user_id=user_id, - ) - if allowance_spec is not None: - specs = [spec for spec in specs if str(spec.get("item_type") or "").strip() != "travel_allowance"] - specs.append(allowance_spec) - return specs - def _build_travel_allowance_item_spec( - self, - *, - context_documents: list[dict[str, Any]], - specs: list[dict[str, Any]], - occurred_at: datetime, - expense_type: str, - location: str, - context_json: dict[str, Any], - employee_grade: str | None, - user_id: str, - ) -> dict[str, Any] | None: - if not self._should_add_travel_allowance_item( - expense_type=expense_type, - context_documents=context_documents, - context_json=context_json, - ): - return None - grade = str(employee_grade or context_json.get("grade") or "").strip() - if not grade: - return None - days, _, end_date = self._resolve_travel_allowance_days( - context_json=context_json, - occurred_at=occurred_at, - ) - allowance_location = self._resolve_travel_allowance_location( - location=location, - context_documents=context_documents, - ) - if days < 1 or not allowance_location: - return None - try: - from app.services.travel_reimbursement_calculator import ( - TravelReimbursementCalculatorService, - ) - result = TravelReimbursementCalculatorService(self.db).calculate( - TravelReimbursementCalculatorRequest( - days=days, - location=allowance_location, - grade=grade, - ), - CurrentUserContext( - username=user_id, - name="", - role_codes=[], - is_admin=False, - ), - ) - except ValueError: - return None - allowance_amount = Decimal(result.allowance_amount or Decimal("0.00")).quantize(Decimal("0.01")) - allowance_rate = Decimal(result.total_allowance_rate or Decimal("0.00")).quantize(Decimal("0.01")) - if allowance_amount <= Decimal("0.00") or allowance_rate <= Decimal("0.00"): - return None - return { - "item_date": end_date, - "item_type": "travel_allowance", - "item_reason": ( - f"系统自动计算出差补贴:{result.matched_city},{days}天," - f"{allowance_rate:.2f}元/天" - ), - "item_location": str(result.allowance_region or allowance_location).strip(), - "item_amount": allowance_amount, - "invoice_id": None, - } - @staticmethod - def _should_add_travel_allowance_item( - *, - expense_type: str, - context_documents: list[dict[str, Any]], - context_json: dict[str, Any], - ) -> bool: - normalized_expense_type = str(expense_type or "").strip().lower() - if normalized_expense_type == "travel": - return True - review_form_values = context_json.get("review_form_values") - if isinstance(review_form_values, dict): - review_type = str( - review_form_values.get("expense_type") - or review_form_values.get("scene_label") - or review_form_values.get("reason_value") - or "" - ) - if any(keyword in review_type for keyword in ("差旅", "出差")): - return True - for document in context_documents: - document_type = str(document.get("document_type") or "").strip() - scene_code = str(document.get("scene_code") or "").strip() - if document_type in {"train_ticket", "flight_itinerary"} or scene_code == "travel": - return True - return False - def _resolve_travel_allowance_days( - self, - *, - context_json: dict[str, Any], - occurred_at: datetime, - ) -> tuple[int, date, date]: - start_date = occurred_at.date() - end_date = start_date - explicit_days = self._extract_travel_allowance_days_from_context(context_json) - business_time_context = context_json.get("business_time_context") - if isinstance(business_time_context, dict): - start_date = self._parse_iso_date_or_default(business_time_context.get("start_date"), start_date) - end_date = self._parse_iso_date_or_default(business_time_context.get("end_date"), start_date) - else: - review_form_values = context_json.get("review_form_values") - if isinstance(review_form_values, dict): - time_text = str( - review_form_values.get("time_range") - or review_form_values.get("business_time") - or review_form_values.get("occurred_date") - or "" - ).strip() - matched_dates = re.findall(r"\d{4}-\d{2}-\d{2}", time_text) - if matched_dates: - start_date = self._parse_iso_date_or_default(matched_dates[0], start_date) - end_date = self._parse_iso_date_or_default(matched_dates[-1], start_date) - if end_date < start_date: - end_date = start_date - if explicit_days > 0: - return explicit_days, start_date, start_date + timedelta(days=explicit_days - 1) - days = (end_date - start_date).days + 1 - return max(1, days), start_date, end_date - @staticmethod - def _extract_travel_allowance_days_from_context(context_json: dict[str, Any]) -> int: - review_form_values = context_json.get("review_form_values") - text_parts: list[str] = [] - if isinstance(review_form_values, dict): - text_parts.extend( - str(review_form_values.get(key) or "") - for key in ( - "reason", - "business_reason", - "reason_value", - "scene_label", - "time_range", - "business_time", - ) - ) - text_parts.extend( - str(context_json.get(key) or "") - for key in ("user_input_text", "message", "raw_text", "ocr_summary") - ) - return ExpenseClaimService._extract_travel_day_count(" ".join(text_parts)) - @staticmethod - def _extract_travel_day_count(text: str) -> int: - normalized = str(text or "").replace(" ", "") - if not normalized: - return 0 - patterns = ( - r"(?:出差|差旅|行程|支撑|支持|部署|项目|业务)\D{0,12}?(\d{1,2})天", - r"(\d{1,2})天(?:出差|差旅|行程)", - ) - for pattern in patterns: - match = re.search(pattern, normalized) - if not match: - continue - try: - return max(1, int(match.group(1))) - except ValueError: - continue - return 0 - @staticmethod - def _parse_iso_date_or_default(value: Any, fallback: date) -> date: - try: - return date.fromisoformat(str(value or "").strip()) - except ValueError: - return fallback - @staticmethod - def _resolve_travel_allowance_location( - *, - location: str, - context_documents: list[dict[str, Any]], - ) -> str: - normalized_location = str(location or "").strip() - if normalized_location and normalized_location not in {"待补充", "未知", "暂无"}: - return normalized_location - for document in context_documents: - for field in list(document.get("document_fields") or []): - if not isinstance(field, dict): - continue - key = str(field.get("key") or "").strip().lower() - label = str(field.get("label") or "").strip() - value = str(field.get("value") or "").strip() - if key == "route" or "行程" in label: - separators = ("-", "至", "→", "->") - for separator in separators: - if separator in value: - return value.split(separator)[-1].strip() - if key in {"destination", "arrival_city"} or label in {"目的地", "到达城市"}: - return value - return "" - def _replace_claim_items( - self, - *, - claim: ExpenseClaim, - item_specs: list[dict[str, Any]], - ) -> None: - existing_items = sorted( - list(claim.items), - key=lambda item: ( - item.item_date or date.max, - self._normalize_sort_datetime(item.created_at), - ), - ) - for index, spec in enumerate(item_specs): - item = existing_items[index] if index < len(existing_items) else None - if item is None: - item = ExpenseClaimItem(claim_id=claim.id) - claim.items.append(item) - self.db.add(item) - item.item_date = spec["item_date"] - item.item_type = spec["item_type"] - item.item_reason = spec["item_reason"] - item.item_location = spec["item_location"] - item.item_amount = spec["item_amount"] - item.invoice_id = ( - None - if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES - else self._merge_attachment_reference(item.invoice_id, spec["invoice_id"]) - ) - for stale_item in existing_items[len(item_specs) :]: - claim.items.remove(stale_item) - self.db.delete(stale_item) - def _append_document_items( - self, - *, - claim: ExpenseClaim, - item_specs: list[dict[str, Any]], - ) -> None: - system_specs = [ - spec for spec in item_specs if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES - ] - normal_specs = [ - spec for spec in item_specs if str(spec.get("item_type") or "").strip() not in SYSTEM_GENERATED_ITEM_TYPES - ] - existing_invoice_ids = { - str(item.invoice_id or "").strip() - for item in claim.items - if str(item.invoice_id or "").strip() - } - existing_invoice_names = { - self._resolve_attachment_display_name(item.invoice_id) - for item in claim.items - if str(item.invoice_id or "").strip() - } - for spec in normal_specs: - invoice_id = str(spec.get("invoice_id") or "").strip() - invoice_name = self._resolve_attachment_display_name(invoice_id) - if invoice_id and (invoice_id in existing_invoice_ids or invoice_name in existing_invoice_names): - continue - claim.items.append( - ExpenseClaimItem( - claim_id=claim.id, - item_date=spec["item_date"], - item_type=spec["item_type"], - item_reason=spec["item_reason"], - item_location=spec["item_location"], - item_amount=spec["item_amount"], - invoice_id=spec["invoice_id"], - ) - ) - self.db.add(claim.items[-1]) - if invoice_id: - existing_invoice_ids.add(invoice_id) - existing_invoice_names.add(invoice_name) - if system_specs: - existing_system_items = [ - item for item in list(claim.items) if str(item.item_type or "").strip() in SYSTEM_GENERATED_ITEM_TYPES - ] - for stale_item in existing_system_items: - claim.items.remove(stale_item) - self.db.delete(stale_item) - for spec in system_specs: - claim.items.append( - ExpenseClaimItem( - claim_id=claim.id, - item_date=spec["item_date"], - item_type=spec["item_type"], - item_reason=spec["item_reason"], - item_location=spec["item_location"], - item_amount=spec["item_amount"], - invoice_id=spec["invoice_id"], - ) - ) - self.db.add(claim.items[-1]) - def _build_duplicate_attachment_block_result( - self, - *, - claim: ExpenseClaim, - document_specs: list[dict[str, Any]], - context_documents: list[dict[str, Any]], - ) -> dict[str, Any] | None: - duplicate_matches = self._find_duplicate_attachment_matches( - claim=claim, - document_specs=document_specs, - context_documents=context_documents, - ) - if not duplicate_matches: - return None - duplicate_labels = list( - dict.fromkeys( - str(item.get("incoming_label") or item.get("existing_label") or "").strip() - for item in duplicate_matches - if str(item.get("incoming_label") or item.get("existing_label") or "").strip() - ) - ) - duplicate_text = "、".join(duplicate_labels[:3]) or "本次上传票据" - reason = ( - f"检测到本次上传的票据与草稿 {claim.claim_no} 中已有票据重复:{duplicate_text}。" - "请重新上传不同的票据后再归集。" - ) - return { - "message": reason, - "draft_only": False, - "status": "blocked", - "duplicate_attachment_blocked": True, - "duplicate_invoice_blocked": True, - "submission_blocked": True, - "submission_blocked_reasons": [reason], - "missing_fields": [reason], - "risk_flags": ["duplicate_invoice"], - "duplicate_attachments": duplicate_matches, - "claim_id": claim.id, - "claim_no": claim.claim_no, - "amount": float(claim.amount or Decimal("0.00")), - "invoice_count": int(claim.invoice_count or 0), - } - def _find_duplicate_attachment_matches( - self, - *, - claim: ExpenseClaim, - document_specs: list[dict[str, Any]], - context_documents: list[dict[str, Any]], - ) -> list[dict[str, str]]: - existing_tokens: dict[str, dict[str, str]] = {} - for item in list(claim.items or []): - if str(item.item_type or "").strip() in SYSTEM_GENERATED_ITEM_TYPES: - continue - invoice_id = str(item.invoice_id or "").strip() - if not invoice_id: - continue - display_name = self._resolve_attachment_display_name(invoice_id) - for token in self._build_duplicate_attachment_tokens(invoice_id): - existing_tokens.setdefault( - token, - { - "existing_label": display_name or invoice_id, - "existing_item_id": str(item.id or ""), - "match_type": "filename", - }, - ) - file_path = self._resolve_item_attachment_path(item) - if file_path is not None and file_path.exists(): - metadata = self._read_attachment_meta(file_path) - document_info = metadata.get("document_info") - if isinstance(document_info, dict): - for invoice_key in self._collect_invoice_keys_from_document_info(document_info): - token = self._normalize_duplicate_attachment_token(invoice_key) - if token: - existing_tokens.setdefault( - token, - { - "existing_label": display_name or invoice_id, - "existing_item_id": str(item.id or ""), - "match_type": "invoice_key", - }, - ) - if not existing_tokens: - return [] - document_by_filename = { - str(document.get("filename") or "").strip(): document - for document in context_documents - if isinstance(document, dict) and str(document.get("filename") or "").strip() - } - matches: list[dict[str, str]] = [] - seen_tokens: set[str] = set() - for spec in document_specs: - if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES: - continue - invoice_id = str(spec.get("invoice_id") or "").strip() - if not invoice_id: - continue - incoming_tokens = self._build_duplicate_attachment_tokens(invoice_id) - document = document_by_filename.get(invoice_id) - if document is not None: - incoming_tokens.extend( - self._normalize_duplicate_attachment_token(invoice_key) - for invoice_key in self._collect_invoice_keys_from_incoming_document(document) - ) - for token in incoming_tokens: - if not token or token in seen_tokens or token not in existing_tokens: - continue - seen_tokens.add(token) - existing = existing_tokens[token] - matches.append( - { - "incoming_label": self._resolve_attachment_display_name(invoice_id) or invoice_id, - "existing_label": existing.get("existing_label", ""), - "existing_item_id": existing.get("existing_item_id", ""), - "match_type": existing.get("match_type", "filename"), - } - ) - return matches - @classmethod - def _build_duplicate_attachment_tokens(cls, value: str | None) -> list[str]: - raw = str(value or "").strip() - display_name = cls._resolve_attachment_display_name(raw) - candidates = [raw, display_name] - return list( - dict.fromkeys( - token - for token in (cls._normalize_duplicate_attachment_token(candidate) for candidate in candidates) - if token - ) - ) - @staticmethod - def _normalize_duplicate_attachment_token(value: str | None) -> str: - normalized = Path(str(value or "").strip()).name.lower() - normalized = re.sub(r"\s+", "", normalized) - normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._") - return normalized - def _collect_invoice_keys_from_incoming_document(self, document: dict[str, Any]) -> list[str]: - document_info = dict(document or {}) - if "fields" not in document_info and isinstance(document_info.get("document_fields"), list): - document_info["fields"] = document_info.get("document_fields") - return self._collect_invoice_keys_from_document_info(document_info) - def _resolve_document_item_type(self, document: dict[str, Any], *, fallback: str) -> str: - document_type = str(document.get("document_type") or "").strip() - mapped_type = DOCUMENT_TYPE_ITEM_TYPE_MAP.get(document_type) - if mapped_type: - return mapped_type - scene_code = str(document.get("scene_code") or "").strip() - if scene_code in {"travel", "hotel", "transport", "meal", "office", "meeting", "training"}: - return scene_code - if document_type in {"flight_itinerary", "train_ticket"}: - return "travel" - if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}: - return "transport" - if document_type == "hotel_invoice": - return "hotel" - if document_type == "meal_receipt": - return "meal" - if document_type == "office_invoice": - return "office" - if document_type == "meeting_invoice": - return "meeting" - if document_type == "training_invoice": - return "training" - scene_label = str(document.get("scene_label") or "").strip() - if "交通" in scene_label: - return "transport" - if "住宿" in scene_label: - return "hotel" - if "餐" in scene_label: - return "meal" - if "会务" in scene_label or "会议" in scene_label: - return "meeting" - if "培训" in scene_label: - return "training" - return fallback or "other" - def _resolve_document_item_reason(self, document: dict[str, Any], *, fallback: str) -> str: - document_type = str(document.get("document_type") or "").strip().lower() - item_type = self._resolve_document_item_type(document, fallback="") - if document_type in {"train_ticket", "flight_itinerary"} or item_type in {"train_ticket", "flight_ticket"}: - route = self._resolve_document_route_value(document) - trip_no = self._resolve_document_fact_field( - document, - keys={"trip_no", "flight_no", "train_no"}, - labels={"车次", "航班"}, - ) - if route and trip_no: - return f"{self._format_document_route(route)}({trip_no})" - if route: - return self._format_document_route(route) - if document_type in {"taxi_receipt", "transport_receipt"} or item_type == "ride_ticket": - route = self._resolve_document_route_value(document) - if route: - return self._format_document_route(route) - if document_type == "hotel_invoice" or item_type == "hotel_ticket": - merchant = self._resolve_document_fact_field( - document, - keys={"merchant_name", "merchant", "seller_name", "vendor_name", "hotel_name"}, - labels={"商户", "酒店", "宾馆", "销售方", "开票方"}, - ) - stay_range = self._resolve_document_stay_range(document) - if merchant and stay_range: - return f"{merchant},{stay_range}" - if merchant: - return merchant - if stay_range: - return stay_range - merchant = self._resolve_document_fact_field( - document, - keys={"merchant_name", "merchant", "seller_name", "vendor_name"}, - labels={"商户", "销售方", "开票方", "收款方"}, - ) - if merchant: - return merchant - summary = str(document.get("summary") or "").strip() - return summary or fallback or "" - def _resolve_document_route_value(self, document: dict[str, Any]) -> str: - route = self._resolve_document_fact_field( - document, - keys={"route", "trip_route"}, - labels={"行程", "路线"}, - ) - if route: - return route - origin = self._resolve_document_fact_field( - document, - keys={ - "origin", - "from", - "from_city", - "departure", - "departure_city", - "start", - "start_location", - "start_address", - "pickup_location", - "pickup_address", - "boarding_station", - }, - labels=DOCUMENT_ROUTE_ORIGIN_LABELS, - ) - destination = self._resolve_document_fact_field( - document, - keys={ - "destination", - "to", - "to_city", - "arrival", - "arrival_city", - "end", - "end_location", - "end_address", - "dropoff_location", - "dropoff_address", - "alighting_station", - }, - labels=DOCUMENT_ROUTE_DESTINATION_LABELS, - ) - if origin and destination: - return f"{origin}-{destination}" - text = " ".join( - [ - str(document.get("summary") or "").strip(), - str(document.get("text") or "").strip(), - ] - ).strip() - text_route = self._extract_document_route_from_text(text) - if text_route: - return text_route - text_origin = self._extract_document_labeled_text_value(text, DOCUMENT_ROUTE_ORIGIN_LABELS) - text_destination = self._extract_document_labeled_text_value(text, DOCUMENT_ROUTE_DESTINATION_LABELS) - if text_origin and text_destination: - return f"{text_origin}-{text_destination}" - return "" - @staticmethod - def _resolve_document_fact_field( - document: dict[str, Any], - *, - keys: set[str], - labels: set[str], - ) -> str: - raw_fields = document.get("document_fields") - if not isinstance(raw_fields, list): - raw_fields = document.get("fields") - if not isinstance(raw_fields, list): - return "" - normalized_keys = {str(key or "").strip().lower().replace("_", "") for key in keys} - for field in raw_fields: - if not isinstance(field, dict): - continue - field_key = str(field.get("key") or "").strip().lower().replace("_", "") - label = str(field.get("label") or "").replace(" ", "") - value = str(field.get("value") or "").strip() - if not value: - continue - if field_key in normalized_keys or any(token in label for token in labels): - return value - return "" - @staticmethod - def _format_document_route(route: str) -> str: - normalized = ( - str(route or "") - .strip() - .replace("->", "-") - .replace("→", "-") - .replace("—", "-") - .replace("–", "-") - .replace("至", "-") - .replace("到", "-") - ) - if "-" not in normalized: - return str(route or "").strip() - origin, destination = [part.strip() for part in normalized.split("-", 1)] - origin = origin.removeprefix("从").strip() - destination = destination.removeprefix("至").removeprefix("到").strip() - if not origin or not destination or origin == destination: - return str(route or "").strip() - return f"{origin}-{destination}" - @staticmethod - def _extract_document_route_from_text(text: str) -> str: - for match in DOCUMENT_ROUTE_TEXT_PATTERN.finditer(str(text or "")): - origin = str(match.group(1) or "").strip() - destination = str(match.group(2) or "").strip() - if not origin or not destination or origin == destination: - continue - if origin.isdigit() and destination.isdigit(): - continue - if DOCUMENT_DATE_PATTERN.search(f"{origin}-{destination}"): - continue - return f"{origin}-{destination}" - return "" - @staticmethod - def _extract_document_labeled_text_value(text: str, labels: set[str]) -> str: - for label in sorted(labels, key=len, reverse=True): - pattern = re.compile( - rf"{re.escape(label)}[::\s]*" - r"([A-Za-z0-9\u4e00-\u9fa5()()·\-路街道号弄区县市省园桥站机场中心]{2,50})" - ) - match = pattern.search(str(text or "")) - if match: - return str(match.group(1) or "").strip() - return "" - def _resolve_document_stay_range(self, document: dict[str, Any]) -> str: - check_in = self._resolve_document_fact_field( - document, - keys={"check_in", "checkin", "arrival_date", "start_date"}, - labels={"入住", "入住日期", "到店", "开始日期"}, - ) - check_out = self._resolve_document_fact_field( - document, - keys={"check_out", "checkout", "departure_date", "end_date"}, - labels={"离店", "退房", "离店日期", "结束日期"}, - ) - if check_in and check_out: - return f"{check_in}至{check_out}" - nights = self._resolve_document_fact_field( - document, - keys={"nights", "night_count", "room_nights"}, - labels={"间夜", "晚数", "入住天数"}, - ) - if nights: - return f"{nights}晚" - return "" - def _resolve_document_item_amount(self, document: dict[str, Any]) -> Decimal | None: - return resolve_document_item_amount(document) - def _resolve_document_field_amount(self, document: dict[str, Any]) -> Decimal | None: - return resolve_document_field_amount(document) - def _resolve_document_text_amount(self, text: str) -> Decimal | None: - return resolve_document_text_amount(text) - def _parse_document_amount_value(self, value: str) -> Decimal | None: - return parse_document_amount_value(value) - @staticmethod - def _parse_plain_document_amount_value(value: str) -> Decimal | None: - return parse_plain_document_amount_value(value) - @staticmethod - def _is_probable_year_amount(amount: Decimal | None) -> bool: - return is_probable_year_amount(amount) - @classmethod - def _is_date_like_amount_candidate(cls, amount: Decimal | None, text: str) -> bool: - return is_date_like_amount_candidate(amount, text) - @staticmethod - def _format_decimal_amount(amount: Decimal | None) -> str: - return format_decimal_amount(amount) - def _resolve_document_item_date(self, document: dict[str, Any], *, fallback: date) -> date: - return self._resolve_document_item_date_candidate(document) or fallback - def _resolve_document_item_date_candidate(self, document: dict[str, Any]) -> date | None: - document_type = str(document.get("document_type") or "").strip().lower() - if document_type in DOCUMENT_TRIP_DATE_LABELS: - parsed = self._resolve_document_date_from_fields( - document, - keys=DOCUMENT_TRIP_DATE_KEYS, - labels=DOCUMENT_TRIP_DATE_LABEL_TOKENS, - ) - if parsed is not None: - return parsed - parsed = self._resolve_document_date_from_fields( - document, - keys=DOCUMENT_GENERIC_DATE_KEYS, - labels=DOCUMENT_GENERIC_DATE_LABEL_TOKENS, - excluded_labels=DOCUMENT_INVOICE_DATE_LABEL_TOKENS, - ) - if parsed is not None: - return parsed - parsed = self._parse_document_date( - " ".join( - [ - str(document.get("summary") or "").strip(), - str(document.get("text") or "").strip(), - ] - ).strip() - ) - if parsed is not None: - return parsed - return None - for field in list(document.get("document_fields") or []): - if not isinstance(field, dict): - continue - key = str(field.get("key") or "").strip().lower().replace("_", "") - label = str(field.get("label") or "").replace(" ", "") - value = str(field.get("value") or "").strip() - if not value: - continue - if key in {"date", "time", "issuedat", "issuedate", "invoicedate"} or any( - token in label for token in ("日期", "时间", "开票日期", "发生时间") - ): - parsed = self._parse_document_date(value) - if parsed is not None: - return parsed - parsed = self._parse_document_date( - " ".join( - [ - str(document.get("summary") or "").strip(), - str(document.get("text") or "").strip(), - ] - ).strip() - ) - return parsed - def _resolve_document_date_from_fields( - self, - document: dict[str, Any], - *, - keys: set[str], - labels: tuple[str, ...], - excluded_labels: tuple[str, ...] = (), - ) -> date | None: - for field in list(document.get("document_fields") or []): - if not isinstance(field, dict): - continue - key = str(field.get("key") or "").strip().lower().replace("_", "") - label = str(field.get("label") or "").replace(" ", "") - if excluded_labels and any(token in label for token in excluded_labels): - continue - if key not in keys and not any(token in label for token in labels): - continue - parsed = self._parse_document_date(str(field.get("value") or "")) - if parsed is not None: - return parsed - return None - @staticmethod - def _parse_document_date(value: str) -> date | None: - match = DOCUMENT_DATE_PATTERN.search(str(value or "")) - if not match: - return None - raw_value = str(match.group(1) or "").strip() - normalized = raw_value.replace("年", "-").replace("月", "-").replace("日", "") - normalized = normalized.replace("/", "-").replace(".", "-") - parts = [part for part in normalized.split("-") if part] - if len(parts) != 3: - return None - try: - return date(int(parts[0]), int(parts[1]), int(parts[2])) - except ValueError: - return None - def _upsert_primary_item( - self, - *, - claim: ExpenseClaim, - occurred_at: datetime, - expense_type: str, - amount: Decimal, - reason: str, - location: str, - attachment_names: list[str], - ) -> None: - item = claim.items[0] if claim.items else None - if item is None: - item = ExpenseClaimItem( - claim_id=claim.id, - item_date=occurred_at.date(), - item_type=expense_type, - item_reason=reason, - item_location=location, - item_amount=amount, - invoice_id=attachment_names[0] if attachment_names else None, - ) - claim.items.append(item) - self.db.add(item) - return - item.item_date = occurred_at.date() - item.item_type = expense_type - item.item_reason = reason - item.item_location = location - item.item_amount = amount - item.invoice_id = ( - self._merge_attachment_reference(item.invoice_id, attachment_names[0]) - if attachment_names - else item.invoice_id - ) - def _generate_claim_no(self, occurred_at: datetime) -> str: - month_code = occurred_at.strftime("%Y%m") - prefix = f"EXP-{month_code}-" - existing_claim_nos = list( - self.db.scalars( - select(ExpenseClaim.claim_no).where(ExpenseClaim.claim_no.like(f"{prefix}%")) - ) - ) - max_suffix = 0 - for claim_no in existing_claim_nos: - normalized = str(claim_no or "").strip() - if not normalized.startswith(prefix): - continue - suffix = normalized[len(prefix):] - if not suffix.isdigit(): - continue - max_suffix = max(max_suffix, int(suffix)) - return f"{prefix}{max_suffix + 1:03d}" - @staticmethod - def _resolve_claim_no_retry_count(context_json: dict[str, Any]) -> int: - try: - return max(0, int(context_json.get("_claim_no_retry_count") or 0)) - except (TypeError, ValueError): - return 0 - @staticmethod - def _is_claim_no_conflict_error(exc: IntegrityError) -> bool: - message = str(exc).lower() - return ( - "claim_no" in message - and ( - "unique" in message - or "duplicate key" in message - or "ix_expense_claims_claim_no" in message - or "expense_claims.claim_no" in message - ) - ) - - def _count_draft_claims_for_owner( - self, - *, - employee: Employee | None, - user_id: str | None, - ) -> int: - owner_filters = self._build_draft_owner_filters( - employee=employee, - user_id=user_id, - ) - if not owner_filters: - return 0 - - stmt = ( - select(func.count()) - .select_from(ExpenseClaim) - .where(ExpenseClaim.status == "draft") - .where(or_(*owner_filters)) - ) - return int(self.db.scalar(stmt) or 0) - - def _build_draft_owner_filters( - self, - *, - employee: Employee | None, - user_id: str | None, - ) -> list[Any]: - conditions: list[Any] = [] - seen: set[tuple[str, str]] = set() - - def add_condition(field_name: str, value: str | None) -> None: - normalized = str(value or "").strip() - if not normalized or normalized == "待补充": - return - - marker = (field_name, normalized.lower()) - if marker in seen: - return - seen.add(marker) - - if field_name == "employee_id": - conditions.append(ExpenseClaim.employee_id == normalized) - return - conditions.append(ExpenseClaim.employee_name == normalized) - - if employee is not None: - add_condition("employee_id", employee.id) - add_condition("employee_name", employee.email) - if self._employee_name_is_unique(employee): - add_condition("employee_name", employee.name) - - add_condition("employee_name", user_id) - return conditions - - def _resolve_employee( - self, - *, - ontology: OntologyParseResult, - context_json: dict[str, Any], - user_id: str | None, - ) -> Employee | None: - normalized_user_id = str(user_id or "").strip() - if normalized_user_id: - stmt = ( - select(Employee) - .options(selectinload(Employee.organization_unit), selectinload(Employee.manager)) - .where(func.lower(Employee.email) == normalized_user_id.lower()) - .limit(1) - ) - employee = self.db.scalar(stmt) - if employee is not None: - return employee - - employee_name = self._resolve_employee_name( - ontology=ontology, - context_json=context_json, - user_id=None, - ) - if not employee_name: - return None - - stmt = ( - select(Employee) - .options(selectinload(Employee.organization_unit), selectinload(Employee.manager)) - .where(Employee.name == employee_name) - .limit(1) - ) - return self.db.scalar(stmt) - - @staticmethod - def _resolve_employee_name( - *, - ontology: OntologyParseResult, - context_json: dict[str, Any], - user_id: str | None, - fallback: str = "待补充", - ) -> str: - review_form_values = context_json.get("review_form_values") - if isinstance(review_form_values, dict): - for key in ("reporter_name", "employee_name", "claimant_name"): - value = str(review_form_values.get(key) or "").strip() - if value: - return value - for item in ontology.entities: - if item.type == "employee" and item.value.strip(): - return item.value.strip() - for key in ("name", "user_name", "employee_name"): - value = str(context_json.get(key) or "").strip() - if value: - return value - return str(user_id or fallback).strip() or fallback - - @staticmethod - def _resolve_department_name( - *, - employee: Employee | None, - context_json: dict[str, Any], - fallback: str = "待补充", - ) -> str: - if employee is not None and employee.organization_unit is not None: - return employee.organization_unit.name - - request_context = context_json.get("request_context") - if isinstance(request_context, dict): - for key in ("department", "department_name", "deptName"): - value = str(request_context.get(key) or "").strip() - if value: - return value - - for key in ("department_name", "department"): - value = str(context_json.get(key) or "").strip() - if value: - return value - return fallback - - @staticmethod - def _resolve_project_code(entities: list[OntologyEntity]) -> str | None: - for item in entities: - if item.type == "project" and item.normalized_value.strip(): - return item.normalized_value.strip() - return None - - @staticmethod - def _resolve_explicit_review_expense_type(context_json: dict[str, Any]) -> str | None: - review_form_values = context_json.get("review_form_values") - if isinstance(review_form_values, dict): - compact = str( - review_form_values.get("expense_type") - or review_form_values.get("reimbursement_type") - or "" - ).replace(" ", "") - if compact: - if "招待" in compact or ("客户" in compact and any(word in compact for word in ("吃饭", "宴请", "请客", "用餐"))): - return "entertainment" - if any(word in compact for word in ("差旅", "出差", "机票", "行程")): - return "travel" - if any(word in compact for word in ("住宿", "酒店", "宾馆")): - return "hotel" - if any(word in compact for word in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")): - return "transport" - if any(word in compact for word in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")): - return "meal" - if "会务" in compact: - return "meeting" - if any(word in compact for word in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")): - return "office" - if any(word in compact for word in ("培训费", "培训", "讲师费", "课时费", "课程费")): - return "training" - if any(word in compact for word in ("通讯费", "话费", "流量费", "宽带费")): - return "communication" - if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")): - return "welfare" - return None - - @staticmethod - def _resolve_expense_type( - entities: list[OntologyEntity], - *, - context_json: dict[str, Any], - ) -> str | None: - explicit_expense_type = ExpenseClaimService._resolve_explicit_review_expense_type(context_json) - if explicit_expense_type: - return explicit_expense_type - for item in entities: - if item.type == "expense_type": - normalized = item.normalized_value.strip() - if normalized: - return normalized - return None - - @staticmethod - def _resolve_reason( - *, - message: str, - context_json: dict[str, Any], - allow_message_fallback: bool, - ) -> str | None: - review_form_values = context_json.get("review_form_values") - if isinstance(review_form_values, dict): - for key in ("reason", "business_reason"): - value = str(review_form_values.get(key) or "").strip() - if value: - return ExpenseClaimService._strip_leading_time_from_reason(value) - - explicit_text = context_json.get("user_input_text") - if isinstance(explicit_text, str): - normalized_explicit_text = explicit_text.strip() - if normalized_explicit_text: - return ExpenseClaimService._strip_leading_time_from_reason(normalized_explicit_text)[:500] or None - return None - - request_context = context_json.get("request_context") - if ( - isinstance(request_context, dict) - and str(context_json.get("entry_source") or "").strip() == "detail" - ): - for key in ("reason", "title"): - value = str(request_context.get(key) or "").strip() - if value: - return value - if not allow_message_fallback: - return None - - normalized_message = str(message or "").strip() - compact_message = re.sub(r"\s+", "", normalized_message) - if compact_message.startswith(SYSTEM_GENERATED_REASON_PREFIXES): - return None - return ExpenseClaimService._strip_leading_time_from_reason(normalized_message)[:500] or None - - @staticmethod - def _strip_leading_time_from_reason(value: str) -> str: - reason = str(value or "").strip() - for pattern in LEADING_REASON_TIME_PATTERNS: - next_reason = pattern.sub("", reason).strip() - if next_reason != reason: - return next_reason - return reason - - @staticmethod - def _resolve_location(*, message: str, context_json: dict[str, Any]) -> str | None: - review_form_values = context_json.get("review_form_values") - if isinstance(review_form_values, dict): - for key in ("business_location", "location"): - value = str(review_form_values.get(key) or "").strip() - if value: - return value - - request_context = context_json.get("request_context") - if ( - isinstance(request_context, dict) - and str(context_json.get("entry_source") or "").strip() == "detail" - ): - for key in ("city", "location"): - value = str(request_context.get(key) or "").strip() - if value: - return value - compact = str(message or "").replace(" ", "") - city_match = re.search( - r"去(?P[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)", - compact, - ) - if city_match: - return city_match.group("city").strip() - if "客户现场" in compact: - return "客户现场" - return None - - @staticmethod - def _resolve_occurred_at( - ontology: OntologyParseResult, - *, - context_json: dict[str, Any], - ) -> datetime | None: - review_form_values = context_json.get("review_form_values") - if isinstance(review_form_values, dict): - for key in ("occurred_date", "time_range", "business_time"): - value = str(review_form_values.get(key) or "").strip() - if not value: - continue - try: - parsed = date.fromisoformat(value) - return datetime(parsed.year, parsed.month, parsed.day, tzinfo=UTC) - except ValueError: - continue - - start_date = ontology.time_range.start_date - if start_date: - try: - parsed = date.fromisoformat(start_date) - return datetime(parsed.year, parsed.month, parsed.day, tzinfo=UTC) - except ValueError: - pass - return None - - @staticmethod - def _resolve_amount( - entities: list[OntologyEntity], - *, - context_json: dict[str, Any], - ) -> Decimal | None: - review_form_values = context_json.get("review_form_values") - if isinstance(review_form_values, dict): - raw_value = str(review_form_values.get("amount") or "").strip() - if raw_value: - compact = raw_value.replace("元", "").replace(",", "").strip() - try: - return Decimal(compact).quantize(Decimal("0.01")) - except (InvalidOperation, ValueError): - pass - for item in entities: - if item.type != "amount" or item.role == "threshold": - continue - try: - return Decimal(item.normalized_value).quantize(Decimal("0.01")) - except (InvalidOperation, ValueError): - continue - return None - - @staticmethod - def _resolve_attachment_names(context_json: dict[str, Any]) -> list[str]: - names = context_json.get("attachment_names") - if not isinstance(names, list): - return [] - return [str(name).strip() for name in names if str(name).strip()] - - def _resolve_attachment_count(self, context_json: dict[str, Any]) -> int: - names = self._resolve_attachment_names(context_json) - if names: - return len(names) - try: - return max(0, int(context_json.get("attachment_count") or 0)) - except (TypeError, ValueError): - return 0 - - def _get_claim_item_or_raise( - self, - *, - claim_id: str, - item_id: str, - current_user: CurrentUserContext, - ) -> tuple[ExpenseClaim | None, ExpenseClaimItem]: - claim = self.get_claim(claim_id, current_user) - if claim is None: - return None, None # type: ignore[return-value] - - item = next((entry for entry in claim.items if entry.id == item_id), None) - if item is None: - raise LookupError("Item not found") - return claim, item - - def _get_attachment_storage_root(self) -> Path: - return (get_settings().resolved_storage_root_dir / "expense_claims").resolve() - - def _build_item_attachment_dir(self, claim_id: str, item_id: str) -> Path: - return (self._get_attachment_storage_root() / claim_id / item_id).resolve() - - def _delete_claim_attachment_files(self, claim: ExpenseClaim) -> None: - for item in list(claim.items or []): - self._delete_item_attachment_files(item) - self._delete_claim_attachment_root(claim.id) - - def _delete_claim_attachment_root(self, claim_id: str) -> None: - claim_root = self._assert_attachment_storage_child(self._get_attachment_storage_root() / claim_id) - self._delete_attachment_path(claim_root) - - @staticmethod - def _normalize_attachment_filename(filename: str | None) -> str: - normalized = Path(str(filename or "").strip()).name - normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._") - suffix = Path(normalized).suffix - if normalized: - return normalized - return f"attachment{suffix or '.bin'}" - - def _resolve_attachment_path(self, storage_key: str | None) -> Path | None: - normalized = str(storage_key or "").strip() - if not normalized: - return None - - root = self._get_attachment_storage_root() - path = (root / normalized).resolve() - try: - path.relative_to(root) - except ValueError as exc: - raise FileNotFoundError("Attachment path is invalid") from exc - return path - - def _resolve_item_attachment_path(self, item: ExpenseClaimItem) -> Path | None: - if not str(item.invoice_id or "").strip(): - return None - - file_path = self._resolve_attachment_path(item.invoice_id) - if file_path is not None and file_path.exists(): - return file_path - - filename = self._normalize_attachment_filename(item.invoice_id) - if not filename: - return file_path - - fallback_path = (self._build_item_attachment_dir(item.claim_id, item.id) / filename).resolve() - try: - fallback_path.relative_to(self._get_attachment_storage_root()) - except ValueError as exc: - raise FileNotFoundError("Attachment path is invalid") from exc - return fallback_path - - def _to_attachment_storage_key(self, file_path: Path) -> str: - root = self._get_attachment_storage_root() - return file_path.resolve().relative_to(root).as_posix() - - def _resolve_item_attachment_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]: - file_path = self._resolve_item_attachment_path(item) - if file_path is None or not file_path.exists(): - raise FileNotFoundError("Attachment not found") - - metadata = self._read_attachment_meta(file_path) - filename = str(metadata.get("file_name") or file_path.name) - media_type = self._resolve_attachment_media_type( - filename, - fallback=str(metadata.get("media_type") or ""), - ) - return file_path, media_type, filename - - def _delete_item_attachment_files(self, item: ExpenseClaimItem) -> None: - file_path = self._resolve_item_attachment_path(item) - if file_path is None: - return - - root = self._get_attachment_storage_root() - if file_path.parent == root: - self._delete_attachment_path(file_path) - self._delete_attachment_path(self._attachment_meta_path(file_path)) - return - - self._delete_attachment_path(file_path.parent) - - def _assert_attachment_storage_child(self, path: Path) -> Path: - root = self._get_attachment_storage_root() - resolved = path.resolve() - try: - resolved.relative_to(root) - except ValueError as exc: - raise FileNotFoundError("Attachment path is invalid") from exc - return resolved - - def _delete_attachment_path(self, path: Path | None) -> None: - if path is None: - return - - target = self._assert_attachment_storage_child(path) - if not target.exists(): - return - - if target.is_dir(): - shutil.rmtree(target) - else: - target.unlink() - - if target.exists(): - raise OSError(f"Attachment path was not deleted: {target}") - - @staticmethod - def _attachment_meta_path(file_path: Path) -> Path: - return file_path.with_name(f"{file_path.name}.meta.json") - - def _write_attachment_meta(self, file_path: Path, payload: dict[str, Any]) -> None: - meta_path = self._attachment_meta_path(file_path) - meta_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") - - def _read_attachment_meta(self, file_path: Path) -> dict[str, Any]: - meta_path = self._attachment_meta_path(file_path) - if not meta_path.exists(): - return {} - - try: - payload = json.loads(meta_path.read_text(encoding="utf-8")) - except (json.JSONDecodeError, OSError): - return {} - return payload if isinstance(payload, dict) else {} - - def _repair_pdf_text_layer_metadata_if_needed( - self, - *, - file_path: Path, - metadata: dict[str, Any], - item: ExpenseClaimItem | None = None, - ) -> dict[str, Any]: - if not metadata: - return metadata - - media_type = str(metadata.get("media_type") or self._resolve_attachment_media_type(file_path.name)).strip() - if media_type != "application/pdf": - return metadata - - ocr_text = str(metadata.get("ocr_text") or "") - ocr_summary = str(metadata.get("ocr_summary") or "") - if OcrService._placeholder_ratio(f"{ocr_summary}\n{ocr_text}") < 0.12: - return metadata - - text_layer = OcrService(self.db)._extract_pdf_text_layer(file_path) - repaired_text, used_text_layer = OcrService._choose_document_text( - ocr_text=ocr_text, - text_layer=text_layer, - ) - if not used_text_layer or not repaired_text: - return metadata - - repaired_summary = OcrService._summarize_text(repaired_text) - document = SimpleNamespace( - filename=str(metadata.get("file_name") or file_path.name), - text=repaired_text, - summary=repaired_summary, - avg_score=float(metadata.get("ocr_avg_score") or 0.0), - line_count=int(metadata.get("ocr_line_count") or 0), - document_type="", - document_type_label="", - scene_code="", - scene_label="", - document_fields=[], - warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()], - ) - document_info = self._build_attachment_document_info(document) - document.document_type = document_info.get("document_type", "") - document.document_type_label = document_info.get("document_type_label", "") - document.scene_code = document_info.get("scene_code", "") - document.scene_label = document_info.get("scene_label", "") - document.document_fields = list(document_info.get("fields") or []) - - metadata["ocr_text"] = repaired_text - metadata["ocr_summary"] = repaired_summary - metadata["document_info"] = document_info - metadata["previewable"] = True - metadata["preview_kind"] = "pdf" - metadata["preview_storage_key"] = str(metadata.get("storage_key") or self._to_attachment_storage_key(file_path)) - metadata["preview_media_type"] = "application/pdf" - metadata["preview_file_name"] = str(metadata.get("file_name") or file_path.name) - - if item is not None: - requirement_check = self._build_attachment_requirement_check( - item=item, - document_info=document_info, - ) - metadata["requirement_check"] = requirement_check - metadata["analysis"] = self._build_attachment_analysis( - document=document, - item=item, - claim=getattr(item, "claim", None), - document_info=document_info, - requirement_check=requirement_check, - ) - - self._write_attachment_meta(file_path, metadata) - return metadata - - def _build_attachment_preview_meta( - self, - *, - file_path: Path, - media_type: str, - ocr_document: Any | None, - ) -> dict[str, Any]: - filename = file_path.name - storage_key = self._to_attachment_storage_key(file_path) - preview_kind = self._resolve_preview_kind(media_type, filename) - - preview_data_url = str(getattr(ocr_document, "preview_data_url", "") or "").strip() - preview_source_kind = str(getattr(ocr_document, "preview_kind", "") or "").strip() - if preview_source_kind == "image" and preview_data_url: - preview_asset = self._write_preview_asset_from_data_url( - attachment_dir=file_path.parent, - original_filename=filename, - preview_data_url=preview_data_url, - ) - if preview_asset is not None: - preview_path, preview_media_type, preview_file_name = preview_asset - return { - "previewable": True, - "preview_kind": "image", - "preview_storage_key": self._to_attachment_storage_key(preview_path), - "preview_media_type": preview_media_type, - "preview_file_name": preview_file_name, - } - - if preview_kind: - return { - "previewable": True, - "preview_kind": preview_kind, - "preview_storage_key": storage_key, - "preview_media_type": media_type, - "preview_file_name": filename, - } - - return { - "previewable": False, - "preview_kind": "", - "preview_storage_key": "", - "preview_media_type": "", - "preview_file_name": "", - } - - def _resolve_item_attachment_preview_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]: - file_path, media_type, filename = self._resolve_item_attachment_content(item) - metadata = self._read_attachment_meta(file_path) - metadata = self._repair_pdf_text_layer_metadata_if_needed( - file_path=file_path, - metadata=metadata, - item=item, - ) - preview_storage_key = str(metadata.get("preview_storage_key") or "").strip() - preview_file_name = str(metadata.get("preview_file_name") or "").strip() - preview_media_type = str(metadata.get("preview_media_type") or "").strip() - - if preview_storage_key: - preview_path = self._resolve_attachment_path(preview_storage_key) - if preview_path is not None and preview_path.exists(): - resolved_name = preview_file_name or preview_path.name - resolved_media_type = self._resolve_attachment_media_type( - resolved_name, - fallback=preview_media_type, - ) - return preview_path, resolved_media_type, resolved_name - - if self._is_previewable_media_type(media_type, filename): - return file_path, media_type, filename - - raise FileNotFoundError("Attachment preview not found") - - def _build_attachment_payload(self, item: ExpenseClaimItem) -> dict[str, Any]: - file_path, media_type, filename = self._resolve_item_attachment_content(item) - metadata = self._read_attachment_meta(file_path) - metadata = self._repair_pdf_text_layer_metadata_if_needed( - file_path=file_path, - metadata=metadata, - item=item, - ) - uploaded_at_value = metadata.get("uploaded_at") - uploaded_at = None - if isinstance(uploaded_at_value, str) and uploaded_at_value.strip(): - try: - uploaded_at = datetime.fromisoformat(uploaded_at_value) - except ValueError: - uploaded_at = None - - analysis = metadata.get("analysis") - if not isinstance(analysis, dict): - analysis = None - - document_info = metadata.get("document_info") - if not isinstance(document_info, dict): - document_info = None - - requirement_check = metadata.get("requirement_check") - if not isinstance(requirement_check, dict): - requirement_check = None - - preview_kind = str(metadata.get("preview_kind") or "").strip() - previewable = bool(metadata.get("previewable", self._is_previewable_media_type(media_type, filename))) - preview_url = self._build_attachment_preview_client_path(item.claim_id, item.id) if previewable else "" - - return { - "file_name": str(metadata.get("file_name") or filename), - "storage_key": str(item.invoice_id or ""), - "media_type": str(metadata.get("media_type") or media_type), - "size_bytes": int(metadata.get("size_bytes") or file_path.stat().st_size), - "uploaded_at": uploaded_at, - "previewable": previewable, - "preview_kind": preview_kind or self._resolve_preview_kind(media_type, filename), - "preview_url": preview_url, - "analysis": analysis, - "document_info": document_info, - "requirement_check": requirement_check, - } - - @staticmethod - def _resolve_preview_kind(media_type: str | None, filename: str) -> str: - resolved = str(media_type or "").strip() or (mimetypes.guess_type(filename)[0] or "") - if resolved.startswith("image/"): - return "image" - if resolved == "application/pdf": - return "pdf" - return "" - - @staticmethod - def _decode_data_url(payload: str) -> tuple[str, bytes] | None: - normalized = str(payload or "").strip() - matched = re.match(r"^data:(?P[\w.+-]+/[\w.+-]+);base64,(?P.+)$", normalized, flags=re.DOTALL) - if not matched: - return None - try: - content = base64.b64decode(matched.group("body"), validate=True) - except (binascii.Error, ValueError): - return None - return matched.group("media"), content - - def _write_preview_asset_from_data_url( - self, - *, - attachment_dir: Path, - original_filename: str, - preview_data_url: str, - ) -> tuple[Path, str, str] | None: - decoded = self._decode_data_url(preview_data_url) - if decoded is None: - return None - - preview_media_type, preview_content = decoded - suffix = mimetypes.guess_extension(preview_media_type) or ".bin" - preview_name = f"{Path(original_filename).stem}.preview{suffix}" - preview_path = attachment_dir / preview_name - preview_path.write_bytes(preview_content) - return preview_path, preview_media_type, preview_name - - @staticmethod - def _build_attachment_preview_client_path(claim_id: str, item_id: str) -> str: - return ( - "/reimbursements/claims/" - f"{quote(str(claim_id or '').strip(), safe='')}" - f"/items/{quote(str(item_id or '').strip(), safe='')}/attachment/preview" - ) - - @staticmethod - def _resolve_attachment_media_type(filename: str, *, fallback: str | None = None) -> str: - guessed = mimetypes.guess_type(filename)[0] - return str(guessed or fallback or "application/octet-stream") - - @staticmethod - def _is_previewable_media_type(media_type: str | None, filename: str) -> bool: - resolved = str(media_type or "").strip() or (mimetypes.guess_type(filename)[0] or "") - return resolved.startswith("image/") or resolved == "application/pdf" - - @staticmethod - def _resolve_attachment_display_name(storage_key: str | None) -> str: - return Path(str(storage_key or "").strip()).name - - @classmethod - def _merge_attachment_reference(cls, current_invoice_id: str | None, next_invoice_id: str | None) -> str | None: - normalized_next = str(next_invoice_id or "").strip() - if not normalized_next: - return None - - normalized_current = str(current_invoice_id or "").strip() - if ( - normalized_current - and cls._resolve_attachment_display_name(normalized_current) - == cls._resolve_attachment_display_name(normalized_next) - ): - return normalized_current - - return normalized_next - - def _build_attachment_document_info(self, document: Any) -> dict[str, Any]: - insight = build_document_insight( - filename=str(getattr(document, "filename", "") or ""), - summary=str(getattr(document, "summary", "") or ""), - text=str(getattr(document, "text", "") or ""), - ) - document_type = str(getattr(document, "document_type", "") or "").strip() - if document_type in {"", "other"}: - document_type = insight.document_type - - document_type_label = str(getattr(document, "document_type_label", "") or "").strip() - if not document_type_label or document_type_label == "其他单据": - document_type_label = insight.document_type_label - - scene_code = str(getattr(document, "scene_code", "") or "").strip() - if scene_code in {"", "other"}: - scene_code = insight.scene_code - - scene_label = str(getattr(document, "scene_label", "") or "").strip() - if not scene_label or scene_label == "其他票据": - scene_label = insight.scene_label - - raw_fields = list(getattr(document, "document_fields", []) or []) - normalized_fields: list[dict[str, str]] = [] - for item in raw_fields: - key = "" - label = "" - value = "" - if isinstance(item, dict): - key = str(item.get("key") or "").strip() - label = str(item.get("label") or "").strip() - value = str(item.get("value") or "").strip() - else: - key = str(getattr(item, "key", "") or "").strip() - label = str(getattr(item, "label", "") or "").strip() - value = str(getattr(item, "value", "") or "").strip() - if key and label and value: - label = self._resolve_document_field_display_label( - document_type=document_type, - key=key, - label=label, - ) - normalized_fields.append( - { - "key": key, - "label": label, - "value": value, - } - ) - - if not normalized_fields: - normalized_fields = [ - { - "key": field.key, - "label": field.label, - "value": field.value, - } - for field in insight.fields - if field.value - ] - - return { - "document_type": document_type, - "document_type_label": document_type_label, - "scene_code": scene_code, - "scene_label": scene_label, - "fields": normalized_fields, - } - - @staticmethod - def _resolve_document_field_display_label( - *, - document_type: str, - key: str, - label: str, - ) -> str: - trip_label = DOCUMENT_TRIP_DATE_LABELS.get( - str(document_type or "").strip().lower() - ) - if not trip_label: - return label - - normalized_key = str(key or "").strip().lower().replace("_", "") - normalized_label = str(label or "").replace(" ", "") - if normalized_key in DOCUMENT_INVOICE_DATE_KEYS or any( - token in normalized_label for token in DOCUMENT_INVOICE_DATE_LABEL_TOKENS - ): - return label - - is_date_field = ( - normalized_key - in DOCUMENT_TRIP_DATE_KEYS - | DOCUMENT_GENERIC_DATE_KEYS - or any( - token in normalized_label - for token in ( - *DOCUMENT_TRIP_DATE_LABEL_TOKENS, - *DOCUMENT_GENERIC_DATE_LABEL_TOKENS, - ) - ) - ) - return trip_label if is_date_field else label - - def _backfill_item_type_from_attachment( - self, - *, - item: ExpenseClaimItem, - document_info: dict[str, Any], - ) -> None: - current_type = str(item.item_type or "").strip().lower() - if current_type not in GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES: - return - - document_type = str(document_info.get("document_type") or "").strip() - mapped_type = DOCUMENT_TYPE_ITEM_TYPE_MAP.get(document_type) - if mapped_type: - item.item_type = mapped_type - - def _backfill_item_amount_from_attachment( - self, - *, - item: ExpenseClaimItem, - document: Any, - document_info: dict[str, Any], - ) -> None: - current_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) - if current_amount > Decimal("0.00"): - return - - amount = self._resolve_document_item_amount( - { - "document_fields": document_info.get("fields") or [], - "summary": str(getattr(document, "summary", "") or ""), - "text": str(getattr(document, "text", "") or ""), - } - ) - if amount is not None and amount > Decimal("0.00"): - item.item_amount = amount - - def _build_attachment_expense_audit_points( - self, - *, - document: Any, - item: ExpenseClaimItem, - document_info: dict[str, Any], - ) -> list[str]: - text = " ".join( - [ - str(getattr(document, "summary", "") or "").strip(), - str(getattr(document, "text", "") or "").strip(), - ] - ).strip() - document_payload = { - "document_fields": document_info.get("fields") or [], - "summary": str(getattr(document, "summary", "") or ""), - "text": str(getattr(document, "text", "") or ""), - } - field_amount = self._resolve_document_field_amount(document_payload) - audited_amount = self._resolve_document_item_amount(document_payload) - item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) - - points: list[str] = [] - if ( - field_amount is not None - and audited_amount is not None - and self._is_date_like_amount_candidate(field_amount, text) - and abs(field_amount - audited_amount) > Decimal("1.00") - ): - points.append( - "费用核算:OCR 金额疑似误取日期" - f" {self._format_decimal_amount(field_amount)}," - f"已按票据文本中的总费用 {self._format_decimal_amount(audited_amount)} 元回填," - "请核对酒店或票据原文总额。" - ) - - if ( - audited_amount is not None - and item_amount > Decimal("0.00") - and abs(audited_amount - item_amount) > Decimal("1.00") - ): - points.append( - f"费用核算:票据文本复核金额为 {self._format_decimal_amount(audited_amount)} 元," - f"当前明细金额为 {self._format_decimal_amount(item_amount)} 元,请确认是否需要调整。" - ) - - return points - - def _build_attachment_travel_policy_audit( - self, - *, - document: Any, - item: ExpenseClaimItem, - document_info: dict[str, Any], - claim: ExpenseClaim | None = None, - ) -> dict[str, Any]: - policy = self._get_expense_rule_catalog().travel_policy - if policy is None: - return {"points": [], "rule_basis": [], "has_high_risk": False} - - item_type = str(item.item_type or "").strip().lower() - document_type = str(document_info.get("document_type") or "").strip().lower() - scene_code = str(document_info.get("scene_code") or "").strip().lower() - if not ( - item_type in {"hotel", "hotel_ticket"} - or document_type == "hotel_invoice" - or scene_code == "hotel" - ): - return {"points": [], "rule_basis": [], "has_high_risk": False} - - item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) - if item_amount <= Decimal("0.00"): - return {"points": [], "rule_basis": [], "has_high_risk": False} - - claim = claim or getattr(item, "claim", None) - grade_band = self._resolve_travel_policy_band(getattr(claim, "employee_grade", None)) - rule_name = str(policy.standard_rule_name or policy.rule_name or "公司差旅费报销规则").strip() - rule_version = str(policy.standard_rule_version or policy.rule_version or "").strip() - version_text = f"({rule_version})" if rule_version else "" - rule_basis = [ - f"依据《{rule_name}》{version_text},住宿费按员工职级、出差城市和每晚金额进行差标核算。" - ] - if grade_band is None: - return { - "points": ["住宿标准:当前员工职级缺失,无法匹配规则中心的住宿报销标准。"], - "rule_basis": rule_basis, - "has_high_risk": False, - } - - text = " ".join( - [ - str(getattr(document, "summary", "") or "").strip(), - str(getattr(document, "text", "") or "").strip(), - ] - ).strip() - context = { - "item": item, - "document_info": document_info, - "ocr_summary": str(getattr(document, "summary", "") or "").strip(), - "ocr_text": str(getattr(document, "text", "") or "").strip(), - } - hotel_city = self._extract_hotel_city(context, policy) - claim_city = self._extract_city_from_text(str(getattr(claim, "location", "") or ""), policy) if claim else "" - reason_city = self._extract_city_from_text(str(getattr(claim, "reason", "") or ""), policy) if claim else "" - baseline_city = hotel_city or claim_city or reason_city - if not baseline_city: - baseline_city = self._extract_city_from_text(text, policy) - if not baseline_city: - return { - "points": ["住宿标准:未能从酒店名称、出差地点或票据内容匹配到规则中心城市,无法核算住宿差标。"], - "rule_basis": rule_basis, - "has_high_risk": False, - } - - standard = self._resolve_travel_policy_hotel_standard( - policy=policy, - grade_band=grade_band, - city=baseline_city, - ) - if standard is None: - return {"points": [], "rule_basis": rule_basis, "has_high_risk": False} - - cap, standard_label = standard - night_count = self._extract_hotel_night_count(context) - nightly_amount = (item_amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01")) - if nightly_amount <= cap: - return {"points": [], "rule_basis": rule_basis, "has_high_risk": False} - - band_label = policy.band_labels.get(grade_band, str(getattr(claim, "employee_grade", "") or "当前职级").strip()) - over_amount = (nightly_amount - cap).quantize(Decimal("0.01")) - return { - "points": [ - ( - f"住宿标准:{band_label}在{standard_label}的住宿标准为 " - f"{self._format_decimal_amount(cap)} 元/晚,票据识别金额 " - f"{self._format_decimal_amount(item_amount)} 元 / {night_count} 晚," - f"约 {self._format_decimal_amount(nightly_amount)} 元/晚," - f"超出 {self._format_decimal_amount(over_amount)} 元/晚。" - ) - ], - "rule_basis": rule_basis, - "has_high_risk": True, - } - - def _backfill_item_date_from_attachment( - self, - *, - item: ExpenseClaimItem, - document: Any, - document_info: dict[str, Any], - ) -> None: - document_payload = { - "document_type": str(document_info.get("document_type") or "").strip(), - "scene_code": str(document_info.get("scene_code") or "").strip(), - "summary": str(getattr(document, "summary", "") or "").strip(), - "text": str(getattr(document, "text", "") or "").strip(), - "document_fields": list(document_info.get("fields") or []), - } - parsed = self._resolve_document_item_date_candidate(document_payload) - if parsed is not None: - item.item_date = parsed - - def _backfill_item_reason_from_attachment( - self, - *, - item: ExpenseClaimItem, - document: Any, - document_info: dict[str, Any], - ) -> None: - reason = self._resolve_document_item_reason( - { - "document_type": str(document_info.get("document_type") or "").strip(), - "scene_code": str(document_info.get("scene_code") or "").strip(), - "scene_label": str(document_info.get("scene_label") or "").strip(), - "document_fields": document_info.get("fields") or [], - "summary": str(getattr(document, "summary", "") or ""), - "text": str(getattr(document, "text", "") or ""), - }, - fallback=str(item.item_reason or "").strip(), - ) - if reason: - item.item_reason = reason - - def _build_attachment_requirement_check( - self, - *, - item: ExpenseClaimItem, - document_info: dict[str, Any], - ) -> dict[str, Any]: - expense_type = str(item.item_type or "").strip().lower() or "other" - policy = self._get_expense_scene_policy(expense_type) - expense_label = policy.label if policy is not None else self._resolve_expense_type_label(expense_type) - allowed_scenes = set(policy.allowed_scene_codes) if policy is not None else set() - allowed_document_types = set(policy.allowed_document_types) if policy is not None else set() - allowed_scene_labels = [self._resolve_document_scene_label(code) for code in sorted(allowed_scenes)] - allowed_document_type_labels = [ - resolve_document_type_label(document_type) - for document_type in sorted(allowed_document_types) - ] - recognized_scene_code = str(document_info.get("scene_code") or "other").strip() or "other" - recognized_scene_label = str( - document_info.get("scene_label") or self._resolve_document_scene_label(recognized_scene_code) - ).strip() - recognized_document_type = str(document_info.get("document_type") or "other").strip() or "other" - recognized_document_type_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据" - matches = ( - (not allowed_scenes and not allowed_document_types) - or recognized_scene_code in allowed_scenes - or recognized_document_type in allowed_document_types - ) - - if matches: - if allowed_scene_labels or allowed_document_type_labels: - message = ( - f"当前费用项目为{expense_label},已识别为{recognized_document_type_label}," - f"符合当前{expense_label}场景的附件要求。" - ) - else: - message = f"当前费用项目为{expense_label},已识别为{recognized_document_type_label}。" - else: - expected_parts = [label + "相关票据" for label in allowed_scene_labels] - expected_parts.extend(allowed_document_type_labels) - expected_text = "、".join(dict.fromkeys(part for part in expected_parts if part)) or "对应场景票据" - message = ( - f"当前费用项目为{expense_label},要求上传{expected_text};" - f"当前识别为{recognized_document_type_label},不符合当前场景,建议过滤或更换附件。" - ) - - return { - "matches": matches, - "current_expense_type": expense_type, - "current_expense_type_label": expense_label, - "allowed_scene_labels": allowed_scene_labels, - "allowed_document_type_labels": allowed_document_type_labels, - "recognized_scene_code": recognized_scene_code, - "recognized_scene_label": recognized_scene_label, - "recognized_document_type": recognized_document_type, - "recognized_document_type_label": recognized_document_type_label, - "mismatch_severity": policy.attachment_mismatch_severity if policy is not None else "high", - "rule_code": policy.rule_code if policy is not None else DEFAULT_SCENE_RULE_ASSET_CODE, - "rule_name": policy.rule_name if policy is not None else "报销场景提交与附件标准", - "message": message, - } - - @staticmethod - def _resolve_document_scene_label(scene_code: str) -> str: - normalized = str(scene_code or "").strip().lower() - return DOCUMENT_SCENE_LABELS.get(normalized, "其他票据") - - @staticmethod - def _extract_amount_candidates(text: str) -> list[Decimal]: - return extract_amount_candidates(text) - - @staticmethod - def _is_amount_match_date_fragment( - amount: Decimal, - text: str, - start: int, - end: int, - ) -> bool: - return is_amount_match_date_fragment(amount, text, start, end) - - @staticmethod - def _has_date_like_text(text: str) -> bool: - return bool(re.search(r"(20\d{2}[年/\-.]\d{1,2}[月/\-.]\d{1,2}日?)", text)) - - @staticmethod - def _normalize_match_text(text: str) -> str: - return re.sub(r"\s+", "", str(text or "")).lower() - - @staticmethod - def _resolve_expense_type_label(expense_type: str | None) -> str: - normalized = str(expense_type or "").strip().lower() - return EXPENSE_TYPE_LABELS.get(normalized, "其他") - - def _resolve_allowed_document_scenes(self, expense_type: str | None) -> set[str]: - normalized = str(expense_type or "").strip().lower() - policy = self._get_expense_scene_policy(normalized) - allowed_scenes = set(policy.allowed_scene_codes) if policy is not None else set() - allowed_scenes.update(EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(normalized, set())) - return allowed_scenes - - def _resolve_document_analysis_scenes(self, document_info: dict[str, Any], text: str) -> set[str]: - scenes: set[str] = set() - recognized_scene_code = str(document_info.get("scene_code") or "").strip().lower() - if recognized_scene_code and recognized_scene_code != "other": - scenes.add(recognized_scene_code) - - recognized_document_type = str(document_info.get("document_type") or "").strip().lower() - mapped_scene = DOCUMENT_TYPE_SCENE_MAP.get(recognized_document_type) - if mapped_scene: - scenes.add(mapped_scene) - - if scenes: - return scenes - return set(self._detect_expense_scenes(text).keys()) - - def _detect_expense_scenes(self, text: str) -> dict[str, list[str]]: - normalized = self._normalize_match_text(text) - if not normalized: - return {} - - matches: dict[str, list[str]] = {} - for scene, keywords in EXPENSE_SCENE_KEYWORDS.items(): - matched = [keyword for keyword in keywords if keyword in normalized] - if matched: - matches[scene] = matched[:3] - return matches - - def _format_scene_labels(self, scene_codes: set[str]) -> str: - labels = [self._resolve_expense_type_label(code) for code in scene_codes] - unique_labels = list(dict.fromkeys(label for label in labels if label)) - return "、".join(unique_labels) if unique_labels else "其他" - - def _build_purpose_mismatch_point( - self, - *, - item: ExpenseClaimItem, - document_scenes: set[str], - ) -> str | None: - if not document_scenes: - return None - - allowed_scenes = self._resolve_allowed_document_scenes(item.item_type) - document_scene_labels = self._format_scene_labels(document_scenes) - - if allowed_scenes and document_scenes.isdisjoint(allowed_scenes): - expense_label = self._resolve_expense_type_label(item.item_type) - return f"附件类型:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。" - - return None - - @staticmethod - def _is_valid_route_description(value: str) -> bool: - text = str(value or "").strip() - if not text: - return False - if DOCUMENT_DATE_PATTERN.search(text): - return False - return bool(DOCUMENT_ROUTE_FORMAT_PATTERN.match(text)) - - def _build_route_format_point( - self, - *, - item: ExpenseClaimItem, - document_info: dict[str, Any], - ) -> str | None: - item_type = str(item.item_type or "").strip().lower() - document_type = str(document_info.get("document_type") or "").strip().lower() - route_required = item_type in ROUTE_DESCRIPTION_ITEM_TYPES or document_type in { - "train_ticket", - "flight_itinerary", - "taxi_receipt", - "transport_receipt", - } - if not route_required: - return None - - reason = str(item.item_reason or "").strip() - if self._is_valid_route_description(reason): - return None - - example = "广州南-北京南" if item_type != "ride_ticket" else "深圳北站-腾讯滨海大厦" - current = f"当前为“{reason[:30]}”," if reason else "" - return ( - f"行程说明:{current}格式应为“起始地-目的地”," - f"例如“{example}”,请按票据行程补充。" - ) - - def _build_fallback_attachment_analysis( - self, - *, - media_type: str | None, - item: ExpenseClaimItem, - ) -> dict[str, Any]: - return { - "severity": "medium", - "label": "中风险", - "headline": "AI提示:附件已上传,待识别结果", - "summary": "附件已成功保存,但当前尚未拿到有效识别结果,建议人工先核对票据内容。", - "points": [ - f"附件格式:{self._resolve_attachment_media_type('attachment', fallback=media_type)}", - f"费用金额:当前明细金额为 {item.item_amount} 元", - ], - "suggestion": "建议打开附件确认金额、日期和票据类型是否完整,再继续提交审批。", - } - - def _build_failed_ocr_attachment_analysis( - self, - *, - media_type: str | None, - error_message: str, - item: ExpenseClaimItem, - ) -> dict[str, Any]: - return { - "severity": "medium", - "label": "中风险", - "headline": "AI提示:附件已上传,但识别失败", - "summary": "文件已经保存成功,但本次 AI 识别未完成,因此无法给出完整票据核验结论。", - "points": [ - f"识别异常:{error_message or 'OCR 服务暂不可用'}", - f"费用金额:当前明细金额为 {item.item_amount} 元", - f"附件格式:{self._resolve_attachment_media_type('attachment', fallback=media_type)}", - ], - "suggestion": "建议重新上传更清晰的票据图片,或稍后重试识别后再提交。", - } - - def _build_attachment_analysis( - self, - *, - document: Any, - item: ExpenseClaimItem, - claim: ExpenseClaim | None = None, - document_info: dict[str, Any] | None = None, - requirement_check: dict[str, Any] | None = None, - ) -> dict[str, Any]: - warnings = [str(value).strip() for value in list(getattr(document, "warnings", []) or []) if str(value).strip()] - text = " ".join( - [ - str(getattr(document, "summary", "") or "").strip(), - str(getattr(document, "text", "") or "").strip(), - ] - ).strip() - compact_text = text.replace(" ", "") - avg_score = float(getattr(document, "avg_score", 0.0) or 0.0) - line_count = int(getattr(document, "line_count", 0) or 0) - document_info = document_info or self._build_attachment_document_info(document) - requirement_check = requirement_check or self._build_attachment_requirement_check( - item=item, - document_info=document_info, - ) - document_scenes = self._resolve_document_analysis_scenes(document_info, text) - purpose_mismatch_point = self._build_purpose_mismatch_point( - item=item, - document_scenes=document_scenes, - ) - route_format_point = self._build_route_format_point( - item=item, - document_info=document_info, - ) - expense_audit_points = self._build_attachment_expense_audit_points( - document=document, - item=item, - document_info=document_info, - ) - travel_policy_audit = self._build_attachment_travel_policy_audit( - document=document, - item=item, - claim=claim, - document_info=document_info, - ) - travel_policy_points = [ - str(point).strip() - for point in list(travel_policy_audit.get("points") or []) - if str(point).strip() - ] - travel_policy_rule_basis = [ - str(point).strip() - for point in list(travel_policy_audit.get("rule_basis") or []) - if str(point).strip() - ] - travel_policy_high_risk = bool(travel_policy_audit.get("has_high_risk")) - recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other" - recognized_document_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据" - requirement_matches = bool(requirement_check.get("matches")) - mismatch_severity = str(requirement_check.get("mismatch_severity") or "high").strip().lower() or "high" - - has_ticket_keyword = any( - keyword in compact_text - for keyword in ( - "发票", - "票据", - "增值税", - "电子行程单", - "购买方", - "销售方", - "税额", - "价税", - "票号", - "发票代码", - "凭证", - ) - ) - amount_candidates = self._extract_amount_candidates(text) - item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) - has_matching_amount = any(abs(candidate - item_amount) <= Decimal("1.00") for candidate in amount_candidates) - has_date_text = self._has_date_like_text(text) - amount_mismatch = bool(amount_candidates) and item_amount > Decimal("0.00") and not has_matching_amount - - points: list[str] = [] - if warnings: - points.append(f"识别提示:{warnings[0]}") - if line_count == 0 or not compact_text: - points.append("附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。") - if recognized_document_type == "other" and not has_ticket_keyword: - points.append("票据类型:未识别到发票、票据、电子行程单等关键字,暂无法判断票据类型。") - if not amount_candidates: - points.append("金额字段:未识别到可用于核对的金额。") - elif amount_mismatch: - candidate_text = "、".join(str(candidate) for candidate in amount_candidates[:3]) - points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。") - if not has_date_text: - date_requirement = DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS.get( - recognized_document_type, - "开票日期或业务发生日期", - ) - points.append(f"日期字段:未识别到{date_requirement}。") - if not requirement_matches: - points.append(f"附件类型要求:{requirement_check.get('message')}") - points.extend(expense_audit_points) - points.extend(travel_policy_points) - if purpose_mismatch_point: - points.append(purpose_mismatch_point) - if route_format_point: - points.append(route_format_point) - if avg_score and avg_score < 0.72: - points.append(f"识别质量:OCR 置信度偏低({avg_score:.0%}),可能影响票据核验准确性。") - - issue_count = len(points) - if issue_count == 0: - return { - "severity": "pass", - "label": "AI提示符合条件", - "headline": "AI提示:附件符合基础校验条件", - "summary": "已识别到票据类型和关键字段,且符合当前费用场景的附件要求。", - "points": [ - f"票据类型:已识别为{recognized_document_label}。", - f"附件类型要求:{requirement_check.get('message')}", - f"金额字段:已识别到与当前明细接近的金额 {item_amount} 元。", - ], - "rule_basis": travel_policy_rule_basis, - "suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。", - } - - severity = "low" - label = "低风险" - headline = "AI提示:附件存在轻微待核对项" - summary = "当前附件已识别出部分票据要素,但仍建议人工继续复核。" - - if travel_policy_high_risk: - severity = "high" - label = "高风险" - headline = "AI提示:住宿金额超出报销标准" - summary = "当前住宿票据金额超过规则中心差旅住宿标准,强行提交前需补充超标原因。" - elif ( - line_count == 0 - or not compact_text - or (recognized_document_type == "other" and not has_ticket_keyword and issue_count >= 2) - or (not requirement_matches and mismatch_severity == "high") - or (purpose_mismatch_point and amount_mismatch) - ): - severity = "high" - label = "高风险" - headline = "AI提示:附件不符合票据校验条件" - summary = "当前附件存在明显异常,票据类型与当前费用场景不匹配,或无法作为有效报销材料。" - elif ( - purpose_mismatch_point - or route_format_point - or expense_audit_points - or travel_policy_points - or amount_mismatch - or issue_count >= 2 - or warnings - or (avg_score and avg_score < 0.72) - or (not requirement_matches and mismatch_severity in {"medium", "low"}) - ): - severity = "medium" - label = "中风险" - headline = "AI提示:附件存在明显待整改项" - summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。" - if route_format_point and issue_count == 1: - summary = "票据行程已识别,但费用明细说明未按“起始地-目的地”格式填写。" - elif expense_audit_points and issue_count == len(expense_audit_points): - summary = "OCR 金额已完成二次核算,请按票据原文总额复核。" - elif travel_policy_points and issue_count == len(travel_policy_points): - summary = "住宿票据已识别,但当前缺少职级或城市信息,无法完成差旅住宿标准核算。" - - suggestion = { - "high": "建议过滤当前不匹配的票据,重新上传符合当前费用场景的清晰原件。", - "medium": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。", - "low": "建议人工再次核对金额和业务说明,确认后可继续流转。", - }[severity] - if travel_policy_high_risk: - suggestion = "请核对住宿发票金额、晚数和出差城市;如确需超标,需在附加说明中补充超标说明并提交审批重点复核。" - - return { - "severity": severity, - "label": label, - "headline": headline, - "summary": summary, - "points": points, - "rule_basis": list(dict.fromkeys(travel_policy_rule_basis)), - "suggestion": suggestion, - } - - @staticmethod - def _serialize_claim(claim: ExpenseClaim) -> dict[str, Any]: - return { - "id": claim.id, - "claim_no": claim.claim_no, - "employee_name": claim.employee_name, - "department_name": claim.department_name, - "project_code": claim.project_code, - "expense_type": claim.expense_type, - "reason": claim.reason, - "location": claim.location, - "amount": float(claim.amount), - "invoice_count": int(claim.invoice_count or 0), - "status": claim.status, - "approval_stage": claim.approval_stage, - "risk_flags_json": list(claim.risk_flags_json or []), - } - - @staticmethod - def _collect_return_flags(risk_flags: Any) -> list[dict[str, Any]]: - if not isinstance(risk_flags, list): - return [] - - return [ - flag - for flag in risk_flags - if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "manual_return" - ] - - @staticmethod - def _normalize_return_reason_codes(reason_codes: list[str] | None) -> list[str]: - return ExpenseClaimService._normalize_return_reason_code_payload(reason_codes)["reason_codes"] - - @staticmethod - def _normalize_return_reason_code_payload(reason_codes: list[str] | None) -> dict[str, list[str]]: - normalized_codes: list[str] = [] - unknown_codes: list[str] = [] - for item in reason_codes or []: - code = str(item or "").strip() - if not code: - continue - if code in RETURN_REASON_OPTIONS and code not in normalized_codes: - normalized_codes.append(code) - elif code not in RETURN_REASON_OPTIONS and code not in unknown_codes: - unknown_codes.append(code) - return { - "reason_codes": normalized_codes, - "unknown_reason_codes": unknown_codes, - } - - @staticmethod - def _merge_persistent_claim_risk_flags(*, existing_flags: list[Any], next_flags: list[Any]) -> list[Any]: - if not next_flags: - return list(existing_flags or []) - - merged_flags = list(next_flags or []) - next_return_markers = { - ExpenseClaimService._build_return_flag_marker(flag) - for flag in merged_flags - if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "manual_return" - } - for flag in list(existing_flags or []): - if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "manual_return"): - continue - marker = ExpenseClaimService._build_return_flag_marker(flag) - if marker in next_return_markers: - continue - merged_flags.append(flag) - next_return_markers.add(marker) - return merged_flags - - @staticmethod - def _build_return_flag_marker(flag: dict[str, Any]) -> tuple[str, str, str]: - event_id = str(flag.get("return_event_id") or "").strip() - if event_id: - return ("event_id", event_id, "") - return ( - str(flag.get("return_count") or "").strip(), - str(flag.get("created_at") or "").strip(), - str(flag.get("message") or flag.get("reason") or "").strip(), - ) - - @staticmethod - def _build_default_return_message(*, operator: str, risk_points: list[str]) -> str: - if risk_points: - return f"{operator} 退回该报销单:{'、'.join(risk_points)}。请申请人调整后重新提交。" - return f"{operator} 已退回该报销单,请申请人调整后重新提交。" - - @staticmethod - def _normalize_return_stage_key(stage: str | None) -> str: - normalized = str(stage or "").strip() - if "直属" in normalized or "领导" in normalized or "负责人" in normalized: - return "direct_manager" - if "财务" in normalized: - return "finance" - if "AI" in normalized or "预审" in normalized: - return "ai_review" - if "归档" in normalized or "入账" in normalized: - return "archive" - return "unknown" - - @staticmethod - def _is_editable_claim_status(status: str | None) -> bool: - return str(status or "").strip().lower() in EDITABLE_CLAIM_STATUSES - - @staticmethod - def _normalize_optional_text(value: str | None, *, fallback: str = "", allow_empty: bool = False) -> str | None: - normalized = str(value or "").strip() - if normalized: - return normalized - if allow_empty: - return None - return fallback - - @staticmethod - def _normalize_sort_datetime(value: datetime | None) -> datetime: - if value is None: - return datetime.max.replace(tzinfo=UTC) - if value.tzinfo is None: - return value.replace(tzinfo=UTC) - return value - - @staticmethod - def _is_missing_value(value: Any) -> bool: - text = str(value or "").strip() - if not text: - return True - compact = text.replace(" ", "") - return compact in {"待补充", "暂无", "无", "未知", "处理中"} - - def _ensure_draft_claim(self, claim: ExpenseClaim) -> None: - if not self._is_editable_claim_status(claim.status): - raise ValueError("只有草稿、待补充或退回待提交状态的报销单才允许执行该操作。") - - @staticmethod - def _ensure_draft_pending_claim(claim: ExpenseClaim) -> None: - status = str(claim.status or "").strip().lower() - if status != "draft": - raise ValueError("只有草稿待提交状态的报销单才允许编辑附加说明。") - - @staticmethod - def _ensure_mutable_claim_item(item: ExpenseClaimItem) -> None: - if str(item.item_type or "").strip().lower() in SYSTEM_GENERATED_ITEM_TYPES: - raise ValueError("系统自动计算的费用明细不可手动修改。") - - def _delete_claim_assistant_sessions(self, claim_id: str | None) -> None: - from app.services.agent_conversations import AgentConversationService - - AgentConversationService(self.db).delete_conversations_for_draft_claim( - claim_id=claim_id, - source="user_message", - session_type="expense", - ) - - def _run_ai_submission_review(self, claim: ExpenseClaim) -> dict[str, Any]: - base_flags = list(claim.risk_flags_json or []) - attachment_flags = [ - flag - for flag in base_flags - if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis" - ] - preserved_flags = [ - flag - for flag in base_flags - if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "submission_review") - ] - - review_flags: list[dict[str, Any]] = [] - attention_reasons: list[str] = [] - - high_attachment_flags = [ - flag - for flag in attachment_flags - if str(flag.get("severity") or "").strip().lower() == "high" - ] - medium_attachment_flags = [ - flag - for flag in attachment_flags - if str(flag.get("severity") or "").strip().lower() == "medium" - ] - if high_attachment_flags: - attention_reasons.append("存在高风险票据,需审批人重点复核。") - review_flags.append( - { - "source": "submission_review", - "severity": "high", - "label": "AI预审重点复核", - "message": ( - f"AI预审发现 {len(high_attachment_flags)} 条高风险附件," - "已随单流转给审批人重点复核。" - ), - } - ) - elif medium_attachment_flags: - review_flags.append( - { - "source": "submission_review", - "severity": "medium", - "label": "AI预审提醒", - "message": f"AI预审发现 {len(medium_attachment_flags)} 条中风险附件,已随单流转给审批人复核。", - } - ) - - manager_name = self._resolve_claim_manager_name(claim) - if not manager_name: - attention_reasons.append("未识别到该员工的直属领导,需审批环节补充分配。") - review_flags.append( - { - "source": "submission_review", - "severity": "medium", - "label": "审批链待分配", - "message": "AI预审发现直属领导缺失,已提交到审批环节等待分配或复核。", - } - ) - - historical_risk_count = self._count_recent_risky_claims(claim) - if historical_risk_count >= AI_REVIEW_REPEAT_RISK_BLOCK_COUNT: - review_flags.append( - { - "source": "submission_review", - "severity": "medium", - "label": "历史风险偏高", - "message": ( - f"近 {AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销," - "本次已追加到审批链重点关注。" - ), - } - ) - elif historical_risk_count >= AI_REVIEW_REPEAT_RISK_WARNING_COUNT: - review_flags.append( - { - "source": "submission_review", - "severity": "low", - "label": "历史风险提醒", - "message": ( - f"近 {AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销," - "建议直属领导重点复核。" - ), - } - ) - - travel_review = self._run_travel_policy_review(claim) - attention_reasons.extend(travel_review["blocking_reasons"]) - review_flags.extend(travel_review["flags"]) - - scene_policy_review = self._run_scene_policy_review(claim) - attention_reasons.extend(scene_policy_review["blocking_reasons"]) - review_flags.extend(scene_policy_review["flags"]) - - platform_risk_review = self.evaluate_platform_risk_rules(claim) - attention_reasons.extend(platform_risk_review["blocking_reasons"]) - review_flags.extend(platform_risk_review["flags"]) - - if attention_reasons: - summary_message = "AI预审发现需审批重点关注事项:" + ";".join( - dict.fromkeys(attention_reasons) - ) - review_flags.insert( - 0, - { - "source": "submission_review", - "severity": "medium", - "label": "AI预审重点复核", - "message": summary_message, - }, - ) - - return { - "status": "submitted", - "approval_stage": "直属领导审批", - "risk_flags": preserved_flags + review_flags, - "message": ( - f"报销单 {claim.claim_no} 已完成 AI预审," - f"现已提交给直属领导 {manager_name or '审批人'} 审批。" - ), - "passed": True, - } - - @staticmethod - def _resolve_claim_manager_name(claim: ExpenseClaim) -> str: - if claim.employee is not None: - if claim.employee.manager is not None and claim.employee.manager.name: - return str(claim.employee.manager.name).strip() - if claim.employee.organization_unit is not None and claim.employee.organization_unit.manager_name: - return str(claim.employee.organization_unit.manager_name).strip() - return "" - - def _count_recent_risky_claims(self, claim: ExpenseClaim) -> int: - filters = [] - if claim.employee_id: - filters.append(ExpenseClaim.employee_id == claim.employee_id) - elif claim.employee_name: - filters.append(ExpenseClaim.employee_name == claim.employee_name) - if not filters: - return 0 - - since = datetime.now(UTC) - timedelta(days=AI_REVIEW_LOOKBACK_DAYS) - stmt = ( - select(ExpenseClaim) - .where(or_(*filters)) - .where(ExpenseClaim.id != claim.id) - .where(ExpenseClaim.occurred_at >= since) - ) - recent_claims = list(self.db.scalars(stmt).all()) - return sum(1 for item in recent_claims if list(item.risk_flags_json or [])) - - def evaluate_platform_risk_rules( - self, - claim: ExpenseClaim, - *, - rule_codes: list[str] | None = None, - ) -> dict[str, list[Any]]: - manifests = self._load_platform_risk_rule_manifests(rule_codes=rule_codes) - if not manifests: - return {"flags": [], "blocking_reasons": []} - - contexts = self._build_claim_attachment_contexts(claim) - flags: list[dict[str, Any]] = [] - blocking_reasons: list[str] = [] - - for manifest in manifests: - if not self._risk_manifest_applies_to_claim(manifest, claim=claim, contexts=contexts): - continue - - flag = self._evaluate_platform_risk_manifest( - manifest, - claim=claim, - contexts=contexts, - ) - if flag is None: - continue - - flags.append(flag) - severity = str(flag.get("severity") or "").strip().lower() - action = str(flag.get("action") or "").strip().lower() - if severity == "high" or action == "block": - blocking_reasons.append(str(flag.get("message") or flag.get("label") or "").strip()) - - deduplicated_reasons = list( - dict.fromkeys(reason for reason in blocking_reasons if reason) - ) - return {"flags": flags, "blocking_reasons": deduplicated_reasons} - - def _load_platform_risk_rule_manifests( - self, - *, - rule_codes: list[str] | None, - ) -> list[dict[str, Any]]: - code_filter = { - str(code or "").strip() - for code in list(rule_codes or []) - if str(code or "").strip() - } - manifests_by_code: dict[str, dict[str, Any]] = {} - - assets = list( - self.db.scalars( - select(AgentAsset) - .where(AgentAsset.asset_type == AgentAssetType.RULE.value) - .where(AgentAsset.status == AgentAssetStatus.ACTIVE.value) - .where(AgentAsset.domain == AgentAssetDomain.EXPENSE.value) - .order_by(AgentAsset.updated_at.desc(), AgentAsset.created_at.desc()) - ).all() - ) - library_manager = AgentAssetRuleLibraryManager() - - for asset in assets: - config_json = asset.config_json if isinstance(asset.config_json, dict) else {} - if str(config_json.get("detail_mode") or "").strip().lower() != "json_risk": - continue - rule_code = str(asset.code or "").strip() - if code_filter and rule_code not in code_filter: - continue - - rule_document = config_json.get("rule_document") - if not isinstance(rule_document, dict): - continue - file_name = str(rule_document.get("file_name") or "").strip() - rule_library = ( - str(config_json.get("rule_library") or RISK_RULES_LIBRARY).strip() - or RISK_RULES_LIBRARY - ) - if not file_name: - continue - - try: - payload = library_manager.read_rule_library_json( - library=rule_library, - file_name=file_name, - ) - except (FileNotFoundError, ValueError): - continue - - manifest_code = str(payload.get("rule_code") or rule_code).strip() - if not manifest_code or (code_filter and manifest_code not in code_filter): - continue - if payload.get("enabled") is False: - continue - - payload = dict(payload) - payload.setdefault("rule_code", manifest_code) - payload["_rule_version"] = str( - asset.published_version or asset.current_version or "v1.0.0" - ) - payload["_rule_asset_id"] = asset.id - manifests_by_code[manifest_code] = payload - - missing_codes = code_filter - set(manifests_by_code) - should_load_fallback = not code_filter or bool(missing_codes) - if should_load_fallback: - try: - files = library_manager.list_rule_library_json_files(library=RISK_RULES_LIBRARY) - except ValueError: - files = [] - for file_name in files: - try: - payload = library_manager.read_rule_library_json( - library=RISK_RULES_LIBRARY, - file_name=file_name, - ) - except (FileNotFoundError, ValueError): - continue - rule_code = str(payload.get("rule_code") or "").strip() - if not rule_code or rule_code in manifests_by_code: - continue - if code_filter and rule_code not in missing_codes: - continue - if payload.get("enabled") is False: - continue - payload = dict(payload) - payload["_rule_version"] = "v1.0.0" - manifests_by_code[rule_code] = payload - - return list(manifests_by_code.values()) - - def _risk_manifest_applies_to_claim( - self, - manifest: dict[str, Any], - *, - claim: ExpenseClaim, - contexts: list[dict[str, Any]], - ) -> bool: - applies_to = manifest.get("applies_to") - if not isinstance(applies_to, dict): - applies_to = {} - - try: - min_attachments = int(applies_to.get("min_attachments") or 0) - except (TypeError, ValueError): - min_attachments = 0 - if min_attachments and int(claim.invoice_count or 0) < min_attachments and not contexts: - return False - - expense_types = { - str(claim.expense_type or "").strip().lower(), - *{ - str(item.item_type or "").strip().lower() - for item in list(claim.items or []) - if str(item.item_type or "").strip() - }, - } - domains = { - str(value or "").strip().lower() - for value in list(applies_to.get("domains") or []) - if str(value or "").strip() - } - configured_expense_types = { - str(value or "").strip().lower() - for value in list(applies_to.get("expense_types") or []) - if str(value or "").strip() - } - - if configured_expense_types and not (expense_types & configured_expense_types): - return False - if domains and not self._risk_domains_match_claim( - domains, - expense_types=expense_types, - contexts=contexts, - ): - return False - - return True - - def _risk_domains_match_claim( - self, - domains: set[str], - *, - expense_types: set[str], - contexts: list[dict[str, Any]], - ) -> bool: - normalized_contexts: list[dict[str, str]] = [] - for context in contexts: - document_info = context.get("document_info") or {} - normalized_contexts.append( - { - "scene_code": str(document_info.get("scene_code") or "").strip().lower(), - "document_type": str( - document_info.get("document_type") or "" - ).strip().lower(), - "item_type": str( - getattr(context.get("item"), "item_type", "") or "" - ).strip().lower(), - } - ) - - if "travel" in domains: - if expense_types & {"travel", "hotel", "transport"}: - return True - if any( - item["scene_code"] in {"travel", "hotel", "transport"} - or item["document_type"] - in { - "flight_itinerary", - "train_ticket", - "hotel_invoice", - "taxi_receipt", - } - for item in normalized_contexts - ): - return True - if "meal" in domains: - if expense_types & {"meal", "entertainment"}: - return True - if any( - item["scene_code"] == "meal" or item["document_type"] == "meal_receipt" - for item in normalized_contexts - ): - return True - return bool(domains & expense_types) - - def _evaluate_platform_risk_manifest( - self, - manifest: dict[str, Any], - *, - claim: ExpenseClaim, - contexts: list[dict[str, Any]], - ) -> dict[str, Any] | None: - evaluator = str(manifest.get("evaluator") or "").strip().lower() - if evaluator == "reason_too_brief": - return self._evaluate_reason_too_brief_risk(manifest, claim=claim) - if evaluator == "entertainment_reason_missing": - return self._evaluate_entertainment_reason_missing_risk(manifest, claim=claim) - if evaluator == "document_expense_mismatch": - return self._evaluate_document_expense_mismatch_risk( - manifest, - claim=claim, - contexts=contexts, - ) - if evaluator == "location_consistency": - return self._evaluate_location_consistency_risk( - manifest, - claim=claim, - contexts=contexts, - ) - if evaluator == "duplicate_invoice": - return self._evaluate_duplicate_invoice_risk(manifest, claim=claim, contexts=contexts) - if evaluator == "identity_consistency": - return self._evaluate_identity_consistency_risk( - manifest, - claim=claim, - contexts=contexts, - ) - if evaluator == "cross_year_invoice": - return self._evaluate_cross_year_invoice_risk(manifest, claim=claim, contexts=contexts) - if evaluator == "void_or_red_invoice": - return self._evaluate_text_keyword_risk( - manifest, - contexts=contexts, - keywords=["作废", "红冲", "红字", "冲红"], - fallback_message="票据文本中出现作废、红冲或红字发票相关信息,建议退回补充或人工复核。", - ) - if evaluator == "vague_goods_description": - return self._evaluate_text_keyword_risk( - manifest, - contexts=contexts, - keywords=["详见清单", "服务费", "咨询费", "其他", "办公用品"], - fallback_message="票据商品或服务描述较笼统,建议审批人核对真实用途和明细清单。", - ) - if evaluator == "multi_city_reason_required": - return self._evaluate_multi_city_reason_required_risk( - manifest, - claim=claim, - contexts=contexts, - ) - return None - - def _evaluate_reason_too_brief_risk( - self, - manifest: dict[str, Any], - *, - claim: ExpenseClaim, - ) -> dict[str, Any] | None: - params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {} - try: - min_reason_length = max(1, int(params.get("min_reason_length") or 6)) - except (TypeError, ValueError): - min_reason_length = 6 - reason_corpus = re.sub(r"\s+", "", self._build_scene_reason_corpus(claim)) - if len(reason_corpus) >= min_reason_length: - return None - return self._build_platform_risk_flag( - manifest, - message=f"报销事由有效描述不足 {min_reason_length} 个字符,暂不足以支撑真实性判断。", - evidence={"reason_length": len(reason_corpus), "min_reason_length": min_reason_length}, - ) - - def _evaluate_entertainment_reason_missing_risk( - self, - manifest: dict[str, Any], - *, - claim: ExpenseClaim, - ) -> dict[str, Any] | None: - expense_types = { - str(claim.expense_type or "").strip().lower(), - *{str(item.item_type or "").strip().lower() for item in list(claim.items or [])}, - } - reason_corpus = self._build_scene_reason_corpus(claim) - compact_reason = re.sub(r"\s+", "", reason_corpus) - looks_like_entertainment = ( - "entertainment" in expense_types - or "招待" in compact_reason - or "客户" in compact_reason - ) - if not looks_like_entertainment: - return None - required_keywords = ("客户", "项目", "参与", "人员", "对象", "商务", "会议") - has_detail = any(keyword in compact_reason for keyword in required_keywords) - if has_detail: - return None - return self._build_platform_risk_flag( - manifest, - message="招待或餐饮类费用未识别到客户、项目、参与人员等必要说明,建议补充后再流转。", - evidence={"reason": reason_corpus[:300]}, - ) - - def _evaluate_document_expense_mismatch_risk( - self, - manifest: dict[str, Any], - *, - claim: ExpenseClaim, - contexts: list[dict[str, Any]], - ) -> dict[str, Any] | None: - mismatches: list[str] = [] - for context in contexts: - item = context["item"] - item_type = ( - str(item.item_type or claim.expense_type or "other").strip().lower() - or "other" - ) - policy = self._get_expense_scene_policy(item_type) - if policy is None: - continue - document_info = context.get("document_info") or {} - recognized_scene_code = ( - str(document_info.get("scene_code") or "other").strip().lower() - or "other" - ) - recognized_document_type = ( - str(document_info.get("document_type") or "other").strip().lower() - or "other" - ) - if ( - recognized_scene_code in set(policy.allowed_scene_codes) - or recognized_document_type in set(policy.allowed_document_types) - ): - continue - recognized_label = str( - document_info.get("document_type_label") - or recognized_document_type - or "未知票据" - ) - mismatches.append(f"第 {context['index']} 条明细为{policy.label},附件识别为{recognized_label}") - - if not mismatches: - return None - return self._build_platform_risk_flag( - manifest, - message=";".join(mismatches[:3]) + ",与当前费用场景不匹配。", - evidence={"mismatches": mismatches[:5]}, - ) - - def _evaluate_location_consistency_risk( - self, - manifest: dict[str, Any], - *, - claim: ExpenseClaim, - contexts: list[dict[str, Any]], - ) -> dict[str, Any] | None: - policy = self._get_expense_rule_catalog().travel_policy - if policy is None: - return None - declared_cities = self._extract_known_cities_from_text( - " ".join( - [ - str(claim.location or ""), - *[str(item.item_location or "") for item in list(claim.items or [])], - ] - ), - policy, - ) - evidence_cities = self._collect_attachment_cities(contexts, policy) - if not declared_cities or not evidence_cities: - return None - if set(declared_cities) & set(evidence_cities): - return None - declared_text = "、".join(declared_cities) - evidence_text = "、".join(evidence_cities[:5]) - return self._build_platform_risk_flag( - manifest, - message=f"申报地点 {declared_text} 与票据识别地点 {evidence_text} 不一致,建议补充异地说明或更换附件。", - evidence={"declared_cities": declared_cities, "evidence_cities": evidence_cities}, - ) - - def _evaluate_duplicate_invoice_risk( - self, - manifest: dict[str, Any], - *, - claim: ExpenseClaim, - contexts: list[dict[str, Any]], - ) -> dict[str, Any] | None: - invoice_keys = self._collect_invoice_keys_from_contexts(contexts) - duplicate_keys = [ - key - for key, count in self._count_values(invoice_keys).items() - if count > 1 - ] - if duplicate_keys: - return self._build_platform_risk_flag( - manifest, - message=f"当前报销单内存在重复票据号码:{'、'.join(duplicate_keys[:3])}。", - evidence={"duplicate_invoice_keys": duplicate_keys[:5]}, - ) - - if not invoice_keys: - return None - - other_items = list( - self.db.scalars( - select(ExpenseClaimItem) - .where(ExpenseClaimItem.claim_id != claim.id) - .where(ExpenseClaimItem.invoice_id.is_not(None)) - ).all() - ) - matched_claim_ids: set[str] = set() - for other_item in other_items: - other_path = self._resolve_attachment_path(other_item.invoice_id) - if other_path is None or not other_path.exists(): - continue - other_meta = self._read_attachment_meta(other_path) - other_document_info = other_meta.get("document_info") - if not isinstance(other_document_info, dict): - continue - other_keys = self._collect_invoice_keys_from_document_info(other_document_info) - if set(invoice_keys) & set(other_keys): - matched_claim_ids.add(str(other_item.claim_id or "")) - - if not matched_claim_ids: - return None - return self._build_platform_risk_flag( - manifest, - message=f"票据号码已在其他报销单中出现,疑似重复报销:{'、'.join(invoice_keys[:3])}。", - evidence={ - "invoice_keys": invoice_keys[:5], - "matched_claim_ids": sorted(matched_claim_ids)[:5], - }, - ) - - def _evaluate_identity_consistency_risk( - self, - manifest: dict[str, Any], - *, - claim: ExpenseClaim, - contexts: list[dict[str, Any]], - ) -> dict[str, Any] | None: - params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {} - allow_keywords = [ - str(value) - for value in list(params.get("allow_keywords") or []) - if str(value).strip() - ] - claimant = str(claim.employee_name or "").strip() - if not claimant: - return None - mismatched_buyers: list[str] = [] - for context in contexts: - buyer = self._resolve_first_document_field_value( - context.get("document_info") or {}, - keys={"buyer_name", "buyer", "purchaser_name", "claimant"}, - labels={"购买方", "抬头", "买方", "购方"}, - ) - if not buyer: - continue - if claimant in buyer or any(keyword in buyer for keyword in allow_keywords): - continue - mismatched_buyers.append(buyer) - if not mismatched_buyers: - return None - return self._build_platform_risk_flag( - manifest, - message=f"发票抬头 {mismatched_buyers[0]} 与报销人 {claimant} 不一致,建议人工复核。", - evidence={"claimant": claimant, "buyers": mismatched_buyers[:5]}, - ) - - def _evaluate_cross_year_invoice_risk( - self, - manifest: dict[str, Any], - *, - claim: ExpenseClaim, - contexts: list[dict[str, Any]], - ) -> dict[str, Any] | None: - claim_year = claim.occurred_at.year if claim.occurred_at is not None else None - if claim_year is None: - return None - issue_years: list[int] = [] - for context in contexts: - text = " ".join( - [ - self._resolve_first_document_field_value( - context.get("document_info") or {}, - keys={"date", "issue_date", "invoice_date"}, - labels={"日期", "开票日期", "发生时间"}, - ), - str(context.get("ocr_summary") or ""), - str(context.get("ocr_text") or ""), - ] - ) - for match in re.findall(r"(20\d{2}|19\d{2})[年/\-.]", text): - try: - issue_years.append(int(match)) - except ValueError: - continue - mismatch_years = sorted({year for year in issue_years if year != claim_year}) - if not mismatch_years: - return None - return self._build_platform_risk_flag( - manifest, - message=f"票据年份 {mismatch_years[0]} 与费用发生年份 {claim_year} 不一致,建议确认是否跨年报销。", - evidence={"claim_year": claim_year, "invoice_years": mismatch_years}, - ) - - def _evaluate_text_keyword_risk( - self, - manifest: dict[str, Any], - *, - contexts: list[dict[str, Any]], - keywords: list[str], - fallback_message: str, - ) -> dict[str, Any] | None: - matched: list[str] = [] - for context in contexts: - text = f"{context.get('ocr_summary') or ''}\n{context.get('ocr_text') or ''}" - for keyword in keywords: - if keyword in text and keyword not in matched: - matched.append(keyword) - if not matched: - return None - return self._build_platform_risk_flag( - manifest, - message=fallback_message, - evidence={"matched_keywords": matched}, - ) - - def _evaluate_multi_city_reason_required_risk( - self, - manifest: dict[str, Any], - *, - claim: ExpenseClaim, - contexts: list[dict[str, Any]], - ) -> dict[str, Any] | None: - policy = self._get_expense_rule_catalog().travel_policy - if policy is None: - return None - cities = self._collect_attachment_cities(contexts, policy) - for item in list(claim.items or []): - for city in self._extract_known_cities_from_text(str(item.item_location or ""), policy): - if city not in cities: - cities.append(city) - if len(cities) <= 2: - return None - reason_corpus = self._build_travel_reason_corpus(claim) - if self._text_contains_keywords(reason_corpus, policy.route_exception_keywords): - return None - return self._build_platform_risk_flag( - manifest, - message=f"本次报销识别到多城市行程({'、'.join(cities[:5])}),但事由中未说明中转、多地拜访或改签原因。", - evidence={"cities": cities[:8]}, - ) - - def _build_platform_risk_flag( - self, - manifest: dict[str, Any], - *, - message: str, - evidence: dict[str, Any], - ) -> dict[str, Any]: - outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {} - fail_outcome = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {} - severity = str(fail_outcome.get("severity") or "medium").strip().lower() or "medium" - default_action = "block" if severity == "high" else "manual_review" - action = str(fail_outcome.get("action") or default_action).strip() - label = str(manifest.get("name") or manifest.get("rule_code") or "风险规则命中").strip() - - return { - "source": "submission_review", - "hit_source": "rule_center", - "rule_type": "risk", - "rule_code": str(manifest.get("rule_code") or "").strip(), - "rule_version": str(manifest.get("_rule_version") or "v1.0.0").strip(), - "severity": severity, - "action": action, - "label": label, - "message": message, - "evidence": evidence, - } - - @staticmethod - def _count_values(values: list[str]) -> dict[str, int]: - counts: dict[str, int] = {} - for value in values: - normalized = str(value or "").strip() - if not normalized: - continue - counts[normalized] = counts.get(normalized, 0) + 1 - return counts - - def _collect_invoice_keys_from_contexts(self, contexts: list[dict[str, Any]]) -> list[str]: - invoice_keys: list[str] = [] - for context in contexts: - document_info = context.get("document_info") or {} - for key in self._collect_invoice_keys_from_document_info(document_info): - if key not in invoice_keys: - invoice_keys.append(key) - return invoice_keys - - def _collect_invoice_keys_from_document_info(self, document_info: dict[str, Any]) -> list[str]: - keys: list[str] = [] - for field in list(document_info.get("fields") or []): - if not isinstance(field, dict): - continue - field_key = str(field.get("key") or "").strip().lower().replace("_", "") - label = str(field.get("label") or "").replace(" ", "") - value = str(field.get("value") or "").strip() - if not value: - continue - if field_key in {"invoiceno", "invoicenumber", "number", "code"} or any( - token in label for token in ("发票号码", "票号", "发票代码", "号码") - ): - normalized = re.sub(r"\s+", "", value) - if normalized and normalized not in keys: - keys.append(normalized) - return keys - - def _collect_attachment_cities( - self, - contexts: list[dict[str, Any]], - policy: RuntimeTravelPolicy, - ) -> list[str]: - cities: list[str] = [] - for context in contexts: - document_info = context.get("document_info") or {} - parts = [ - str(context.get("ocr_summary") or ""), - str(context.get("ocr_text") or ""), - str(context.get("item").item_location if context.get("item") is not None else ""), - ] - for field in list(document_info.get("fields") or []): - if isinstance(field, dict): - parts.append(str(field.get("value") or "")) - for city in self._extract_known_cities_from_text(" ".join(parts), policy): - if city not in cities: - cities.append(city) - return cities - - @staticmethod - def _extract_known_cities_from_text(text: str, policy: RuntimeTravelPolicy) -> list[str]: - normalized = str(text or "").strip() - if not normalized: - return [] - cities: list[str] = [] - for city in sorted(policy.city_tiers.keys(), key=lambda item: len(item), reverse=True): - if city in normalized and city not in cities: - cities.append(city) - return cities - - @staticmethod - def _resolve_first_document_field_value( - document_info: dict[str, Any], - *, - keys: set[str], - labels: set[str], - ) -> str: - normalized_keys = {key.replace("_", "").lower() for key in keys} - for field in list(document_info.get("fields") or []): - if not isinstance(field, dict): - continue - field_key = str(field.get("key") or "").strip().lower().replace("_", "") - label = str(field.get("label") or "").replace(" ", "") - value = str(field.get("value") or "").strip() - if not value: - continue - if field_key in normalized_keys or any(token in label for token in labels): - return value - return "" - - def _run_scene_policy_review(self, claim: ExpenseClaim) -> dict[str, list[Any]]: - catalog = self._get_expense_rule_catalog() - flags: list[dict[str, Any]] = [] - blocking_reasons: list[str] = [] - reason_corpus = self._build_scene_reason_corpus(claim) - scene_totals: dict[str, Decimal] = defaultdict(lambda: Decimal("0.00")) - scene_warned: set[str] = set() - - for item in claim.items: - item_type = str(item.item_type or claim.expense_type or "other").strip().lower() or "other" - policy = catalog.get_scene_policy(item_type) - if policy is None: - continue - - scene_totals[item_type] += Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) - - if policy.always_warn and item_type not in scene_warned: - scene_warned.add(item_type) - flags.append( - { - "source": "submission_review", - "severity": "medium", - "label": f"{policy.label}人工重点复核", - "message": policy.always_warn_message or f"{policy.label}默认需要人工重点复核。", - "rule_code": policy.rule_code, - } - ) - - item_limit = policy.item_amount_limit - item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01")) - if item_limit is not None and item_amount > Decimal("0.00"): - exceeded = self._evaluate_amount_limit( - amount=item_amount, - limit_config=item_limit, - reason_text="\n".join( - part - for part in [reason_corpus, str(item.item_reason or "").strip()] - if part - ), - ) - if exceeded is not None: - severity, threshold = exceeded - label = ( - f"{policy.label}金额超标待说明" - if severity == "high" - else f"{policy.label}金额超标提醒" - ) - message = ( - f"{policy.label}当前识别金额为 {item_amount} 元," - f"已超过制度阈值 {threshold} 元。" - ) - if severity == "high": - message += " 当前未识别到例外说明,请先补充原因。" - blocking_reasons.append(f"{policy.label}金额超出制度阈值,且未补充例外说明。") - else: - message += " 已识别到例外说明,请审批人重点复核。" - flags.append( - { - "source": "submission_review", - "severity": severity, - "label": label, - "message": message, - "rule_code": policy.rule_code, - } - ) - - for scene_code, total_amount in scene_totals.items(): - policy = catalog.get_scene_policy(scene_code) - if policy is None or policy.claim_amount_limit is None or total_amount <= Decimal("0.00"): - continue - exceeded = self._evaluate_amount_limit( - amount=total_amount, - limit_config=policy.claim_amount_limit, - reason_text=reason_corpus, - ) - if exceeded is None: - continue - - severity, threshold = exceeded - label = f"{policy.label}合计超标待说明" if severity == "high" else f"{policy.label}合计超标提醒" - message = ( - f"{policy.label}当前合计金额为 {total_amount} 元," - f"已超过制度阈值 {threshold} 元。" - ) - if severity == "high": - message += " 当前未识别到例外说明,请先补充原因。" - blocking_reasons.append(f"{policy.label}合计金额超出制度阈值,且未补充例外说明。") - else: - message += " 已识别到例外说明,请审批人重点复核。" - flags.append( - { - "source": "submission_review", - "severity": severity, - "label": label, - "message": message, - "rule_code": policy.rule_code, - } - ) - - return { - "flags": flags, - "blocking_reasons": list(dict.fromkeys(reason for reason in blocking_reasons if reason)), - } - - @staticmethod - def _evaluate_amount_limit( - *, - amount: Decimal, - limit_config: Any, - reason_text: str, - ) -> tuple[str, Decimal] | None: - block_amount = getattr(limit_config, "block_amount", None) - warn_amount = getattr(limit_config, "warn_amount", None) - exception_keywords = list(getattr(limit_config, "exception_keywords", []) or []) - has_exception = ExpenseClaimService._text_contains_keywords(reason_text, exception_keywords) - - if block_amount is not None and amount > Decimal(block_amount): - return ("medium" if has_exception else "high", Decimal(block_amount)) - if warn_amount is not None and amount > Decimal(warn_amount): - return ("medium", Decimal(warn_amount)) - return None - - def _run_travel_policy_review(self, claim: ExpenseClaim) -> dict[str, list[Any]]: - policy = self._get_expense_rule_catalog().travel_policy - if policy is None: - return {"flags": [], "blocking_reasons": []} - contexts = [ - context - for context in self._build_claim_attachment_contexts(claim) - if self._is_travel_policy_relevant_context(context, policy) - ] - if not contexts: - return {"flags": [], "blocking_reasons": []} - - reason_corpus = self._build_travel_reason_corpus(claim) - has_route_exception = self._text_contains_keywords( - reason_corpus, - policy.route_exception_keywords, - ) - has_standard_exception = self._text_contains_keywords( - reason_corpus, - policy.standard_exception_keywords, - ) - grade_band = self._resolve_travel_policy_band(claim.employee_grade) - band_label = policy.band_labels.get(grade_band or "", str(claim.employee_grade or "").strip() or "当前职级") - - itinerary_segments: list[dict[str, Any]] = [] - itinerary_cities: list[str] = [] - hotel_contexts: list[dict[str, Any]] = [] - flags: list[dict[str, Any]] = [] - blocking_reasons: list[str] = [] - - for context in contexts: - route_segment = self._extract_route_segment(context, policy) - if route_segment and self._is_long_distance_travel_context(context, policy): - itinerary_segments.append( - { - "item": context["item"], - "origin": route_segment[0], - "destination": route_segment[1], - } - ) - itinerary_cities.extend([route_segment[0], route_segment[1]]) - - scene_code = str(context["document_info"].get("scene_code") or "").strip().lower() - document_type = str(context["document_info"].get("document_type") or "").strip().lower() - item_type = str(context["item"].item_type or "").strip().lower() - if "hotel" in {scene_code, document_type, item_type} or document_type == "hotel_invoice": - hotel_contexts.append(context) - - unique_itinerary_cities = list(dict.fromkeys(city for city in itinerary_cities if city)) - expected_destination_city = self._resolve_expected_travel_city( - claim, - contexts, - unique_itinerary_cities, - policy, - ) - - if itinerary_segments: - unique_destinations = list( - dict.fromkeys(segment["destination"] for segment in itinerary_segments if segment["destination"]) - ) - first_origin = str(itinerary_segments[0]["origin"] or "").strip() - last_destination = str(itinerary_segments[-1]["destination"] or "").strip() - - for previous, current in zip(itinerary_segments, itinerary_segments[1:]): - previous_destination = str(previous["destination"] or "").strip() - current_origin = str(current["origin"] or "").strip() - if previous_destination and current_origin and previous_destination != current_origin: - message = ( - f"差旅行程未形成连续链路:上一段到达 {previous_destination}," - f"下一段却从 {current_origin} 出发,请补充中转或改签说明。" - ) - flags.append( - { - "source": "submission_review", - "severity": "high", - "label": "行程闭环异常", - "message": message, - "rule_code": policy.rule_code, - } - ) - blocking_reasons.append("差旅行程未形成连续闭环,请补充中转、改签或异地出发原因。") - break - - if ( - expected_destination_city - and last_destination - and last_destination not in {expected_destination_city, first_origin} - ): - message = ( - f"差旅行程终点识别为 {last_destination}," - f"与申报目的地 {expected_destination_city} 不一致,请补充多地出差或后续行程说明。" - ) - flags.append( - { - "source": "submission_review", - "severity": "high", - "label": "行程终点异常", - "message": message, - "rule_code": policy.rule_code, - } - ) - blocking_reasons.append("差旅行程终点与申报目的地不一致,请补充多地出差说明或补齐后续票据。") - - expected_city_set = { - city - for city in (expected_destination_city, first_origin) - if city - } - extra_destinations = [ - city - for city in unique_destinations - if city and city not in expected_city_set - ] - if extra_destinations and not has_route_exception: - destinations_text = "、".join(extra_destinations[:3]) - flags.append( - { - "source": "submission_review", - "severity": "high", - "label": "多城市行程待说明", - "message": ( - f"检测到本次差旅涉及 {destinations_text} 多个目的地," - "但当前报销事由未说明中转、多地拜访或改签原因。" - ), - "rule_code": policy.rule_code, - } - ) - blocking_reasons.append("检测到多城市差旅行程,但当前未补充中转或多地出差说明。") - - allowed_hotel_cities = { - city - for city in [expected_destination_city, *unique_itinerary_cities] - if city - } - for context in hotel_contexts: - hotel_city = self._extract_hotel_city(context, policy) - if hotel_city and allowed_hotel_cities and hotel_city not in allowed_hotel_cities: - expected_text = "、".join(sorted(allowed_hotel_cities)) - flags.append( - { - "source": "submission_review", - "severity": "high", - "label": "酒店地点异常", - "message": ( - f"酒店票据识别城市为 {hotel_city}," - f"与当前差旅目的地/行程城市 {expected_text} 不一致,请补充异地住宿原因。" - ), - "rule_code": policy.rule_code, - } - ) - blocking_reasons.append("酒店票据地点与差旅目的地不一致,请补充异地住宿原因或更换附件。") - - if grade_band is None: - continue - - baseline_city = hotel_city or expected_destination_city - standard = self._resolve_travel_policy_hotel_standard( - policy=policy, - grade_band=grade_band, - city=baseline_city, - ) - if standard is None: - continue - cap, standard_label = standard - night_count = self._extract_hotel_night_count(context) - item_amount = Decimal(context["item"].item_amount or Decimal("0.00")).quantize(Decimal("0.01")) - nightly_amount = (item_amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01")) - - if nightly_amount <= cap: - continue - - hotel_message = ( - f"{band_label} 职级在{standard_label}的住宿标准为 {cap} 元/晚," - f"当前酒店识别金额约 {nightly_amount} 元/晚。" - ) - item_reason = str(context["item"].item_reason or "").strip() - item_has_exception = self._text_contains_keywords(item_reason, policy.standard_exception_keywords) - if has_standard_exception or item_has_exception: - flags.append( - { - "source": "submission_review", - "severity": "medium", - "label": "住宿超标提醒", - "message": hotel_message + " 已识别到补充说明,请直属领导重点复核。", - "rule_code": policy.rule_code, - } - ) - else: - flags.append( - { - "source": "submission_review", - "severity": "high", - "label": "住宿超标待说明", - "message": hotel_message + " 当前未识别到超标说明,请先补充原因。", - "rule_code": policy.rule_code, - } - ) - blocking_reasons.append("住宿金额超出当前职级差标,且未补充超标说明。") - - if grade_band is not None: - for context in contexts: - transport_class = self._detect_transport_class(context, policy) - if transport_class is None: - continue - - transport_kind, class_label, class_level = transport_class - allowed_level = policy.transport_limits.get(grade_band, {}).get(transport_kind) - if allowed_level is None or class_level <= allowed_level: - continue - - item_reason = str(context["item"].item_reason or "").strip() - item_has_exception = self._text_contains_keywords(item_reason, policy.standard_exception_keywords) - message = f"{band_label} 职级当前默认不可报销 {class_label}。" - if has_standard_exception or item_has_exception: - flags.append( - { - "source": "submission_review", - "severity": "medium", - "label": "交通舱位超标提醒", - "message": message + " 已识别到补充说明,请审批人重点复核。", - "rule_code": policy.rule_code, - } - ) - else: - flags.append( - { - "source": "submission_review", - "severity": "high", - "label": "交通舱位超标待说明", - "message": message + " 当前未识别到例外说明,请先补充原因。", - "rule_code": policy.rule_code, - } - ) - blocking_reasons.append("交通舱位或席别超出当前职级差标,且未补充例外说明。") - - return { - "flags": flags, - "blocking_reasons": list(dict.fromkeys(reason for reason in blocking_reasons if reason)), - } - - def _build_claim_attachment_contexts(self, claim: ExpenseClaim) -> list[dict[str, Any]]: - contexts: list[dict[str, Any]] = [] - ordered_items = sorted( - claim.items, - key=lambda item: ( - item.item_date or date.max, - self._normalize_sort_datetime(item.created_at), - ), - ) - for index, item in enumerate(ordered_items, start=1): - file_path = self._resolve_attachment_path(item.invoice_id) - if file_path is None or not file_path.exists(): - continue - - metadata = self._read_attachment_meta(file_path) - document_info = metadata.get("document_info") - contexts.append( - { - "index": index, - "item": item, - "document_info": document_info if isinstance(document_info, dict) else {}, - "ocr_text": str(metadata.get("ocr_text") or ""), - "ocr_summary": str(metadata.get("ocr_summary") or ""), - } - ) - return contexts - - def _is_travel_policy_relevant_context( - self, - context: dict[str, Any], - policy: RuntimeTravelPolicy, - ) -> bool: - item = context.get("item") - document_info = context.get("document_info") or {} - item_type = str(getattr(item, "item_type", "") or "").strip().lower() - scene_code = str(document_info.get("scene_code") or "").strip().lower() - document_type = str(document_info.get("document_type") or "").strip().lower() - return ( - item_type in set(policy.relevant_expense_types) - or scene_code in set(policy.relevant_expense_types) - or document_type in {"hotel_invoice", *set(policy.long_distance_document_types)} - ) - - @staticmethod - def _resolve_document_field_value(document_info: dict[str, Any], key: str) -> str: - normalized_key = str(key or "").strip().lower() - for field in list(document_info.get("fields") or []): - if not isinstance(field, dict): - continue - field_key = str(field.get("key") or "").strip().lower() - if field_key == normalized_key: - return str(field.get("value") or "").strip() - return "" - - @staticmethod - def _text_contains_keywords(text: str, keywords: tuple[str, ...] | list[str]) -> bool: - compact = re.sub(r"\s+", "", str(text or "")) - if not compact: - return False - return any(keyword in compact for keyword in keywords) - - def _build_travel_reason_corpus(self, claim: ExpenseClaim) -> str: - parts = [str(claim.reason or "").strip(), str(claim.location or "").strip()] - for item in claim.items: - parts.append(str(item.item_reason or "").strip()) - parts.append(str(item.item_location or "").strip()) - return "\n".join(part for part in parts if part) - - @staticmethod - def _resolve_travel_policy_band(grade: str | None) -> str | None: - normalized = str(grade or "").strip().upper() - if not normalized: - return None - - p_match = re.search(r"P(\d+)", normalized) - if p_match: - level = int(p_match.group(1)) - if level <= 3: - return "junior" - if level <= 5: - return "mid" - return "senior" - - m_match = re.search(r"M(\d+)", normalized) - if m_match: - level = int(m_match.group(1)) - if level <= 2: - return "manager" - return "executive" - - if normalized.startswith("D"): - return "executive" - return None - - def _resolve_expected_travel_city( - self, - claim: ExpenseClaim, - contexts: list[dict[str, Any]], - itinerary_cities: list[str], - policy: RuntimeTravelPolicy, - ) -> str: - claim_city = self._extract_city_from_text(str(claim.location or ""), policy) - if claim_city: - return claim_city - - for context in contexts: - hotel_city = self._extract_hotel_city(context, policy) - if hotel_city: - return hotel_city - - if len(itinerary_cities) >= 2 and itinerary_cities[1]: - return itinerary_cities[1] - for city in itinerary_cities: - if city: - return city - return "" - - def _extract_route_segment( - self, - context: dict[str, Any], - policy: RuntimeTravelPolicy, - ) -> tuple[str, str] | None: - document_info = context["document_info"] - route_value = self._resolve_document_field_value(document_info, "route") - if not route_value or "-" not in route_value: - return None - - origin_text, destination_text = [segment.strip() for segment in route_value.split("-", 1)] - origin_city = self._extract_city_from_text(origin_text, policy) - destination_city = self._extract_city_from_text(destination_text, policy) - if not origin_city or not destination_city or origin_city == destination_city: - return None - return origin_city, destination_city - - def _extract_hotel_city(self, context: dict[str, Any], policy: RuntimeTravelPolicy) -> str: - document_info = context["document_info"] - item = context["item"] - merchant_name = self._resolve_document_field_value(document_info, "merchant_name") - for candidate in ( - merchant_name, - str(item.item_location or ""), - str(context.get("ocr_summary") or ""), - str(context.get("ocr_text") or ""), - ): - city = self._extract_city_from_text(candidate, policy) - if city: - return city - return "" - - @staticmethod - def _format_travel_policy_city_tier(city_tier: str) -> str: - return { - "tier_1": "一线城市", - "tier_2": "重点城市", - "tier_3": "其他城市", - }.get(str(city_tier or "").strip(), "当前城市") - - def _resolve_travel_policy_hotel_standard( - self, - *, - policy: RuntimeTravelPolicy, - grade_band: str, - city: str, - ) -> tuple[Decimal, str] | None: - normalized_city = str(city or "").strip() - city_limits = getattr(policy, "hotel_city_limits", {}) or {} - city_entry = city_limits.get(normalized_city) if normalized_city else None - if city_entry and city_entry.get(grade_band) is not None: - cap = Decimal(city_entry[grade_band]).quantize(Decimal("0.01")) - return cap, normalized_city - - city_tier = (getattr(policy, "city_tiers", {}) or {}).get(normalized_city, "tier_3") - tier_entry = (getattr(policy, "hotel_limits", {}) or {}).get(grade_band, {}) - tier_cap = tier_entry.get(city_tier) - if tier_cap is None: - return None - tier_label = self._format_travel_policy_city_tier(city_tier) - cap = Decimal(tier_cap).quantize(Decimal("0.01")) - return cap, tier_label - - @staticmethod - def _extract_city_from_text(text: str, policy: RuntimeTravelPolicy) -> str: - normalized = str(text or "").strip() - if not normalized: - return "" - city_names = set(policy.city_tiers.keys()) - city_names.update((getattr(policy, "hotel_city_limits", {}) or {}).keys()) - city_match_order = sorted(city_names, key=lambda item: len(item), reverse=True) - for city in city_match_order: - if city in normalized: - return city - return "" - - @staticmethod - def _extract_hotel_night_count(context: dict[str, Any]) -> int: - text = " ".join( - [ - str(context.get("ocr_summary") or "").strip(), - str(context.get("ocr_text") or "").strip(), - ] - ).strip() - match = TRAVEL_POLICY_HOTEL_NIGHT_PATTERN.search(text) - if not match: - return 1 - try: - return max(1, int(match.group(1))) - except (TypeError, ValueError): - return 1 - - def _detect_transport_class( - self, - context: dict[str, Any], - policy: RuntimeTravelPolicy, - ) -> tuple[str, str, int] | None: - document_info = context["document_info"] - document_type = str(document_info.get("document_type") or "").strip().lower() - text = " ".join( - [ - str(context.get("ocr_summary") or "").strip(), - str(context.get("ocr_text") or "").strip(), - ] - ).strip() - compact_text = re.sub(r"\s+", "", text) - if not compact_text: - return None - - if document_type == "flight_itinerary": - for config in policy.flight_classes: - label = str(config.keyword or "").strip() - level = int(config.level) - if label in compact_text: - return "flight", label, level - return None - - if document_type == "train_ticket": - for config in policy.train_classes: - label = str(config.keyword or "").strip() - level = int(config.level) - if label in compact_text: - return "train", label, level - return None - - return None - - def _is_long_distance_travel_context( - self, - context: dict[str, Any], - policy: RuntimeTravelPolicy, - ) -> bool: - document_info = context["document_info"] - document_type = str(document_info.get("document_type") or "").strip().lower() - scene_code = str(document_info.get("scene_code") or "").strip().lower() - if document_type in set(policy.long_distance_document_types): - return True - return scene_code == "travel" - - def _sync_travel_allowance_item(self, claim: ExpenseClaim) -> None: - items = list(claim.items or []) - allowance_items = [ - item for item in items if str(item.item_type or "").strip().lower() == "travel_allowance" - ] - business_items = [ - item for item in items if str(item.item_type or "").strip().lower() != "travel_allowance" - ] - business_types = {str(item.item_type or "").strip().lower() for item in business_items} - is_travel_claim = str(claim.expense_type or "").strip().lower() == "travel" - has_travel_detail = bool(business_types & TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES) - if not is_travel_claim and not has_travel_detail: - for item in allowance_items: - self._discard_claim_item(claim, item) - return - - grade = str(claim.employee_grade or "").strip() - if not grade: - return - - allowance_location = self._resolve_travel_allowance_location_from_claim( - claim=claim, - business_items=business_items, - ) - if not allowance_location: - return - - existing_allowance = allowance_items[0] if allowance_items else None - days, start_date, end_date = self._resolve_travel_allowance_days_from_claim( - claim=claim, - business_items=business_items, - existing_allowance=existing_allowance, - ) - if days < 1: - return - - try: - from app.services.travel_reimbursement_calculator import ( - TravelReimbursementCalculatorService, - ) - - result = TravelReimbursementCalculatorService(self.db).calculate( - TravelReimbursementCalculatorRequest( - days=days, - location=allowance_location, - grade=grade, - ), - CurrentUserContext( - username=str(claim.employee_id or claim.employee_name or "system"), - name=str(claim.employee_name or ""), - role_codes=[], - is_admin=False, - ), - ) - except ValueError: - return - - allowance_amount = Decimal(result.allowance_amount or Decimal("0.00")).quantize(Decimal("0.01")) - allowance_rate = Decimal(result.total_allowance_rate or Decimal("0.00")).quantize(Decimal("0.01")) - if allowance_amount <= Decimal("0.00") or allowance_rate <= Decimal("0.00"): - return - - item = existing_allowance - if item is None: - item = ExpenseClaimItem(claim_id=claim.id) - claim.items.append(item) - self.db.add(item) - - for duplicate in allowance_items[1:]: - self._discard_claim_item(claim, duplicate) - - item.item_date = end_date - item.item_type = "travel_allowance" - item.item_reason = ( - f"系统自动计算出差补贴:{result.matched_city},{days}天," - f"{allowance_rate:.2f}元/天" - ) - item.item_location = str(result.allowance_region or allowance_location).strip() - item.item_amount = allowance_amount - item.invoice_id = None - - def _discard_claim_item(self, claim: ExpenseClaim, item: ExpenseClaimItem) -> None: - if item in claim.items: - claim.items.remove(item) - state = sqlalchemy_inspect(item) - if state.persistent: - self.db.delete(item) - elif state.pending: - self.db.expunge(item) - - @staticmethod - def _resolve_travel_allowance_days_from_claim( - *, - claim: ExpenseClaim, - business_items: list[ExpenseClaimItem], - existing_allowance: ExpenseClaimItem | None, - ) -> tuple[int, date, date]: - dated_items = sorted( - [item.item_date for item in business_items if item.item_date is not None] - ) - if dated_items: - start_date = dated_items[0] - end_date = dated_items[-1] - elif claim.occurred_at is not None: - start_date = claim.occurred_at.date() - end_date = start_date - else: - start_date = date.today() - end_date = start_date - - days = (end_date - start_date).days + 1 - explicit_days = max( - (ExpenseClaimService._extract_travel_day_count(item.item_reason) for item in business_items), - default=0, - ) - if explicit_days > 0: - days = explicit_days - end_date = start_date + timedelta(days=days - 1) - return max(1, days), start_date, end_date - existing_days = ExpenseClaimService._extract_travel_allowance_days(existing_allowance) - unique_dates = {value for value in dated_items} - if existing_days > days and len(unique_dates) <= 1: - days = existing_days - end_date = start_date + timedelta(days=days - 1) - return max(1, days), start_date, end_date - - @staticmethod - def _extract_travel_allowance_days(item: ExpenseClaimItem | None) -> int: - if item is None: - return 0 - match = re.search(r"(\d+)\s*天", str(item.item_reason or "")) - if not match: - return 0 - try: - return max(0, int(match.group(1))) - except ValueError: - return 0 - - @staticmethod - def _resolve_travel_allowance_location_from_claim( - *, - claim: ExpenseClaim, - business_items: list[ExpenseClaimItem], - ) -> str: - claim_location = str(claim.location or "").strip() - if claim_location and claim_location not in {"待补充", "未知", "暂无", "非必填"}: - return claim_location - - sorted_items = sorted( - business_items, - key=lambda item: (item.item_date or date.max, ExpenseClaimService._normalize_sort_datetime(item.created_at)), - ) - for item in sorted_items: - location = str(item.item_location or "").strip() - if location and location not in {"待补充", "未知", "暂无", "非必填"}: - return location - reason = str(item.item_reason or "").strip() - for separator in ("-", "至", "到", "→", "->"): - if separator in reason: - destination = reason.split(separator)[-1].strip() - if destination: - return destination - return "" - - def _sync_claim_from_items(self, claim: ExpenseClaim) -> None: - self._sync_travel_allowance_item(claim) - if not claim.items: - claim.amount = Decimal("0.00") - claim.invoice_count = 0 - claim.risk_flags_json = self._merge_claim_attachment_risk_flags(claim, []) - return - - ordered_items = sorted( - claim.items, - key=lambda item: ( - item.item_date or date.max, - self._normalize_sort_datetime(item.created_at), - ), - ) - primary_item = ordered_items[0] - total_amount = sum((item.item_amount for item in ordered_items), Decimal("0.00")) - - claim.amount = total_amount.quantize(Decimal("0.01")) - claim.invoice_count = sum(1 for item in ordered_items if str(item.invoice_id or "").strip()) - claim.occurred_at = datetime( - primary_item.item_date.year, - primary_item.item_date.month, - primary_item.item_date.day, - tzinfo=UTC, - ) - claim.expense_type = self._resolve_claim_expense_type_from_items( - ordered_items, - fallback=str(primary_item.item_type or claim.expense_type or "other").strip() or "other", - ) - primary_item_type = str(primary_item.item_type or "").strip() - if primary_item_type not in DOCUMENT_FACT_ITEM_TYPES: - claim.reason = ( - self._normalize_optional_text(primary_item.item_reason, fallback=claim.reason or "待补充") - or "待补充" - ) - claim.location = ( - self._normalize_optional_text(primary_item.item_location, fallback=claim.location or "待补充") - or "待补充" - ) - claim.risk_flags_json = self._merge_claim_attachment_risk_flags( - claim, - self._build_claim_attachment_risk_flags(ordered_items), - ) - if str(claim.status or "").strip().lower() == "draft": - claim.approval_stage = "待提交" - - @staticmethod - def _resolve_claim_expense_type_from_items( - items: list[ExpenseClaimItem], - *, - fallback: str, - ) -> str: - fallback_type = str(fallback or "").strip() or "other" - item_types = {str(item.item_type or "").strip().lower() for item in items} - if item_types & (TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES | {"travel_allowance"}): - return "travel" - return fallback_type - - def _refresh_item_attachment_analysis(self, item: ExpenseClaimItem) -> None: - file_path = self._resolve_attachment_path(item.invoice_id) - if file_path is None or not file_path.exists(): - return - - metadata = self._read_attachment_meta(file_path) - media_type = str(metadata.get("media_type") or self._resolve_attachment_media_type(file_path.name)).strip() - ocr_status = str(metadata.get("ocr_status") or "").strip().lower() - - if ocr_status == "failed": - analysis = self._build_failed_ocr_attachment_analysis( - media_type=media_type, - error_message=str(metadata.get("ocr_error") or ""), - item=item, - ) - elif ocr_status == "recognized" or any( - ( - str(metadata.get("ocr_text") or "").strip(), - str(metadata.get("ocr_summary") or "").strip(), - int(metadata.get("ocr_line_count") or 0), - list(metadata.get("ocr_warnings") or []), - ) - ): - stored_document_info = metadata.get("document_info") - if not isinstance(stored_document_info, dict): - stored_document_info = {} - document = SimpleNamespace( - filename=str(metadata.get("file_name") or file_path.name), - text=str(metadata.get("ocr_text") or ""), - summary=str(metadata.get("ocr_summary") or ""), - avg_score=float(metadata.get("ocr_avg_score") or 0.0), - line_count=int(metadata.get("ocr_line_count") or 0), - document_type=str(stored_document_info.get("document_type") or ""), - document_type_label=str(stored_document_info.get("document_type_label") or ""), - scene_code=str(stored_document_info.get("scene_code") or ""), - scene_label=str(stored_document_info.get("scene_label") or ""), - document_fields=list(stored_document_info.get("fields") or []), - warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()], - ) - document_info = self._build_attachment_document_info(document) - requirement_check = self._build_attachment_requirement_check( - item=item, - document_info=document_info, - ) - analysis = self._build_attachment_analysis( - document=document, - item=item, - claim=getattr(item, "claim", None), - document_info=document_info, - requirement_check=requirement_check, - ) - metadata["document_info"] = document_info - metadata["requirement_check"] = requirement_check - else: - analysis = self._build_fallback_attachment_analysis(media_type=media_type, item=item) - - metadata["analysis"] = analysis - self._write_attachment_meta(file_path, metadata) - - def _build_claim_attachment_risk_flags( - self, ordered_items: list[ExpenseClaimItem] - ) -> list[dict[str, Any]]: - derived_flags: list[dict[str, Any]] = [] - for index, item in enumerate(ordered_items, start=1): - file_path = self._resolve_attachment_path(item.invoice_id) - if file_path is None or not file_path.exists(): - continue - - metadata = self._read_attachment_meta(file_path) - analysis = metadata.get("analysis") - if not isinstance(analysis, dict): - continue - - severity = str(analysis.get("severity") or "").strip().lower() - if severity in {"", "pass", "low"}: - continue - - summary = ( - str(analysis.get("summary") or analysis.get("headline") or "").strip() - or "附件存在待核对风险。" - ) - points = [ - str(point or "").strip() - for point in list(analysis.get("points") or []) - if str(point or "").strip() - ] - message_detail = ";".join(points[:3]) if points else summary - label = str( - analysis.get("label") or ("高风险" if severity == "high" else "中风险") - ).strip() - derived_flags.append( - { - "source": "attachment_analysis", - "item_id": item.id, - "severity": severity, - "label": label, - "message": f"费用明细第 {index} 条:{message_detail}", - "summary": summary, - "points": points, - } - ) - return derived_flags - - def _get_expense_rule_catalog(self) -> Any: - cached = getattr(self, "_expense_rule_catalog", None) - if cached is not None: - return cached - - db = getattr(self, "db", None) - if db is None: - catalog = build_default_expense_rule_catalog() - else: - catalog = ExpenseRuleRuntimeService(db).load_catalog() - setattr(self, "_expense_rule_catalog", catalog) - return catalog - - def _get_expense_scene_policy(self, expense_type: str | None) -> Any | None: - return self._get_expense_rule_catalog().get_scene_policy(expense_type) - - def _resolve_min_attachment_count(self, expense_type: str | None) -> int: - policy = self._get_expense_scene_policy(expense_type) - if policy is None: - return 1 - return max(0, int(policy.min_attachment_count or 0)) - - def _build_scene_reason_corpus(self, claim: ExpenseClaim) -> str: - parts = [str(claim.reason or "").strip(), str(claim.location or "").strip()] - for item in claim.items: - parts.append(str(item.item_reason or "").strip()) - parts.append(str(item.item_location or "").strip()) - return "\n".join(part for part in parts if part) - - @staticmethod - def _merge_claim_attachment_risk_flags( - claim: ExpenseClaim, - attachment_risk_flags: list[dict[str, Any]], - ) -> list[Any]: - preserved_flags = [ - flag - for flag in list(claim.risk_flags_json or []) - if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis") - ] - return preserved_flags + attachment_risk_flags - - @staticmethod - def _format_submission_blocked_message(issues: list[str]) -> str: - normalized_issues = [str(issue or "").strip() for issue in issues if str(issue or "").strip()] - if not normalized_issues: - return "AI预审未通过,但没有返回明确原因,请刷新草稿后重试。" - - return "AI预审暂未通过,原因如下:\n" + "\n".join( - f"{index}. {issue}" for index, issue in enumerate(normalized_issues, start=1) - ) - - def _validate_claim_for_submission(self, claim: ExpenseClaim) -> list[str]: - issues: list[str] = [] - claim_location_required = self._is_location_required_expense_type(claim.expense_type) - claim_min_attachment_count = self._resolve_min_attachment_count(claim.expense_type) - - if self._is_missing_value(claim.employee_name): - issues.append("申请人未完善") - if self._is_missing_value(claim.department_name): - issues.append("所属部门未完善") - if self._is_missing_value(claim.expense_type): - issues.append("报销类型未完善") - if self._is_missing_value(claim.reason): - issues.append("报销事由未完善") - if claim_location_required and self._is_missing_value(claim.location): - issues.append("业务地点未完善") - if claim.amount is None or claim.amount <= Decimal("0.00"): - issues.append("报销金额未完善") - if claim.occurred_at is None: - issues.append("发生时间未完善") - if int(claim.invoice_count or 0) < claim_min_attachment_count: - issues.append("票据附件数量不足") - if not claim.items: - issues.append("费用明细不能为空") - - for index, item in enumerate(claim.items, start=1): - prefix = f"费用明细第 {index} 条" - is_system_generated = str(item.item_type or "").strip().lower() in SYSTEM_GENERATED_ITEM_TYPES - item_location_required = self._is_location_required_expense_type(item.item_type or claim.expense_type) - if item.item_date is None: - issues.append(f"{prefix}缺少日期") - if self._is_missing_value(item.item_type): - issues.append(f"{prefix}缺少费用项目") - if self._is_missing_value(item.item_reason): - issues.append(f"{prefix}缺少说明") - if item_location_required and self._is_missing_value(item.item_location): - issues.append(f"{prefix}缺少地点") - if item.item_amount is None or item.item_amount <= Decimal("0.00"): - issues.append(f"{prefix}缺少金额") - if not is_system_generated and self._is_missing_value(item.invoice_id): - issues.append(f"{prefix}缺少票据标识") - - return issues - - def _is_location_required_expense_type(self, expense_type: str | None) -> bool: - policy = self._get_expense_scene_policy(expense_type) - if policy is None: - return str(expense_type or "").strip().lower() in LOCATION_REQUIRED_EXPENSE_TYPES - return bool(policy.location_required) - - @staticmethod - def _has_privileged_claim_access(current_user: CurrentUserContext) -> bool: - if current_user.is_admin: - return True - role_codes = { - str(item).strip().lower() - for item in current_user.role_codes - if str(item).strip() - } - return bool(role_codes & PRIVILEGED_CLAIM_ROLE_CODES) - - @staticmethod - def _has_claim_delete_access(current_user: CurrentUserContext) -> bool: - if current_user.is_admin: - return True - role_codes = { - str(item).strip().lower() - for item in current_user.role_codes - if str(item).strip() - } - return bool(role_codes & CLAIM_DELETE_ROLE_CODES) - - def _can_return_claim(self, current_user: CurrentUserContext, claim: ExpenseClaim) -> bool: - if self._has_privileged_claim_access(current_user): - return True - - role_codes = self._normalize_role_codes(current_user) - if not (role_codes & APPROVAL_VISIBLE_CLAIM_ROLE_CODES): - return False - if str(claim.status or "").strip().lower() != "submitted": - return False - if str(claim.approval_stage or "").strip() != "直属领导审批": - return False - - current_employee = self._resolve_current_employee(current_user) - if current_employee is not None and str(claim.employee_id or "").strip() == current_employee.id: - return False - - claim_employee = claim.employee - if current_employee is not None and claim_employee is not None: - if claim_employee.manager_id == current_employee.id: - return True - if claim_employee.manager is not None and claim_employee.manager.id == current_employee.id: - return True - - approver_name = str( - current_employee.name if current_employee is not None and current_employee.name else current_user.name or "" - ).strip() - if not approver_name: - return False - - return self._resolve_claim_manager_name(claim) == approver_name - - def _can_approve_claim(self, current_user: CurrentUserContext, claim: ExpenseClaim) -> bool: - stage = str(claim.approval_stage or "").strip() - if stage == "直属领导审批": - return self._is_current_direct_manager_approver(current_user, claim) - if stage == "财务审批": - role_codes = self._normalize_role_codes(current_user) - return current_user.is_admin or "finance" in role_codes - return False - - def _is_current_direct_manager_approver(self, current_user: CurrentUserContext, claim: ExpenseClaim) -> bool: - role_codes = self._normalize_role_codes(current_user) - if not (role_codes & APPROVAL_VISIBLE_CLAIM_ROLE_CODES): - return False - if str(claim.status or "").strip().lower() != "submitted": - return False - if str(claim.approval_stage or "").strip() != "直属领导审批": - return False - - current_employee = self._resolve_current_employee(current_user) - if current_employee is not None and str(claim.employee_id or "").strip() == current_employee.id: - return False - - claim_employee = claim.employee - if current_employee is not None and claim_employee is not None: - if claim_employee.manager_id == current_employee.id: - return True - if claim_employee.manager is not None and claim_employee.manager.id == current_employee.id: - return True - - approver_name = str( - current_employee.name if current_employee is not None and current_employee.name else current_user.name or "" - ).strip() - if not approver_name: - return False - - return self._resolve_claim_manager_name(claim) == approver_name - - @staticmethod - def _normalize_role_codes(current_user: CurrentUserContext) -> set[str]: - return { - str(item).strip().lower() - for item in current_user.role_codes - if str(item).strip() - } - - def _resolve_current_employee(self, current_user: CurrentUserContext) -> Employee | None: - return self._resolve_employee_by_identity_candidates( - [ - str(current_user.username or "").strip(), - str(current_user.name or "").strip(), - ] - ) - - def _resolve_current_user_display_name(self, current_user: CurrentUserContext) -> str: - current_employee = self._resolve_current_employee(current_user) - if current_employee is not None and str(current_employee.name or "").strip(): - return str(current_employee.name).strip() - - for candidate in (current_user.name, current_user.username): - normalized = str(candidate or "").strip() - if normalized and not self._is_email_like(normalized): - return normalized - - return str(current_user.username or current_user.name or "anonymous").strip() or "anonymous" - - def _is_claim_owned_by_current_user(self, claim: ExpenseClaim, current_user: CurrentUserContext) -> bool: - current_employee = self._resolve_current_employee(current_user) - if current_employee is not None: - if str(claim.employee_id or "").strip() == current_employee.id: - return True - identity_values = { - str(current_employee.name or "").strip(), - str(current_employee.email or "").strip(), - str(current_employee.employee_no or "").strip(), - } - else: - identity_values = set() - - identity_values.update( - { - str(current_user.username or "").strip(), - str(current_user.name or "").strip(), - } - ) - identity_values.discard("") - return str(claim.employee_name or "").strip() in identity_values - - @staticmethod - def _is_email_like(value: str) -> bool: - return bool(re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", str(value or "").strip())) - - def _resolve_claim_employee_for_backfill(self, claim: ExpenseClaim) -> Employee | None: - if claim.employee is not None: - employee = self.db.scalar( - select(Employee) - .options( - selectinload(Employee.organization_unit), - selectinload(Employee.manager), - selectinload(Employee.roles), - ) - .where(Employee.id == claim.employee.id) - .limit(1) - ) - return employee or claim.employee - - employee_id = str(claim.employee_id or "").strip() - if employee_id: - employee = self.db.scalar( - select(Employee) - .options( - selectinload(Employee.organization_unit), - selectinload(Employee.manager), - selectinload(Employee.roles), - ) - .where(Employee.id == employee_id) - .limit(1) - ) - if employee is not None: - return employee - - return self._resolve_employee_by_identity_candidates([str(claim.employee_name or "").strip()]) - - def _resolve_employee_by_identity_candidates(self, candidates: list[str]) -> Employee | None: - normalized_candidates = [ - item - for item in dict.fromkeys(str(candidate or "").strip() for candidate in candidates) - if item - ] - if not normalized_candidates: - return None - - load_options = ( - selectinload(Employee.organization_unit), - selectinload(Employee.manager), - selectinload(Employee.roles), - ) - - for candidate in normalized_candidates: - employee = self.db.scalar( - select(Employee) - .options(*load_options) - .where( - or_( - func.lower(Employee.email) == candidate.lower(), - func.lower(Employee.employee_no) == candidate.lower(), - ) - ) - .limit(1) - ) - if employee is not None: - return employee - - for candidate in normalized_candidates: - matches = list( - self.db.scalars( - select(Employee) - .options(*load_options) - .where(Employee.name == candidate) - .limit(2) - ).all() - ) - if len(matches) == 1: - return matches[0] - - return None - - def _backfill_claim_identity_from_current_user( - self, - claim: ExpenseClaim, - current_user: CurrentUserContext, - ) -> None: - employee = self._resolve_claim_employee_for_backfill(claim) or self._resolve_current_employee(current_user) - - if employee is not None: - claim_employee_id = str(claim.employee_id or "").strip() - claim_employee_name = str(claim.employee_name or "").strip() - employee_names = { - str(employee.name or "").strip(), - str(employee.email or "").strip(), - str(employee.employee_no or "").strip(), - } - employee_names.discard("") - - can_apply_employee = ( - not claim_employee_id - or claim_employee_id == employee.id - or self._is_missing_value(claim_employee_name) - or claim_employee_name in employee_names - ) - - if can_apply_employee: - claim.employee = employee - claim.employee_id = employee.id - if employee.name: - claim.employee_name = employee.name - if employee.organization_unit is not None: - claim.department_id = employee.organization_unit_id - claim.department_name = employee.organization_unit.name - return - - context_department = str( - getattr(current_user, "department_name", "") - or getattr(current_user, "department", "") - or getattr(current_user, "departmentName", "") - or "" - ).strip() - if context_department and self._is_missing_value(claim.department_name): - claim.department_name = context_department - - context_name = str(current_user.name or current_user.username or "").strip() - if context_name and self._is_missing_value(claim.employee_name): - claim.employee_name = context_name - - def _employee_name_is_unique(self, employee: Employee) -> bool: - normalized_name = str(employee.name or "").strip() - if not normalized_name: - return False - - same_name_count = int( - self.db.scalar( - select(func.count()).select_from(Employee).where(Employee.name == normalized_name) - ) - or 0 - ) - return same_name_count == 1 - - def _build_personal_claim_conditions(self, current_user: CurrentUserContext) -> list[Any]: - conditions = [] - username = str(current_user.username or "").strip() - employee = self._resolve_current_employee(current_user) - - def add_condition(field_name: str, value: str | None) -> None: - normalized = str(value or "").strip() - if not normalized: - return - if field_name == "employee_id": - conditions.append(ExpenseClaim.employee_id == normalized) - return - conditions.append(ExpenseClaim.employee_name == normalized) - - if employee is not None: - add_condition("employee_id", employee.id) - add_condition("employee_name", employee.email) - if self._employee_name_is_unique(employee): - add_condition("employee_name", employee.name) - else: - add_condition("employee_id", username) - add_condition("employee_name", username) - - return conditions - - def _build_approval_claim_conditions(self, current_user: CurrentUserContext) -> list[Any]: - role_codes = self._normalize_role_codes(current_user) - if not (role_codes & APPROVAL_VISIBLE_CLAIM_ROLE_CODES): - return [] - - employee = self._resolve_current_employee(current_user) - manager_name = str( - employee.name if employee is not None and employee.name else current_user.name or "" - ).strip() - pending_leader_approval_parts = [ - ExpenseClaim.status == "submitted", - ExpenseClaim.approval_stage == "直属领导审批", - ] - if employee is not None: - pending_leader_approval_parts.append( - or_(ExpenseClaim.employee_id.is_(None), ExpenseClaim.employee_id != employee.id) - ) - if manager_name: - pending_leader_approval_parts.append(ExpenseClaim.employee_name != manager_name) - - pending_leader_approval = and_(*pending_leader_approval_parts) - conditions = [] - - if employee is not None: - subordinate_ids = select(Employee.id).where(Employee.manager_id == employee.id) - conditions.append(and_(pending_leader_approval, ExpenseClaim.employee_id.in_(subordinate_ids))) - - if manager_name: - managed_department_ids = select(OrganizationUnit.id).where(OrganizationUnit.manager_name == manager_name) - managed_department_names = select(OrganizationUnit.name).where(OrganizationUnit.manager_name == manager_name) - conditions.append(and_(pending_leader_approval, ExpenseClaim.department_id.in_(managed_department_ids))) - conditions.append(and_(pending_leader_approval, ExpenseClaim.department_name.in_(managed_department_names))) - - return conditions - - def _apply_approval_claim_scope(self, stmt: Any, current_user: CurrentUserContext) -> Any: - role_codes = self._normalize_role_codes(current_user) - if current_user.is_admin or "executive" in role_codes: - return stmt.where(ExpenseClaim.status == "submitted") - if "finance" in role_codes: - return stmt.where( - ExpenseClaim.status == "submitted", - ExpenseClaim.approval_stage == "财务审批", - ) - - conditions = self._build_approval_claim_conditions(current_user) - if not conditions: - return stmt.where(ExpenseClaim.id == "__no_visible_claim__") - - return stmt.where(or_(*conditions)) - - def _apply_claim_scope( - self, - stmt: Any, - current_user: CurrentUserContext, - *, - include_approval_scope: bool = False, - ) -> Any: - if self._has_privileged_claim_access(current_user): - return stmt - - conditions = self._build_personal_claim_conditions(current_user) - - if not conditions: - return stmt.where(ExpenseClaim.id == "__no_visible_claim__") - - if include_approval_scope: - conditions.extend(self._build_approval_claim_conditions(current_user)) - - return stmt.where(or_(*conditions)) - - def _ensure_ready(self) -> None: - AgentFoundationService(self.db).ensure_foundation_ready() diff --git a/server/src/app/services/expense_rule_runtime.py b/server/src/app/services/expense_rule_runtime.py index 7bc8609..539bc82 100644 --- a/server/src/app/services/expense_rule_runtime.py +++ b/server/src/app/services/expense_rule_runtime.py @@ -2,12 +2,11 @@ from __future__ import annotations import json import re -from dataclasses import dataclass, field from decimal import Decimal -from typing import Any, Literal +from typing import Any from openpyxl import load_workbook -from pydantic import BaseModel, Field, ValidationError +from pydantic import ValidationError from sqlalchemy import select from sqlalchemy.orm import Session @@ -17,558 +16,29 @@ from app.services.agent_asset_spreadsheet import ( COMPANY_TRAVEL_EXPENSE_RULE_CODE, AgentAssetSpreadsheetManager, ) - -EXPENSE_RULE_CODE_BLOCK_PATTERN = re.compile(r"```expense-rule\s*(\{.*?\})\s*```", re.DOTALL) - -DOCUMENT_TYPE_LABELS = { - "flight_itinerary": "机票/航班行程单", - "train_ticket": "火车/高铁票", - "hotel_invoice": "酒店住宿票据", - "taxi_receipt": "出租车/网约车票据", - "parking_toll_receipt": "停车/通行费票据", - "meal_receipt": "餐饮票据", - "office_invoice": "办公用品票据", - "meeting_invoice": "会议/会务票据", - "training_invoice": "培训票据", - "vat_invoice": "增值税发票", - "receipt": "一般收据/凭证", - "other": "其他单据", -} - -SCENE_LABELS = { - "travel": "差旅", - "hotel": "住宿", - "transport": "交通", - "meal": "餐饮", - "entertainment": "业务招待", - "office": "办公", - "meeting": "会务", - "training": "培训", - "communication": "通讯", - "welfare": "福利", - "other": "其他", -} - -DEFAULT_SCENE_RULE_ASSET_CODE = "rule.expense.scene_submission_standard" -DEFAULT_TRAVEL_RULE_ASSET_CODE = "rule.expense.travel_risk_control_standard" - -DEFAULT_SCENE_MATRIX_CONFIG: dict[str, Any] = { - "kind": "scene_matrix", - "version": 1, - "scenes": { - "travel": { - "label": "差旅费", - "location_required": True, - "min_attachment_count": 1, - "allowed_scene_codes": ["travel"], - "allowed_document_types": ["flight_itinerary", "train_ticket"], - "attachment_mismatch_severity": "high", - }, - "hotel": { - "label": "住宿费", - "location_required": False, - "min_attachment_count": 1, - "allowed_scene_codes": ["hotel"], - "allowed_document_types": ["hotel_invoice", "vat_invoice", "receipt"], - "attachment_mismatch_severity": "high", - }, - "transport": { - "label": "交通费", - "location_required": False, - "min_attachment_count": 1, - "allowed_scene_codes": ["transport"], - "allowed_document_types": ["taxi_receipt", "parking_toll_receipt", "vat_invoice", "receipt"], - "attachment_mismatch_severity": "high", - "item_amount_limit": { - "scope": "item_amount", - "warn_amount": "300.00", - "block_amount": "800.00", - "exception_keywords": ["跨城", "夜间", "应急", "无公共交通", "机场", "火车站", "超标说明"], - "metric_label": "单笔交通金额", - }, - }, - "meal": { - "label": "餐费", - "location_required": False, - "min_attachment_count": 1, - "allowed_scene_codes": ["meal"], - "allowed_document_types": ["meal_receipt", "vat_invoice", "receipt"], - "attachment_mismatch_severity": "high", - "claim_amount_limit": { - "scope": "claim_total", - "warn_amount": "300.00", - "block_amount": "800.00", - "exception_keywords": ["客户接待", "团队活动", "加班", "展会", "超标说明"], - "metric_label": "餐费合计", - }, - }, - "entertainment": { - "label": "业务招待费", - "location_required": True, - "min_attachment_count": 1, - "allowed_scene_codes": ["meal"], - "allowed_document_types": ["meal_receipt", "vat_invoice", "receipt"], - "attachment_mismatch_severity": "high", - "claim_amount_limit": { - "scope": "claim_total", - "warn_amount": "2000.00", - "block_amount": "5000.00", - "exception_keywords": ["重要客户", "商务宴请", "项目签约", "超标说明"], - "metric_label": "招待费合计", - }, - }, - "office": { - "label": "办公费", - "location_required": False, - "min_attachment_count": 1, - "allowed_scene_codes": ["office"], - "allowed_document_types": ["office_invoice", "vat_invoice", "receipt"], - "attachment_mismatch_severity": "high", - "claim_amount_limit": { - "scope": "claim_total", - "warn_amount": "1500.00", - "block_amount": "5000.00", - "exception_keywords": ["批量采购", "固定资产", "部门集中采购", "超标说明"], - "metric_label": "办公费合计", - }, - }, - "meeting": { - "label": "会务费", - "location_required": True, - "min_attachment_count": 1, - "allowed_scene_codes": ["meeting"], - "allowed_document_types": ["meeting_invoice", "vat_invoice", "receipt"], - "attachment_mismatch_severity": "high", - "claim_amount_limit": { - "scope": "claim_total", - "warn_amount": "5000.00", - "block_amount": "30000.00", - "exception_keywords": ["大型会议", "外部场地", "超标说明"], - "metric_label": "会务费合计", - }, - }, - "training": { - "label": "培训费", - "location_required": False, - "min_attachment_count": 1, - "allowed_scene_codes": ["training"], - "allowed_document_types": ["training_invoice", "vat_invoice", "receipt"], - "attachment_mismatch_severity": "high", - "claim_amount_limit": { - "scope": "claim_total", - "warn_amount": "3000.00", - "block_amount": "15000.00", - "exception_keywords": ["认证考试", "外部培训", "超标说明"], - "metric_label": "培训费合计", - }, - }, - "communication": { - "label": "通讯费", - "location_required": False, - "min_attachment_count": 1, - "allowed_scene_codes": ["other"], - "allowed_document_types": ["vat_invoice", "receipt"], - "attachment_mismatch_severity": "medium", - "claim_amount_limit": { - "scope": "claim_total", - "warn_amount": "300.00", - "block_amount": "1000.00", - "exception_keywords": ["国际漫游", "专项通信", "超标说明"], - "metric_label": "通讯费合计", - }, - }, - "welfare": { - "label": "福利费", - "location_required": False, - "min_attachment_count": 1, - "allowed_scene_codes": ["other"], - "allowed_document_types": ["vat_invoice", "receipt"], - "attachment_mismatch_severity": "medium", - "claim_amount_limit": { - "scope": "claim_total", - "warn_amount": "1000.00", - "block_amount": "5000.00", - "exception_keywords": ["节日福利", "团队活动", "员工关怀", "超标说明"], - "metric_label": "福利费合计", - }, - }, - "other": { - "label": "其他费用", - "location_required": False, - "min_attachment_count": 1, - "allowed_scene_codes": ["other"], - "allowed_document_types": ["vat_invoice", "receipt"], - "attachment_mismatch_severity": "medium", - "always_warn": True, - "always_warn_message": "其他费用默认进入人工重点复核,请补充清晰用途说明并由审批人重点确认。", - "claim_amount_limit": { - "scope": "claim_total", - "warn_amount": "1000.00", - "block_amount": "3000.00", - "exception_keywords": ["特殊事项", "临时采购", "超标说明"], - "metric_label": "其他费用合计", - }, - }, - }, -} - -DEFAULT_TRAVEL_POLICY_CONFIG: dict[str, Any] = { - "kind": "travel_policy", - "version": 1, - "relevant_expense_types": ["travel", "hotel", "transport"], - "long_distance_document_types": ["flight_itinerary", "train_ticket"], - "route_exception_keywords": [ - "中转", - "转机", - "经停", - "改签", - "多地出差", - "多城市", - "多站", - "异地返程", - "异地结束", - "临时变更", - "继续前往", - "第二站", - ], - "standard_exception_keywords": [ - "超标说明", - "无直达", - "展会高峰", - "会议高峰", - "协议酒店满房", - "客户指定", - "临时改签", - "行程变更", - "红眼航班", - "晚到店", - ], - "band_labels": { - "junior": "P1-P3", - "mid": "P4-P5", - "senior": "P6-P7", - "manager": "M1-M2", - "executive": "M3及以上 / D序列", - }, - "city_tiers": { - "北京": "tier_1", - "上海": "tier_1", - "广州": "tier_1", - "深圳": "tier_1", - "杭州": "tier_2", - "南京": "tier_2", - "苏州": "tier_2", - "武汉": "tier_2", - "成都": "tier_2", - "重庆": "tier_2", - "西安": "tier_2", - "天津": "tier_2", - "宁波": "tier_2", - "厦门": "tier_2", - "青岛": "tier_2", - "长沙": "tier_2", - "郑州": "tier_2", - "合肥": "tier_2", - "济南": "tier_2", - "沈阳": "tier_2", - "大连": "tier_2", - "福州": "tier_2", - "昆明": "tier_2", - "海口": "tier_2", - "三亚": "tier_2", - "无锡": "tier_2", - "东莞": "tier_2", - "佛山": "tier_2", - }, - "hotel_limits": { - "junior": {"tier_1": "450.00", "tier_2": "380.00", "tier_3": "320.00"}, - "mid": {"tier_1": "550.00", "tier_2": "480.00", "tier_3": "380.00"}, - "senior": {"tier_1": "700.00", "tier_2": "620.00", "tier_3": "520.00"}, - "manager": {"tier_1": "900.00", "tier_2": "820.00", "tier_3": "720.00"}, - "executive": {"tier_1": "1200.00", "tier_2": "1000.00", "tier_3": "900.00"}, - }, - "transport_limits": { - "junior": {"flight": 1, "train": 1}, - "mid": {"flight": 1, "train": 1}, - "senior": {"flight": 2, "train": 2}, - "manager": {"flight": 3, "train": 3}, - "executive": {"flight": 4, "train": 3}, - }, - "flight_classes": [ - {"keyword": "头等舱", "level": 4}, - {"keyword": "公务舱", "level": 3}, - {"keyword": "商务舱", "level": 3}, - {"keyword": "超级经济舱", "level": 2}, - {"keyword": "高端经济舱", "level": 2}, - {"keyword": "明珠经济舱", "level": 2}, - {"keyword": "经济舱", "level": 1}, - ], - "train_classes": [ - {"keyword": "商务座", "level": 3}, - {"keyword": "一等座", "level": 2}, - {"keyword": "软卧", "level": 2}, - {"keyword": "二等座", "level": 1}, - {"keyword": "二等卧", "level": 1}, - {"keyword": "硬卧", "level": 1}, - ], -} - - -class AmountLimitConfig(BaseModel): - scope: Literal["claim_total", "item_amount"] = "claim_total" - warn_amount: Decimal | None = None - block_amount: Decimal | None = None - exception_keywords: list[str] = Field(default_factory=list) - metric_label: str = "金额" - - -class ScenePolicyConfig(BaseModel): - label: str - location_required: bool = False - min_attachment_count: int = 1 - allowed_scene_codes: list[str] = Field(default_factory=list) - allowed_document_types: list[str] = Field(default_factory=list) - attachment_mismatch_severity: Literal["low", "medium", "high"] = "high" - claim_amount_limit: AmountLimitConfig | None = None - item_amount_limit: AmountLimitConfig | None = None - always_warn: bool = False - always_warn_message: str = "" - - -class SceneMatrixRuleConfig(BaseModel): - kind: Literal["scene_matrix"] - version: int = 1 - scenes: dict[str, ScenePolicyConfig] - - -class TravelClassConfig(BaseModel): - keyword: str - level: int - - -class TravelPolicyConfig(BaseModel): - kind: Literal["travel_policy"] - version: int = 1 - relevant_expense_types: list[str] = Field(default_factory=list) - long_distance_document_types: list[str] = Field(default_factory=list) - route_exception_keywords: list[str] = Field(default_factory=list) - standard_exception_keywords: list[str] = Field(default_factory=list) - band_labels: dict[str, str] = Field(default_factory=dict) - city_tiers: dict[str, str] = Field(default_factory=dict) - hotel_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict) - hotel_city_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict) - allowance_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict) - standard_rule_code: str = "" - standard_rule_name: str = "" - standard_rule_version: str = "" - transport_limits: dict[str, dict[str, int]] = Field(default_factory=dict) - flight_classes: list[TravelClassConfig] = Field(default_factory=list) - train_classes: list[TravelClassConfig] = Field(default_factory=list) - - -class ExpenseScenePolicy(ScenePolicyConfig): - expense_type: str - rule_code: str - rule_name: str - rule_version: str - - -class RuntimeTravelPolicy(TravelPolicyConfig): - rule_code: str - rule_name: str - rule_version: str - - -@dataclass -class ExpenseRuleCatalog: - scene_policies: dict[str, ExpenseScenePolicy] = field(default_factory=dict) - travel_policy: RuntimeTravelPolicy | None = None - - def get_scene_policy(self, expense_type: str | None) -> ExpenseScenePolicy | None: - normalized = str(expense_type or "").strip().lower() or "other" - return self.scene_policies.get(normalized) - - -def resolve_document_type_label(document_type: str | None) -> str: - normalized = str(document_type or "").strip().lower() or "other" - return DOCUMENT_TYPE_LABELS.get(normalized, normalized or "其他单据") - - -def build_default_expense_rule_catalog() -> ExpenseRuleCatalog: - catalog = ExpenseRuleCatalog() - scene_matrix = SceneMatrixRuleConfig.model_validate(DEFAULT_SCENE_MATRIX_CONFIG) - for expense_type, config in scene_matrix.scenes.items(): - catalog.scene_policies[expense_type] = ExpenseScenePolicy( - expense_type=expense_type, - rule_code=DEFAULT_SCENE_RULE_ASSET_CODE, - rule_name="报销场景提交与附件标准", - rule_version="v1.0.0", - **config.model_dump(), - ) - - travel_policy = TravelPolicyConfig.model_validate(DEFAULT_TRAVEL_POLICY_CONFIG) - catalog.travel_policy = RuntimeTravelPolicy( - rule_code=DEFAULT_TRAVEL_RULE_ASSET_CODE, - rule_name="差旅报销风险管控制度", - rule_version="v1.1.0", - **travel_policy.model_dump(), - ) - return catalog - - -def build_scene_submission_standard_markdown() -> str: - scene_matrix = SceneMatrixRuleConfig.model_validate(DEFAULT_SCENE_MATRIX_CONFIG) - sections: list[str] = [ - "# 报销场景提交与附件标准", - "", - "## 模板信息", - "", - "- 模板类型:系统内置场景矩阵规则", - "- 运行时类型:`scene_matrix`", - "- 适用对象:报销提交与附件校验", - "", - "## 目标", - "", - "统一约束各报销场景的必填字段、附件类型和金额预警口径,在上传附件和提交审核两个时点直接输出可执行风险判断。", - "", - "## 适用范围", - "", - "适用于差旅、住宿、交通、餐费、业务招待、办公、会务、培训、通讯、福利和其他费用场景。", - "", - "## 输入字段", - "", - "- expense_type", - "- attachments", - "- location", - "- amount / item_amount", - "- reason", - "", - "## 判断规则", - "", - ] - - for index, (expense_type, config) in enumerate(scene_matrix.scenes.items(), start=1): - expected_document_labels = "、".join( - resolve_document_type_label(item) for item in config.allowed_document_types - ) - expected_scene_labels = "、".join( - SCENE_LABELS.get(item, item) for item in config.allowed_scene_codes - ) - sections.extend( - [ - f"### 规则 {index} {config.label}(`{expense_type}`)", - "", - f"- 业务地点:{'必填' if config.location_required else '非必填'}", - f"- 最少附件数:{config.min_attachment_count}", - f"- 允许识别场景:{expected_scene_labels or '不限制'}", - f"- 允许附件类型:{expected_document_labels or '不限制'}", - f"- 附件不匹配处理:{config.attachment_mismatch_severity.upper()}", - ] - ) - if config.claim_amount_limit is not None: - sections.append( - f"- 合计金额阈值:预警 {config.claim_amount_limit.warn_amount or '-'} 元," - f"拦截 {config.claim_amount_limit.block_amount or '-'} 元" - ) - if config.item_amount_limit is not None: - sections.append( - f"- 单笔金额阈值:预警 {config.item_amount_limit.warn_amount or '-'} 元," - f"拦截 {config.item_amount_limit.block_amount or '-'} 元" - ) - if config.always_warn and config.always_warn_message: - sections.append(f"- 特殊处理:{config.always_warn_message}") - sections.append("") - - sections.extend( - [ - "## 输出", - "", - "- 命中高风险时退回待补充。", - "- 命中中风险时继续流转,并提示审批人重点复核。", - "- 命中 always_warn 场景时追加人工重点复核提示。", - "", - "## 来源依据", - "", - "- 公司报销制度中关于场景识别、附件要求、金额阈值和人工复核的统一口径。", - "", - "## 审核约束", - "", - "- 当前规则为系统内置真实运行规则,变更后需重新审核并评估回滚影响。", - "- 规则 JSON 与 Markdown 说明必须保持一致。", - "", - "## 管理员备注", - "", - "如后续制度调整附件类型、金额阈值或人工复核口径,应优先修改运行时 JSON 并同步更新说明。", - "", - "```expense-rule", - json.dumps(DEFAULT_SCENE_MATRIX_CONFIG, ensure_ascii=False, indent=2), - "```", - ] - ) - return "\n".join(sections) - - -def build_travel_risk_control_standard_markdown() -> str: - return "\n".join( - [ - "# 差旅报销风险管控制度", - "", - "## 模板信息", - "", - "- 模板键:`travel_standard_v1`", - "- 运行时类型:`travel_policy`", - "- 适用对象:差旅、住宿、交通相关报销审核", - "", - "## 目标", - "", - "校验差旅行程闭环、酒店地点一致性、住宿标准、飞机舱位和火车席别是否符合制度,并对例外情况保留人工复核入口。", - "", - "## 适用范围", - "", - "适用于差旅费、住宿费和交通费相关报销单,重点覆盖跨城市出差、改签、中转和超标说明场景。", - "", - "## 输入字段", - "", - "- expense_type", - "- attachments / OCR routes", - "- location", - "- employee_grade", - "- reason", - "", - "## 判断规则", - "", - "- 两段及以上长途交通票据必须首尾衔接。", - "- 最终终点应与申报目的地一致,或返回首段出发城市。", - "- 检测到多城市行程但无说明时,按高风险退回待补充。", - "- 酒店城市必须落在目的地或交通链路停留城市中。", - "- 住宿标准、飞机舱位和火车席别按职级与城市分级执行。", - "- 超标但有说明时记为中风险;超标且无说明时记为高风险。", - "", - "## 输出", - "", - "- 行程异常时输出高风险退回。", - "- 差标超限但有合理说明时输出中风险提醒。", - "- 命中差旅制度规则时,保留 `rule_code` 和 `rule_version` 供审批链追踪。", - "", - "## 来源依据", - "", - "- 公司差旅制度关于行程闭环、酒店地点一致性、职级差标和例外说明的规定。", - "", - "## 审核约束", - "", - "- 当前规则为系统内置真实运行规则,修改前需确认差旅制度版本与灰度回滚方案。", - "- 规则 JSON 与 Markdown 说明必须保持一致。", - "", - "## 管理员备注", - "", - "如制度调整职级带、城市分级或交通等级,应先更新运行时 JSON,再同步修改本说明。", - "", - "```expense-rule", - json.dumps(DEFAULT_TRAVEL_POLICY_CONFIG, ensure_ascii=False, indent=2), - "```", - ] - ) - +from app.services.expense_rule_runtime_defaults import ( + DEFAULT_SCENE_MATRIX_CONFIG, + DEFAULT_SCENE_RULE_ASSET_CODE, + DEFAULT_TRAVEL_POLICY_CONFIG, + DEFAULT_TRAVEL_RULE_ASSET_CODE, + DOCUMENT_TYPE_LABELS, + EXPENSE_RULE_CODE_BLOCK_PATTERN, + SCENE_LABELS, +) +from app.services.expense_rule_runtime_models import ( + AmountLimitConfig, + ExpenseRuleCatalog, + ExpenseScenePolicy, + RuntimeTravelPolicy, + SceneMatrixRuleConfig, + TravelPolicyConfig, + build_default_expense_rule_catalog, + resolve_document_type_label, +) +from app.services.expense_rule_runtime_standards import ( + build_scene_submission_standard_markdown, + build_travel_risk_control_standard_markdown, +) class ExpenseRuleRuntimeService: def __init__(self, db: Session) -> None: diff --git a/server/src/app/services/expense_rule_runtime_defaults.py b/server/src/app/services/expense_rule_runtime_defaults.py new file mode 100644 index 0000000..c6d78b5 --- /dev/null +++ b/server/src/app/services/expense_rule_runtime_defaults.py @@ -0,0 +1,299 @@ +from __future__ import annotations + +import re +from typing import Any + +EXPENSE_RULE_CODE_BLOCK_PATTERN = re.compile(r"```expense-rule\s*(\{.*?\})\s*```", re.DOTALL) + +DOCUMENT_TYPE_LABELS = { + "flight_itinerary": "机票/航班行程单", + "train_ticket": "火车/高铁票", + "hotel_invoice": "酒店住宿票据", + "taxi_receipt": "出租车/网约车票据", + "parking_toll_receipt": "停车/通行费票据", + "meal_receipt": "餐饮票据", + "office_invoice": "办公用品票据", + "meeting_invoice": "会议/会务票据", + "training_invoice": "培训票据", + "vat_invoice": "增值税发票", + "receipt": "一般收据/凭证", + "other": "其他单据", +} + +SCENE_LABELS = { + "travel": "差旅", + "hotel": "住宿", + "transport": "交通", + "meal": "餐饮", + "entertainment": "业务招待", + "office": "办公", + "meeting": "会务", + "training": "培训", + "communication": "通讯", + "welfare": "福利", + "other": "其他", +} + +DEFAULT_SCENE_RULE_ASSET_CODE = "rule.expense.scene_submission_standard" +DEFAULT_TRAVEL_RULE_ASSET_CODE = "rule.expense.travel_risk_control_standard" + +DEFAULT_SCENE_MATRIX_CONFIG: dict[str, Any] = { + "kind": "scene_matrix", + "version": 1, + "scenes": { + "travel": { + "label": "差旅费", + "location_required": True, + "min_attachment_count": 1, + "allowed_scene_codes": ["travel"], + "allowed_document_types": ["flight_itinerary", "train_ticket"], + "attachment_mismatch_severity": "high", + }, + "hotel": { + "label": "住宿费", + "location_required": False, + "min_attachment_count": 1, + "allowed_scene_codes": ["hotel"], + "allowed_document_types": ["hotel_invoice", "vat_invoice", "receipt"], + "attachment_mismatch_severity": "high", + }, + "transport": { + "label": "交通费", + "location_required": False, + "min_attachment_count": 1, + "allowed_scene_codes": ["transport"], + "allowed_document_types": ["taxi_receipt", "parking_toll_receipt", "vat_invoice", "receipt"], + "attachment_mismatch_severity": "high", + "item_amount_limit": { + "scope": "item_amount", + "warn_amount": "300.00", + "block_amount": "800.00", + "exception_keywords": ["跨城", "夜间", "应急", "无公共交通", "机场", "火车站", "超标说明"], + "metric_label": "单笔交通金额", + }, + }, + "meal": { + "label": "餐费", + "location_required": False, + "min_attachment_count": 1, + "allowed_scene_codes": ["meal"], + "allowed_document_types": ["meal_receipt", "vat_invoice", "receipt"], + "attachment_mismatch_severity": "high", + "claim_amount_limit": { + "scope": "claim_total", + "warn_amount": "300.00", + "block_amount": "800.00", + "exception_keywords": ["客户接待", "团队活动", "加班", "展会", "超标说明"], + "metric_label": "餐费合计", + }, + }, + "entertainment": { + "label": "业务招待费", + "location_required": True, + "min_attachment_count": 1, + "allowed_scene_codes": ["meal"], + "allowed_document_types": ["meal_receipt", "vat_invoice", "receipt"], + "attachment_mismatch_severity": "high", + "claim_amount_limit": { + "scope": "claim_total", + "warn_amount": "2000.00", + "block_amount": "5000.00", + "exception_keywords": ["重要客户", "商务宴请", "项目签约", "超标说明"], + "metric_label": "招待费合计", + }, + }, + "office": { + "label": "办公费", + "location_required": False, + "min_attachment_count": 1, + "allowed_scene_codes": ["office"], + "allowed_document_types": ["office_invoice", "vat_invoice", "receipt"], + "attachment_mismatch_severity": "high", + "claim_amount_limit": { + "scope": "claim_total", + "warn_amount": "1500.00", + "block_amount": "5000.00", + "exception_keywords": ["批量采购", "固定资产", "部门集中采购", "超标说明"], + "metric_label": "办公费合计", + }, + }, + "meeting": { + "label": "会务费", + "location_required": True, + "min_attachment_count": 1, + "allowed_scene_codes": ["meeting"], + "allowed_document_types": ["meeting_invoice", "vat_invoice", "receipt"], + "attachment_mismatch_severity": "high", + "claim_amount_limit": { + "scope": "claim_total", + "warn_amount": "5000.00", + "block_amount": "30000.00", + "exception_keywords": ["大型会议", "外部场地", "超标说明"], + "metric_label": "会务费合计", + }, + }, + "training": { + "label": "培训费", + "location_required": False, + "min_attachment_count": 1, + "allowed_scene_codes": ["training"], + "allowed_document_types": ["training_invoice", "vat_invoice", "receipt"], + "attachment_mismatch_severity": "high", + "claim_amount_limit": { + "scope": "claim_total", + "warn_amount": "3000.00", + "block_amount": "15000.00", + "exception_keywords": ["认证考试", "外部培训", "超标说明"], + "metric_label": "培训费合计", + }, + }, + "communication": { + "label": "通讯费", + "location_required": False, + "min_attachment_count": 1, + "allowed_scene_codes": ["other"], + "allowed_document_types": ["vat_invoice", "receipt"], + "attachment_mismatch_severity": "medium", + "claim_amount_limit": { + "scope": "claim_total", + "warn_amount": "300.00", + "block_amount": "1000.00", + "exception_keywords": ["国际漫游", "专项通信", "超标说明"], + "metric_label": "通讯费合计", + }, + }, + "welfare": { + "label": "福利费", + "location_required": False, + "min_attachment_count": 1, + "allowed_scene_codes": ["other"], + "allowed_document_types": ["vat_invoice", "receipt"], + "attachment_mismatch_severity": "medium", + "claim_amount_limit": { + "scope": "claim_total", + "warn_amount": "1000.00", + "block_amount": "5000.00", + "exception_keywords": ["节日福利", "团队活动", "员工关怀", "超标说明"], + "metric_label": "福利费合计", + }, + }, + "other": { + "label": "其他费用", + "location_required": False, + "min_attachment_count": 1, + "allowed_scene_codes": ["other"], + "allowed_document_types": ["vat_invoice", "receipt"], + "attachment_mismatch_severity": "medium", + "always_warn": True, + "always_warn_message": "其他费用默认进入人工重点复核,请补充清晰用途说明并由审批人重点确认。", + "claim_amount_limit": { + "scope": "claim_total", + "warn_amount": "1000.00", + "block_amount": "3000.00", + "exception_keywords": ["特殊事项", "临时采购", "超标说明"], + "metric_label": "其他费用合计", + }, + }, + }, +} + +DEFAULT_TRAVEL_POLICY_CONFIG: dict[str, Any] = { + "kind": "travel_policy", + "version": 1, + "relevant_expense_types": ["travel", "hotel", "transport"], + "long_distance_document_types": ["flight_itinerary", "train_ticket"], + "route_exception_keywords": [ + "中转", + "转机", + "经停", + "改签", + "多地出差", + "多城市", + "多站", + "异地返程", + "异地结束", + "临时变更", + "继续前往", + "第二站", + ], + "standard_exception_keywords": [ + "超标说明", + "无直达", + "展会高峰", + "会议高峰", + "协议酒店满房", + "客户指定", + "临时改签", + "行程变更", + "红眼航班", + "晚到店", + ], + "band_labels": { + "junior": "P1-P3", + "mid": "P4-P5", + "senior": "P6-P7", + "manager": "M1-M2", + "executive": "M3及以上 / D序列", + }, + "city_tiers": { + "北京": "tier_1", + "上海": "tier_1", + "广州": "tier_1", + "深圳": "tier_1", + "杭州": "tier_2", + "南京": "tier_2", + "苏州": "tier_2", + "武汉": "tier_2", + "成都": "tier_2", + "重庆": "tier_2", + "西安": "tier_2", + "天津": "tier_2", + "宁波": "tier_2", + "厦门": "tier_2", + "青岛": "tier_2", + "长沙": "tier_2", + "郑州": "tier_2", + "合肥": "tier_2", + "济南": "tier_2", + "沈阳": "tier_2", + "大连": "tier_2", + "福州": "tier_2", + "昆明": "tier_2", + "海口": "tier_2", + "三亚": "tier_2", + "无锡": "tier_2", + "东莞": "tier_2", + "佛山": "tier_2", + }, + "hotel_limits": { + "junior": {"tier_1": "450.00", "tier_2": "380.00", "tier_3": "320.00"}, + "mid": {"tier_1": "550.00", "tier_2": "480.00", "tier_3": "380.00"}, + "senior": {"tier_1": "700.00", "tier_2": "620.00", "tier_3": "520.00"}, + "manager": {"tier_1": "900.00", "tier_2": "820.00", "tier_3": "720.00"}, + "executive": {"tier_1": "1200.00", "tier_2": "1000.00", "tier_3": "900.00"}, + }, + "transport_limits": { + "junior": {"flight": 1, "train": 1}, + "mid": {"flight": 1, "train": 1}, + "senior": {"flight": 2, "train": 2}, + "manager": {"flight": 3, "train": 3}, + "executive": {"flight": 4, "train": 3}, + }, + "flight_classes": [ + {"keyword": "头等舱", "level": 4}, + {"keyword": "公务舱", "level": 3}, + {"keyword": "商务舱", "level": 3}, + {"keyword": "超级经济舱", "level": 2}, + {"keyword": "高端经济舱", "level": 2}, + {"keyword": "明珠经济舱", "level": 2}, + {"keyword": "经济舱", "level": 1}, + ], + "train_classes": [ + {"keyword": "商务座", "level": 3}, + {"keyword": "一等座", "level": 2}, + {"keyword": "软卧", "level": 2}, + {"keyword": "二等座", "level": 1}, + {"keyword": "二等卧", "level": 1}, + {"keyword": "硬卧", "level": 1}, + ], +} diff --git a/server/src/app/services/expense_rule_runtime_models.py b/server/src/app/services/expense_rule_runtime_models.py new file mode 100644 index 0000000..4a1d834 --- /dev/null +++ b/server/src/app/services/expense_rule_runtime_models.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from decimal import Decimal +from typing import Literal + +from pydantic import BaseModel, Field + +from app.services.expense_rule_runtime_defaults import ( + DEFAULT_SCENE_MATRIX_CONFIG, + DEFAULT_SCENE_RULE_ASSET_CODE, + DEFAULT_TRAVEL_POLICY_CONFIG, + DEFAULT_TRAVEL_RULE_ASSET_CODE, + DOCUMENT_TYPE_LABELS, +) + +class AmountLimitConfig(BaseModel): + scope: Literal["claim_total", "item_amount"] = "claim_total" + warn_amount: Decimal | None = None + block_amount: Decimal | None = None + exception_keywords: list[str] = Field(default_factory=list) + metric_label: str = "金额" + + +class ScenePolicyConfig(BaseModel): + label: str + location_required: bool = False + min_attachment_count: int = 1 + allowed_scene_codes: list[str] = Field(default_factory=list) + allowed_document_types: list[str] = Field(default_factory=list) + attachment_mismatch_severity: Literal["low", "medium", "high"] = "high" + claim_amount_limit: AmountLimitConfig | None = None + item_amount_limit: AmountLimitConfig | None = None + always_warn: bool = False + always_warn_message: str = "" + + +class SceneMatrixRuleConfig(BaseModel): + kind: Literal["scene_matrix"] + version: int = 1 + scenes: dict[str, ScenePolicyConfig] + + +class TravelClassConfig(BaseModel): + keyword: str + level: int + + +class TravelPolicyConfig(BaseModel): + kind: Literal["travel_policy"] + version: int = 1 + relevant_expense_types: list[str] = Field(default_factory=list) + long_distance_document_types: list[str] = Field(default_factory=list) + route_exception_keywords: list[str] = Field(default_factory=list) + standard_exception_keywords: list[str] = Field(default_factory=list) + band_labels: dict[str, str] = Field(default_factory=dict) + city_tiers: dict[str, str] = Field(default_factory=dict) + hotel_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict) + hotel_city_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict) + allowance_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict) + standard_rule_code: str = "" + standard_rule_name: str = "" + standard_rule_version: str = "" + transport_limits: dict[str, dict[str, int]] = Field(default_factory=dict) + flight_classes: list[TravelClassConfig] = Field(default_factory=list) + train_classes: list[TravelClassConfig] = Field(default_factory=list) + + +class ExpenseScenePolicy(ScenePolicyConfig): + expense_type: str + rule_code: str + rule_name: str + rule_version: str + + +class RuntimeTravelPolicy(TravelPolicyConfig): + rule_code: str + rule_name: str + rule_version: str + + +@dataclass +class ExpenseRuleCatalog: + scene_policies: dict[str, ExpenseScenePolicy] = field(default_factory=dict) + travel_policy: RuntimeTravelPolicy | None = None + + def get_scene_policy(self, expense_type: str | None) -> ExpenseScenePolicy | None: + normalized = str(expense_type or "").strip().lower() or "other" + return self.scene_policies.get(normalized) + + +def resolve_document_type_label(document_type: str | None) -> str: + normalized = str(document_type or "").strip().lower() or "other" + return DOCUMENT_TYPE_LABELS.get(normalized, normalized or "其他单据") + + +def build_default_expense_rule_catalog() -> ExpenseRuleCatalog: + catalog = ExpenseRuleCatalog() + scene_matrix = SceneMatrixRuleConfig.model_validate(DEFAULT_SCENE_MATRIX_CONFIG) + for expense_type, config in scene_matrix.scenes.items(): + catalog.scene_policies[expense_type] = ExpenseScenePolicy( + expense_type=expense_type, + rule_code=DEFAULT_SCENE_RULE_ASSET_CODE, + rule_name="报销场景提交与附件标准", + rule_version="v1.0.0", + **config.model_dump(), + ) + + travel_policy = TravelPolicyConfig.model_validate(DEFAULT_TRAVEL_POLICY_CONFIG) + catalog.travel_policy = RuntimeTravelPolicy( + rule_code=DEFAULT_TRAVEL_RULE_ASSET_CODE, + rule_name="差旅报销风险管控制度", + rule_version="v1.1.0", + **travel_policy.model_dump(), + ) + return catalog diff --git a/server/src/app/services/expense_rule_runtime_standards.py b/server/src/app/services/expense_rule_runtime_standards.py new file mode 100644 index 0000000..9bbd9af --- /dev/null +++ b/server/src/app/services/expense_rule_runtime_standards.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +import json + +from app.services.expense_rule_runtime_defaults import ( + DEFAULT_SCENE_MATRIX_CONFIG, + DEFAULT_TRAVEL_POLICY_CONFIG, + SCENE_LABELS, +) +from app.services.expense_rule_runtime_models import ( + SceneMatrixRuleConfig, + resolve_document_type_label, +) + +def build_scene_submission_standard_markdown() -> str: + scene_matrix = SceneMatrixRuleConfig.model_validate(DEFAULT_SCENE_MATRIX_CONFIG) + sections: list[str] = [ + "# 报销场景提交与附件标准", + "", + "## 模板信息", + "", + "- 模板类型:系统内置场景矩阵规则", + "- 运行时类型:`scene_matrix`", + "- 适用对象:报销提交与附件校验", + "", + "## 目标", + "", + "统一约束各报销场景的必填字段、附件类型和金额预警口径,在上传附件和提交审核两个时点直接输出可执行风险判断。", + "", + "## 适用范围", + "", + "适用于差旅、住宿、交通、餐费、业务招待、办公、会务、培训、通讯、福利和其他费用场景。", + "", + "## 输入字段", + "", + "- expense_type", + "- attachments", + "- location", + "- amount / item_amount", + "- reason", + "", + "## 判断规则", + "", + ] + + for index, (expense_type, config) in enumerate(scene_matrix.scenes.items(), start=1): + expected_document_labels = "、".join( + resolve_document_type_label(item) for item in config.allowed_document_types + ) + expected_scene_labels = "、".join( + SCENE_LABELS.get(item, item) for item in config.allowed_scene_codes + ) + sections.extend( + [ + f"### 规则 {index} {config.label}(`{expense_type}`)", + "", + f"- 业务地点:{'必填' if config.location_required else '非必填'}", + f"- 最少附件数:{config.min_attachment_count}", + f"- 允许识别场景:{expected_scene_labels or '不限制'}", + f"- 允许附件类型:{expected_document_labels or '不限制'}", + f"- 附件不匹配处理:{config.attachment_mismatch_severity.upper()}", + ] + ) + if config.claim_amount_limit is not None: + sections.append( + f"- 合计金额阈值:预警 {config.claim_amount_limit.warn_amount or '-'} 元," + f"拦截 {config.claim_amount_limit.block_amount or '-'} 元" + ) + if config.item_amount_limit is not None: + sections.append( + f"- 单笔金额阈值:预警 {config.item_amount_limit.warn_amount or '-'} 元," + f"拦截 {config.item_amount_limit.block_amount or '-'} 元" + ) + if config.always_warn and config.always_warn_message: + sections.append(f"- 特殊处理:{config.always_warn_message}") + sections.append("") + + sections.extend( + [ + "## 输出", + "", + "- 命中高风险时退回待补充。", + "- 命中中风险时继续流转,并提示审批人重点复核。", + "- 命中 always_warn 场景时追加人工重点复核提示。", + "", + "## 来源依据", + "", + "- 公司报销制度中关于场景识别、附件要求、金额阈值和人工复核的统一口径。", + "", + "## 审核约束", + "", + "- 当前规则为系统内置真实运行规则,变更后需重新审核并评估回滚影响。", + "- 规则 JSON 与 Markdown 说明必须保持一致。", + "", + "## 管理员备注", + "", + "如后续制度调整附件类型、金额阈值或人工复核口径,应优先修改运行时 JSON 并同步更新说明。", + "", + "```expense-rule", + json.dumps(DEFAULT_SCENE_MATRIX_CONFIG, ensure_ascii=False, indent=2), + "```", + ] + ) + return "\n".join(sections) + + +def build_travel_risk_control_standard_markdown() -> str: + return "\n".join( + [ + "# 差旅报销风险管控制度", + "", + "## 模板信息", + "", + "- 模板键:`travel_standard_v1`", + "- 运行时类型:`travel_policy`", + "- 适用对象:差旅、住宿、交通相关报销审核", + "", + "## 目标", + "", + "校验差旅行程闭环、酒店地点一致性、住宿标准、飞机舱位和火车席别是否符合制度,并对例外情况保留人工复核入口。", + "", + "## 适用范围", + "", + "适用于差旅费、住宿费和交通费相关报销单,重点覆盖跨城市出差、改签、中转和超标说明场景。", + "", + "## 输入字段", + "", + "- expense_type", + "- attachments / OCR routes", + "- location", + "- employee_grade", + "- reason", + "", + "## 判断规则", + "", + "- 两段及以上长途交通票据必须首尾衔接。", + "- 最终终点应与申报目的地一致,或返回首段出发城市。", + "- 检测到多城市行程但无说明时,按高风险退回待补充。", + "- 酒店城市必须落在目的地或交通链路停留城市中。", + "- 住宿标准、飞机舱位和火车席别按职级与城市分级执行。", + "- 超标但有说明时记为中风险;超标且无说明时记为高风险。", + "", + "## 输出", + "", + "- 行程异常时输出高风险退回。", + "- 差标超限但有合理说明时输出中风险提醒。", + "- 命中差旅制度规则时,保留 `rule_code` 和 `rule_version` 供审批链追踪。", + "", + "## 来源依据", + "", + "- 公司差旅制度关于行程闭环、酒店地点一致性、职级差标和例外说明的规定。", + "", + "## 审核约束", + "", + "- 当前规则为系统内置真实运行规则,修改前需确认差旅制度版本与灰度回滚方案。", + "- 规则 JSON 与 Markdown 说明必须保持一致。", + "", + "## 管理员备注", + "", + "如制度调整职级带、城市分级或交通等级,应先更新运行时 JSON,再同步修改本说明。", + "", + "```expense-rule", + json.dumps(DEFAULT_TRAVEL_POLICY_CONFIG, ensure_ascii=False, indent=2), + "```", + ] + ) diff --git a/server/src/app/services/knowledge.py b/server/src/app/services/knowledge.py index afea60f..8dc4a93 100644 --- a/server/src/app/services/knowledge.py +++ b/server/src/app/services/knowledge.py @@ -1,220 +1,187 @@ -from __future__ import annotations - -import hashlib -import json -import mimetypes -import re -import shutil -import subprocess -from dataclasses import dataclass -from datetime import UTC, datetime -from pathlib import Path -from typing import Any -from urllib.request import Request, urlopen -from uuid import uuid4 -from xml.etree import ElementTree -from zipfile import BadZipFile, ZipFile - -import jwt -from sqlalchemy import select -from sqlalchemy.orm import Session - -from app.api.deps import CurrentUserContext -from app.core.agent_enums import AgentRunStatus -from app.core.config import get_settings -from app.core.logging import get_logger -from app.models.agent_run import AgentRun -from app.schemas.knowledge import ( - KnowledgeDocumentDetailRead, - KnowledgeDocumentRead, - KnowledgeFolderRead, - KnowledgeLibraryRead, - KnowledgeOnlyOfficeConfigRead, - KnowledgePreviewBlockRead, - KnowledgePreviewPageRead, - KnowledgePreviewStatRead, -) -from app.services.knowledge_rag import KnowledgeRagService -from app.services.settings import resolve_onlyoffice_settings - -logger = get_logger("app.services.knowledge") - -FIXED_KNOWLEDGE_FOLDERS = [ - "财务知识库", - "制度政策", - "报销制度", - "差旅规范", - "发票管理", - "税务合规", - "预算管理", - "财务共享", - "培训资料", - "常见问答", -] - -ICON_BY_TYPE = { - "pdf": "mdi mdi-file-document-outline-pdf pdf", - "word": "mdi mdi-file-document-outline-word word", - "excel": "mdi mdi-file-document-outline-excel excel", - "ppt": "mdi mdi-file-powerpoint-box ppt", - "image": "mdi mdi-file-image-outline image", - "text": "mdi mdi-file-document-outline text", - "archive": "mdi mdi-folder-zip-outline archive", - "binary": "mdi mdi-file-outline", -} - -TEXT_EXTENSIONS = {"txt", "md", "csv", "json", "xml", "yml", "yaml", "log"} -WORD_EXTENSIONS = {"doc", "docx"} -EXCEL_EXTENSIONS = {"xls", "xlsx", "csv"} -PPT_EXTENSIONS = {"ppt", "pptx"} -IMAGE_EXTENSIONS = {"png", "jpg", "jpeg", "gif", "bmp", "webp", "svg"} -ARCHIVE_EXTENSIONS = {"zip", "rar", "7z"} -STRUCTURED_PREVIEW_EXTENSIONS = {"docx", "xlsx", "pptx"} | TEXT_EXTENSIONS -INLINE_PREVIEW_EXTENSIONS = {"pdf"} | IMAGE_EXTENSIONS -ONLYOFFICE_EDITABLE_EXTENSIONS = {"docx", "xlsx", "pptx"} -KNOWLEDGE_INGEST_SYNC_STALE_SECONDS = 90 -KNOWLEDGE_SEARCH_RESULT_LIMIT = 3 -KNOWLEDGE_SEARCH_STOP_TERMS = { - "什么", - "怎么", - "如何", - "多少", - "是否", - "可以", - "一下", - "请问", - "帮我", - "一下子", - "这个", - "那个", - "哪些", - "一下吧", -} - -KNOWLEDGE_INGEST_STATUS_PUBLISHED = 1 -KNOWLEDGE_INGEST_STATUS_SYNCING = 2 -KNOWLEDGE_INGEST_STATUS_INGESTED = 3 -KNOWLEDGE_INGEST_STATUS_FAILED = 4 - -KNOWLEDGE_INGEST_STATUS_META = { - KNOWLEDGE_INGEST_STATUS_PUBLISHED: ("待归纳", "muted"), - KNOWLEDGE_INGEST_STATUS_SYNCING: ("正归纳", "warning"), - KNOWLEDGE_INGEST_STATUS_INGESTED: ("已归纳", "success"), - KNOWLEDGE_INGEST_STATUS_FAILED: ("归纳失败", "danger"), -} - - -@dataclass(slots=True) -class OnlyOfficeCallbackPayload: - status: int - download_url: str - users: list[str] - - -def prepare_knowledge_library() -> None: - KnowledgeService().ensure_library_ready() - - -class KnowledgeService: - def __init__(self, storage_root: Path | None = None, db: Session | None = None) -> None: - settings = get_settings() - self.db = db - self.storage_root = Path(storage_root or settings.resolved_storage_root_dir) - self.library_root = self.storage_root / "knowledge" - self.index_path = self.library_root / ".index.json" - - def ensure_library_ready(self) -> None: - self.library_root.mkdir(parents=True, exist_ok=True) - for folder_name in FIXED_KNOWLEDGE_FOLDERS: - (self.library_root / folder_name).mkdir(parents=True, exist_ok=True) - - if not self.index_path.exists(): - self._save_index({"version": 1, "documents": []}) - - index = self._load_index() - if self._reconcile_index(index): - self._save_index(index) - - def list_library(self) -> KnowledgeLibraryRead: - documents = self._load_documents() - folders = [ - KnowledgeFolderRead( - name=folder_name, - count=sum(1 for item in documents if item.folder == folder_name), - icon="mdi mdi-folder-open" if folder_name == "差旅规范" else "mdi mdi-folder", - ) - for folder_name in FIXED_KNOWLEDGE_FOLDERS - ] - return KnowledgeLibraryRead(folders=folders, documents=documents) - - def get_document_detail(self, document_id: str) -> KnowledgeDocumentDetailRead: - self.ensure_library_ready() - index = self._load_index() - if self._reconcile_document_ingest_statuses(index, document_ids=[document_id]): - self._save_index(index) - entry = self._require_entry(index, document_id) - preview_kind, preview_pages = self._build_preview(entry) - document = self._serialize_document(entry) - return KnowledgeDocumentDetailRead( - **document.model_dump(), - previewKind=preview_kind, - previewPages=preview_pages, - ) - - def upload_document( - self, - folder: str, - filename: str, - content: bytes, - current_user: CurrentUserContext, - ) -> KnowledgeDocumentDetailRead: - self.ensure_library_ready() - normalized_folder = self._normalize_folder(folder) - normalized_name = self._normalize_filename(filename) - - if not content: - raise ValueError("上传文件不能为空。") - - rag_service = KnowledgeRagService(db=self.db, storage_root=self.storage_root) - index = self._load_index() - existing_entry = next( - ( - item - for item in index["documents"] - if item["folder"] == normalized_folder - and item["original_name"].lower() == normalized_name.lower() - ), - None, - ) - - document_id = existing_entry["id"] if existing_entry else uuid4().hex - stored_name = f"{document_id}__{normalized_name}" - target_path = self.library_root / normalized_folder / stored_name - - if existing_entry is not None: - rag_service.delete_document(document_id) - if existing_entry["stored_name"] != stored_name: - old_path = self.library_root / existing_entry["folder"] / existing_entry["stored_name"] - if old_path.exists(): - old_path.unlink() - - target_path.write_bytes(content) - - now = datetime.now(UTC).isoformat() - mime_type = mimetypes.guess_type(normalized_name)[0] or "application/octet-stream" - checksum = hashlib.sha256(content).hexdigest() - extension = self._extract_extension(normalized_name) - - if existing_entry is None: - entry = { - "id": document_id, - "folder": normalized_folder, - "original_name": normalized_name, - "stored_name": stored_name, - "mime_type": mime_type, - "extension": extension, - "size_bytes": len(content), - "sha256": checksum, +from __future__ import annotations + +import hashlib +import json +import mimetypes +from datetime import UTC, datetime +from pathlib import Path +from typing import Any +from urllib.request import Request, urlopen +from uuid import uuid4 + +from sqlalchemy.orm import Session + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentRunStatus +from app.core.config import get_settings +from app.core.logging import get_logger +from app.schemas.knowledge import ( + KnowledgeDocumentDetailRead, + KnowledgeDocumentRead, + KnowledgeFolderRead, + KnowledgeLibraryRead, + KnowledgeOnlyOfficeConfigRead, + KnowledgePreviewPageRead, +) +from app.services.knowledge_rag import KnowledgeRagService + +logger = get_logger("app.services.knowledge") + +from app.services.knowledge_constants import ( + FIXED_KNOWLEDGE_FOLDERS, + ICON_BY_TYPE, + KNOWLEDGE_INGEST_STATUS_FAILED, + KNOWLEDGE_INGEST_STATUS_INGESTED, + KNOWLEDGE_INGEST_STATUS_META, + KNOWLEDGE_INGEST_STATUS_PUBLISHED, + KNOWLEDGE_INGEST_STATUS_SYNCING, + KNOWLEDGE_SEARCH_RESULT_LIMIT, +) +from app.services.knowledge_document_extractors import ( + _extract_docx_text, + _extract_document_text_from_path, + _extract_pdf_text, + _extract_pptx_slides, + _extract_text_with_ocr, + _extract_xlsx_sheets, + _normalize_extracted_text, + _read_text_preview, +) +from app.services.knowledge_file_utils import ( + can_preview, + extract_extension, + format_size, + format_time, + normalize_filename, + normalize_folder, + parse_stored_name, + resolve_file_type, + resolve_file_type_label, +) +from app.services.knowledge_onlyoffice import ( + OnlyOfficeCallbackPayload, + build_onlyoffice_config as build_onlyoffice_config_payload, + build_onlyoffice_access_token, + build_onlyoffice_document_key, + parse_onlyoffice_callback, + resolve_onlyoffice_document_type, + validate_onlyoffice_access_token, +) +from app.services.knowledge_ingest_status import ( + is_syncing_status_stale, + normalize_ingest_status_code, + resolve_linked_ingest_run_status, + should_preserve_syncing_status, +) +from app.services.knowledge_preview import build_preview + + +def prepare_knowledge_library() -> None: + KnowledgeService().ensure_library_ready() + + +class KnowledgeService: + def __init__(self, storage_root: Path | None = None, db: Session | None = None) -> None: + settings = get_settings() + self.db = db + self.storage_root = Path(storage_root or settings.resolved_storage_root_dir) + self.library_root = self.storage_root / "knowledge" + self.index_path = self.library_root / ".index.json" + + def ensure_library_ready(self) -> None: + self.library_root.mkdir(parents=True, exist_ok=True) + for folder_name in FIXED_KNOWLEDGE_FOLDERS: + (self.library_root / folder_name).mkdir(parents=True, exist_ok=True) + + if not self.index_path.exists(): + self._save_index({"version": 1, "documents": []}) + + index = self._load_index() + if self._reconcile_index(index): + self._save_index(index) + + def list_library(self) -> KnowledgeLibraryRead: + documents = self._load_documents() + folders = [ + KnowledgeFolderRead( + name=folder_name, + count=sum(1 for item in documents if item.folder == folder_name), + icon="mdi mdi-folder-open" if folder_name == "差旅规范" else "mdi mdi-folder", + ) + for folder_name in FIXED_KNOWLEDGE_FOLDERS + ] + return KnowledgeLibraryRead(folders=folders, documents=documents) + + def get_document_detail(self, document_id: str) -> KnowledgeDocumentDetailRead: + self.ensure_library_ready() + index = self._load_index() + if self._reconcile_document_ingest_statuses(index, document_ids=[document_id]): + self._save_index(index) + entry = self._require_entry(index, document_id) + preview_kind, preview_pages = self._build_preview(entry) + document = self._serialize_document(entry) + return KnowledgeDocumentDetailRead( + **document.model_dump(), + previewKind=preview_kind, + previewPages=preview_pages, + ) + + def upload_document( + self, + folder: str, + filename: str, + content: bytes, + current_user: CurrentUserContext, + ) -> KnowledgeDocumentDetailRead: + self.ensure_library_ready() + normalized_folder = self._normalize_folder(folder) + normalized_name = self._normalize_filename(filename) + + if not content: + raise ValueError("上传文件不能为空。") + + rag_service = KnowledgeRagService(db=self.db, storage_root=self.storage_root) + index = self._load_index() + existing_entry = next( + ( + item + for item in index["documents"] + if item["folder"] == normalized_folder + and item["original_name"].lower() == normalized_name.lower() + ), + None, + ) + + document_id = existing_entry["id"] if existing_entry else uuid4().hex + stored_name = f"{document_id}__{normalized_name}" + target_path = self.library_root / normalized_folder / stored_name + + if existing_entry is not None: + rag_service.delete_document(document_id) + if existing_entry["stored_name"] != stored_name: + old_path = ( + self.library_root / existing_entry["folder"] / existing_entry["stored_name"] + ) + if old_path.exists(): + old_path.unlink() + + target_path.write_bytes(content) + + now = datetime.now(UTC).isoformat() + mime_type = mimetypes.guess_type(normalized_name)[0] or "application/octet-stream" + checksum = hashlib.sha256(content).hexdigest() + extension = self._extract_extension(normalized_name) + + if existing_entry is None: + entry = { + "id": document_id, + "folder": normalized_folder, + "original_name": normalized_name, + "stored_name": stored_name, + "mime_type": mime_type, + "extension": extension, + "size_bytes": len(content), + "sha256": checksum, "created_at": now, "updated_at": now, "uploaded_by": current_user.name, @@ -228,22 +195,22 @@ class KnowledgeService: "ingest_agent_run_id": "", } index["documents"].append(entry) - logger.info( - "Knowledge document uploaded id=%s folder=%s filename=%s by=%s", - document_id, - normalized_folder, - normalized_name, - current_user.name, - ) - else: - existing_entry.update( - { - "stored_name": stored_name, - "mime_type": mime_type, - "extension": extension, - "size_bytes": len(content), - "sha256": checksum, - "updated_at": now, + logger.info( + "Knowledge document uploaded id=%s folder=%s filename=%s by=%s", + document_id, + normalized_folder, + normalized_name, + current_user.name, + ) + else: + existing_entry.update( + { + "stored_name": stored_name, + "mime_type": mime_type, + "extension": extension, + "size_bytes": len(content), + "sha256": checksum, + "updated_at": now, "uploaded_by": current_user.name, "version_number": int(existing_entry.get("version_number", 1)) + 1, "ingest_status": KNOWLEDGE_INGEST_STATUS_PUBLISHED, @@ -255,48 +222,50 @@ class KnowledgeService: "ingest_agent_run_id": "", } ) - entry = existing_entry - logger.info( - "Knowledge document updated id=%s folder=%s filename=%s by=%s", - document_id, - normalized_folder, - normalized_name, - current_user.name, - ) - - self._save_index(index) - return self.get_document_detail(document_id) - - def delete_document(self, document_id: str) -> None: - self.ensure_library_ready() - index = self._load_index() - entry = self._require_entry(index, document_id) - file_path = self._resolve_document_path(entry) - if file_path.exists(): - file_path.unlink() - - index["documents"] = [item for item in index["documents"] if item["id"] != document_id] - self._save_index(index) - KnowledgeRagService(db=self.db, storage_root=self.storage_root).delete_document(document_id) - logger.info("Knowledge document deleted id=%s filename=%s", document_id, entry["original_name"]) - - def get_document_content(self, document_id: str) -> tuple[Path, str, str]: - self.ensure_library_ready() - index = self._load_index() - entry = self._require_entry(index, document_id) - file_path = self._resolve_document_path(entry) - - if not file_path.exists(): - raise FileNotFoundError(entry["original_name"]) - - return file_path, entry["mime_type"], entry["original_name"] - + entry = existing_entry + logger.info( + "Knowledge document updated id=%s folder=%s filename=%s by=%s", + document_id, + normalized_folder, + normalized_name, + current_user.name, + ) + + self._save_index(index) + return self.get_document_detail(document_id) + + def delete_document(self, document_id: str) -> None: + self.ensure_library_ready() + index = self._load_index() + entry = self._require_entry(index, document_id) + file_path = self._resolve_document_path(entry) + if file_path.exists(): + file_path.unlink() + + index["documents"] = [item for item in index["documents"] if item["id"] != document_id] + self._save_index(index) + KnowledgeRagService(db=self.db, storage_root=self.storage_root).delete_document(document_id) + logger.info( + "Knowledge document deleted id=%s filename=%s", document_id, entry["original_name"] + ) + + def get_document_content(self, document_id: str) -> tuple[Path, str, str]: + self.ensure_library_ready() + index = self._load_index() + entry = self._require_entry(index, document_id) + file_path = self._resolve_document_path(entry) + + if not file_path.exists(): + raise FileNotFoundError(entry["original_name"]) + + return file_path, entry["mime_type"], entry["original_name"] + def list_folder_documents(self, folder: str | None = None) -> list[dict[str, Any]]: self.ensure_library_ready() index = self._load_index() if self._reconcile_document_ingest_statuses(index): self._save_index(index) - documents = list(index.get("documents") or []) + documents = list(index.get("documents") or []) if folder is None: return documents normalized_folder = self._normalize_folder(folder) @@ -313,9 +282,7 @@ class KnowledgeService: requested_ids = {str(item).strip() for item in document_ids or [] if str(item).strip()} if requested_ids: documents = [ - item - for item in documents - if str(item.get("id") or "").strip() in requested_ids + item for item in documents if str(item.get("id") or "").strip() in requested_ids ] if changed_only: documents = [item for item in documents if self._should_index_document(item)] @@ -325,238 +292,146 @@ class KnowledgeService: self.ensure_library_ready() index = self._load_index() if self._reconcile_document_ingest_statuses(index, document_ids=[document_id]): - self._save_index(index) - return dict(self._require_entry(index, document_id)) - - def set_document_ingest_statuses( - self, - document_ids: list[str], - status_code: int, - *, - agent_run_id: str | None = None, - ) -> None: - self.ensure_library_ready() - normalized_ids = {str(item).strip() for item in document_ids if str(item).strip()} - if not normalized_ids: - return - - index = self._load_index() + self._save_index(index) + return dict(self._require_entry(index, document_id)) + + def set_document_ingest_statuses( + self, + document_ids: list[str], + status_code: int, + *, + agent_run_id: str | None = None, + ) -> None: + self.ensure_library_ready() + normalized_ids = {str(item).strip() for item in document_ids if str(item).strip()} + if not normalized_ids: + return + + index = self._load_index() changed = False updated_at = datetime.now(UTC).isoformat() for entry in index.get("documents", []): if str(entry.get("id") or "").strip() not in normalized_ids: continue - changed = self._apply_ingest_status_to_entry( - entry, - status_code=status_code, - updated_at=updated_at, - agent_run_id=agent_run_id, - ) or changed + changed = ( + self._apply_ingest_status_to_entry( + entry, + status_code=status_code, + updated_at=updated_at, + agent_run_id=agent_run_id, + ) + or changed + ) if changed: self._save_index(index) - - def refresh_document_ingest_statuses( - self, - document_ids: list[str] | None = None, - *, - preserve_syncing: bool = True, - ) -> None: - self.ensure_library_ready() - index = self._load_index() - if self._reconcile_document_ingest_statuses( - index, - document_ids=document_ids, - preserve_syncing=preserve_syncing, - ): - self._save_index(index) - - def search_knowledge( - self, - query: str, - *, - conversation_history: list[dict[str, str]] | None = None, - limit: int = KNOWLEDGE_SEARCH_RESULT_LIMIT, - ) -> dict[str, Any]: - self.ensure_library_ready() - return KnowledgeRagService(db=self.db, storage_root=self.storage_root).query_knowledge( - query, - conversation_history=conversation_history, - limit=limit, - ) - - def extract_document_text(self, document_id: str) -> str: - self.ensure_library_ready() - entry = self.get_document_entry(document_id) - file_path = self._resolve_document_path(entry) - if not file_path.exists(): - raise FileNotFoundError(entry["original_name"]) - return self._extract_document_text_from_path( - file_path=file_path, - original_name=str(entry.get("original_name") or file_path.name), - mime_type=str(entry.get("mime_type") or "application/octet-stream"), - ) - - def build_onlyoffice_config( - self, - document_id: str, - current_user: CurrentUserContext, - ) -> KnowledgeOnlyOfficeConfigRead: - self.ensure_library_ready() - settings = get_settings() - onlyoffice_settings = resolve_onlyoffice_settings() - if not onlyoffice_settings.enabled: - logger.warning( - "ONLYOFFICE disabled in runtime config doc=%s enabled=%s public_url=%s backend_url=%s jwt_set=%s", - document_id, - onlyoffice_settings.enabled, - onlyoffice_settings.public_url, - onlyoffice_settings.backend_url, - bool(onlyoffice_settings.jwt_secret), - ) - raise ValueError("ONLYOFFICE 预览未启用。") - if not onlyoffice_settings.public_url or not onlyoffice_settings.backend_url: - logger.warning( - "ONLYOFFICE config incomplete doc=%s enabled=%s public_url=%s backend_url=%s jwt_set=%s", - document_id, - onlyoffice_settings.enabled, - onlyoffice_settings.public_url, - onlyoffice_settings.backend_url, - bool(onlyoffice_settings.jwt_secret), - ) - raise ValueError("ONLYOFFICE 地址配置不完整。") - if not onlyoffice_settings.jwt_secret: - logger.warning( - "ONLYOFFICE JWT missing doc=%s enabled=%s public_url=%s backend_url=%s jwt_set=%s", - document_id, - onlyoffice_settings.enabled, - onlyoffice_settings.public_url, - onlyoffice_settings.backend_url, - bool(onlyoffice_settings.jwt_secret), - ) - raise ValueError("ONLYOFFICE JWT 密钥未配置。") - - index = self._load_index() - entry = self._require_entry(index, document_id) - extension = self._extract_extension(entry["original_name"]) - if extension not in ONLYOFFICE_EDITABLE_EXTENSIONS: - raise ValueError("当前文件格式不支持 ONLYOFFICE 预览。") - - document_type = self._resolve_onlyoffice_document_type(extension) - backend_base_url = onlyoffice_settings.backend_url.rstrip("/") - public_url = onlyoffice_settings.public_url.rstrip("/") - access_token = self._build_onlyoffice_access_token(document_id) - document_url = ( - f"{backend_base_url}{settings.api_v1_prefix}/knowledge/documents/{document_id}/onlyoffice/content" - f"?access_token={access_token}" - ) - callback_url = ( - f"{backend_base_url}{settings.api_v1_prefix}/knowledge/documents/{document_id}/onlyoffice/callback" - ) - document_key = self._build_onlyoffice_document_key(entry) - - config: dict[str, Any] = { - "documentType": document_type, - "document": { - "fileType": extension, - "key": document_key, - "title": entry["original_name"], - "url": document_url, - "permissions": { - "download": True, - "edit": False, - "print": True, - "copy": True, - }, - }, - "editorConfig": { - "mode": "view", - "lang": "zh-CN", - "callbackUrl": callback_url, - "user": { - "id": current_user.username, - "name": current_user.name, - }, - "customization": { - "compactHeader": True, - "compactToolbar": True, - "toolbarNoTabs": False, - "autosave": False, - "forcesave": False, - }, - }, - "width": "100%", - "height": "100%", - } - config["token"] = jwt.encode(config, onlyoffice_settings.jwt_secret, algorithm="HS256") - - return KnowledgeOnlyOfficeConfigRead( - documentServerUrl=public_url, - config=config, - ) - - def validate_onlyoffice_access_token(self, document_id: str, access_token: str) -> None: - onlyoffice_settings = resolve_onlyoffice_settings() - try: - payload = jwt.decode( - access_token, - onlyoffice_settings.jwt_secret, - algorithms=["HS256"], - ) - except jwt.PyJWTError as exc: - raise ValueError("ONLYOFFICE 文件访问令牌无效。") from exc - - if payload.get("scope") != "onlyoffice-content" or payload.get("document_id") != document_id: - raise ValueError("ONLYOFFICE 文件访问令牌无效。") - - def handle_onlyoffice_callback(self, document_id: str, payload: dict[str, Any]) -> None: - self.ensure_library_ready() - callback = self._parse_onlyoffice_callback(payload) - if callback.status not in {2, 6} or not callback.download_url: - return - - logger.info( - "ONLYOFFICE callback received id=%s status=%s users=%s", - document_id, - callback.status, - ",".join(callback.users) if callback.users else "-", - ) - - request = Request(callback.download_url, headers={"User-Agent": "x-financial-onlyoffice"}) - with urlopen(request, timeout=30) as response: # noqa: S310 - content = response.read() - - actor_name = callback.users[0] if callback.users else "ONLYOFFICE" - self._replace_document_content(document_id, content, actor_name=actor_name) - - def _load_documents(self) -> list[KnowledgeDocumentRead]: - self.ensure_library_ready() - index = self._load_index() - changed = self._reconcile_index(index) - changed = self._reconcile_document_ingest_statuses(index) or changed - if changed: - self._save_index(index) - - documents = [self._serialize_document(entry) for entry in index["documents"]] - return sorted(documents, key=lambda item: item.time, reverse=True) - - def _serialize_document( - self, - entry: dict[str, Any], - ) -> KnowledgeDocumentRead: - extension = entry.get("extension") or self._extract_extension(entry["original_name"]) - file_type = self._resolve_file_type(extension) - size_bytes = int(entry.get("size_bytes") or 0) + + def refresh_document_ingest_statuses( + self, + document_ids: list[str] | None = None, + *, + preserve_syncing: bool = True, + ) -> None: + self.ensure_library_ready() + index = self._load_index() + if self._reconcile_document_ingest_statuses( + index, + document_ids=document_ids, + preserve_syncing=preserve_syncing, + ): + self._save_index(index) + + def search_knowledge( + self, + query: str, + *, + conversation_history: list[dict[str, str]] | None = None, + limit: int = KNOWLEDGE_SEARCH_RESULT_LIMIT, + ) -> dict[str, Any]: + self.ensure_library_ready() + return KnowledgeRagService(db=self.db, storage_root=self.storage_root).query_knowledge( + query, + conversation_history=conversation_history, + limit=limit, + ) + + def extract_document_text(self, document_id: str) -> str: + self.ensure_library_ready() + entry = self.get_document_entry(document_id) + file_path = self._resolve_document_path(entry) + if not file_path.exists(): + raise FileNotFoundError(entry["original_name"]) + return self._extract_document_text_from_path( + file_path=file_path, + original_name=str(entry.get("original_name") or file_path.name), + mime_type=str(entry.get("mime_type") or "application/octet-stream"), + ) + + def build_onlyoffice_config( + self, + document_id: str, + current_user: CurrentUserContext, + ) -> KnowledgeOnlyOfficeConfigRead: + self.ensure_library_ready() + index = self._load_index() + entry = self._require_entry(index, document_id) + return build_onlyoffice_config_payload( + document_id=document_id, + entry=entry, + current_user=current_user, + ) + + def validate_onlyoffice_access_token(self, document_id: str, access_token: str) -> None: + validate_onlyoffice_access_token(document_id, access_token) + + def handle_onlyoffice_callback(self, document_id: str, payload: dict[str, Any]) -> None: + self.ensure_library_ready() + callback = self._parse_onlyoffice_callback(payload) + if callback.status not in {2, 6} or not callback.download_url: + return + + logger.info( + "ONLYOFFICE callback received id=%s status=%s users=%s", + document_id, + callback.status, + ",".join(callback.users) if callback.users else "-", + ) + + request = Request(callback.download_url, headers={"User-Agent": "x-financial-onlyoffice"}) + with urlopen(request, timeout=30) as response: # noqa: S310 + content = response.read() + + actor_name = callback.users[0] if callback.users else "ONLYOFFICE" + self._replace_document_content(document_id, content, actor_name=actor_name) + + def _load_documents(self) -> list[KnowledgeDocumentRead]: + self.ensure_library_ready() + index = self._load_index() + changed = self._reconcile_index(index) + changed = self._reconcile_document_ingest_statuses(index) or changed + if changed: + self._save_index(index) + + documents = [self._serialize_document(entry) for entry in index["documents"]] + return sorted(documents, key=lambda item: item.time, reverse=True) + + def _serialize_document( + self, + entry: dict[str, Any], + ) -> KnowledgeDocumentRead: + extension = entry.get("extension") or self._extract_extension(entry["original_name"]) + file_type = self._resolve_file_type(extension) + size_bytes = int(entry.get("size_bytes") or 0) updated_at = self._format_time(entry.get("updated_at") or entry.get("created_at")) ingest_time = self._format_time(entry.get("ingest_completed_at")) - state_code = self._normalize_ingest_status_code(entry.get("ingest_status")) + state_code = normalize_ingest_status_code(entry.get("ingest_status")) state_label, state_tone = KNOWLEDGE_INGEST_STATUS_META.get( state_code, KNOWLEDGE_INGEST_STATUS_META[KNOWLEDGE_INGEST_STATUS_PUBLISHED], ) - - return KnowledgeDocumentRead( - id=entry["id"], + + return KnowledgeDocumentRead( + id=entry["id"], name=entry["original_name"], folder=entry["folder"], tag=f"{entry['folder']} / {extension.upper() or 'FILE'}", @@ -564,200 +439,68 @@ class KnowledgeService: ingestTime=ingest_time if state_code == KNOWLEDGE_INGEST_STATUS_INGESTED else "", version=f"v{int(entry.get('version_number', 1))}.0", stateCode=state_code, - state=state_label, - stateTone=state_tone, - owner=entry.get("uploaded_by") or "????", - icon=ICON_BY_TYPE.get(file_type, ICON_BY_TYPE["binary"]), - fileType=file_type, - fileTypeLabel=self._resolve_file_type_label(file_type), - summary=f"{entry['folder']} ? {extension.upper() or 'FILE'} ? {self._format_size(size_bytes)}", - mimeType=entry.get("mime_type") or "application/octet-stream", - extension=extension, - sizeBytes=size_bytes, - canPreview=self._can_preview(extension), - llmWikiAvailable=False, - llmWikiQualityStatus="", - llmWikiQualityNote="", - ) - - def _build_preview( - self, entry: dict[str, Any] - ) -> tuple[str, list[KnowledgePreviewPageRead]]: - extension = self._extract_extension(entry["original_name"]) - file_path = self._resolve_document_path(entry) - - if extension == "pdf": - return "pdf", [] - - if extension in IMAGE_EXTENSIONS: - return "image", [] - - if extension in TEXT_EXTENSIONS: - text = self._read_text_preview(file_path) - return "text", [self._build_text_preview_page(entry, text)] - - if extension == "docx": - text = self._extract_docx_text(file_path) - return "text", [self._build_text_preview_page(entry, text)] - - if extension == "xlsx": - return "table", self._build_xlsx_preview_pages(entry, file_path) - - if extension == "pptx": - return "slides", self._build_pptx_preview_pages(entry, file_path) - - return ( - "unsupported", - [ - KnowledgePreviewPageRead( - title=entry["original_name"], - subtitle="当前格式暂不支持在线解析预览。", - stats=[ - KnowledgePreviewStatRead(label="文件格式", value=extension.upper() or "FILE"), - KnowledgePreviewStatRead(label="文件大小", value=self._format_size(entry["size_bytes"])), - KnowledgePreviewStatRead(label="建议操作", value="下载后查看"), - ], - blocks=[ - KnowledgePreviewBlockRead( - heading="预览说明", - lines=[ - "当前系统已支持该文件的上传、下载和权限控制。", - "如需在线预览,可后续接入专门的文档转换服务。", - ], - ) - ], - ) - ], - ) - - def _build_text_preview_page( - self, entry: dict[str, Any], text: str - ) -> KnowledgePreviewPageRead: - lines = [line.strip() for line in text.splitlines() if line.strip()] - if not lines: - lines = ["文件内容为空,或当前文档未提取到可展示文本。"] - - groups = [lines[index : index + 8] for index in range(0, min(len(lines), 24), 8)] - blocks = [ - KnowledgePreviewBlockRead(heading=f"内容片段 {index + 1}", lines=group) - for index, group in enumerate(groups) - ] - - return KnowledgePreviewPageRead( - title=entry["original_name"], - subtitle="文本提取预览", - stats=[ - KnowledgePreviewStatRead(label="文件格式", value=entry["extension"].upper() or "TEXT"), - KnowledgePreviewStatRead(label="可见行数", value=str(len(lines))), - KnowledgePreviewStatRead(label="文件大小", value=self._format_size(entry["size_bytes"])), - ], - blocks=blocks, - ) - - def _build_xlsx_preview_pages( - self, entry: dict[str, Any], file_path: Path - ) -> list[KnowledgePreviewPageRead]: - sheets = self._extract_xlsx_sheets(file_path) - if not sheets: - sheets = [("Sheet 1", [["未提取到表格内容。"]])] - - preview_pages: list[KnowledgePreviewPageRead] = [] - sheet_count = len(sheets) - for sheet_name, rows in sheets[:8]: - visible_rows = rows[:12] if rows else [["未提取到表格内容。"]] - blocks = [ - KnowledgePreviewBlockRead( - heading=f"第 {index + 1} 行", - lines=[" | ".join((cell or "") for cell in row)], - ) - for index, row in enumerate(visible_rows) - ] - - preview_pages.append( - KnowledgePreviewPageRead( - title=sheet_name, - subtitle="表格内容预览", - stats=[ - KnowledgePreviewStatRead(label="工作表数量", value=str(sheet_count)), - KnowledgePreviewStatRead(label="预览行数", value=str(len(visible_rows))), - KnowledgePreviewStatRead(label="文件大小", value=self._format_size(entry["size_bytes"])), - ], - blocks=blocks, - ) - ) - - return preview_pages - - def _build_pptx_preview_pages( - self, entry: dict[str, Any], file_path: Path - ) -> list[KnowledgePreviewPageRead]: - slides = self._extract_pptx_slides(file_path) - if not slides: - slides = [["未提取到幻灯片文本。"]] - - pages: list[KnowledgePreviewPageRead] = [] - for index, slide_lines in enumerate(slides[:8]): - pages.append( - KnowledgePreviewPageRead( - title=entry["original_name"], - subtitle=f"幻灯片 {index + 1}", - stats=[ - KnowledgePreviewStatRead(label="页码", value=str(index + 1)), - KnowledgePreviewStatRead(label="文本条数", value=str(len(slide_lines))), - KnowledgePreviewStatRead(label="文件格式", value="PPTX"), - ], - blocks=[ - KnowledgePreviewBlockRead( - heading="幻灯片内容", - lines=slide_lines or ["该页未提取到文本内容。"], - ) - ], - ) - ) - - return pages - - def _load_index(self) -> dict[str, Any]: - try: - payload = json.loads(self.index_path.read_text(encoding="utf-8")) - except (FileNotFoundError, json.JSONDecodeError): - payload = {"version": 1, "documents": []} - payload.setdefault("documents", []) - return payload - - def _save_index(self, index: dict[str, Any]) -> None: - self.index_path.write_text( - json.dumps(index, ensure_ascii=False, indent=2), - encoding="utf-8", - ) - - def _reconcile_index(self, index: dict[str, Any]) -> bool: - changed = False - documents = index.setdefault("documents", []) - known_by_stored = { - (item["folder"], item["stored_name"]): item - for item in documents - if item.get("folder") and item.get("stored_name") - } - - existing_items: list[dict[str, Any]] = [] - for item in documents: - file_path = self._resolve_document_path(item) - if file_path.exists(): - item["size_bytes"] = file_path.stat().st_size - item["extension"] = self._extract_extension(item["original_name"]) - item["mime_type"] = item.get("mime_type") or ( - mimetypes.guess_type(item["original_name"])[0] or "application/octet-stream" - ) - normalized_status = self._normalize_ingest_status_code(item.get("ingest_status")) - if item.get("ingest_status") != normalized_status: - item["ingest_status"] = normalized_status - changed = True + state=state_label, + stateTone=state_tone, + owner=entry.get("uploaded_by") or "????", + icon=ICON_BY_TYPE.get(file_type, ICON_BY_TYPE["binary"]), + fileType=file_type, + fileTypeLabel=self._resolve_file_type_label(file_type), + summary=f"{entry['folder']} ? {extension.upper() or 'FILE'} ? {self._format_size(size_bytes)}", + mimeType=entry.get("mime_type") or "application/octet-stream", + extension=extension, + sizeBytes=size_bytes, + canPreview=self._can_preview(extension), + llmWikiAvailable=False, + llmWikiQualityStatus="", + llmWikiQualityNote="", + ) + + def _build_preview(self, entry: dict[str, Any]) -> tuple[str, list[KnowledgePreviewPageRead]]: + return build_preview(entry, resolve_document_path=self._resolve_document_path) + + def _load_index(self) -> dict[str, Any]: + try: + payload = json.loads(self.index_path.read_text(encoding="utf-8")) + except (FileNotFoundError, json.JSONDecodeError): + payload = {"version": 1, "documents": []} + payload.setdefault("documents", []) + return payload + + def _save_index(self, index: dict[str, Any]) -> None: + self.index_path.write_text( + json.dumps(index, ensure_ascii=False, indent=2), + encoding="utf-8", + ) + + def _reconcile_index(self, index: dict[str, Any]) -> bool: + changed = False + documents = index.setdefault("documents", []) + known_by_stored = { + (item["folder"], item["stored_name"]): item + for item in documents + if item.get("folder") and item.get("stored_name") + } + + existing_items: list[dict[str, Any]] = [] + for item in documents: + file_path = self._resolve_document_path(item) + if file_path.exists(): + item["size_bytes"] = file_path.stat().st_size + item["extension"] = self._extract_extension(item["original_name"]) + item["mime_type"] = item.get("mime_type") or ( + mimetypes.guess_type(item["original_name"])[0] or "application/octet-stream" + ) + normalized_status = normalize_ingest_status_code(item.get("ingest_status")) + if item.get("ingest_status") != normalized_status: + item["ingest_status"] = normalized_status + changed = True if "ingest_agent_run_id" not in item: item["ingest_agent_run_id"] = "" changed = True if "ingest_status_updated_at" not in item: - item["ingest_status_updated_at"] = item.get("updated_at") or item.get("created_at") or "" + item["ingest_status_updated_at"] = ( + item.get("updated_at") or item.get("created_at") or "" + ) changed = True if "ingest_completed_at" not in item: item["ingest_completed_at"] = "" @@ -772,34 +515,34 @@ class KnowledgeService: item["ingest_document_sha256"] = "" changed = True existing_items.append(item) - else: - changed = True - - for folder_name in FIXED_KNOWLEDGE_FOLDERS: - folder_path = self.library_root / folder_name - for file_path in folder_path.iterdir(): - if not file_path.is_file() or file_path.name.startswith("."): - continue - - key = (folder_name, file_path.name) - if key in known_by_stored: - continue - - document_id, original_name = self._parse_stored_name(file_path.name) - stat = file_path.stat() - existing_items.append( - { - "id": document_id, - "folder": folder_name, - "original_name": original_name, - "stored_name": file_path.name, - "mime_type": mimetypes.guess_type(original_name)[0] - or "application/octet-stream", - "extension": self._extract_extension(original_name), - "size_bytes": stat.st_size, - "sha256": "", - "created_at": datetime.fromtimestamp(stat.st_ctime, tz=UTC).isoformat(), - "updated_at": datetime.fromtimestamp(stat.st_mtime, tz=UTC).isoformat(), + else: + changed = True + + for folder_name in FIXED_KNOWLEDGE_FOLDERS: + folder_path = self.library_root / folder_name + for file_path in folder_path.iterdir(): + if not file_path.is_file() or file_path.name.startswith("."): + continue + + key = (folder_name, file_path.name) + if key in known_by_stored: + continue + + document_id, original_name = self._parse_stored_name(file_path.name) + stat = file_path.stat() + existing_items.append( + { + "id": document_id, + "folder": folder_name, + "original_name": original_name, + "stored_name": file_path.name, + "mime_type": mimetypes.guess_type(original_name)[0] + or "application/octet-stream", + "extension": self._extract_extension(original_name), + "size_bytes": stat.st_size, + "sha256": "", + "created_at": datetime.fromtimestamp(stat.st_ctime, tz=UTC).isoformat(), + "updated_at": datetime.fromtimestamp(stat.st_mtime, tz=UTC).isoformat(), "uploaded_by": "系统导入", "version_number": 1, "ingest_status": KNOWLEDGE_INGEST_STATUS_PUBLISHED, @@ -811,56 +554,59 @@ class KnowledgeService: "ingest_agent_run_id": "", } ) - changed = True - - if changed or len(existing_items) != len(documents): - index["documents"] = existing_items - return True - return False - - def _reconcile_document_ingest_statuses( - self, - index: dict[str, Any], - *, - document_ids: list[str] | None = None, - preserve_syncing: bool = True, - ) -> bool: - changed = False - target_ids = {str(item).strip() for item in document_ids or [] if str(item).strip()} - status_map = KnowledgeRagService(db=self.db, storage_root=self.storage_root).get_document_status_map( - list(target_ids) - if target_ids - else [ - str(item.get("id") or "").strip() - for item in index.get("documents", []) - if str(item.get("id") or "").strip() - ] - ) - - for entry in index.get("documents", []): - document_id = str(entry.get("id") or "").strip() - if target_ids and document_id not in target_ids: - continue - - current_status = self._normalize_ingest_status_code(entry.get("ingest_status")) - if entry.get("ingest_status") != current_status: - entry["ingest_status"] = current_status - changed = True - - if ( - current_status == KNOWLEDGE_INGEST_STATUS_SYNCING - and preserve_syncing - and self._should_preserve_syncing_status(entry) - ): - continue - + changed = True + + if changed or len(existing_items) != len(documents): + index["documents"] = existing_items + return True + return False + + def _reconcile_document_ingest_statuses( + self, + index: dict[str, Any], + *, + document_ids: list[str] | None = None, + preserve_syncing: bool = True, + ) -> bool: + changed = False + target_ids = {str(item).strip() for item in document_ids or [] if str(item).strip()} + status_map = KnowledgeRagService( + db=self.db, storage_root=self.storage_root + ).get_document_status_map( + list(target_ids) + if target_ids + else [ + str(item.get("id") or "").strip() + for item in index.get("documents", []) + if str(item.get("id") or "").strip() + ] + ) + + for entry in index.get("documents", []): + document_id = str(entry.get("id") or "").strip() + if target_ids and document_id not in target_ids: + continue + + current_status = normalize_ingest_status_code(entry.get("ingest_status")) + if entry.get("ingest_status") != current_status: + entry["ingest_status"] = current_status + changed = True + + if ( + current_status == KNOWLEDGE_INGEST_STATUS_SYNCING + and preserve_syncing + and should_preserve_syncing_status(entry, db=self.db) + ): + continue + status_payload = status_map.get(document_id) or {} rag_status = str(status_payload.get("status") or "").strip().lower() - linked_run_status = self._resolve_linked_ingest_run_status(entry) - if ( - linked_run_status == AgentRunStatus.FAILED.value - and rag_status in {"pending", "processing", "preprocessed"} - ): + linked_run_status = resolve_linked_ingest_run_status(entry, db=self.db) + if linked_run_status == AgentRunStatus.FAILED.value and rag_status in { + "pending", + "processing", + "preprocessed", + }: desired_status = KNOWLEDGE_INGEST_STATUS_FAILED elif bool(status_payload.get("query_ready")): desired_status = KNOWLEDGE_INGEST_STATUS_INGESTED @@ -868,10 +614,13 @@ class KnowledgeService: desired_status = KNOWLEDGE_INGEST_STATUS_SYNCING elif rag_status == "failed": desired_status = KNOWLEDGE_INGEST_STATUS_FAILED - else: - desired_status = KNOWLEDGE_INGEST_STATUS_PUBLISHED - - if current_status == KNOWLEDGE_INGEST_STATUS_FAILED and desired_status == KNOWLEDGE_INGEST_STATUS_PUBLISHED: + else: + desired_status = KNOWLEDGE_INGEST_STATUS_PUBLISHED + + if ( + current_status == KNOWLEDGE_INGEST_STATUS_FAILED + and desired_status == KNOWLEDGE_INGEST_STATUS_PUBLISHED + ): continue if current_status != desired_status: entry["ingest_status"] = desired_status @@ -882,7 +631,8 @@ class KnowledgeService: if desired_status == KNOWLEDGE_INGEST_STATUS_INGESTED: self._mark_entry_ingested( entry, - completed_at=entry.get("ingest_status_updated_at") or datetime.now(UTC).isoformat(), + completed_at=entry.get("ingest_status_updated_at") + or datetime.now(UTC).isoformat(), ) changed = True elif desired_status == KNOWLEDGE_INGEST_STATUS_INGESTED: @@ -899,7 +649,7 @@ class KnowledgeService: agent_run_id: str | None, ) -> bool: changed = False - current_status = self._normalize_ingest_status_code(entry.get("ingest_status")) + current_status = normalize_ingest_status_code(entry.get("ingest_status")) if current_status != status_code: entry["ingest_status"] = status_code changed = True @@ -923,7 +673,10 @@ class KnowledgeService: *, completed_at: str | None = None, ) -> bool: - completed_value = str(completed_at or entry.get("ingest_completed_at") or "").strip() or datetime.now(UTC).isoformat() + completed_value = ( + str(completed_at or entry.get("ingest_completed_at") or "").strip() + or datetime.now(UTC).isoformat() + ) expected_values = { "ingest_completed_at": completed_value, "ingest_document_name": str(entry.get("original_name") or "").strip(), @@ -938,14 +691,14 @@ class KnowledgeService: return changed def _should_index_document(self, entry: dict[str, Any]) -> bool: - status_code = self._normalize_ingest_status_code(entry.get("ingest_status")) + status_code = normalize_ingest_status_code(entry.get("ingest_status")) if status_code in { KNOWLEDGE_INGEST_STATUS_PUBLISHED, KNOWLEDGE_INGEST_STATUS_FAILED, }: return True if status_code == KNOWLEDGE_INGEST_STATUS_SYNCING: - return self._is_syncing_status_stale(entry) + return is_syncing_status_stale(entry) return any( [ @@ -963,421 +716,77 @@ class KnowledgeService: def _load_json_file(path: Path, *, default: Any) -> Any: try: return json.loads(path.read_text(encoding="utf-8")) - except (FileNotFoundError, json.JSONDecodeError): - return default - - @staticmethod - def _load_text_file(path: Path) -> str: - try: - return path.read_text(encoding="utf-8").strip() - except FileNotFoundError: - return "" - - @staticmethod - def _normalize_ingest_status_code(value: Any) -> int: - try: - status_code = int(value) - except (TypeError, ValueError): - return KNOWLEDGE_INGEST_STATUS_PUBLISHED - if status_code not in KNOWLEDGE_INGEST_STATUS_META: - return KNOWLEDGE_INGEST_STATUS_PUBLISHED - return status_code - - @staticmethod - def _is_syncing_status_stale(entry: dict[str, Any]) -> bool: - raw_value = str(entry.get("ingest_status_updated_at") or "").strip() - if not raw_value: - return True - try: - updated_at = datetime.fromisoformat(raw_value) - except ValueError: - return True - if updated_at.tzinfo is None: - updated_at = updated_at.replace(tzinfo=UTC) - age_seconds = (datetime.now(UTC) - updated_at.astimezone(UTC)).total_seconds() - return age_seconds >= KNOWLEDGE_INGEST_SYNC_STALE_SECONDS - - def _should_preserve_syncing_status(self, entry: dict[str, Any]) -> bool: - agent_run_id = str(entry.get("ingest_agent_run_id") or "").strip() - if not agent_run_id or self.db is None: - return not self._is_syncing_status_stale(entry) - - run = self.db.scalar(select(AgentRun).where(AgentRun.run_id == agent_run_id)) - if run is None: - return not self._is_syncing_status_stale(entry) - if run.status != AgentRunStatus.RUNNING.value: - return False - - heartbeat_at = str((run.route_json or {}).get("heartbeat_at") or "").strip() - if heartbeat_at: - probe_entry = {"ingest_status_updated_at": heartbeat_at} - return not self._is_syncing_status_stale(probe_entry) - - return not self._is_syncing_status_stale(entry) + except (FileNotFoundError, json.JSONDecodeError): + return default - def _resolve_linked_ingest_run_status(self, entry: dict[str, Any]) -> str: - agent_run_id = str(entry.get("ingest_agent_run_id") or "").strip() - if not agent_run_id or self.db is None: + @staticmethod + def _load_text_file(path: Path) -> str: + try: + return path.read_text(encoding="utf-8").strip() + except FileNotFoundError: return "" - run = self.db.scalar(select(AgentRun).where(AgentRun.run_id == agent_run_id)) - if run is None: - return "" - return str(run.status or "").strip() - def _require_entry(self, index: dict[str, Any], document_id: str) -> dict[str, Any]: for entry in index["documents"]: if entry["id"] == document_id: return entry - raise FileNotFoundError(document_id) - - def _resolve_document_path(self, entry: dict[str, Any]) -> Path: - return self.library_root / entry["folder"] / entry["stored_name"] - - def _replace_document_content(self, document_id: str, content: bytes, actor_name: str) -> KnowledgeDocumentDetailRead: - index = self._load_index() - entry = self._require_entry(index, document_id) - current_user = CurrentUserContext( - username="onlyoffice", - name=actor_name or "ONLYOFFICE", - role_codes=["manager"], - is_admin=True, - ) - return self.upload_document( - folder=entry["folder"], - filename=entry["original_name"], - content=content, - current_user=current_user, - ) - - @staticmethod - def _parse_onlyoffice_callback(payload: dict[str, Any]) -> OnlyOfficeCallbackPayload: - status = int(payload.get("status") or 0) - download_url = str(payload.get("url") or "").strip() - users = [str(item).strip() for item in payload.get("users") or [] if str(item).strip()] - return OnlyOfficeCallbackPayload(status=status, download_url=download_url, users=users) - - @staticmethod - def _normalize_filename(filename: str) -> str: - normalized = Path(str(filename or "").strip()).name.strip() - normalized = normalized.replace("/", "_").replace("\\", "_") - if not normalized: - raise ValueError("文件名不能为空。") - return normalized - - @staticmethod - def _normalize_folder(folder: str) -> str: - normalized = str(folder or "").strip() - if normalized not in FIXED_KNOWLEDGE_FOLDERS: - raise ValueError("只能上传到预设知识库文件夹。") - return normalized - - @staticmethod - def _extract_extension(filename: str) -> str: - suffix = Path(filename).suffix.lower().lstrip(".") - return suffix - - @staticmethod - def _build_onlyoffice_document_key(entry: dict[str, Any]) -> str: - version = int(entry.get("version_number", 1)) - checksum = str(entry.get("sha256") or "")[:12] - return f"{entry['id']}-v{version}-{checksum or 'nochecksum'}" - - def _build_onlyoffice_access_token(self, document_id: str) -> str: - onlyoffice_settings = resolve_onlyoffice_settings() - payload = { - "scope": "onlyoffice-content", - "document_id": document_id, - } - return jwt.encode(payload, onlyoffice_settings.jwt_secret, algorithm="HS256") - - @staticmethod - def _resolve_onlyoffice_document_type(extension: str) -> str: - if extension in WORD_EXTENSIONS: - return "word" - if extension in EXCEL_EXTENSIONS: - return "cell" - if extension in PPT_EXTENSIONS: - return "slide" - raise ValueError("当前文件格式不支持 ONLYOFFICE 预览。") - - @staticmethod - def _parse_stored_name(stored_name: str) -> tuple[str, str]: - if "__" not in stored_name: - return uuid4().hex, stored_name - document_id, original_name = stored_name.split("__", 1) - return document_id or uuid4().hex, original_name or stored_name - - @staticmethod - def _format_time(value: str | None) -> str: - if not value: - return "" - try: - parsed = datetime.fromisoformat(value) - except ValueError: - return value - return parsed.astimezone(UTC).strftime("%Y-%m-%d %H:%M") - - @staticmethod - def _format_size(size_bytes: int) -> str: - if size_bytes < 1024: - return f"{size_bytes} B" - if size_bytes < 1024 * 1024: - return f"{size_bytes / 1024:.1f} KB" - return f"{size_bytes / (1024 * 1024):.1f} MB" - - @staticmethod - def _resolve_file_type(extension: str) -> str: - if extension == "pdf": - return "pdf" - if extension in WORD_EXTENSIONS: - return "word" - if extension in EXCEL_EXTENSIONS: - return "excel" - if extension in PPT_EXTENSIONS: - return "ppt" - if extension in IMAGE_EXTENSIONS: - return "image" - if extension in TEXT_EXTENSIONS: - return "text" - if extension in ARCHIVE_EXTENSIONS: - return "archive" - return "binary" - - @staticmethod - def _resolve_file_type_label(file_type: str) -> str: - mapping = { - "pdf": "PDF 预览", - "word": "Word 预览", - "excel": "Excel 预览", - "ppt": "PPT 预览", - "image": "图片预览", - "text": "文本预览", - "archive": "压缩包", - "binary": "文件预览", - } - return mapping.get(file_type, "文件预览") - - @staticmethod - def _can_preview(extension: str) -> bool: - return extension in INLINE_PREVIEW_EXTENSIONS or extension in STRUCTURED_PREVIEW_EXTENSIONS - - @staticmethod - def _read_text_preview(file_path: Path) -> str: - encodings = ("utf-8", "utf-8-sig", "gbk") - for encoding in encodings: - try: - return file_path.read_text(encoding=encoding) - except UnicodeDecodeError: - continue - return "当前文本文件编码暂不支持在线解析。" - - @staticmethod - def _extract_docx_text(file_path: Path) -> str: - try: - with ZipFile(file_path) as archive: - xml_content = archive.read("word/document.xml") - except (BadZipFile, KeyError): - return "当前 Word 文件解析失败。" - - root = ElementTree.fromstring(xml_content) - texts = [node.text.strip() for node in root.iter() if node.tag.endswith("}t") and node.text] - return "\n".join(texts) - - def _extract_document_text_from_path( - self, - *, - file_path: Path, - original_name: str, - mime_type: str, - ) -> str: - extension = self._extract_extension(original_name) - if extension in TEXT_EXTENSIONS: - return self._normalize_extracted_text(self._read_text_preview(file_path)) - if extension == "docx": - return self._normalize_extracted_text(self._extract_docx_text(file_path)) - if extension == "pdf": - text = self._normalize_extracted_text(self._extract_pdf_text(file_path)) - if text: - return text - return self._normalize_extracted_text( - self._extract_text_with_ocr( - file_path=file_path, - original_name=original_name, - mime_type=mime_type, - ) - ) - if extension in IMAGE_EXTENSIONS: - return self._normalize_extracted_text( - self._extract_text_with_ocr( - file_path=file_path, - original_name=original_name, - mime_type=mime_type, - ) - ) - return "" - - @staticmethod - def _normalize_extracted_text(text: str) -> str: - normalized = str(text or "").replace("\r\n", "\n").replace("\r", "\n") - normalized = re.sub(r"\n{3,}", "\n\n", normalized) - return normalized.strip() - - @staticmethod - def _extract_pdf_text(file_path: Path) -> str: - pdftotext_bin = shutil.which("pdftotext") - if not pdftotext_bin: - return "" - - completed = subprocess.run( - [pdftotext_bin, "-layout", str(file_path), "-"], - capture_output=True, - text=True, - timeout=40, - check=False, - ) - if completed.returncode != 0: - return "" - return str(completed.stdout or "") - - @staticmethod - def _extract_text_with_ocr( - *, - file_path: Path, - original_name: str, - mime_type: str, - ) -> str: - try: - from app.services.ocr import OcrService - - result = OcrService().recognize_files( - [(original_name, file_path.read_bytes(), mime_type)] - ) - except Exception: - return "" - - parts: list[str] = [] - for document in result.documents: - text = str(getattr(document, "text", "") or "").strip() - summary = str(getattr(document, "summary", "") or "").strip() - if text: - parts.append(text) - elif summary: - parts.append(summary) - return "\n\n".join(part for part in parts if part) - - @staticmethod - def _extract_xlsx_sheets(file_path: Path) -> list[tuple[str, list[list[str]]]]: - try: - with ZipFile(file_path) as archive: - shared_strings: list[str] = [] - if "xl/sharedStrings.xml" in archive.namelist(): - shared_root = ElementTree.fromstring(archive.read("xl/sharedStrings.xml")) - shared_strings = [ - "".join(node.itertext()).strip() - for node in shared_root.iter() - if node.tag.endswith("}si") - ] - - sheet_files = sorted( - name - for name in archive.namelist() - if re.fullmatch(r"xl/worksheets/sheet\d+\.xml", name) - ) - if not sheet_files: - return [] - - relationship_targets: dict[str, str] = {} - if "xl/_rels/workbook.xml.rels" in archive.namelist(): - rel_root = ElementTree.fromstring(archive.read("xl/_rels/workbook.xml.rels")) - for node in rel_root.iter(): - if not node.tag.endswith("Relationship"): - continue - rel_id = node.attrib.get("Id") - target = node.attrib.get("Target") - if not rel_id or not target: - continue - normalized = target.lstrip("/") - if not normalized.startswith("xl/"): - normalized = f"xl/{normalized.lstrip('./')}" - relationship_targets[rel_id] = normalized - - ordered_sheets: list[tuple[str, str]] = [] - if "xl/workbook.xml" in archive.namelist(): - workbook_root = ElementTree.fromstring(archive.read("xl/workbook.xml")) - for index, node in enumerate(workbook_root.iter()): - if not node.tag.endswith("sheet"): - continue - sheet_name = node.attrib.get("name") or f"Sheet {index + 1}" - relationship_id = next( - (value for key, value in node.attrib.items() if key.endswith("}id")), - None, - ) - target = relationship_targets.get(relationship_id or "") - if target: - ordered_sheets.append((sheet_name, target)) - - if not ordered_sheets: - ordered_sheets = [ - (f"Sheet {index + 1}", sheet_file) - for index, sheet_file in enumerate(sheet_files) - ] - - preview_sheets: list[tuple[str, list[list[str]]]] = [] - for sheet_name, target in ordered_sheets: - if target not in archive.namelist(): - continue - - sheet_root = ElementTree.fromstring(archive.read(target)) - rows: list[list[str]] = [] - for row in sheet_root.iter(): - if not row.tag.endswith("}row"): - continue - row_values: list[str] = [] - for cell in row: - if not cell.tag.endswith("}c"): - continue - cell_type = cell.attrib.get("t") - value_node = next((item for item in cell if item.tag.endswith("}v")), None) - - if cell_type == "inlineStr": - text_node = next((item for item in cell.iter() if item.tag.endswith("}t")), None) - row_values.append((text_node.text or "").strip() if text_node is not None else "") - continue - - if value_node is None or value_node.text is None: - row_values.append("") - continue - - raw_value = value_node.text.strip() - if cell_type == "s" and raw_value.isdigit(): - index = int(raw_value) - row_values.append(shared_strings[index] if index < len(shared_strings) else raw_value) - else: - row_values.append(raw_value) - if row_values: - rows.append(row_values) - - preview_sheets.append((sheet_name, rows)) - - return preview_sheets - except (BadZipFile, ElementTree.ParseError, KeyError, ValueError): - return [] - - @staticmethod - def _extract_pptx_slides(file_path: Path) -> list[list[str]]: - try: - with ZipFile(file_path) as archive: - slide_names = sorted( - name - for name in archive.namelist() - if re.fullmatch(r"ppt/slides/slide\d+\.xml", name) - ) - slides: list[list[str]] = [] - for slide_name in slide_names: - root = ElementTree.fromstring(archive.read(slide_name)) - texts = [node.text.strip() for node in root.iter() if node.tag.endswith("}t") and node.text] - slides.append(texts) - return slides - except (BadZipFile, ElementTree.ParseError, KeyError): - return [] + raise FileNotFoundError(document_id) + + def _resolve_document_path(self, entry: dict[str, Any]) -> Path: + return self.library_root / entry["folder"] / entry["stored_name"] + + def _replace_document_content( + self, document_id: str, content: bytes, actor_name: str + ) -> KnowledgeDocumentDetailRead: + index = self._load_index() + entry = self._require_entry(index, document_id) + current_user = CurrentUserContext( + username="onlyoffice", + name=actor_name or "ONLYOFFICE", + role_codes=["manager"], + is_admin=True, + ) + return self.upload_document( + folder=entry["folder"], + filename=entry["original_name"], + content=content, + current_user=current_user, + ) + + @staticmethod + def _parse_onlyoffice_callback(payload: dict[str, Any]) -> OnlyOfficeCallbackPayload: + return parse_onlyoffice_callback(payload) + + _build_onlyoffice_document_key = staticmethod(build_onlyoffice_document_key) + _build_onlyoffice_access_token = staticmethod(build_onlyoffice_access_token) + _resolve_onlyoffice_document_type = staticmethod(resolve_onlyoffice_document_type) + + _normalize_filename = staticmethod(normalize_filename) + _normalize_folder = staticmethod(normalize_folder) + _extract_extension = staticmethod(extract_extension) + _parse_stored_name = staticmethod(parse_stored_name) + _format_time = staticmethod(format_time) + _format_size = staticmethod(format_size) + _resolve_file_type = staticmethod(resolve_file_type) + _resolve_file_type_label = staticmethod(resolve_file_type_label) + _can_preview = staticmethod(can_preview) + _read_text_preview = staticmethod(_read_text_preview) + _extract_docx_text = staticmethod(_extract_docx_text) + _normalize_extracted_text = staticmethod(_normalize_extracted_text) + _extract_pdf_text = staticmethod(_extract_pdf_text) + _extract_text_with_ocr = staticmethod(_extract_text_with_ocr) + _extract_xlsx_sheets = staticmethod(_extract_xlsx_sheets) + _extract_pptx_slides = staticmethod(_extract_pptx_slides) + + def _extract_document_text_from_path( + self, + *, + file_path: Path, + original_name: str, + mime_type: str, + ) -> str: + return _extract_document_text_from_path( + file_path=file_path, + original_name=original_name, + mime_type=mime_type, + ) diff --git a/server/src/app/services/knowledge_constants.py b/server/src/app/services/knowledge_constants.py new file mode 100644 index 0000000..aac9481 --- /dev/null +++ b/server/src/app/services/knowledge_constants.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +FIXED_KNOWLEDGE_FOLDERS = [ + "财务知识库", + "制度政策", + "报销制度", + "差旅规范", + "发票管理", + "税务合规", + "预算管理", + "财务共享", + "培训资料", + "常见问答", +] + +ICON_BY_TYPE = { + "pdf": "mdi mdi-file-document-outline-pdf pdf", + "word": "mdi mdi-file-document-outline-word word", + "excel": "mdi mdi-file-document-outline-excel excel", + "ppt": "mdi mdi-file-powerpoint-box ppt", + "image": "mdi mdi-file-image-outline image", + "text": "mdi mdi-file-document-outline text", + "archive": "mdi mdi-folder-zip-outline archive", + "binary": "mdi mdi-file-outline", +} + +TEXT_EXTENSIONS = {"txt", "md", "csv", "json", "xml", "yml", "yaml", "log"} +WORD_EXTENSIONS = {"doc", "docx"} +EXCEL_EXTENSIONS = {"xls", "xlsx", "csv"} +PPT_EXTENSIONS = {"ppt", "pptx"} +IMAGE_EXTENSIONS = {"png", "jpg", "jpeg", "gif", "bmp", "webp", "svg"} +ARCHIVE_EXTENSIONS = {"zip", "rar", "7z"} +STRUCTURED_PREVIEW_EXTENSIONS = {"docx", "xlsx", "pptx"} | TEXT_EXTENSIONS +INLINE_PREVIEW_EXTENSIONS = {"pdf"} | IMAGE_EXTENSIONS +ONLYOFFICE_EDITABLE_EXTENSIONS = {"docx", "xlsx", "pptx"} +KNOWLEDGE_INGEST_SYNC_STALE_SECONDS = 90 +KNOWLEDGE_SEARCH_RESULT_LIMIT = 3 +KNOWLEDGE_SEARCH_STOP_TERMS = { + "什么", + "怎么", + "如何", + "多少", + "是否", + "可以", + "一下", + "请问", + "帮我", + "一下子", + "这个", + "那个", + "哪些", + "一下吧", +} + +KNOWLEDGE_INGEST_STATUS_PUBLISHED = 1 +KNOWLEDGE_INGEST_STATUS_SYNCING = 2 +KNOWLEDGE_INGEST_STATUS_INGESTED = 3 +KNOWLEDGE_INGEST_STATUS_FAILED = 4 + +KNOWLEDGE_INGEST_STATUS_META = { + KNOWLEDGE_INGEST_STATUS_PUBLISHED: ("待归纳", "muted"), + KNOWLEDGE_INGEST_STATUS_SYNCING: ("正归纳", "warning"), + KNOWLEDGE_INGEST_STATUS_INGESTED: ("已归纳", "success"), + KNOWLEDGE_INGEST_STATUS_FAILED: ("归纳失败", "danger"), +} + diff --git a/server/src/app/services/knowledge_document_extractors.py b/server/src/app/services/knowledge_document_extractors.py new file mode 100644 index 0000000..9b57cf1 --- /dev/null +++ b/server/src/app/services/knowledge_document_extractors.py @@ -0,0 +1,223 @@ +from __future__ import annotations + +import re +import shutil +import subprocess +from pathlib import Path +from xml.etree import ElementTree +from zipfile import BadZipFile, ZipFile + +from app.services.knowledge_constants import IMAGE_EXTENSIONS, TEXT_EXTENSIONS +from app.services.knowledge_file_utils import extract_extension + +def _read_text_preview(file_path: Path) -> str: + encodings = ("utf-8", "utf-8-sig", "gbk") + for encoding in encodings: + try: + return file_path.read_text(encoding=encoding) + except UnicodeDecodeError: + continue + return "当前文本文件编码暂不支持在线解析。" + +def _extract_docx_text(file_path: Path) -> str: + try: + with ZipFile(file_path) as archive: + xml_content = archive.read("word/document.xml") + except (BadZipFile, KeyError): + return "当前 Word 文件解析失败。" + + root = ElementTree.fromstring(xml_content) + texts = [node.text.strip() for node in root.iter() if node.tag.endswith("}t") and node.text] + return "\n".join(texts) + +def _extract_document_text_from_path( + *, + file_path: Path, + original_name: str, + mime_type: str, +) -> str: + extension = extract_extension(original_name) + if extension in TEXT_EXTENSIONS: + return _normalize_extracted_text(_read_text_preview(file_path)) + if extension == "docx": + return _normalize_extracted_text(_extract_docx_text(file_path)) + if extension == "pdf": + text = _normalize_extracted_text(_extract_pdf_text(file_path)) + if text: + return text + return _normalize_extracted_text( + _extract_text_with_ocr( + file_path=file_path, + original_name=original_name, + mime_type=mime_type, + ) + ) + if extension in IMAGE_EXTENSIONS: + return _normalize_extracted_text( + _extract_text_with_ocr( + file_path=file_path, + original_name=original_name, + mime_type=mime_type, + ) + ) + return "" + +def _normalize_extracted_text(text: str) -> str: + normalized = str(text or "").replace("\r\n", "\n").replace("\r", "\n") + normalized = re.sub(r"\n{3,}", "\n\n", normalized) + return normalized.strip() + +def _extract_pdf_text(file_path: Path) -> str: + pdftotext_bin = shutil.which("pdftotext") + if not pdftotext_bin: + return "" + + completed = subprocess.run( + [pdftotext_bin, "-layout", str(file_path), "-"], + capture_output=True, + text=True, + timeout=40, + check=False, + ) + if completed.returncode != 0: + return "" + return str(completed.stdout or "") + +def _extract_text_with_ocr( + *, + file_path: Path, + original_name: str, + mime_type: str, +) -> str: + try: + from app.services.ocr import OcrService + + result = OcrService().recognize_files( + [(original_name, file_path.read_bytes(), mime_type)] + ) + except Exception: + return "" + + parts: list[str] = [] + for document in result.documents: + text = str(getattr(document, "text", "") or "").strip() + summary = str(getattr(document, "summary", "") or "").strip() + if text: + parts.append(text) + elif summary: + parts.append(summary) + return "\n\n".join(part for part in parts if part) + +def _extract_xlsx_sheets(file_path: Path) -> list[tuple[str, list[list[str]]]]: + try: + with ZipFile(file_path) as archive: + shared_strings: list[str] = [] + if "xl/sharedStrings.xml" in archive.namelist(): + shared_root = ElementTree.fromstring(archive.read("xl/sharedStrings.xml")) + shared_strings = [ + "".join(node.itertext()).strip() + for node in shared_root.iter() + if node.tag.endswith("}si") + ] + + sheet_files = sorted( + name + for name in archive.namelist() + if re.fullmatch(r"xl/worksheets/sheet\d+\.xml", name) + ) + if not sheet_files: + return [] + + relationship_targets: dict[str, str] = {} + if "xl/_rels/workbook.xml.rels" in archive.namelist(): + rel_root = ElementTree.fromstring(archive.read("xl/_rels/workbook.xml.rels")) + for node in rel_root.iter(): + if not node.tag.endswith("Relationship"): + continue + rel_id = node.attrib.get("Id") + target = node.attrib.get("Target") + if not rel_id or not target: + continue + normalized = target.lstrip("/") + if not normalized.startswith("xl/"): + normalized = f"xl/{normalized.lstrip('./')}" + relationship_targets[rel_id] = normalized + + ordered_sheets: list[tuple[str, str]] = [] + if "xl/workbook.xml" in archive.namelist(): + workbook_root = ElementTree.fromstring(archive.read("xl/workbook.xml")) + for index, node in enumerate(workbook_root.iter()): + if not node.tag.endswith("sheet"): + continue + sheet_name = node.attrib.get("name") or f"Sheet {index + 1}" + relationship_id = next( + (value for key, value in node.attrib.items() if key.endswith("}id")), + None, + ) + target = relationship_targets.get(relationship_id or "") + if target: + ordered_sheets.append((sheet_name, target)) + + if not ordered_sheets: + ordered_sheets = [ + (f"Sheet {index + 1}", sheet_file) + for index, sheet_file in enumerate(sheet_files) + ] + + preview_sheets: list[tuple[str, list[list[str]]]] = [] + for sheet_name, target in ordered_sheets: + if target not in archive.namelist(): + continue + + sheet_root = ElementTree.fromstring(archive.read(target)) + rows: list[list[str]] = [] + for row in sheet_root.iter(): + if not row.tag.endswith("}row"): + continue + row_values: list[str] = [] + for cell in row: + if not cell.tag.endswith("}c"): + continue + cell_type = cell.attrib.get("t") + value_node = next((item for item in cell if item.tag.endswith("}v")), None) + + if cell_type == "inlineStr": + text_node = next((item for item in cell.iter() if item.tag.endswith("}t")), None) + row_values.append((text_node.text or "").strip() if text_node is not None else "") + continue + + if value_node is None or value_node.text is None: + row_values.append("") + continue + + raw_value = value_node.text.strip() + if cell_type == "s" and raw_value.isdigit(): + index = int(raw_value) + row_values.append(shared_strings[index] if index < len(shared_strings) else raw_value) + else: + row_values.append(raw_value) + if row_values: + rows.append(row_values) + + preview_sheets.append((sheet_name, rows)) + + return preview_sheets + except (BadZipFile, ElementTree.ParseError, KeyError, ValueError): + return [] + +def _extract_pptx_slides(file_path: Path) -> list[list[str]]: + try: + with ZipFile(file_path) as archive: + slide_names = sorted( + name + for name in archive.namelist() + if re.fullmatch(r"ppt/slides/slide\d+\.xml", name) + ) + slides: list[list[str]] = [] + for slide_name in slide_names: + root = ElementTree.fromstring(archive.read(slide_name)) + texts = [node.text.strip() for node in root.iter() if node.tag.endswith("}t") and node.text] + slides.append(texts) + return slides + except (BadZipFile, ElementTree.ParseError, KeyError): + return [] diff --git a/server/src/app/services/knowledge_file_utils.py b/server/src/app/services/knowledge_file_utils.py new file mode 100644 index 0000000..7a87b03 --- /dev/null +++ b/server/src/app/services/knowledge_file_utils.py @@ -0,0 +1,112 @@ +from __future__ import annotations + +from datetime import UTC, datetime +from pathlib import Path +from uuid import uuid4 + +from app.services.knowledge_constants import ( + ARCHIVE_EXTENSIONS, + EXCEL_EXTENSIONS, + FIXED_KNOWLEDGE_FOLDERS, + IMAGE_EXTENSIONS, + INLINE_PREVIEW_EXTENSIONS, + PPT_EXTENSIONS, + STRUCTURED_PREVIEW_EXTENSIONS, + TEXT_EXTENSIONS, + WORD_EXTENSIONS, +) + +def normalize_filename(filename: str) -> str: + normalized = Path(str(filename or "").strip()).name.strip() + normalized = normalized.replace("/", "_").replace("\\", "_") + if not normalized: + raise ValueError("文件名不能为空。") + return normalized + +def normalize_folder(folder: str) -> str: + normalized = str(folder or "").strip() + if normalized not in FIXED_KNOWLEDGE_FOLDERS: + raise ValueError("只能上传到预设知识库文件夹。") + return normalized + +def extract_extension(filename: str) -> str: + suffix = Path(filename).suffix.lower().lstrip(".") + return suffix + +def _build_onlyoffice_document_key(entry: dict[str, Any]) -> str: + version = int(entry.get("version_number", 1)) + checksum = str(entry.get("sha256") or "")[:12] + return f"{entry['id']}-v{version}-{checksum or 'nochecksum'}" + +def _build_onlyoffice_access_token(self, document_id: str) -> str: + onlyoffice_settings = resolve_onlyoffice_settings() + payload = { + "scope": "onlyoffice-content", + "document_id": document_id, + } + return jwt.encode(payload, onlyoffice_settings.jwt_secret, algorithm="HS256") + +def _resolve_onlyoffice_document_type(extension: str) -> str: + if extension in WORD_EXTENSIONS: + return "word" + if extension in EXCEL_EXTENSIONS: + return "cell" + if extension in PPT_EXTENSIONS: + return "slide" + raise ValueError("当前文件格式不支持 ONLYOFFICE 预览。") + +def parse_stored_name(stored_name: str) -> tuple[str, str]: + if "__" not in stored_name: + return uuid4().hex, stored_name + document_id, original_name = stored_name.split("__", 1) + return document_id or uuid4().hex, original_name or stored_name + +def format_time(value: str | None) -> str: + if not value: + return "" + try: + parsed = datetime.fromisoformat(value) + except ValueError: + return value + return parsed.astimezone(UTC).strftime("%Y-%m-%d %H:%M") + +def format_size(size_bytes: int) -> str: + if size_bytes < 1024: + return f"{size_bytes} B" + if size_bytes < 1024 * 1024: + return f"{size_bytes / 1024:.1f} KB" + return f"{size_bytes / (1024 * 1024):.1f} MB" + +def resolve_file_type(extension: str) -> str: + if extension == "pdf": + return "pdf" + if extension in WORD_EXTENSIONS: + return "word" + if extension in EXCEL_EXTENSIONS: + return "excel" + if extension in PPT_EXTENSIONS: + return "ppt" + if extension in IMAGE_EXTENSIONS: + return "image" + if extension in TEXT_EXTENSIONS: + return "text" + if extension in ARCHIVE_EXTENSIONS: + return "archive" + return "binary" + +def resolve_file_type_label(file_type: str) -> str: + mapping = { + "pdf": "PDF 预览", + "word": "Word 预览", + "excel": "Excel 预览", + "ppt": "PPT 预览", + "image": "图片预览", + "text": "文本预览", + "archive": "压缩包", + "binary": "文件预览", + } + return mapping.get(file_type, "文件预览") + +def can_preview(extension: str) -> bool: + return extension in INLINE_PREVIEW_EXTENSIONS or extension in STRUCTURED_PREVIEW_EXTENSIONS + diff --git a/server/src/app/services/knowledge_ingest_status.py b/server/src/app/services/knowledge_ingest_status.py new file mode 100644 index 0000000..ecb2d64 --- /dev/null +++ b/server/src/app/services/knowledge_ingest_status.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from app.core.agent_enums import AgentRunStatus +from app.models.agent_run import AgentRun +from app.services.knowledge_constants import ( + KNOWLEDGE_INGEST_STATUS_META, + KNOWLEDGE_INGEST_STATUS_PUBLISHED, + KNOWLEDGE_INGEST_SYNC_STALE_SECONDS, +) + + +def normalize_ingest_status_code(value: Any) -> int: + try: + status_code = int(value) + except (TypeError, ValueError): + return KNOWLEDGE_INGEST_STATUS_PUBLISHED + if status_code not in KNOWLEDGE_INGEST_STATUS_META: + return KNOWLEDGE_INGEST_STATUS_PUBLISHED + return status_code + + +def is_syncing_status_stale(entry: dict[str, Any]) -> bool: + raw_value = str(entry.get("ingest_status_updated_at") or "").strip() + if not raw_value: + return True + try: + updated_at = datetime.fromisoformat(raw_value) + except ValueError: + return True + if updated_at.tzinfo is None: + updated_at = updated_at.replace(tzinfo=UTC) + age_seconds = (datetime.now(UTC) - updated_at.astimezone(UTC)).total_seconds() + return age_seconds >= KNOWLEDGE_INGEST_SYNC_STALE_SECONDS + + +def should_preserve_syncing_status(entry: dict[str, Any], *, db: Session | None) -> bool: + agent_run_id = str(entry.get("ingest_agent_run_id") or "").strip() + if not agent_run_id or db is None: + return not is_syncing_status_stale(entry) + + run = db.scalar(select(AgentRun).where(AgentRun.run_id == agent_run_id)) + if run is None: + return not is_syncing_status_stale(entry) + if run.status != AgentRunStatus.RUNNING.value: + return False + + heartbeat_at = str((run.route_json or {}).get("heartbeat_at") or "").strip() + if heartbeat_at: + probe_entry = {"ingest_status_updated_at": heartbeat_at} + return not is_syncing_status_stale(probe_entry) + + return not is_syncing_status_stale(entry) + + +def resolve_linked_ingest_run_status(entry: dict[str, Any], *, db: Session | None) -> str: + agent_run_id = str(entry.get("ingest_agent_run_id") or "").strip() + if not agent_run_id or db is None: + return "" + + run = db.scalar(select(AgentRun).where(AgentRun.run_id == agent_run_id)) + if run is None: + return "" + return str(run.status or "").strip() diff --git a/server/src/app/services/knowledge_onlyoffice.py b/server/src/app/services/knowledge_onlyoffice.py new file mode 100644 index 0000000..8e7e57a --- /dev/null +++ b/server/src/app/services/knowledge_onlyoffice.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any + +import jwt + +from app.api.deps import CurrentUserContext +from app.core.config import get_settings +from app.core.logging import get_logger +from app.schemas.knowledge import KnowledgeOnlyOfficeConfigRead +from app.services.knowledge_constants import ( + EXCEL_EXTENSIONS, + ONLYOFFICE_EDITABLE_EXTENSIONS, + PPT_EXTENSIONS, + WORD_EXTENSIONS, +) +from app.services.knowledge_file_utils import extract_extension +from app.services.settings import resolve_onlyoffice_settings + +logger = get_logger("app.services.knowledge") + + +@dataclass(slots=True) +class OnlyOfficeCallbackPayload: + status: int + download_url: str + users: list[str] + + +def parse_onlyoffice_callback(payload: dict[str, Any]) -> OnlyOfficeCallbackPayload: + status = int(payload.get("status") or 0) + download_url = str(payload.get("url") or "").strip() + users = [str(item).strip() for item in payload.get("users") or [] if str(item).strip()] + return OnlyOfficeCallbackPayload(status=status, download_url=download_url, users=users) + + +def build_onlyoffice_document_key(entry: dict[str, Any]) -> str: + version = int(entry.get("version_number", 1)) + checksum = str(entry.get("sha256") or "")[:12] + return f"{entry['id']}-v{version}-{checksum or 'nochecksum'}" + + +def build_onlyoffice_access_token(document_id: str) -> str: + onlyoffice_settings = resolve_onlyoffice_settings() + payload = { + "scope": "onlyoffice-content", + "document_id": document_id, + } + return jwt.encode(payload, onlyoffice_settings.jwt_secret, algorithm="HS256") + + +def build_onlyoffice_config( + *, + document_id: str, + entry: dict[str, Any], + current_user: CurrentUserContext, +) -> KnowledgeOnlyOfficeConfigRead: + settings = get_settings() + onlyoffice_settings = resolve_onlyoffice_settings() + if not onlyoffice_settings.enabled: + logger.warning( + "ONLYOFFICE disabled in runtime config doc=%s enabled=%s public_url=%s backend_url=%s jwt_set=%s", + document_id, + onlyoffice_settings.enabled, + onlyoffice_settings.public_url, + onlyoffice_settings.backend_url, + bool(onlyoffice_settings.jwt_secret), + ) + raise ValueError("ONLYOFFICE 预览未启用。") + if not onlyoffice_settings.public_url or not onlyoffice_settings.backend_url: + logger.warning( + "ONLYOFFICE config incomplete doc=%s enabled=%s public_url=%s backend_url=%s jwt_set=%s", + document_id, + onlyoffice_settings.enabled, + onlyoffice_settings.public_url, + onlyoffice_settings.backend_url, + bool(onlyoffice_settings.jwt_secret), + ) + raise ValueError("ONLYOFFICE 地址配置不完整。") + if not onlyoffice_settings.jwt_secret: + logger.warning( + "ONLYOFFICE JWT missing doc=%s enabled=%s public_url=%s backend_url=%s jwt_set=%s", + document_id, + onlyoffice_settings.enabled, + onlyoffice_settings.public_url, + onlyoffice_settings.backend_url, + bool(onlyoffice_settings.jwt_secret), + ) + raise ValueError("ONLYOFFICE JWT 密钥未配置。") + + extension = extract_extension(entry["original_name"]) + if extension not in ONLYOFFICE_EDITABLE_EXTENSIONS: + raise ValueError("当前文件格式不支持 ONLYOFFICE 预览。") + + backend_base_url = onlyoffice_settings.backend_url.rstrip("/") + public_url = onlyoffice_settings.public_url.rstrip("/") + access_token = build_onlyoffice_access_token(document_id) + document_url = ( + f"{backend_base_url}{settings.api_v1_prefix}/knowledge/documents/{document_id}/onlyoffice/content" + f"?access_token={access_token}" + ) + callback_url = ( + f"{backend_base_url}{settings.api_v1_prefix}/knowledge/documents/{document_id}/onlyoffice/callback" + ) + + config: dict[str, Any] = { + "documentType": resolve_onlyoffice_document_type(extension), + "document": { + "fileType": extension, + "key": build_onlyoffice_document_key(entry), + "title": entry["original_name"], + "url": document_url, + "permissions": { + "download": True, + "edit": False, + "print": True, + "copy": True, + }, + }, + "editorConfig": { + "mode": "view", + "lang": "zh-CN", + "callbackUrl": callback_url, + "user": { + "id": current_user.username, + "name": current_user.name, + }, + "customization": { + "compactHeader": True, + "compactToolbar": True, + "toolbarNoTabs": False, + "autosave": False, + "forcesave": False, + }, + }, + "width": "100%", + "height": "100%", + } + config["token"] = jwt.encode(config, onlyoffice_settings.jwt_secret, algorithm="HS256") + return KnowledgeOnlyOfficeConfigRead(documentServerUrl=public_url, config=config) + + +def validate_onlyoffice_access_token(document_id: str, access_token: str) -> None: + onlyoffice_settings = resolve_onlyoffice_settings() + try: + payload = jwt.decode( + access_token, + onlyoffice_settings.jwt_secret, + algorithms=["HS256"], + ) + except jwt.PyJWTError as exc: + raise ValueError("ONLYOFFICE 文件访问令牌无效。") from exc + + if payload.get("scope") != "onlyoffice-content" or payload.get("document_id") != document_id: + raise ValueError("ONLYOFFICE 文件访问令牌无效。") + + +def resolve_onlyoffice_document_type(extension: str) -> str: + if extension in WORD_EXTENSIONS: + return "word" + if extension in EXCEL_EXTENSIONS: + return "cell" + if extension in PPT_EXTENSIONS: + return "slide" + raise ValueError("当前文件格式不支持 ONLYOFFICE 预览。") diff --git a/server/src/app/services/knowledge_preview.py b/server/src/app/services/knowledge_preview.py new file mode 100644 index 0000000..834c526 --- /dev/null +++ b/server/src/app/services/knowledge_preview.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +from typing import Any + +from app.schemas.knowledge import ( + KnowledgePreviewBlockRead, + KnowledgePreviewPageRead, + KnowledgePreviewStatRead, +) +from app.services.knowledge_constants import IMAGE_EXTENSIONS, TEXT_EXTENSIONS +from app.services.knowledge_document_extractors import ( + _extract_docx_text, + _extract_pptx_slides, + _extract_xlsx_sheets, + _read_text_preview, +) +from app.services.knowledge_file_utils import extract_extension, format_size + +def build_preview( + entry: dict[str, Any], + *, + resolve_document_path, +) -> tuple[str, list[KnowledgePreviewPageRead]]: + extension = extract_extension(entry["original_name"]) + file_path = resolve_document_path(entry) + + if extension == "pdf": + return "pdf", [] + + if extension in IMAGE_EXTENSIONS: + return "image", [] + + if extension in TEXT_EXTENSIONS: + text = _read_text_preview(file_path) + return "text", [_build_text_preview_page(entry, text)] + + if extension == "docx": + text = _extract_docx_text(file_path) + return "text", [_build_text_preview_page(entry, text)] + + if extension == "xlsx": + return "table", _build_xlsx_preview_pages(entry, file_path) + + if extension == "pptx": + return "slides", _build_pptx_preview_pages(entry, file_path) + + return ( + "unsupported", + [ + KnowledgePreviewPageRead( + title=entry["original_name"], + subtitle="当前格式暂不支持在线解析预览。", + stats=[ + KnowledgePreviewStatRead(label="文件格式", value=extension.upper() or "FILE"), + KnowledgePreviewStatRead(label="文件大小", value=format_size(entry["size_bytes"])), + KnowledgePreviewStatRead(label="建议操作", value="下载后查看"), + ], + blocks=[ + KnowledgePreviewBlockRead( + heading="预览说明", + lines=[ + "当前系统已支持该文件的上传、下载和权限控制。", + "如需在线预览,可后续接入专门的文档转换服务。", + ], + ) + ], + ) + ], + ) + +def _build_text_preview_page( + entry: dict[str, Any], text: str +) -> KnowledgePreviewPageRead: + lines = [line.strip() for line in text.splitlines() if line.strip()] + if not lines: + lines = ["文件内容为空,或当前文档未提取到可展示文本。"] + + groups = [lines[index : index + 8] for index in range(0, min(len(lines), 24), 8)] + blocks = [ + KnowledgePreviewBlockRead(heading=f"内容片段 {index + 1}", lines=group) + for index, group in enumerate(groups) + ] + + return KnowledgePreviewPageRead( + title=entry["original_name"], + subtitle="文本提取预览", + stats=[ + KnowledgePreviewStatRead(label="文件格式", value=entry["extension"].upper() or "TEXT"), + KnowledgePreviewStatRead(label="可见行数", value=str(len(lines))), + KnowledgePreviewStatRead(label="文件大小", value=format_size(entry["size_bytes"])), + ], + blocks=blocks, + ) + +def _build_xlsx_preview_pages( + entry: dict[str, Any], file_path +) -> list[KnowledgePreviewPageRead]: + sheets = self._extract_xlsx_sheets(file_path) + if not sheets: + sheets = [("Sheet 1", [["未提取到表格内容。"]])] + + preview_pages: list[KnowledgePreviewPageRead] = [] + sheet_count = len(sheets) + for sheet_name, rows in sheets[:8]: + visible_rows = rows[:12] if rows else [["未提取到表格内容。"]] + blocks = [ + KnowledgePreviewBlockRead( + heading=f"第 {index + 1} 行", + lines=[" | ".join((cell or "") for cell in row)], + ) + for index, row in enumerate(visible_rows) + ] + + preview_pages.append( + KnowledgePreviewPageRead( + title=sheet_name, + subtitle="表格内容预览", + stats=[ + KnowledgePreviewStatRead(label="工作表数量", value=str(sheet_count)), + KnowledgePreviewStatRead(label="预览行数", value=str(len(visible_rows))), + KnowledgePreviewStatRead(label="文件大小", value=format_size(entry["size_bytes"])), + ], + blocks=blocks, + ) + ) + + return preview_pages + +def _build_pptx_preview_pages( + entry: dict[str, Any], file_path +) -> list[KnowledgePreviewPageRead]: + slides = self._extract_pptx_slides(file_path) + if not slides: + slides = [["未提取到幻灯片文本。"]] + + pages: list[KnowledgePreviewPageRead] = [] + for index, slide_lines in enumerate(slides[:8]): + pages.append( + KnowledgePreviewPageRead( + title=entry["original_name"], + subtitle=f"幻灯片 {index + 1}", + stats=[ + KnowledgePreviewStatRead(label="页码", value=str(index + 1)), + KnowledgePreviewStatRead(label="文本条数", value=str(len(slide_lines))), + KnowledgePreviewStatRead(label="文件格式", value="PPTX"), + ], + blocks=[ + KnowledgePreviewBlockRead( + heading="幻灯片内容", + lines=slide_lines or ["该页未提取到文本内容。"], + ) + ], + ) + ) + + return pages + diff --git a/server/src/app/services/knowledge_rag.py b/server/src/app/services/knowledge_rag.py index 624d71f..51607d8 100644 --- a/server/src/app/services/knowledge_rag.py +++ b/server/src/app/services/knowledge_rag.py @@ -1,37 +1,43 @@ -from __future__ import annotations - -import asyncio -import json +from __future__ import annotations + import os import re import socket import threading -from dataclasses import dataclass -from datetime import UTC, datetime -from functools import partial -from http import HTTPStatus from pathlib import Path -from time import perf_counter from typing import Any -from urllib.error import HTTPError, URLError -from urllib.parse import quote -from urllib.request import Request, urlopen - -from sqlalchemy.orm import Session - -from app.core.config import get_settings -from app.core.logging import get_logger -from app.db.session import get_session_factory -from app.services.settings import SettingsService - -logger = get_logger("app.services.knowledge_rag") - + +from sqlalchemy.orm import Session + +from app.core.config import get_settings +from app.core.logging import get_logger +from app.db.session import get_session_factory +from app.services.knowledge_rag_runtime import ( + DEFAULT_EMBEDDING_TIMEOUT_SECONDS, + DEFAULT_LIGHTRAG_QUERY_MODE, + DEFAULT_LLM_TIMEOUT_SECONDS, + KnowledgeRagError, + RuntimeModelConfig, + _LightRagRuntime, + _build_ali_rerank_request, + _build_azure_deployment_base, + _build_headers, + _ensure_path, + _extract_chat_text, + _extract_embedding_vectors, + _extract_error_message, + _extract_rerank_results, + _normalize_endpoint, + _parse_json_body, + _send_json_request, +) +from app.services.settings import SettingsService + +logger = get_logger("app.services.knowledge_rag") + DEFAULT_QDRANT_URL = "http://127.0.0.1:6333" CONTAINER_QDRANT_URL = "http://qdrant:6333" DEFAULT_LIGHTRAG_WORKSPACE = "x_financial_knowledge" -DEFAULT_LIGHTRAG_QUERY_MODE = "naive" -DEFAULT_LLM_TIMEOUT_SECONDS = 180 -DEFAULT_EMBEDDING_TIMEOUT_SECONDS = 120 MAX_KNOWLEDGE_HIT_CONTENT_LENGTH = 2200 MAX_KNOWLEDGE_HIT_EXCERPT_LENGTH = 220 MAX_QUERY_TERMS = 12 @@ -70,531 +76,93 @@ STRUCTURED_APPENDIX_LEADING_MARKERS = ( "# 结构化表格补充", ) STRUCTURED_APPENDIX_LEADING_WINDOW = 220 - -_runtime_lock = threading.RLock() -_runtime_instance: _LightRagRuntime | None = None -_runtime_signature: tuple[Any, ...] | None = None - - -class KnowledgeRagError(RuntimeError): - pass - - -@dataclass(frozen=True, slots=True) -class RuntimeModelConfig: - slot: str - provider: str - model: str - endpoint: str - api_key: str - capability: str - - -class _LightRagRuntime: - def __init__( - self, - *, - working_dir: Path, - workspace: str, - qdrant_url: str, - qdrant_api_key: str, - primary_chat: RuntimeModelConfig, - backup_chat: RuntimeModelConfig | None, - embedding: RuntimeModelConfig, - reranker: RuntimeModelConfig | None, - ) -> None: - self.working_dir = working_dir - self.workspace = workspace - self.qdrant_url = qdrant_url - self.qdrant_api_key = qdrant_api_key - self.primary_chat = primary_chat - self.backup_chat = backup_chat - self.embedding = embedding - self.reranker = reranker - self._rag = self._build_rag() - self._initialize() - self._graph_has_content_cache: bool | None = None - - @property - def rag(self): - return self._rag - - def _build_rag(self): - try: - from lightrag import LightRAG - from lightrag.utils import EmbeddingFunc - except ImportError as exc: # pragma: no cover - exercised in runtime env - raise KnowledgeRagError( - "LightRAG 依赖未安装,请先在 server 环境执行依赖安装。" - ) from exc - - self.working_dir.mkdir(parents=True, exist_ok=True) - - if self.qdrant_url: - os.environ["QDRANT_URL"] = self.qdrant_url - if self.qdrant_api_key: - os.environ["QDRANT_API_KEY"] = self.qdrant_api_key - - embedding_dim = self._probe_embedding_dimension(self.embedding) - logger.info( - "Initialize LightRAG runtime workspace=%s qdrant=%s embedding_model=%s dim=%s", - self.workspace, - self.qdrant_url, - self.embedding.model, - embedding_dim, - ) - - async def embedding_func(texts: list[str]) -> Any: - return await asyncio.to_thread(self._embed_sync, texts) - - async def llm_model_func( - prompt: str, - system_prompt: str | None = None, - history_messages: list[dict[str, Any]] | None = None, - keyword_extraction: bool = False, - **kwargs: Any, - ) -> str: - return await asyncio.to_thread( - self._complete_sync, - prompt, - system_prompt, - history_messages or [], - keyword_extraction, - kwargs, - ) - async def rerank_model_func( - query: str, - documents: list[str], - top_n: int | None = None, - **_kwargs: Any, - ) -> list[dict[str, Any]]: - return await asyncio.to_thread( - self._rerank_sync, - query, - documents, - top_n, - ) - - return LightRAG( - working_dir=str(self.working_dir), - workspace=self.workspace, - kv_storage="JsonKVStorage", - graph_storage="NetworkXStorage", - vector_storage="QdrantVectorDBStorage", - doc_status_storage="JsonDocStatusStorage", - llm_model_name=self.primary_chat.model, - llm_model_func=llm_model_func, - embedding_func=EmbeddingFunc( - embedding_dim=embedding_dim, - func=embedding_func, - max_token_size=8192, - model_name=self.embedding.model, - supports_asymmetric=False, - ), - rerank_model_func=rerank_model_func if self.reranker is not None else None, - enable_llm_cache=False, - enable_llm_cache_for_entity_extract=False, - ) - - def _initialize(self) -> None: - from lightrag.utils import always_get_an_event_loop - - loop = always_get_an_event_loop() - loop.run_until_complete(self._rag.initialize_storages()) - - def finalize(self) -> None: - from lightrag.utils import always_get_an_event_loop - - loop = always_get_an_event_loop() - loop.run_until_complete(self._rag.finalize_storages()) - - def query_data(self, query: str, *, conversation_history: list[dict[str, str]] | None = None) -> dict[str, Any]: - from lightrag import QueryParam - configured_mode = os.environ.get("LIGHTRAG_QUERY_MODE", DEFAULT_LIGHTRAG_QUERY_MODE).strip() or DEFAULT_LIGHTRAG_QUERY_MODE - mode = "naive" if configured_mode != "naive" and not self._graph_has_content() else configured_mode - started_at = perf_counter() - param = QueryParam( - mode=mode, - top_k=8, - chunk_top_k=10, - only_need_context=True, - response_type="Multiple Paragraphs", - conversation_history=conversation_history or [], - include_references=True, - ) - try: - result = self._rag.query_data(query, param) - logger.info("LightRAG query completed mode=%s elapsed=%.2fs", mode, perf_counter() - started_at) - return result - except Exception: - if mode == "naive": - raise - logger.warning("LightRAG query mode=%s failed, retry with naive mode", mode) - fallback_param = QueryParam( - mode="naive", - top_k=8, - chunk_top_k=10, - only_need_context=True, - response_type="Multiple Paragraphs", - conversation_history=conversation_history or [], - include_references=True, - ) - result = self._rag.query_data(query, fallback_param) - logger.info("LightRAG query completed mode=naive elapsed=%.2fs", perf_counter() - started_at) - return result +_runtime_lock = threading.RLock() +_runtime_instance: _LightRagRuntime | None = None +_runtime_signature: tuple[Any, ...] | None = None - def _graph_has_content(self) -> bool: - if self._graph_has_content_cache is not None: - return self._graph_has_content_cache - graph_path = self.working_dir / self.workspace / "graph_chunk_entity_relation.graphml" - try: - graph_text = graph_path.read_text(encoding="utf-8") - except OSError: - self._graph_has_content_cache = False - return False +class KnowledgeRagService: + def __init__(self, db: Session | None = None, storage_root: Path | None = None) -> None: + self.db = db + self.storage_root = Path(storage_root or get_settings().resolved_storage_root_dir) - self._graph_has_content_cache = " str: - return self._rag.insert(texts, ids=document_ids, file_paths=file_paths) - - def get_document_statuses(self, document_ids: list[str]) -> dict[str, Any]: - from lightrag.utils import always_get_an_event_loop - - loop = always_get_an_event_loop() - return loop.run_until_complete(self._rag.aget_docs_by_ids(document_ids)) - - def delete_document(self, document_id: str) -> None: - from lightrag.utils import always_get_an_event_loop - - loop = always_get_an_event_loop() - result = loop.run_until_complete(self._rag.adelete_by_doc_id(document_id)) - status = str(getattr(result, "status", "") or "") - if status not in {"success", "not_found"}: - raise KnowledgeRagError(str(getattr(result, "message", "") or "LightRAG 删除文档失败。")) - - def _probe_embedding_dimension(self, config: RuntimeModelConfig) -> int: - vectors = self._request_embeddings(config, ["dimension probe"]) - if not vectors or not isinstance(vectors[0], list): - raise KnowledgeRagError("无法从 embedding 模型返回结果中解析向量维度。") - dimension = len(vectors[0]) - if dimension <= 0: - raise KnowledgeRagError("embedding 模型返回了无效的向量维度。") - return dimension - - def _embed_sync(self, texts: list[str]) -> Any: - import numpy as np - - vectors = self._request_embeddings(self.embedding, texts) - return np.array(vectors, dtype=float) - - def _rerank_sync( + def query_knowledge( self, query: str, - documents: list[str], - top_n: int | None, - ) -> list[dict[str, Any]]: - if self.reranker is None: - return [] - - status_code, body = self._request_rerank( - self.reranker, - query=query, - documents=documents, - top_n=top_n, - ) - if status_code >= HTTPStatus.BAD_REQUEST: - raise KnowledgeRagError(f"reranker 模型返回异常状态码 {status_code}。") - return _extract_rerank_results(body, provider=self.reranker.provider) - - def _complete_sync( - self, - prompt: str, - system_prompt: str | None, - history_messages: list[dict[str, Any]], - keyword_extraction: bool, - kwargs: dict[str, Any], - ) -> str: - del keyword_extraction - - last_error: Exception | None = None - for config in [self.primary_chat, self.backup_chat]: - if config is None: - continue - try: - return self._request_chat_completion( - config, - prompt=prompt, - system_prompt=system_prompt, - history_messages=history_messages, - max_tokens=int(kwargs.get("max_tokens") or 1200), - temperature=float(kwargs.get("temperature") or 0.1), - ) - except Exception as exc: # pragma: no cover - runtime fallback - last_error = exc - logger.warning( - "LightRAG LLM request failed slot=%s provider=%s model=%s: %s", - config.slot, - config.provider, - config.model, - exc, - ) - continue - - raise KnowledgeRagError(f"LightRAG 调用知识模型失败:{last_error or '没有可用模型配置'}") - - def _request_chat_completion( - self, - config: RuntimeModelConfig, - *, - prompt: str, - system_prompt: str | None, - history_messages: list[dict[str, Any]], - max_tokens: int, - temperature: float, - ) -> str: - messages: list[dict[str, Any]] = [] - if system_prompt: - messages.append({"role": "system", "content": system_prompt}) - messages.extend(history_messages) - messages.append({"role": "user", "content": prompt}) - - if config.provider == "Azure OpenAI": - url = f"{_build_azure_deployment_base(config.endpoint, config.model)}/chat/completions?api-version={AZURE_API_VERSION}" - payload = { - "messages": messages, - "max_tokens": max_tokens, - "temperature": temperature, - } - status_code, body = _send_json_request( - "POST", - url, - headers=_build_headers(config.api_key, use_bearer=False, use_api_key=True), - payload=payload, - timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, - ) - elif config.provider == "Ollama": - url = _ensure_path(_normalize_endpoint(config.endpoint), "api/chat") - payload = { - "model": config.model, - "messages": messages, - "stream": False, - "options": { - "num_predict": max_tokens, - "temperature": temperature, - }, - } - status_code, body = _send_json_request( - "POST", - url, - headers={"Content-Type": "application/json", "Accept": "application/json"}, - payload=payload, - timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, - ) - else: - url = _ensure_path(_normalize_endpoint(config.endpoint), "chat/completions") - payload = { - "model": config.model, - "messages": messages, - "max_tokens": max_tokens, - "temperature": temperature, - } - status_code, body = _send_json_request( - "POST", - url, - headers=_build_headers(config.api_key, use_bearer=True), - payload=payload, - timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, - ) - - if status_code >= HTTPStatus.BAD_REQUEST: - raise KnowledgeRagError(f"知识模型返回异常状态码 {status_code}。") - - return _extract_chat_text(body, provider=config.provider) - - def _request_embeddings(self, config: RuntimeModelConfig, texts: list[str]) -> list[list[float]]: - if config.provider == "Azure OpenAI": - url = f"{_build_azure_deployment_base(config.endpoint, config.model)}/embeddings?api-version={AZURE_API_VERSION}" - payload = {"input": texts} - status_code, body = _send_json_request( - "POST", - url, - headers=_build_headers(config.api_key, use_bearer=False, use_api_key=True), - payload=payload, - timeout_seconds=DEFAULT_EMBEDDING_TIMEOUT_SECONDS, - ) - elif config.provider == "Ollama": - url = _ensure_path(_normalize_endpoint(config.endpoint), "api/embed") - payload = {"model": config.model, "input": texts} - status_code, body = _send_json_request( - "POST", - url, - headers={"Content-Type": "application/json", "Accept": "application/json"}, - payload=payload, - timeout_seconds=DEFAULT_EMBEDDING_TIMEOUT_SECONDS, - ) - else: - url = _ensure_path(_normalize_endpoint(config.endpoint), "embeddings") - payload = {"model": config.model, "input": texts} - status_code, body = _send_json_request( - "POST", - url, - headers=_build_headers(config.api_key, use_bearer=True), - payload=payload, - timeout_seconds=DEFAULT_EMBEDDING_TIMEOUT_SECONDS, - ) - - if status_code >= HTTPStatus.BAD_REQUEST: - raise KnowledgeRagError(f"embedding 模型返回异常状态码 {status_code}。") - - return _extract_embedding_vectors(body, provider=config.provider) - - def _request_rerank( - self, - config: RuntimeModelConfig, *, - query: str, - documents: list[str], - top_n: int | None, - ) -> tuple[int, Any]: - if config.provider == "Azure OpenAI": - url = f"{_build_azure_deployment_base(config.endpoint, config.model)}/rerank?api-version={AZURE_API_VERSION}" - payload: dict[str, Any] = { - "query": query, - "documents": documents, + conversation_history: list[dict[str, str]] | None = None, + limit: int = 5, + ) -> dict[str, Any]: + normalized_query = str(query or "").strip() + if not normalized_query: + return { + "result_type": "knowledge_search", + "query": "", + "record_count": 0, + "hits": [], + "references": [], + "message": "请先输入要检索的知识库问题。", } - if top_n is not None: - payload["top_n"] = top_n - return _send_json_request( - "POST", - url, - headers=_build_headers(config.api_key, use_bearer=False, use_api_key=True), - payload=payload, - timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, - ) - if config.provider == "Ali": - url, payload = _build_ali_rerank_request( - config.model, - query=query, - documents=documents, - top_n=top_n, - ) - return _send_json_request( - "POST", - url, - headers=_build_headers(config.api_key, use_bearer=True), - payload=payload, - timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, - ) + try: + runtime = self._get_runtime() + raw = runtime.query_data(normalized_query, conversation_history=conversation_history) + except Exception as exc: + logger.warning("Knowledge query failed: %s", exc) + return { + "result_type": "knowledge_search", + "query": normalized_query, + "record_count": 0, + "hits": [], + "references": [], + "message": f"知识库检索暂不可用:{exc}", + } - url = _ensure_path(_normalize_endpoint(config.endpoint), "rerank") - payload = { - "model": config.model, - "query": query, - "documents": documents, - } - if top_n is not None: - payload["top_n"] = top_n - return _send_json_request( - "POST", - url, - headers=_build_headers(config.api_key, use_bearer=True), - payload=payload, - timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, - ) - - -class KnowledgeRagService: - def __init__(self, db: Session | None = None, storage_root: Path | None = None) -> None: - self.db = db - self.storage_root = Path(storage_root or get_settings().resolved_storage_root_dir) - - def query_knowledge( - self, - query: str, - *, - conversation_history: list[dict[str, str]] | None = None, - limit: int = 5, - ) -> dict[str, Any]: - normalized_query = str(query or "").strip() - if not normalized_query: - return { - "result_type": "knowledge_search", - "query": "", - "record_count": 0, - "hits": [], - "references": [], - "message": "请先输入要检索的知识库问题。", - } - - try: - runtime = self._get_runtime() - raw = runtime.query_data(normalized_query, conversation_history=conversation_history) - except Exception as exc: - logger.warning("Knowledge query failed: %s", exc) - return { - "result_type": "knowledge_search", - "query": normalized_query, - "record_count": 0, - "hits": [], - "references": [], - "message": f"知识库检索暂不可用:{exc}", - } - - data = raw.get("data") if isinstance(raw, dict) else {} - chunks = list(data.get("chunks") or []) if isinstance(data, dict) else [] - entities = list(data.get("entities") or []) if isinstance(data, dict) else [] - references = list(data.get("references") or []) if isinstance(data, dict) else [] + data = raw.get("data") if isinstance(raw, dict) else {} + chunks = list(data.get("chunks") or []) if isinstance(data, dict) else [] + entities = list(data.get("entities") or []) if isinstance(data, dict) else [] + references = list(data.get("references") or []) if isinstance(data, dict) else [] hits = self._build_hits_from_query_data( query=normalized_query, chunks=chunks, entities=entities, limit=limit, ) - - if not hits: - return { - "result_type": "knowledge_search", - "query": normalized_query, - "record_count": 0, - "hits": [], - "references": [], - "raw_references": references, - "message": "当前知识库中没有检索到与本次问题直接匹配的内容。", - } - - return { - "result_type": "knowledge_search", - "query": normalized_query, - "record_count": len(hits), - "hits": hits, - "references": [str(item.get("code") or "").strip() for item in hits if str(item.get("code") or "").strip()], - "raw_references": references, - "metadata": raw.get("metadata") if isinstance(raw, dict) else {}, - "message": f"已从知识库中检索到 {len(hits)} 条相关内容。", - } - - def index_documents( - self, - *, - document_ids: list[str], - force: bool = False, - ) -> dict[str, Any]: - normalized_ids = [str(item).strip() for item in document_ids if str(item).strip()] - if not normalized_ids: - raise ValueError("没有可供索引的知识文档。") - + + if not hits: + return { + "result_type": "knowledge_search", + "query": normalized_query, + "record_count": 0, + "hits": [], + "references": [], + "raw_references": references, + "message": "当前知识库中没有检索到与本次问题直接匹配的内容。", + } + + return { + "result_type": "knowledge_search", + "query": normalized_query, + "record_count": len(hits), + "hits": hits, + "references": [str(item.get("code") or "").strip() for item in hits if str(item.get("code") or "").strip()], + "raw_references": references, + "metadata": raw.get("metadata") if isinstance(raw, dict) else {}, + "message": f"已从知识库中检索到 {len(hits)} 条相关内容。", + } + + def index_documents( + self, + *, + document_ids: list[str], + force: bool = False, + ) -> dict[str, Any]: + normalized_ids = [str(item).strip() for item in document_ids if str(item).strip()] + if not normalized_ids: + raise ValueError("没有可供索引的知识文档。") + from app.services.knowledge import KnowledgeService from app.services.knowledge_normalizer import KnowledgeNormalizationService @@ -603,34 +171,34 @@ class KnowledgeRagService: KnowledgeNormalizationService(self.db) if self.db is not None else None ) texts: list[str] = [] - file_paths: list[str] = [] - - runtime = self._get_runtime() - existing_statuses = runtime.get_document_statuses(normalized_ids) - - for document_id in normalized_ids: - entry = knowledge_service.get_document_entry(document_id) - if force and document_id in existing_statuses: - try: - runtime.delete_document(document_id) - except Exception as exc: - logger.warning("Delete existing LightRAG document failed doc_id=%s: %s", document_id, exc) + file_paths: list[str] = [] + + runtime = self._get_runtime() + existing_statuses = runtime.get_document_statuses(normalized_ids) + + for document_id in normalized_ids: + entry = knowledge_service.get_document_entry(document_id) + if force and document_id in existing_statuses: + try: + runtime.delete_document(document_id) + except Exception as exc: + logger.warning("Delete existing LightRAG document failed doc_id=%s: %s", document_id, exc) text = knowledge_service.extract_document_text(document_id) if normalization_service is not None: text = normalization_service.build_enriched_text(text) texts.append(text) - file_paths.append(str((knowledge_service.library_root / entry["folder"] / entry["stored_name"]).resolve())) - - track_id = runtime.insert_documents( - texts=texts, - document_ids=normalized_ids, - file_paths=file_paths, - ) - - statuses = runtime.get_document_statuses(normalized_ids) - succeeded_document_ids: list[str] = [] - failed_documents: list[dict[str, str]] = [] - + file_paths.append(str((knowledge_service.library_root / entry["folder"] / entry["stored_name"]).resolve())) + + track_id = runtime.insert_documents( + texts=texts, + document_ids=normalized_ids, + file_paths=file_paths, + ) + + statuses = runtime.get_document_statuses(normalized_ids) + succeeded_document_ids: list[str] = [] + failed_documents: list[dict[str, str]] = [] + for document_id in normalized_ids: status_obj = statuses.get(document_id) status_text = self._status_value(status_obj) @@ -640,63 +208,63 @@ class KnowledgeRagService: failed_documents.append( { "document_id": document_id, - "status": status_text or "unknown", - "error": self._status_error(status_obj), - } - ) - - return { - "track_id": track_id, - "requested_document_ids": normalized_ids, - "succeeded_document_ids": succeeded_document_ids, - "failed_documents": failed_documents, - "status_snapshot": { - document_id: self._serialize_status(status_obj) - for document_id, status_obj in statuses.items() - }, - } - - def get_document_status_map(self, document_ids: list[str] | None = None) -> dict[str, dict[str, Any]]: - target_ids = [str(item).strip() for item in document_ids or [] if str(item).strip()] - if not target_ids: - return {} - try: - statuses = self._get_runtime().get_document_statuses(target_ids) - except Exception as exc: - logger.warning("Load LightRAG document statuses failed: %s", exc) - return {} - return { - document_id: self._serialize_status(status_obj) - for document_id, status_obj in statuses.items() - } - - def delete_document(self, document_id: str) -> None: - normalized_id = str(document_id or "").strip() - if not normalized_id: - return - try: - self._get_runtime().delete_document(normalized_id) - except Exception as exc: - logger.warning("Delete LightRAG document ignored doc_id=%s: %s", normalized_id, exc) - - def _get_runtime(self) -> _LightRagRuntime: - global _runtime_instance, _runtime_signature - - signature, runtime_kwargs = self._build_runtime_signature() - with _runtime_lock: - if _runtime_instance is not None and _runtime_signature == signature: - return _runtime_instance - - if _runtime_instance is not None: - try: - _runtime_instance.finalize() - except Exception as exc: # pragma: no cover - best effort cleanup - logger.warning("Finalize previous LightRAG runtime failed: %s", exc) - - _runtime_instance = _LightRagRuntime(**runtime_kwargs) - _runtime_signature = signature - return _runtime_instance - + "status": status_text or "unknown", + "error": self._status_error(status_obj), + } + ) + + return { + "track_id": track_id, + "requested_document_ids": normalized_ids, + "succeeded_document_ids": succeeded_document_ids, + "failed_documents": failed_documents, + "status_snapshot": { + document_id: self._serialize_status(status_obj) + for document_id, status_obj in statuses.items() + }, + } + + def get_document_status_map(self, document_ids: list[str] | None = None) -> dict[str, dict[str, Any]]: + target_ids = [str(item).strip() for item in document_ids or [] if str(item).strip()] + if not target_ids: + return {} + try: + statuses = self._get_runtime().get_document_statuses(target_ids) + except Exception as exc: + logger.warning("Load LightRAG document statuses failed: %s", exc) + return {} + return { + document_id: self._serialize_status(status_obj) + for document_id, status_obj in statuses.items() + } + + def delete_document(self, document_id: str) -> None: + normalized_id = str(document_id or "").strip() + if not normalized_id: + return + try: + self._get_runtime().delete_document(normalized_id) + except Exception as exc: + logger.warning("Delete LightRAG document ignored doc_id=%s: %s", normalized_id, exc) + + def _get_runtime(self) -> _LightRagRuntime: + global _runtime_instance, _runtime_signature + + signature, runtime_kwargs = self._build_runtime_signature() + with _runtime_lock: + if _runtime_instance is not None and _runtime_signature == signature: + return _runtime_instance + + if _runtime_instance is not None: + try: + _runtime_instance.finalize() + except Exception as exc: # pragma: no cover - best effort cleanup + logger.warning("Finalize previous LightRAG runtime failed: %s", exc) + + _runtime_instance = _LightRagRuntime(**runtime_kwargs) + _runtime_signature = signature + return _runtime_instance + def _build_runtime_signature(self) -> tuple[tuple[Any, ...], dict[str, Any]]: configs = self._load_runtime_configs() settings = get_settings() @@ -704,20 +272,20 @@ class KnowledgeRagService: workspace = os.environ.get("LIGHTRAG_WORKSPACE", DEFAULT_LIGHTRAG_WORKSPACE).strip() or DEFAULT_LIGHTRAG_WORKSPACE qdrant_url = os.environ.get("QDRANT_URL", "").strip() or _resolve_default_qdrant_url() qdrant_api_key = os.environ.get("QDRANT_API_KEY", "").strip() - - signature = ( - str(working_dir), - workspace, - qdrant_url, - qdrant_api_key, - configs["main"].provider, - configs["main"].model, - configs["main"].endpoint, - configs["main"].api_key, - configs["backup"].provider if configs["backup"] else "", - configs["backup"].model if configs["backup"] else "", - configs["backup"].endpoint if configs["backup"] else "", - configs["backup"].api_key if configs["backup"] else "", + + signature = ( + str(working_dir), + workspace, + qdrant_url, + qdrant_api_key, + configs["main"].provider, + configs["main"].model, + configs["main"].endpoint, + configs["main"].api_key, + configs["backup"].provider if configs["backup"] else "", + configs["backup"].model if configs["backup"] else "", + configs["backup"].endpoint if configs["backup"] else "", + configs["backup"].api_key if configs["backup"] else "", configs["embedding"].provider, configs["embedding"].model, configs["embedding"].endpoint, @@ -728,27 +296,27 @@ class KnowledgeRagService: configs["reranker"].api_key if configs["reranker"] else "", str(settings.resolved_storage_root_dir), ) - - return signature, { - "working_dir": working_dir, - "workspace": workspace, - "qdrant_url": qdrant_url, - "qdrant_api_key": qdrant_api_key, + + return signature, { + "working_dir": working_dir, + "workspace": workspace, + "qdrant_url": qdrant_url, + "qdrant_api_key": qdrant_api_key, "primary_chat": configs["main"], "backup_chat": configs["backup"], "embedding": configs["embedding"], "reranker": configs["reranker"], } - - def _load_runtime_configs(self) -> dict[str, RuntimeModelConfig | None]: - owned_session = False - session = self.db - if session is None: - session = get_session_factory()() - owned_session = True - - try: - settings_service = SettingsService(session) + + def _load_runtime_configs(self) -> dict[str, RuntimeModelConfig | None]: + owned_session = False + session = self.db + if session is None: + session = get_session_factory()() + owned_session = True + + try: + settings_service = SettingsService(session) main = self._normalize_runtime_model(settings_service.get_runtime_model_config("main")) embedding = self._normalize_runtime_model(settings_service.get_runtime_model_config("embedding")) try: @@ -773,36 +341,36 @@ class KnowledgeRagService: or (reranker.provider != "Ollama" and not reranker.api_key) ): reranker = None - if not main.endpoint or not main.model: - raise KnowledgeRagError("主对话模型未配置,无法初始化 LightRAG。") - if main.provider != "Ollama" and not main.api_key: - raise KnowledgeRagError("主对话模型缺少 API Key,无法初始化 LightRAG。") - if not embedding.endpoint or not embedding.model: - raise KnowledgeRagError("Embedding 模型未配置,无法初始化 LightRAG。") - if embedding.provider != "Ollama" and not embedding.api_key: - raise KnowledgeRagError("Embedding 模型缺少 API Key,无法初始化 LightRAG。") - return { + if not main.endpoint or not main.model: + raise KnowledgeRagError("主对话模型未配置,无法初始化 LightRAG。") + if main.provider != "Ollama" and not main.api_key: + raise KnowledgeRagError("主对话模型缺少 API Key,无法初始化 LightRAG。") + if not embedding.endpoint or not embedding.model: + raise KnowledgeRagError("Embedding 模型未配置,无法初始化 LightRAG。") + if embedding.provider != "Ollama" and not embedding.api_key: + raise KnowledgeRagError("Embedding 模型缺少 API Key,无法初始化 LightRAG。") + return { "main": main, "backup": backup, "embedding": embedding, "reranker": reranker, } - finally: - if owned_session and session is not None: - session.close() - - @staticmethod - def _normalize_runtime_model(payload: dict[str, str]) -> RuntimeModelConfig: - return RuntimeModelConfig( - slot=str(payload.get("slot") or "").strip(), - provider=str(payload.get("provider") or "").strip(), - model=str(payload.get("model") or "").strip(), - endpoint=str(payload.get("endpoint") or "").strip(), - api_key=str(payload.get("apiKey") or "").strip(), - capability=str(payload.get("capability") or "").strip(), - ) - - @staticmethod + finally: + if owned_session and session is not None: + session.close() + + @staticmethod + def _normalize_runtime_model(payload: dict[str, str]) -> RuntimeModelConfig: + return RuntimeModelConfig( + slot=str(payload.get("slot") or "").strip(), + provider=str(payload.get("provider") or "").strip(), + model=str(payload.get("model") or "").strip(), + endpoint=str(payload.get("endpoint") or "").strip(), + api_key=str(payload.get("apiKey") or "").strip(), + capability=str(payload.get("capability") or "").strip(), + ) + + @staticmethod def _build_hits_from_query_data( *, query: str, @@ -815,10 +383,10 @@ class KnowledgeRagService: for entity in entities: if not isinstance(entity, dict): continue - file_path = str(entity.get("file_path") or "").strip() - entity_name = str(entity.get("entity_name") or "").strip() - if not file_path or not entity_name: - continue + file_path = str(entity.get("file_path") or "").strip() + entity_name = str(entity.get("entity_name") or "").strip() + if not file_path or not entity_name: + continue entity_tags_by_path.setdefault(file_path, []) if entity_name not in entity_tags_by_path[file_path]: entity_tags_by_path[file_path].append(entity_name) @@ -881,35 +449,35 @@ class KnowledgeRagService: normalized.pop("_rank", None) hits.append(normalized) return hits - - @staticmethod - def _serialize_status(status_obj: Any) -> dict[str, Any]: - if status_obj is None: - return {} - if hasattr(status_obj, "__dict__"): - payload = dict(status_obj.__dict__) - elif isinstance(status_obj, dict): - payload = dict(status_obj) + + @staticmethod + def _serialize_status(status_obj: Any) -> dict[str, Any]: + if status_obj is None: + return {} + if hasattr(status_obj, "__dict__"): + payload = dict(status_obj.__dict__) + elif isinstance(status_obj, dict): + payload = dict(status_obj) else: payload = {} payload["status"] = KnowledgeRagService._status_value(status_obj) payload["error_msg"] = KnowledgeRagService._status_error(status_obj) payload["query_ready"] = KnowledgeRagService.is_query_ready_status(status_obj) return payload - - @staticmethod - def _status_value(status_obj: Any) -> str: - raw_status = getattr(status_obj, "status", None) - if raw_status is None and isinstance(status_obj, dict): - raw_status = status_obj.get("status") - normalized = str(raw_status or "").strip().lower() - if "." in normalized: - normalized = normalized.split(".")[-1].strip() - if ":" in normalized and normalized.endswith(">"): - normalized = normalized.split(":")[0].strip("<> '\"") - return normalized - - @staticmethod + + @staticmethod + def _status_value(status_obj: Any) -> str: + raw_status = getattr(status_obj, "status", None) + if raw_status is None and isinstance(status_obj, dict): + raw_status = status_obj.get("status") + normalized = str(raw_status or "").strip().lower() + if "." in normalized: + normalized = normalized.split(".")[-1].strip() + if ":" in normalized and normalized.endswith(">"): + normalized = normalized.split(":")[0].strip("<> '\"") + return normalized + + @staticmethod def _status_error(status_obj: Any) -> str: value = getattr(status_obj, "error_msg", None) if value is None and isinstance(status_obj, dict): @@ -939,244 +507,31 @@ class KnowledgeRagService: if chunks_list is None and isinstance(status_obj, dict): chunks_list = status_obj.get("chunks_list") return bool(chunks_list) - - -def shutdown_knowledge_rag_runtime() -> None: - global _runtime_instance, _runtime_signature - - with _runtime_lock: - if _runtime_instance is None: - return - try: - _runtime_instance.finalize() - except Exception as exc: # pragma: no cover - best effort cleanup - logger.warning("Finalize LightRAG runtime failed during shutdown: %s", exc) - _runtime_instance = None - _runtime_signature = None - - -def _normalize_endpoint(endpoint: str) -> str: - normalized = str(endpoint or "").strip() - if not normalized: - raise KnowledgeRagError("模型 endpoint 不能为空。") - return normalized.rstrip("/") - - -def _ensure_path(endpoint: str, suffix: str) -> str: - suffix = suffix.lstrip("/") - if endpoint.endswith(suffix): - return endpoint - return f"{endpoint}/{suffix}" - - -def _build_azure_deployment_base(endpoint: str, model: str) -> str: - normalized_endpoint = _normalize_endpoint(endpoint) - quoted_model = quote(model, safe="") - if "/openai/deployments/" in normalized_endpoint: - return normalized_endpoint - if "/openai/v1" in normalized_endpoint: - resource_root = normalized_endpoint.split("/openai/v1", maxsplit=1)[0] - return f"{resource_root}/openai/deployments/{quoted_model}" - if normalized_endpoint.endswith("/openai"): - return f"{normalized_endpoint}/deployments/{quoted_model}" - return f"{normalized_endpoint}/openai/deployments/{quoted_model}" - - -def _build_headers( - api_key: str, - *, - use_bearer: bool, - use_api_key: bool = False, -) -> dict[str, str]: - headers = { - "Content-Type": "application/json", - "Accept": "application/json", - } - normalized_key = str(api_key or "").strip() - if normalized_key: - if use_api_key: - headers["api-key"] = normalized_key - elif use_bearer: - headers["Authorization"] = f"Bearer {normalized_key}" - return headers - - -def _send_json_request( - method: str, - url: str, - *, - headers: dict[str, str], - payload: dict[str, Any], - timeout_seconds: int, -) -> tuple[int, Any]: - data = json.dumps(payload).encode("utf-8") - request = Request(url=url, data=data, headers=headers, method=method) - - try: - with urlopen(request, timeout=timeout_seconds) as response: # noqa: S310 - body = response.read().decode("utf-8") if response.length != 0 else "" - return response.status, _parse_json_body(body) - except HTTPError as exc: # pragma: no cover - runtime path - body = exc.read().decode("utf-8", errors="ignore") - detail = _extract_error_message(_parse_json_body(body)) or f"接口返回 {exc.code}" - raise KnowledgeRagError(detail) from exc - except URLError as exc: # pragma: no cover - runtime path - raise KnowledgeRagError(f"无法连接模型接口:{getattr(exc, 'reason', exc)}") from exc - except TimeoutError as exc: # pragma: no cover - runtime path - raise KnowledgeRagError("模型接口调用超时。") from exc - - -def _parse_json_body(body: str) -> Any: - if not body: - return None - try: - return json.loads(body) - except json.JSONDecodeError: - return {"message": body} - - -def _extract_error_message(payload: Any) -> str | None: - if payload is None: - return None - if isinstance(payload, dict): - if isinstance(payload.get("detail"), str): - return payload["detail"] - if isinstance(payload.get("message"), str): - return payload["message"] - error_payload = payload.get("error") - if isinstance(error_payload, dict) and isinstance(error_payload.get("message"), str): - return error_payload["message"] - if isinstance(payload, str): - return payload - return None - - -def _extract_chat_text(payload: Any, *, provider: str) -> str: - if provider == "Ollama": - message = payload.get("message") if isinstance(payload, dict) else None - if isinstance(message, dict): - return str(message.get("content") or "").strip() - return "" - - if not isinstance(payload, dict): - return "" - choices = payload.get("choices") - if not isinstance(choices, list) or not choices: - return "" - first_choice = choices[0] - if not isinstance(first_choice, dict): - return "" - message = first_choice.get("message") - if isinstance(message, dict): - content = message.get("content") - if isinstance(content, str): - return content.strip() - if isinstance(content, list): - parts: list[str] = [] - for item in content: - if isinstance(item, dict) and item.get("type") == "text": - parts.append(str(item.get("text") or "").strip()) - return "\n".join(part for part in parts if part).strip() - text = first_choice.get("text") - if isinstance(text, str): - return text.strip() - return "" - - -def _extract_embedding_vectors(payload: Any, *, provider: str) -> list[list[float]]: - if provider == "Ollama": - embeddings = payload.get("embeddings") if isinstance(payload, dict) else None - if isinstance(embeddings, list): - return [[float(value) for value in item] for item in embeddings if isinstance(item, list)] - embedding = payload.get("embedding") if isinstance(payload, dict) else None - if isinstance(embedding, list): - return [[float(value) for value in embedding]] - raise KnowledgeRagError("Ollama embedding 返回格式无法识别。") - - if not isinstance(payload, dict): - raise KnowledgeRagError("embedding 接口返回格式无效。") - data = payload.get("data") - if not isinstance(data, list) or not data: - raise KnowledgeRagError("embedding 接口没有返回 data。") - vectors: list[list[float]] = [] - for item in data: - if not isinstance(item, dict): - continue - embedding = item.get("embedding") - if isinstance(embedding, list): - vectors.append([float(value) for value in embedding]) - if not vectors: - raise KnowledgeRagError("embedding 接口返回中未找到向量数据。") - return vectors -def _build_ali_rerank_request( - model: str, - *, - query: str, - documents: list[str], - top_n: int | None, -) -> tuple[str, dict[str, Any]]: - normalized_model = str(model or "").strip() - if normalized_model == "qwen3-rerank": - payload: dict[str, Any] = { - "model": normalized_model, - "query": query, - "documents": documents, - } - if top_n is not None: - payload["top_n"] = top_n - return "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", payload +def shutdown_knowledge_rag_runtime() -> None: + global _runtime_instance, _runtime_signature - payload = { - "model": normalized_model, - "input": { - "query": query, - "documents": documents, - }, - "parameters": { - "return_documents": False, - }, - } - if top_n is not None: - payload["parameters"]["top_n"] = top_n - return "https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank", payload - - -def _extract_rerank_results(payload: Any, *, provider: str) -> list[dict[str, Any]]: - if not isinstance(payload, dict): - return [] - if provider == "Ali" and isinstance(payload.get("output"), dict): - results = payload["output"].get("results") - else: - results = payload.get("results") - if not isinstance(results, list): - return [] - normalized: list[dict[str, Any]] = [] - for item in results: - if not isinstance(item, dict): - continue + with _runtime_lock: + if _runtime_instance is None: + return try: - normalized.append( - { - "index": int(item["index"]), - "relevance_score": float(item["relevance_score"]), - } - ) - except (KeyError, TypeError, ValueError): - continue - return normalized - - -def _parse_document_identity(file_path: str) -> tuple[str, str]: - path = Path(str(file_path or "").strip()) - name = path.name - if "__" not in name: - return "", name - document_id, document_name = name.split("__", maxsplit=1) - return document_id.strip(), document_name.strip() - - + _runtime_instance.finalize() + except Exception as exc: # pragma: no cover - best effort cleanup + logger.warning("Finalize LightRAG runtime failed during shutdown: %s", exc) + _runtime_instance = None + _runtime_signature = None + + +def _parse_document_identity(file_path: str) -> tuple[str, str]: + path = Path(str(file_path or "").strip()) + name = path.name + if "__" not in name: + return "", name + document_id, document_name = name.split("__", maxsplit=1) + return document_id.strip(), document_name.strip() + + def _build_excerpt(text: str, *, max_length: int = 180) -> str: normalized = " ".join(str(text or "").split()).strip() if len(normalized) <= max_length: diff --git a/server/src/app/services/knowledge_rag_runtime.py b/server/src/app/services/knowledge_rag_runtime.py new file mode 100644 index 0000000..dea2222 --- /dev/null +++ b/server/src/app/services/knowledge_rag_runtime.py @@ -0,0 +1,672 @@ +from __future__ import annotations + +import asyncio +import json +import os +from dataclasses import dataclass +from http import HTTPStatus +from pathlib import Path +from time import perf_counter +from typing import Any +from urllib.error import HTTPError, URLError +from urllib.parse import quote +from urllib.request import Request, urlopen + +from app.core.logging import get_logger +from app.services.model_connectivity import AZURE_API_VERSION + +logger = get_logger("app.services.knowledge_rag") + +DEFAULT_LIGHTRAG_QUERY_MODE = "naive" +DEFAULT_LLM_TIMEOUT_SECONDS = 180 +DEFAULT_EMBEDDING_TIMEOUT_SECONDS = 120 + +class KnowledgeRagError(RuntimeError): + pass + + +@dataclass(frozen=True, slots=True) +class RuntimeModelConfig: + slot: str + provider: str + model: str + endpoint: str + api_key: str + capability: str + + +class _LightRagRuntime: + def __init__( + self, + *, + working_dir: Path, + workspace: str, + qdrant_url: str, + qdrant_api_key: str, + primary_chat: RuntimeModelConfig, + backup_chat: RuntimeModelConfig | None, + embedding: RuntimeModelConfig, + reranker: RuntimeModelConfig | None, + ) -> None: + self.working_dir = working_dir + self.workspace = workspace + self.qdrant_url = qdrant_url + self.qdrant_api_key = qdrant_api_key + self.primary_chat = primary_chat + self.backup_chat = backup_chat + self.embedding = embedding + self.reranker = reranker + self._rag = self._build_rag() + self._initialize() + self._graph_has_content_cache: bool | None = None + + @property + def rag(self): + return self._rag + + def _build_rag(self): + try: + from lightrag import LightRAG + from lightrag.utils import EmbeddingFunc + except ImportError as exc: # pragma: no cover - exercised in runtime env + raise KnowledgeRagError( + "LightRAG 依赖未安装,请先在 server 环境执行依赖安装。" + ) from exc + + self.working_dir.mkdir(parents=True, exist_ok=True) + + if self.qdrant_url: + os.environ["QDRANT_URL"] = self.qdrant_url + if self.qdrant_api_key: + os.environ["QDRANT_API_KEY"] = self.qdrant_api_key + + embedding_dim = self._probe_embedding_dimension(self.embedding) + logger.info( + "Initialize LightRAG runtime workspace=%s qdrant=%s embedding_model=%s dim=%s", + self.workspace, + self.qdrant_url, + self.embedding.model, + embedding_dim, + ) + + async def embedding_func(texts: list[str]) -> Any: + return await asyncio.to_thread(self._embed_sync, texts) + + async def llm_model_func( + prompt: str, + system_prompt: str | None = None, + history_messages: list[dict[str, Any]] | None = None, + keyword_extraction: bool = False, + **kwargs: Any, + ) -> str: + return await asyncio.to_thread( + self._complete_sync, + prompt, + system_prompt, + history_messages or [], + keyword_extraction, + kwargs, + ) + + async def rerank_model_func( + query: str, + documents: list[str], + top_n: int | None = None, + **_kwargs: Any, + ) -> list[dict[str, Any]]: + return await asyncio.to_thread( + self._rerank_sync, + query, + documents, + top_n, + ) + + return LightRAG( + working_dir=str(self.working_dir), + workspace=self.workspace, + kv_storage="JsonKVStorage", + graph_storage="NetworkXStorage", + vector_storage="QdrantVectorDBStorage", + doc_status_storage="JsonDocStatusStorage", + llm_model_name=self.primary_chat.model, + llm_model_func=llm_model_func, + embedding_func=EmbeddingFunc( + embedding_dim=embedding_dim, + func=embedding_func, + max_token_size=8192, + model_name=self.embedding.model, + supports_asymmetric=False, + ), + rerank_model_func=rerank_model_func if self.reranker is not None else None, + enable_llm_cache=False, + enable_llm_cache_for_entity_extract=False, + ) + + def _initialize(self) -> None: + from lightrag.utils import always_get_an_event_loop + + loop = always_get_an_event_loop() + loop.run_until_complete(self._rag.initialize_storages()) + + def finalize(self) -> None: + from lightrag.utils import always_get_an_event_loop + + loop = always_get_an_event_loop() + loop.run_until_complete(self._rag.finalize_storages()) + + def query_data(self, query: str, *, conversation_history: list[dict[str, str]] | None = None) -> dict[str, Any]: + from lightrag import QueryParam + + configured_mode = os.environ.get("LIGHTRAG_QUERY_MODE", DEFAULT_LIGHTRAG_QUERY_MODE).strip() or DEFAULT_LIGHTRAG_QUERY_MODE + mode = "naive" if configured_mode != "naive" and not self._graph_has_content() else configured_mode + started_at = perf_counter() + param = QueryParam( + mode=mode, + top_k=8, + chunk_top_k=10, + only_need_context=True, + response_type="Multiple Paragraphs", + conversation_history=conversation_history or [], + include_references=True, + ) + try: + result = self._rag.query_data(query, param) + logger.info("LightRAG query completed mode=%s elapsed=%.2fs", mode, perf_counter() - started_at) + return result + except Exception: + if mode == "naive": + raise + logger.warning("LightRAG query mode=%s failed, retry with naive mode", mode) + fallback_param = QueryParam( + mode="naive", + top_k=8, + chunk_top_k=10, + only_need_context=True, + response_type="Multiple Paragraphs", + conversation_history=conversation_history or [], + include_references=True, + ) + result = self._rag.query_data(query, fallback_param) + logger.info("LightRAG query completed mode=naive elapsed=%.2fs", perf_counter() - started_at) + return result + + def _graph_has_content(self) -> bool: + if self._graph_has_content_cache is not None: + return self._graph_has_content_cache + + graph_path = self.working_dir / self.workspace / "graph_chunk_entity_relation.graphml" + try: + graph_text = graph_path.read_text(encoding="utf-8") + except OSError: + self._graph_has_content_cache = False + return False + + self._graph_has_content_cache = " str: + return self._rag.insert(texts, ids=document_ids, file_paths=file_paths) + + def get_document_statuses(self, document_ids: list[str]) -> dict[str, Any]: + from lightrag.utils import always_get_an_event_loop + + loop = always_get_an_event_loop() + return loop.run_until_complete(self._rag.aget_docs_by_ids(document_ids)) + + def delete_document(self, document_id: str) -> None: + from lightrag.utils import always_get_an_event_loop + + loop = always_get_an_event_loop() + result = loop.run_until_complete(self._rag.adelete_by_doc_id(document_id)) + status = str(getattr(result, "status", "") or "") + if status not in {"success", "not_found"}: + raise KnowledgeRagError(str(getattr(result, "message", "") or "LightRAG 删除文档失败。")) + + def _probe_embedding_dimension(self, config: RuntimeModelConfig) -> int: + vectors = self._request_embeddings(config, ["dimension probe"]) + if not vectors or not isinstance(vectors[0], list): + raise KnowledgeRagError("无法从 embedding 模型返回结果中解析向量维度。") + dimension = len(vectors[0]) + if dimension <= 0: + raise KnowledgeRagError("embedding 模型返回了无效的向量维度。") + return dimension + + def _embed_sync(self, texts: list[str]) -> Any: + import numpy as np + + vectors = self._request_embeddings(self.embedding, texts) + return np.array(vectors, dtype=float) + + def _rerank_sync( + self, + query: str, + documents: list[str], + top_n: int | None, + ) -> list[dict[str, Any]]: + if self.reranker is None: + return [] + + status_code, body = self._request_rerank( + self.reranker, + query=query, + documents=documents, + top_n=top_n, + ) + if status_code >= HTTPStatus.BAD_REQUEST: + raise KnowledgeRagError(f"reranker 模型返回异常状态码 {status_code}。") + return _extract_rerank_results(body, provider=self.reranker.provider) + + def _complete_sync( + self, + prompt: str, + system_prompt: str | None, + history_messages: list[dict[str, Any]], + keyword_extraction: bool, + kwargs: dict[str, Any], + ) -> str: + del keyword_extraction + + last_error: Exception | None = None + for config in [self.primary_chat, self.backup_chat]: + if config is None: + continue + try: + return self._request_chat_completion( + config, + prompt=prompt, + system_prompt=system_prompt, + history_messages=history_messages, + max_tokens=int(kwargs.get("max_tokens") or 1200), + temperature=float(kwargs.get("temperature") or 0.1), + ) + except Exception as exc: # pragma: no cover - runtime fallback + last_error = exc + logger.warning( + "LightRAG LLM request failed slot=%s provider=%s model=%s: %s", + config.slot, + config.provider, + config.model, + exc, + ) + continue + + raise KnowledgeRagError(f"LightRAG 调用知识模型失败:{last_error or '没有可用模型配置'}") + + def _request_chat_completion( + self, + config: RuntimeModelConfig, + *, + prompt: str, + system_prompt: str | None, + history_messages: list[dict[str, Any]], + max_tokens: int, + temperature: float, + ) -> str: + messages: list[dict[str, Any]] = [] + if system_prompt: + messages.append({"role": "system", "content": system_prompt}) + messages.extend(history_messages) + messages.append({"role": "user", "content": prompt}) + + if config.provider == "Azure OpenAI": + url = f"{_build_azure_deployment_base(config.endpoint, config.model)}/chat/completions?api-version={AZURE_API_VERSION}" + payload = { + "messages": messages, + "max_tokens": max_tokens, + "temperature": temperature, + } + status_code, body = _send_json_request( + "POST", + url, + headers=_build_headers(config.api_key, use_bearer=False, use_api_key=True), + payload=payload, + timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, + ) + elif config.provider == "Ollama": + url = _ensure_path(_normalize_endpoint(config.endpoint), "api/chat") + payload = { + "model": config.model, + "messages": messages, + "stream": False, + "options": { + "num_predict": max_tokens, + "temperature": temperature, + }, + } + status_code, body = _send_json_request( + "POST", + url, + headers={"Content-Type": "application/json", "Accept": "application/json"}, + payload=payload, + timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, + ) + else: + url = _ensure_path(_normalize_endpoint(config.endpoint), "chat/completions") + payload = { + "model": config.model, + "messages": messages, + "max_tokens": max_tokens, + "temperature": temperature, + } + status_code, body = _send_json_request( + "POST", + url, + headers=_build_headers(config.api_key, use_bearer=True), + payload=payload, + timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, + ) + + if status_code >= HTTPStatus.BAD_REQUEST: + raise KnowledgeRagError(f"知识模型返回异常状态码 {status_code}。") + + return _extract_chat_text(body, provider=config.provider) + + def _request_embeddings(self, config: RuntimeModelConfig, texts: list[str]) -> list[list[float]]: + if config.provider == "Azure OpenAI": + url = f"{_build_azure_deployment_base(config.endpoint, config.model)}/embeddings?api-version={AZURE_API_VERSION}" + payload = {"input": texts} + status_code, body = _send_json_request( + "POST", + url, + headers=_build_headers(config.api_key, use_bearer=False, use_api_key=True), + payload=payload, + timeout_seconds=DEFAULT_EMBEDDING_TIMEOUT_SECONDS, + ) + elif config.provider == "Ollama": + url = _ensure_path(_normalize_endpoint(config.endpoint), "api/embed") + payload = {"model": config.model, "input": texts} + status_code, body = _send_json_request( + "POST", + url, + headers={"Content-Type": "application/json", "Accept": "application/json"}, + payload=payload, + timeout_seconds=DEFAULT_EMBEDDING_TIMEOUT_SECONDS, + ) + else: + url = _ensure_path(_normalize_endpoint(config.endpoint), "embeddings") + payload = {"model": config.model, "input": texts} + status_code, body = _send_json_request( + "POST", + url, + headers=_build_headers(config.api_key, use_bearer=True), + payload=payload, + timeout_seconds=DEFAULT_EMBEDDING_TIMEOUT_SECONDS, + ) + + if status_code >= HTTPStatus.BAD_REQUEST: + raise KnowledgeRagError(f"embedding 模型返回异常状态码 {status_code}。") + + return _extract_embedding_vectors(body, provider=config.provider) + + def _request_rerank( + self, + config: RuntimeModelConfig, + *, + query: str, + documents: list[str], + top_n: int | None, + ) -> tuple[int, Any]: + if config.provider == "Azure OpenAI": + url = f"{_build_azure_deployment_base(config.endpoint, config.model)}/rerank?api-version={AZURE_API_VERSION}" + payload: dict[str, Any] = { + "query": query, + "documents": documents, + } + if top_n is not None: + payload["top_n"] = top_n + return _send_json_request( + "POST", + url, + headers=_build_headers(config.api_key, use_bearer=False, use_api_key=True), + payload=payload, + timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, + ) + + if config.provider == "Ali": + url, payload = _build_ali_rerank_request( + config.model, + query=query, + documents=documents, + top_n=top_n, + ) + return _send_json_request( + "POST", + url, + headers=_build_headers(config.api_key, use_bearer=True), + payload=payload, + timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, + ) + + url = _ensure_path(_normalize_endpoint(config.endpoint), "rerank") + payload = { + "model": config.model, + "query": query, + "documents": documents, + } + if top_n is not None: + payload["top_n"] = top_n + return _send_json_request( + "POST", + url, + headers=_build_headers(config.api_key, use_bearer=True), + payload=payload, + timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS, + ) + +def _normalize_endpoint(endpoint: str) -> str: + normalized = str(endpoint or "").strip() + if not normalized: + raise KnowledgeRagError("模型 endpoint 不能为空。") + return normalized.rstrip("/") + + +def _ensure_path(endpoint: str, suffix: str) -> str: + suffix = suffix.lstrip("/") + if endpoint.endswith(suffix): + return endpoint + return f"{endpoint}/{suffix}" + + +def _build_azure_deployment_base(endpoint: str, model: str) -> str: + normalized_endpoint = _normalize_endpoint(endpoint) + quoted_model = quote(model, safe="") + if "/openai/deployments/" in normalized_endpoint: + return normalized_endpoint + if "/openai/v1" in normalized_endpoint: + resource_root = normalized_endpoint.split("/openai/v1", maxsplit=1)[0] + return f"{resource_root}/openai/deployments/{quoted_model}" + if normalized_endpoint.endswith("/openai"): + return f"{normalized_endpoint}/deployments/{quoted_model}" + return f"{normalized_endpoint}/openai/deployments/{quoted_model}" + + +def _build_headers( + api_key: str, + *, + use_bearer: bool, + use_api_key: bool = False, +) -> dict[str, str]: + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + } + normalized_key = str(api_key or "").strip() + if normalized_key: + if use_api_key: + headers["api-key"] = normalized_key + elif use_bearer: + headers["Authorization"] = f"Bearer {normalized_key}" + return headers + + +def _send_json_request( + method: str, + url: str, + *, + headers: dict[str, str], + payload: dict[str, Any], + timeout_seconds: int, +) -> tuple[int, Any]: + data = json.dumps(payload).encode("utf-8") + request = Request(url=url, data=data, headers=headers, method=method) + + try: + with urlopen(request, timeout=timeout_seconds) as response: # noqa: S310 + body = response.read().decode("utf-8") if response.length != 0 else "" + return response.status, _parse_json_body(body) + except HTTPError as exc: # pragma: no cover - runtime path + body = exc.read().decode("utf-8", errors="ignore") + detail = _extract_error_message(_parse_json_body(body)) or f"接口返回 {exc.code}" + raise KnowledgeRagError(detail) from exc + except URLError as exc: # pragma: no cover - runtime path + raise KnowledgeRagError(f"无法连接模型接口:{getattr(exc, 'reason', exc)}") from exc + except TimeoutError as exc: # pragma: no cover - runtime path + raise KnowledgeRagError("模型接口调用超时。") from exc + + +def _parse_json_body(body: str) -> Any: + if not body: + return None + try: + return json.loads(body) + except json.JSONDecodeError: + return {"message": body} + + +def _extract_error_message(payload: Any) -> str | None: + if payload is None: + return None + if isinstance(payload, dict): + if isinstance(payload.get("detail"), str): + return payload["detail"] + if isinstance(payload.get("message"), str): + return payload["message"] + error_payload = payload.get("error") + if isinstance(error_payload, dict) and isinstance(error_payload.get("message"), str): + return error_payload["message"] + if isinstance(payload, str): + return payload + return None + + +def _extract_chat_text(payload: Any, *, provider: str) -> str: + if provider == "Ollama": + message = payload.get("message") if isinstance(payload, dict) else None + if isinstance(message, dict): + return str(message.get("content") or "").strip() + return "" + + if not isinstance(payload, dict): + return "" + choices = payload.get("choices") + if not isinstance(choices, list) or not choices: + return "" + first_choice = choices[0] + if not isinstance(first_choice, dict): + return "" + message = first_choice.get("message") + if isinstance(message, dict): + content = message.get("content") + if isinstance(content, str): + return content.strip() + if isinstance(content, list): + parts: list[str] = [] + for item in content: + if isinstance(item, dict) and item.get("type") == "text": + parts.append(str(item.get("text") or "").strip()) + return "\n".join(part for part in parts if part).strip() + text = first_choice.get("text") + if isinstance(text, str): + return text.strip() + return "" + + +def _extract_embedding_vectors(payload: Any, *, provider: str) -> list[list[float]]: + if provider == "Ollama": + embeddings = payload.get("embeddings") if isinstance(payload, dict) else None + if isinstance(embeddings, list): + return [[float(value) for value in item] for item in embeddings if isinstance(item, list)] + embedding = payload.get("embedding") if isinstance(payload, dict) else None + if isinstance(embedding, list): + return [[float(value) for value in embedding]] + raise KnowledgeRagError("Ollama embedding 返回格式无法识别。") + + if not isinstance(payload, dict): + raise KnowledgeRagError("embedding 接口返回格式无效。") + data = payload.get("data") + if not isinstance(data, list) or not data: + raise KnowledgeRagError("embedding 接口没有返回 data。") + vectors: list[list[float]] = [] + for item in data: + if not isinstance(item, dict): + continue + embedding = item.get("embedding") + if isinstance(embedding, list): + vectors.append([float(value) for value in embedding]) + if not vectors: + raise KnowledgeRagError("embedding 接口返回中未找到向量数据。") + return vectors + + +def _build_ali_rerank_request( + model: str, + *, + query: str, + documents: list[str], + top_n: int | None, +) -> tuple[str, dict[str, Any]]: + normalized_model = str(model or "").strip() + if normalized_model == "qwen3-rerank": + payload: dict[str, Any] = { + "model": normalized_model, + "query": query, + "documents": documents, + } + if top_n is not None: + payload["top_n"] = top_n + return "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", payload + + payload = { + "model": normalized_model, + "input": { + "query": query, + "documents": documents, + }, + "parameters": { + "return_documents": False, + }, + } + if top_n is not None: + payload["parameters"]["top_n"] = top_n + return "https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank", payload + + +def _extract_rerank_results(payload: Any, *, provider: str) -> list[dict[str, Any]]: + if not isinstance(payload, dict): + return [] + if provider == "Ali" and isinstance(payload.get("output"), dict): + results = payload["output"].get("results") + else: + results = payload.get("results") + if not isinstance(results, list): + return [] + normalized: list[dict[str, Any]] = [] + for item in results: + if not isinstance(item, dict): + continue + try: + normalized.append( + { + "index": int(item["index"]), + "relevance_score": float(item["relevance_score"]), + } + ) + except (KeyError, TypeError, ValueError): + continue + return normalized diff --git a/server/src/app/services/ontology.py b/server/src/app/services/ontology.py index 03b44b7..65522ac 100644 --- a/server/src/app/services/ontology.py +++ b/server/src/app/services/ontology.py @@ -1,697 +1,407 @@ -from __future__ import annotations - -import calendar -import json -import re -from dataclasses import dataclass -from datetime import UTC, date, datetime, timedelta -from typing import Any - -from pydantic import BaseModel, ConfigDict, Field, ValidationError -from sqlalchemy import select -from sqlalchemy.orm import Session - -from app.core.agent_enums import ( - AgentName, - AgentPermissionLevel, - AgentRunSource, - AgentRunStatus, -) -from app.core.logging import get_logger -from app.models.employee import Employee -from app.models.financial_record import ( - AccountsPayableRecord, - AccountsReceivableRecord, - ExpenseClaim, -) -from app.models.organization import OrganizationUnit -from app.schemas.ontology import ( - OntologyConstraint, - OntologyEntity, - OntologyFieldError, - OntologyIntent, - OntologyMetric, - OntologyParseRequest, - OntologyParseResult, - OntologyPermission, - OntologyScenario, - OntologyTimeRange, -) -from app.services.agent_foundation import AgentFoundationService -from app.services.agent_runs import AgentRunService -from app.services.runtime_chat import RuntimeChatService - -logger = get_logger("app.services.ontology") - -DATE_RANGE_PATTERN = re.compile( - r"(?P\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P\d{4}-\d{1,2}-\d{1,2})" -) -EXPLICIT_MONTH_PATTERN = re.compile(r"(?P\d{4})年(?P\d{1,2})月") -EXPLICIT_DATE_PATTERN = re.compile( - r"(?P\d{4})[年/-](?P\d{1,2})[月/-](?P\d{1,2})日?" -) -MONTH_DAY_RANGE_PATTERN = re.compile( - r"(?P\d{1,2})月(?P\d{1,2})日?\s*(?:到|至|~|-)\s*" - r"(?P\d{1,2})月(?P\d{1,2})日?" -) -MONTH_DAY_PATTERN = re.compile(r"(?P\d{1,2})月(?P\d{1,2})日?") -AMOUNT_PATTERN = re.compile( - r"(?P超过|大于|高于|不少于|不低于|小于|低于|少于|至多|不超过|<=|>=|<|>|=|=)?\s*" - r"(?P\d+(?:\.\d+)?)\s*(?P万元|万|元)?" -) -TOP_N_PATTERN = re.compile(r"(?:top|TOP|前|最高的?|最低的?)\s*(?P\d+)") - -SCENARIO_KEYWORDS = { - "expense": ( - ("报销", 0.20), - ("报销单", 0.20), - ("单据报销", 0.18), - ("报账", 0.20), - ("差旅", 0.20), - ("费用", 0.14), - ("发票", 0.14), - ("票据", 0.12), - ("借款", 0.12), - ("住宿", 0.10), - ("餐费", 0.10), - ("招待", 0.18), - ("招待费", 0.18), - ("花销", 0.16), - ("花了", 0.14), - ("支出", 0.14), - ("垫付", 0.14), - ), - "accounts_receivable": ( - ("应收", 0.22), - ("回款", 0.20), - ("收款", 0.18), - ("账龄", 0.18), - ("客户欠款", 0.22), - ), - "accounts_payable": ( - ("应付", 0.22), - ("付款", 0.20), - ("请款", 0.18), - ("供应商", 0.20), - ("待付", 0.16), - ("打款", 0.18), - ), - "knowledge": ( - ("制度", 0.20), - ("规则", 0.20), - ("办法", 0.18), - ("依据", 0.18), - ("政策", 0.16), - ("知识库", 0.18), - ), -} - -QUERY_KEYWORDS = ( - "查", - "查询", - "查看", - "列出", - "统计", - "汇总", - "多少", - "几笔", - "金额", - "明细", -) -EXPLAIN_KEYWORDS = ("为什么", "依据", "原因", "怎么处理", "是否可以", "能不能", "按什么规则") -COMPARE_KEYWORDS = ("对比", "比较", "相比", "差异", "变化") -RISK_KEYWORDS = ("风险", "异常", "重复", "超标", "超预算", "逾期", "验真", "巡检") -DRAFT_KEYWORDS = ("生成", "草稿", "起草", "拟一份", "创建", "发起", "准备") -DRAFT_FOLLOW_UP_KEYWORDS = ( - "继续", - "下一步", - "核对", - "补充", - "补一下", - "修改", - "改成", - "改为", - "换成", - "更新", - "确认", - "提交", - "保存", - "客户是", - "地点是", - "金额是", - "日期是", - "时间是", -) -EXPENSE_REVIEW_ACTIONS = { - "save_draft", - "next_step", - "edit_review", - "link_to_existing_draft", - "create_new_claim_from_documents", -} -OPERATE_KEYWORDS = ( - "直接付款", - "帮我付款", - "安排付款", - "发起付款", - "直接审批", - "审批通过", - "帮我审批", - "驳回", - "上线", - "激活", - "停用", - "删除", -) - -EXPENSE_TYPE_KEYWORDS = { - "差旅": "travel", - "出差": "travel", - "住宿": "hotel", - "酒店": "hotel", - "交通": "transport", - "打车": "transport", - "网约车": "transport", - "出租车": "transport", - "乘车": "transport", - "乘车费": "transport", - "用车": "transport", - "叫车": "transport", - "车资": "transport", - "停车费": "transport", - "餐费": "meal", - "用餐": "meal", - "会务": "meeting", - "招待费": "entertainment", - "招待": "entertainment", - "宴请": "entertainment", - "办公费": "office", - "办公用品": "office", - "文具": "office", - "耗材": "office", - "办公耗材": "office", - "打印纸": "office", - "办公设备": "office", - "培训费": "training", - "培训": "training", - "通讯费": "communication", - "话费": "communication", - "福利费": "welfare", - "团建": "welfare", -} - -EXPENSE_NARRATIVE_KEYWORDS = ( - "报销", - "报账", - "招待", - "招待费", - "花销", - "花了", - "支出", - "垫付", - "打车", - "车费", - "乘车", - "乘车费", - "用车", - "叫车", - "车资", - "餐费", - "吃饭", - "用餐", - "宴请", - "请客", - "住宿", - "发票", - "票据", - "差旅", - "客户现场", -) - -AR_CORE_KEYWORDS = ("应收", "回款", "收款", "账龄", "欠款", "未回款") -AP_CORE_KEYWORDS = ("应付", "付款", "请款", "待付", "打款", "未付款") -GENERIC_EXPENSE_PROMPTS = { - "报销", - "我要报销", - "我想报销", - "帮我报销", - "我要申请报销", - "发起报销", - "提交报销", -} -MISSING_SLOT_LABELS = { - "expense_type": "费用类型", - "amount": "金额", - "customer_name": "客户单位", - "vendor_name": "供应商", - "participants": "参与人员", - "attachments": "票据附件", - "time_range": "发生时间", - "reason": "事由说明", - "document_id": "单据号", -} - -STATUS_KEYWORDS = { - "草稿": "draft", - "待提交": "draft", - "待补充": "supplement", - "退回": "returned", - "已退回": "returned", - "进行中": "review", - "审批中": "review", - "审核中": "review", - "流转中": "review", - "已提交": "submitted", - "逾期": "overdue", - "待审批": "pending", - "待审": "pending", - "已审批": "approved", - "已通过": "approved", - "已审核": "approved", - "已入账": "paid", - "已付款": "paid", - "未付款": "unpaid", - "未回款": "unreceived", -} +from __future__ import annotations -LOCATION_KEYWORDS = ( - "北京", - "上海", - "广州", - "深圳", - "杭州", - "南京", - "苏州", - "成都", - "重庆", - "天津", - "武汉", - "西安", - "郑州", - "长沙", - "青岛", - "厦门", - "宁波", - "合肥", - "济南", - "福州", +import re +from typing import Any + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from app.core.agent_enums import ( + AgentName, + AgentPermissionLevel, + AgentRunSource, + AgentRunStatus, ) - -PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"} -CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"} -KNOWLEDGE_INTENTS = {"query", "explain", "compare"} - - -@dataclass(slots=True) -class ReferenceCatalog: - employees: list[str] - departments: list[str] - customers: list[str] - vendors: list[str] - projects: list[str] - - -class LlmOntologyEntityHint(BaseModel): - model_config = ConfigDict(extra="ignore") - - type: str - value: str - normalized_value: str | None = None - role: str = "target" - confidence: float = Field(default=0.72, ge=0.0, le=1.0) - - -class LlmOntologyParseResult(BaseModel): - model_config = ConfigDict(extra="ignore") - - scenario: OntologyScenario = Field(default="unknown") - intent: OntologyIntent = Field(default="query") - confidence: float = Field(default=0.0, ge=0.0, le=1.0) - clarification_required: bool = False - clarification_question: str | None = None - missing_slots: list[str] = Field(default_factory=list) - ambiguity: list[str] = Field(default_factory=list) - entity_hints: list[LlmOntologyEntityHint] = Field(default_factory=list) - - -class SemanticOntologyService: - def __init__(self, db: Session) -> None: - self.db = db - self.run_service = AgentRunService(db) - self.runtime_chat_service = RuntimeChatService(db) - - def parse(self, payload: OntologyParseRequest) -> OntologyParseResult: - analyzed = self._analyze(payload) - run = self.run_service.create_run( - agent=AgentName.ORCHESTRATOR.value, - source=AgentRunSource.USER_MESSAGE.value, - user_id=payload.user_id, - ontology_json=self._build_ontology_json(analyzed), - route_json={ - "stage": "semantic_parse", - "clarification_required": analyzed["clarification_required"], - "field_error_count": len(analyzed["field_errors"]), - }, - permission_level=analyzed["permission"].level, - status=( - AgentRunStatus.BLOCKED.value - if analyzed["clarification_required"] - or analyzed["permission"].level == AgentPermissionLevel.FORBIDDEN.value - else AgentRunStatus.SUCCEEDED.value - ), - result_summary=self._build_result_summary( - analyzed["scenario"], - analyzed["intent"], - analyzed["permission"].level, - analyzed["confidence"], - ), - error_message=( - analyzed["permission"].reason - if analyzed["permission"].level == AgentPermissionLevel.FORBIDDEN.value - else None - ), - ) - self._record_semantic_parse( - run_id=run.run_id, - payload=payload, - analyzed=analyzed, - ) - return self._build_result(analyzed, run.run_id) - - def parse_for_run(self, payload: OntologyParseRequest, *, run_id: str) -> OntologyParseResult: - analyzed = self._analyze(payload) - self._record_semantic_parse(run_id=run_id, payload=payload, analyzed=analyzed) - return self._build_result(analyzed, run_id) - - def _analyze(self, payload: OntologyParseRequest) -> dict[str, object]: - query = payload.query.strip() - if not query: - raise ValueError("query 不能为空。") - - AgentFoundationService(self.db).ensure_foundation_ready() - context_json = payload.context_json or {} - reference = self._load_reference_catalog() - compact_query = self._compact(query) - entities = self._extract_entities(query, compact_query, reference) - rule_scenario, scenario_score = self._detect_scenario(compact_query) - time_range, _time_score = self._extract_time_range( - query, - compact_query, - context_json=context_json, - ) - session_scenario = self._resolve_session_type_scenario(context_json) - context_scenario = self._resolve_context_scenario(context_json) - if session_scenario == "knowledge": - rule_scenario = "knowledge" - scenario_score = max(scenario_score, 0.34) - if rule_scenario == "unknown" and context_scenario is not None: - rule_scenario = context_scenario - scenario_score = max(scenario_score, 0.14) - if rule_scenario == "unknown": - inferred_scenario = self._infer_scenario_from_entities(entities) - if inferred_scenario is not None: - rule_scenario = inferred_scenario - scenario_score = 0.18 - - if session_scenario != "knowledge" and self._looks_like_expense_narrative( - compact_query, - scenario=rule_scenario, - entities=entities, - time_range=time_range, - ): - rule_scenario = "expense" - scenario_score = max(scenario_score, 0.24) - - rule_intent, intent_score = self._detect_intent( - compact_query, - scenario=rule_scenario, - entities=entities, - time_range=time_range, - ) - if session_scenario != "knowledge" and self._should_inherit_expense_draft( - compact_query, - scenario=rule_scenario, - entities=entities, - time_range=time_range, - context_json=context_json, - ): - rule_scenario = "expense" - rule_intent = "draft" - scenario_score = max(scenario_score, 0.18) - intent_score = max(intent_score, 0.18) - metrics = self._extract_metrics(compact_query) - constraints = self._extract_constraints(compact_query, entities) - model_parse = None - if session_scenario != "knowledge": - model_parse = self._parse_with_model( - payload=payload, - query=query, - compact_query=compact_query, - fallback_scenario=rule_scenario, - fallback_intent=rule_intent, - entities=entities, - time_range=time_range, - metrics=metrics, - constraints=constraints, - ) - scenario = self._resolve_scenario(rule_scenario, model_parse) - if session_scenario == "knowledge": - scenario = "knowledge" - entities = self._merge_entities( - entities, - model_parse.entity_hints if model_parse is not None else [], - ) - intent = self._resolve_intent( - compact_query, - fallback_intent=rule_intent, - scenario=scenario, - entities=entities, - time_range=time_range, - model_parse=model_parse, - ) - missing_slots = self._normalize_short_text_list( - model_parse.missing_slots if model_parse is not None else [] - ) - missing_slots = self._normalize_short_text_list( - missing_slots - + self._infer_default_missing_slots( - compact_query, - scenario=scenario, - intent=intent, - entities=entities, - time_range=time_range, - context_json=context_json, - ) - ) - relax_knowledge_follow_up = self._should_relax_knowledge_follow_up_clarification( - compact_query=compact_query, - scenario=scenario, - context_json=context_json, - missing_slots=missing_slots, - ) - if relax_knowledge_follow_up: - missing_slots = [item for item in missing_slots if item != "expense_type"] - ambiguity = self._normalize_short_text_list( - model_parse.ambiguity if model_parse is not None else [] - ) - risk_flags = self._extract_risk_flags(compact_query, scenario) - permission = self._resolve_permission( - compact_query, - context_json, - intent, - ) - - field_errors = self._build_field_errors( - scenario=scenario, - intent=intent, - entities=entities, - permission=permission, - missing_slots=missing_slots, - ambiguity=ambiguity, - ) - clarification_required, clarification_question = self._build_clarification( - scenario=scenario, - intent=intent, - entities=entities, - permission=permission, - missing_slots=missing_slots, - ambiguity=ambiguity, - allow_incomplete_draft=self._allow_incomplete_draft( - context_json, - scenario=scenario, - intent=intent, - ), - model_clarification_required=bool( - model_parse is not None - and model_parse.clarification_required - ), - model_clarification_question=( - model_parse.clarification_question if model_parse is not None else None - ), - ) - if relax_knowledge_follow_up: - clarification_required = False - clarification_question = None - fallback_confidence = self._compute_confidence( - scenario=scenario, - scenario_score=scenario_score, - intent_score=intent_score, - entities=entities, - time_range=time_range, - metrics=metrics, - constraints=constraints, - risk_flags=risk_flags, - clarification_required=clarification_required, - permission=permission, - ) - confidence = self._resolve_confidence( - model_confidence=( - model_parse.confidence - if model_parse is not None - else None - ), - fallback_confidence=fallback_confidence, - clarification_required=clarification_required, - permission=permission, - ) - return { - "scenario": scenario, - "intent": intent, - "entities": entities, - "time_range": time_range, - "metrics": metrics, - "constraints": constraints, - "risk_flags": risk_flags, - "permission": permission, - "confidence": confidence, - "missing_slots": missing_slots, - "ambiguity": ambiguity, - "parse_strategy": "llm_primary" if model_parse is not None else "rule_fallback", - "clarification_required": clarification_required, - "clarification_question": clarification_question, - "field_errors": field_errors, - } - - @staticmethod - def _should_relax_knowledge_follow_up_clarification( - *, - compact_query: str, - scenario: str, - context_json: dict[str, Any], - missing_slots: list[str], - ) -> bool: - if scenario != "knowledge" or "expense_type" not in missing_slots: - return False - history = context_json.get("conversation_history") - if not isinstance(history, list): - return False - has_previous_user_turn = any( - isinstance(item, dict) - and str(item.get("role") or "").strip() == "user" - and str(item.get("content") or "").strip() - for item in history - ) - if not has_previous_user_turn: - return False - follow_up_markers = ("那", "那么", "这个", "这种", "呢", "的话", "p", "P") - return any(marker in compact_query for marker in follow_up_markers) - - def _record_semantic_parse( - self, - *, - run_id: str, - payload: OntologyParseRequest, - analyzed: dict[str, object], - ) -> None: - self.run_service.record_semantic_parse( - run_id=run_id, - user_id=payload.user_id, - raw_query=payload.query.strip(), - scenario=str(analyzed["scenario"]), - intent=str(analyzed["intent"]), - entities_json=[item.model_dump() for item in analyzed["entities"]], - time_range_json=analyzed["time_range"].model_dump(), - metrics_json=[item.model_dump() for item in analyzed["metrics"]], - constraints_json=[item.model_dump() for item in analyzed["constraints"]], - risk_flags_json=list(analyzed["risk_flags"]), - permission_json=analyzed["permission"].model_dump(), - confidence=float(analyzed["confidence"]), - ) - logger.info( - "Parsed ontology run_id=%s scenario=%s intent=%s permission=%s", - run_id, - analyzed["scenario"], - analyzed["intent"], - analyzed["permission"].level, - ) - - @staticmethod - def _build_ontology_json(analyzed: dict[str, object]) -> dict[str, object]: - return { - "scenario": analyzed["scenario"], - "intent": analyzed["intent"], - "entities": [item.model_dump() for item in analyzed["entities"]], - "time_range": analyzed["time_range"].model_dump(), - "metrics": [item.model_dump() for item in analyzed["metrics"]], - "constraints": [item.model_dump() for item in analyzed["constraints"]], - "risk_flags": list(analyzed["risk_flags"]), - "permission": analyzed["permission"].model_dump(), - "missing_slots": list(analyzed["missing_slots"]), - "ambiguity": list(analyzed["ambiguity"]), - "parse_strategy": analyzed["parse_strategy"], - "confidence": analyzed["confidence"], - } - - @staticmethod - def _build_result(analyzed: dict[str, object], run_id: str) -> OntologyParseResult: - return OntologyParseResult( - scenario=analyzed["scenario"], - intent=analyzed["intent"], - entities=analyzed["entities"], - time_range=analyzed["time_range"], - metrics=analyzed["metrics"], - constraints=analyzed["constraints"], - risk_flags=analyzed["risk_flags"], - permission=analyzed["permission"], - confidence=analyzed["confidence"], - missing_slots=analyzed["missing_slots"], - ambiguity=analyzed["ambiguity"], - parse_strategy=analyzed["parse_strategy"], - clarification_required=analyzed["clarification_required"], - clarification_question=analyzed["clarification_question"], - run_id=run_id, - field_errors=analyzed["field_errors"], - ) - - def _load_reference_catalog(self) -> ReferenceCatalog: - employees = self._read_distinct_values(select(Employee.name)) - departments = self._read_distinct_values(select(OrganizationUnit.name)) - departments += self._read_distinct_values(select(ExpenseClaim.department_name)) - customers = self._read_distinct_values(select(AccountsReceivableRecord.customer_name)) - vendors = self._read_distinct_values(select(AccountsPayableRecord.vendor_name)) - projects = self._read_distinct_values(select(ExpenseClaim.project_code)) - - return ReferenceCatalog( - employees=self._dedupe_and_sort(employees), - departments=self._dedupe_and_sort(departments), - customers=self._dedupe_and_sort(customers), - vendors=self._dedupe_and_sort(vendors), - projects=self._dedupe_and_sort(projects), - ) - - def _read_distinct_values(self, stmt) -> list[str]: - values = self.db.scalars(stmt.distinct()).all() - return [str(item).strip() for item in values if item] - - @staticmethod - def _dedupe_and_sort(values: list[str]) -> list[str]: - items = {str(item).strip() for item in values if str(item).strip()} - return sorted(items, key=lambda item: (-len(item), item)) - - @staticmethod - def _compact(text: str) -> str: - return re.sub(r"\s+", "", text).lower() - +from app.core.logging import get_logger +from app.models.employee import Employee +from app.models.financial_record import ( + AccountsPayableRecord, + AccountsReceivableRecord, + ExpenseClaim, +) +from app.models.organization import OrganizationUnit +from app.schemas.ontology import ( + OntologyParseRequest, + OntologyParseResult, +) +from app.services.agent_foundation import AgentFoundationService +from app.services.agent_runs import AgentRunService +from app.services.ontology_detection import OntologyDetectionMixin +from app.services.ontology_extraction import OntologyExtractionMixin +from app.services.ontology_rules import ( + CONTEXTUAL_SCENARIOS, + EXPENSE_REVIEW_ACTIONS, + LlmOntologyEntityHint, + LlmOntologyParseResult, + ReferenceCatalog, +) +from app.services.ontology_validation import OntologyValidationMixin +from app.services.runtime_chat import RuntimeChatService + +logger = get_logger("app.services.ontology") + +class SemanticOntologyService( + OntologyDetectionMixin, + OntologyExtractionMixin, + OntologyValidationMixin, +): + def __init__(self, db: Session) -> None: + self.db = db + self.run_service = AgentRunService(db) + self.runtime_chat_service = RuntimeChatService(db) + + def parse(self, payload: OntologyParseRequest) -> OntologyParseResult: + analyzed = self._analyze(payload) + run = self.run_service.create_run( + agent=AgentName.ORCHESTRATOR.value, + source=AgentRunSource.USER_MESSAGE.value, + user_id=payload.user_id, + ontology_json=self._build_ontology_json(analyzed), + route_json={ + "stage": "semantic_parse", + "clarification_required": analyzed["clarification_required"], + "field_error_count": len(analyzed["field_errors"]), + }, + permission_level=analyzed["permission"].level, + status=( + AgentRunStatus.BLOCKED.value + if analyzed["clarification_required"] + or analyzed["permission"].level == AgentPermissionLevel.FORBIDDEN.value + else AgentRunStatus.SUCCEEDED.value + ), + result_summary=self._build_result_summary( + analyzed["scenario"], + analyzed["intent"], + analyzed["permission"].level, + analyzed["confidence"], + ), + error_message=( + analyzed["permission"].reason + if analyzed["permission"].level == AgentPermissionLevel.FORBIDDEN.value + else None + ), + ) + self._record_semantic_parse( + run_id=run.run_id, + payload=payload, + analyzed=analyzed, + ) + return self._build_result(analyzed, run.run_id) + + def parse_for_run(self, payload: OntologyParseRequest, *, run_id: str) -> OntologyParseResult: + analyzed = self._analyze(payload) + self._record_semantic_parse(run_id=run_id, payload=payload, analyzed=analyzed) + return self._build_result(analyzed, run_id) + + def _analyze(self, payload: OntologyParseRequest) -> dict[str, object]: + query = payload.query.strip() + if not query: + raise ValueError("query 不能为空。") + + AgentFoundationService(self.db).ensure_foundation_ready() + context_json = payload.context_json or {} + reference = self._load_reference_catalog() + compact_query = self._compact(query) + entities = self._extract_entities(query, compact_query, reference) + rule_scenario, scenario_score = self._detect_scenario(compact_query) + time_range, _time_score = self._extract_time_range( + query, + compact_query, + context_json=context_json, + ) + session_scenario = self._resolve_session_type_scenario(context_json) + context_scenario = self._resolve_context_scenario(context_json) + if session_scenario == "knowledge": + rule_scenario = "knowledge" + scenario_score = max(scenario_score, 0.34) + if rule_scenario == "unknown" and context_scenario is not None: + rule_scenario = context_scenario + scenario_score = max(scenario_score, 0.14) + if rule_scenario == "unknown": + inferred_scenario = self._infer_scenario_from_entities(entities) + if inferred_scenario is not None: + rule_scenario = inferred_scenario + scenario_score = 0.18 + + if session_scenario != "knowledge" and self._looks_like_expense_narrative( + compact_query, + scenario=rule_scenario, + entities=entities, + time_range=time_range, + ): + rule_scenario = "expense" + scenario_score = max(scenario_score, 0.24) + + rule_intent, intent_score = self._detect_intent( + compact_query, + scenario=rule_scenario, + entities=entities, + time_range=time_range, + ) + if session_scenario != "knowledge" and self._should_inherit_expense_draft( + compact_query, + scenario=rule_scenario, + entities=entities, + time_range=time_range, + context_json=context_json, + ): + rule_scenario = "expense" + rule_intent = "draft" + scenario_score = max(scenario_score, 0.18) + intent_score = max(intent_score, 0.18) + metrics = self._extract_metrics(compact_query) + constraints = self._extract_constraints(compact_query, entities) + model_parse = None + if session_scenario != "knowledge": + model_parse = self._parse_with_model( + payload=payload, + query=query, + compact_query=compact_query, + fallback_scenario=rule_scenario, + fallback_intent=rule_intent, + entities=entities, + time_range=time_range, + metrics=metrics, + constraints=constraints, + ) + scenario = self._resolve_scenario(rule_scenario, model_parse) + if session_scenario == "knowledge": + scenario = "knowledge" + entities = self._merge_entities( + entities, + model_parse.entity_hints if model_parse is not None else [], + ) + intent = self._resolve_intent( + compact_query, + fallback_intent=rule_intent, + scenario=scenario, + entities=entities, + time_range=time_range, + model_parse=model_parse, + ) + missing_slots = self._normalize_short_text_list( + model_parse.missing_slots if model_parse is not None else [] + ) + missing_slots = self._normalize_short_text_list( + missing_slots + + self._infer_default_missing_slots( + compact_query, + scenario=scenario, + intent=intent, + entities=entities, + time_range=time_range, + context_json=context_json, + ) + ) + relax_knowledge_follow_up = self._should_relax_knowledge_follow_up_clarification( + compact_query=compact_query, + scenario=scenario, + context_json=context_json, + missing_slots=missing_slots, + ) + if relax_knowledge_follow_up: + missing_slots = [item for item in missing_slots if item != "expense_type"] + ambiguity = self._normalize_short_text_list( + model_parse.ambiguity if model_parse is not None else [] + ) + risk_flags = self._extract_risk_flags(compact_query, scenario) + permission = self._resolve_permission( + compact_query, + context_json, + intent, + ) + + field_errors = self._build_field_errors( + scenario=scenario, + intent=intent, + entities=entities, + permission=permission, + missing_slots=missing_slots, + ambiguity=ambiguity, + ) + clarification_required, clarification_question = self._build_clarification( + scenario=scenario, + intent=intent, + entities=entities, + permission=permission, + missing_slots=missing_slots, + ambiguity=ambiguity, + allow_incomplete_draft=self._allow_incomplete_draft( + context_json, + scenario=scenario, + intent=intent, + ), + model_clarification_required=bool( + model_parse is not None + and model_parse.clarification_required + ), + model_clarification_question=( + model_parse.clarification_question if model_parse is not None else None + ), + ) + if relax_knowledge_follow_up: + clarification_required = False + clarification_question = None + fallback_confidence = self._compute_confidence( + scenario=scenario, + scenario_score=scenario_score, + intent_score=intent_score, + entities=entities, + time_range=time_range, + metrics=metrics, + constraints=constraints, + risk_flags=risk_flags, + clarification_required=clarification_required, + permission=permission, + ) + confidence = self._resolve_confidence( + model_confidence=( + model_parse.confidence + if model_parse is not None + else None + ), + fallback_confidence=fallback_confidence, + clarification_required=clarification_required, + permission=permission, + ) + return { + "scenario": scenario, + "intent": intent, + "entities": entities, + "time_range": time_range, + "metrics": metrics, + "constraints": constraints, + "risk_flags": risk_flags, + "permission": permission, + "confidence": confidence, + "missing_slots": missing_slots, + "ambiguity": ambiguity, + "parse_strategy": "llm_primary" if model_parse is not None else "rule_fallback", + "clarification_required": clarification_required, + "clarification_question": clarification_question, + "field_errors": field_errors, + } + + @staticmethod + def _should_relax_knowledge_follow_up_clarification( + *, + compact_query: str, + scenario: str, + context_json: dict[str, Any], + missing_slots: list[str], + ) -> bool: + if scenario != "knowledge" or "expense_type" not in missing_slots: + return False + history = context_json.get("conversation_history") + if not isinstance(history, list): + return False + has_previous_user_turn = any( + isinstance(item, dict) + and str(item.get("role") or "").strip() == "user" + and str(item.get("content") or "").strip() + for item in history + ) + if not has_previous_user_turn: + return False + follow_up_markers = ("那", "那么", "这个", "这种", "呢", "的话", "p", "P") + return any(marker in compact_query for marker in follow_up_markers) + + def _record_semantic_parse( + self, + *, + run_id: str, + payload: OntologyParseRequest, + analyzed: dict[str, object], + ) -> None: + self.run_service.record_semantic_parse( + run_id=run_id, + user_id=payload.user_id, + raw_query=payload.query.strip(), + scenario=str(analyzed["scenario"]), + intent=str(analyzed["intent"]), + entities_json=[item.model_dump() for item in analyzed["entities"]], + time_range_json=analyzed["time_range"].model_dump(), + metrics_json=[item.model_dump() for item in analyzed["metrics"]], + constraints_json=[item.model_dump() for item in analyzed["constraints"]], + risk_flags_json=list(analyzed["risk_flags"]), + permission_json=analyzed["permission"].model_dump(), + confidence=float(analyzed["confidence"]), + ) + logger.info( + "Parsed ontology run_id=%s scenario=%s intent=%s permission=%s", + run_id, + analyzed["scenario"], + analyzed["intent"], + analyzed["permission"].level, + ) + + @staticmethod + def _build_ontology_json(analyzed: dict[str, object]) -> dict[str, object]: + return { + "scenario": analyzed["scenario"], + "intent": analyzed["intent"], + "entities": [item.model_dump() for item in analyzed["entities"]], + "time_range": analyzed["time_range"].model_dump(), + "metrics": [item.model_dump() for item in analyzed["metrics"]], + "constraints": [item.model_dump() for item in analyzed["constraints"]], + "risk_flags": list(analyzed["risk_flags"]), + "permission": analyzed["permission"].model_dump(), + "missing_slots": list(analyzed["missing_slots"]), + "ambiguity": list(analyzed["ambiguity"]), + "parse_strategy": analyzed["parse_strategy"], + "confidence": analyzed["confidence"], + } + + @staticmethod + def _build_result(analyzed: dict[str, object], run_id: str) -> OntologyParseResult: + return OntologyParseResult( + scenario=analyzed["scenario"], + intent=analyzed["intent"], + entities=analyzed["entities"], + time_range=analyzed["time_range"], + metrics=analyzed["metrics"], + constraints=analyzed["constraints"], + risk_flags=analyzed["risk_flags"], + permission=analyzed["permission"], + confidence=analyzed["confidence"], + missing_slots=analyzed["missing_slots"], + ambiguity=analyzed["ambiguity"], + parse_strategy=analyzed["parse_strategy"], + clarification_required=analyzed["clarification_required"], + clarification_question=analyzed["clarification_question"], + run_id=run_id, + field_errors=analyzed["field_errors"], + ) + + def _load_reference_catalog(self) -> ReferenceCatalog: + employees = self._read_distinct_values(select(Employee.name)) + departments = self._read_distinct_values(select(OrganizationUnit.name)) + departments += self._read_distinct_values(select(ExpenseClaim.department_name)) + customers = self._read_distinct_values(select(AccountsReceivableRecord.customer_name)) + vendors = self._read_distinct_values(select(AccountsPayableRecord.vendor_name)) + projects = self._read_distinct_values(select(ExpenseClaim.project_code)) + + return ReferenceCatalog( + employees=self._dedupe_and_sort(employees), + departments=self._dedupe_and_sort(departments), + customers=self._dedupe_and_sort(customers), + vendors=self._dedupe_and_sort(vendors), + projects=self._dedupe_and_sort(projects), + ) + + def _read_distinct_values(self, stmt) -> list[str]: + values = self.db.scalars(stmt.distinct()).all() + return [str(item).strip() for item in values if item] + + @staticmethod + def _dedupe_and_sort(values: list[str]) -> list[str]: + items = {str(item).strip() for item in values if str(item).strip()} + return sorted(items, key=lambda item: (-len(item), item)) + + @staticmethod + def _compact(text: str) -> str: + return re.sub(r"\s+", "", text).lower() + @staticmethod def _resolve_context_scenario(context_json: dict[str, Any]) -> str | None: value = str(context_json.get("conversation_scenario") or "").strip() @@ -703,1184 +413,10 @@ class SemanticOntologyService: if str(context_json.get("draft_claim_id") or "").strip(): return "expense" return None - - @staticmethod - def _resolve_session_type_scenario(context_json: dict[str, Any]) -> str | None: - value = str(context_json.get("session_type") or "").strip() - if value == "knowledge": - return "knowledge" - return None - - - def _detect_scenario(self, compact_query: str) -> tuple[str, float]: - scores = {key: 0.0 for key in SCENARIO_KEYWORDS} - for scenario, keywords in SCENARIO_KEYWORDS.items(): - for keyword, weight in keywords: - if keyword in compact_query: - scores[scenario] += weight - - best_scenario = max(scores, key=scores.get) - best_score = scores[best_scenario] - if best_score <= 0: - if "单据" in compact_query and any( - keyword in compact_query for keyword in STATUS_KEYWORDS - ): - return "expense", 0.14 - return "unknown", 0.0 - - if best_scenario == "knowledge": - business_scores = [ - scores["expense"], - scores["accounts_receivable"], - scores["accounts_payable"], - ] - if max(business_scores) > 0: - best_scenario = ("expense", "accounts_receivable", "accounts_payable")[ - business_scores.index(max(business_scores)) - ] - best_score = max(business_scores) - - return best_scenario, round(min(best_score, 0.34), 2) - - def _detect_intent( - self, - compact_query: str, - *, - scenario: str, - entities: list[OntologyEntity], - time_range: OntologyTimeRange, - ) -> tuple[str, float]: - if any(keyword in compact_query for keyword in OPERATE_KEYWORDS): - return "operate", 0.30 - status_document_query = ( - "单据" in compact_query - and any(keyword in compact_query for keyword in STATUS_KEYWORDS) - and not any(keyword in compact_query for keyword in DRAFT_KEYWORDS if keyword != "草稿") - ) - historical_document_query = any( - keyword in compact_query - for keyword in ("报销的单据", "报销单据", "报销过的单据", "报销记录") - ) - if scenario == "expense" and any( - keyword in compact_query - for keyword in ( - "报销了吗", - "报销了么", - "报销了没", - "报销了没有", - "报销没", - "单据状态", - "审批状态", - "报销进度", - "到哪了", - "到了哪", - "有没有报销", - "是否报销", - "进行中的单据", - "草稿单据", - "草稿的单据", - "待补充单据", - "审批中的单据", - "已提交单据", - "已入账单据", - ) - ) or (scenario == "expense" and (status_document_query or historical_document_query)): - return "query", 0.24 - if any(keyword in compact_query for keyword in DRAFT_KEYWORDS): - return "draft", 0.26 - if scenario == "expense" and self._is_generic_expense_prompt(compact_query): - return "draft", 0.24 - if any(keyword in compact_query for keyword in COMPARE_KEYWORDS): - return "compare", 0.24 - if any(keyword in compact_query for keyword in EXPLAIN_KEYWORDS): - return "explain", 0.22 - if any(keyword in compact_query for keyword in RISK_KEYWORDS): - return "risk_check", 0.24 - if any(keyword in compact_query for keyword in QUERY_KEYWORDS): - return "query", 0.20 - if self._looks_like_expense_narrative( - compact_query, - scenario=scenario, - entities=entities, - time_range=time_range, - ): - return "draft", 0.22 - return "query", 0.10 - - @staticmethod - def _looks_like_follow_up_message(compact_query: str) -> bool: - if not compact_query: - return False - if any(keyword in compact_query for keyword in DRAFT_FOLLOW_UP_KEYWORDS): - return True - if compact_query.startswith(("那", "这", "它", "这个", "那个")): - return True - - has_domain_keyword = any( - keyword in compact_query - for keyword, _weight in ( - *SCENARIO_KEYWORDS["expense"], - *SCENARIO_KEYWORDS["accounts_receivable"], - *SCENARIO_KEYWORDS["accounts_payable"], - *SCENARIO_KEYWORDS["knowledge"], - ) - ) - return len(compact_query) <= 12 and not has_domain_keyword - - def _should_inherit_expense_draft( - self, - compact_query: str, - *, - scenario: str, - entities: list[OntologyEntity], - time_range: OntologyTimeRange, - context_json: dict[str, Any], - ) -> bool: - context_scenario = self._resolve_context_scenario(context_json) - draft_claim_id = str(context_json.get("draft_claim_id") or "").strip() - review_action = str(context_json.get("review_action") or "").strip() - if review_action in EXPENSE_REVIEW_ACTIONS: - return True - if context_scenario != "expense" and not draft_claim_id: - return False - - if any(keyword in compact_query for keyword in DRAFT_FOLLOW_UP_KEYWORDS): - return True - if self._looks_like_expense_narrative( - compact_query, - scenario="expense", - entities=entities, - time_range=time_range, - ): - return True - if self._looks_like_follow_up_message(compact_query): - return True - - if any(keyword in compact_query for keyword in OPERATE_KEYWORDS): - return False - if any(keyword in compact_query for keyword in COMPARE_KEYWORDS + RISK_KEYWORDS): - return False - if any(keyword in compact_query for keyword in QUERY_KEYWORDS): - return False - - return bool( - draft_claim_id - and any( - item.type - in {"amount", "customer", "employee", "expense_type", "project", "invoice"} - for item in entities - ) - ) - - @staticmethod - def _is_generic_expense_prompt(compact_query: str) -> bool: - return compact_query in GENERIC_EXPENSE_PROMPTS - - @staticmethod - def _looks_like_expense_narrative( - compact_query: str, - *, - scenario: str, - entities: list[OntologyEntity], - time_range: OntologyTimeRange, - ) -> bool: - if scenario not in {"expense", "accounts_receivable", "accounts_payable", "unknown"}: - return False - - if any(keyword in compact_query for keyword in AR_CORE_KEYWORDS + AP_CORE_KEYWORDS): - return False - - entity_types = {item.type for item in entities} - has_expense_signal = any( - keyword in compact_query for keyword in EXPENSE_NARRATIVE_KEYWORDS - ) or "expense_type" in entity_types - has_context_signal = bool(time_range.start_date) or "amount" in entity_types - - return has_expense_signal and has_context_signal - - def _parse_with_model( - self, - *, - payload: OntologyParseRequest, - query: str, - compact_query: str, - fallback_scenario: str, - fallback_intent: str, - entities: list[OntologyEntity], - time_range: OntologyTimeRange, - metrics: list[OntologyMetric], - constraints: list[OntologyConstraint], - ) -> LlmOntologyParseResult | None: - messages = self._build_model_messages( - payload=payload, - query=query, - compact_query=compact_query, - fallback_scenario=fallback_scenario, - fallback_intent=fallback_intent, - entities=entities, - time_range=time_range, - metrics=metrics, - constraints=constraints, - ) - response_text = self.runtime_chat_service.complete( - messages, - max_tokens=600, - temperature=0.0, - ) - payload_json = self._extract_json_payload(response_text) - if payload_json is None: - return None - - try: - return LlmOntologyParseResult.model_validate(payload_json) - except ValidationError as exc: - logger.warning("Semantic model output validation failed: %s", exc) - return None - - @staticmethod - def _build_model_messages( - *, - payload: OntologyParseRequest, - query: str, - compact_query: str, - fallback_scenario: str, - fallback_intent: str, - entities: list[OntologyEntity], - time_range: OntologyTimeRange, - metrics: list[OntologyMetric], - constraints: list[OntologyConstraint], - ) -> list[dict[str, str]]: - facts = { - "query": query, - "compact_query": compact_query, - "context": { - "entry_source": payload.context_json.get("entry_source"), - "attachment_names": payload.context_json.get("attachment_names", []), - "attachment_count": payload.context_json.get("attachment_count", 0), - "ocr_summary": payload.context_json.get("ocr_summary", ""), - "ocr_documents": payload.context_json.get("ocr_documents", []), - "request_context": payload.context_json.get("request_context"), - "role_codes": payload.context_json.get("role_codes", []), - "conversation_id": payload.context_json.get("conversation_id"), - "conversation_scenario": payload.context_json.get("conversation_scenario"), - "conversation_intent": payload.context_json.get("conversation_intent"), - "draft_claim_id": payload.context_json.get("draft_claim_id"), - "review_action": payload.context_json.get("review_action"), - "review_form_values": payload.context_json.get("review_form_values"), - "conversation_history": payload.context_json.get("conversation_history", []), - }, - "rule_candidates": { - "scenario": fallback_scenario, - "intent": fallback_intent, - "entities": [item.model_dump(mode="json") for item in entities], - "time_range": time_range.model_dump(mode="json"), - "metrics": [item.model_dump(mode="json") for item in metrics], - "constraints": [item.model_dump(mode="json") for item in constraints], - }, - } - - system_prompt = ( - "你是企业财务共享平台的语义解析器。" - "你的任务是把用户输入解析为固定 JSON,用于后续路由、追问和权限判断。" - "只输出 JSON 对象,不要输出 Markdown、代码块、解释、标题或 。" - "场景 scenario 只能是:expense, accounts_receivable, " - "accounts_payable, knowledge, unknown。" - "意图 intent 只能是:query, explain, compare, risk_check, draft, operate。" - "如果用户是在描述一笔待处理费用、待报销事项、上传票据或希望整理报销," - "即使没有明确说“生成草稿”,也优先使用 expense + draft。" - "如果提供了 conversation_history,必须把最近轮次作为当前追问的上下文," - "正确理解“这个”“那笔”“改成 800”“继续补充”这类省略表达。" - "出现“客户”不等于应收,出现“供应商”不等于应付,必须结合动作词和业务目标判断。" - "只有明确查询、统计、列出、多少、明细、对比时才优先使用 query 或 compare。" - "附件名称和 OCR 摘要只作为辅助证据,不能编造未出现的事实。" - "信息不足时 clarification_required=true,并给出一句简短中文追问。" - "missing_slots 使用简短 snake_case,例如 expense_type, amount, " - "customer_name, participants, attachments。" - "entity_hints 只填写你比较确定的业务对象;如果不确定,可以返回空数组。" - ) - user_prompt = ( - "请根据以下事实输出 JSON:\n" - f"{json.dumps(facts, ensure_ascii=False, indent=2, default=str)}\n\n" - "输出格式:\n" - "{\n" - ' "scenario": "expense",\n' - ' "intent": "draft",\n' - ' "confidence": 0.88,\n' - ' "clarification_required": true,\n' - ' "clarification_question": "请补充客户单位、参与人员和票据附件。",\n' - ' "missing_slots": ["customer_name", "participants", "attachments"],\n' - ' "ambiguity": [],\n' - ' "entity_hints": [\n' - ' {"type": "expense_type", "value": "招待", ' - '"normalized_value": "entertainment", "role": "filter", ' - '"confidence": 0.86}\n' - " ]\n" - "}" - ) - return [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt}, - ] - - @staticmethod - def _extract_json_payload(response_text: str | None) -> dict[str, Any] | None: - if not response_text: - return None - - cleaned = re.sub(r".*?", "", response_text, flags=re.DOTALL | re.IGNORECASE) - cleaned = cleaned.strip() - if not cleaned: - return None - - fenced_match = re.search(r"```(?:json)?\s*(\{.*\})\s*```", cleaned, flags=re.DOTALL) - candidates = [fenced_match.group(1)] if fenced_match else [] - candidates.extend([cleaned]) - - start = cleaned.find("{") - end = cleaned.rfind("}") - if start != -1 and end != -1 and end > start: - candidates.append(cleaned[start : end + 1]) - - for candidate in candidates: - try: - parsed = json.loads(candidate) - except json.JSONDecodeError: - continue - if isinstance(parsed, dict): - return parsed - - return None - - @staticmethod - def _resolve_scenario( - fallback_scenario: str, - model_parse: LlmOntologyParseResult | None, - ) -> str: - if model_parse is None: - return fallback_scenario - if model_parse.scenario == "unknown" and fallback_scenario != "unknown": - return fallback_scenario - return model_parse.scenario - - def _resolve_intent( - self, - compact_query: str, - *, - fallback_intent: str, - scenario: str, - entities: list[OntologyEntity], - time_range: OntologyTimeRange, - model_parse: LlmOntologyParseResult | None, - ) -> str: - candidate = model_parse.intent if model_parse is not None else fallback_intent - if scenario == "knowledge": - if candidate in KNOWLEDGE_INTENTS: - return candidate - if fallback_intent in KNOWLEDGE_INTENTS: - return fallback_intent - return "query" - if candidate == "query" and scenario == "expense": - if self._is_generic_expense_prompt(compact_query) or fallback_intent == "draft": - return "draft" - return candidate - - @staticmethod - def _merge_entities( - base_entities: list[OntologyEntity], - entity_hints: list[LlmOntologyEntityHint], - ) -> list[OntologyEntity]: - merged: dict[tuple[str, str], OntologyEntity] = { - (item.type, item.normalized_value): item for item in base_entities - } - - for hint in entity_hints: - value = str(hint.value or "").strip() - if not value: - continue - normalized_value = str(hint.normalized_value or value).strip() - key = (str(hint.type).strip(), normalized_value) - candidate = OntologyEntity( - type=str(hint.type).strip(), - value=value, - normalized_value=normalized_value, - role=str(hint.role or "target").strip() or "target", - confidence=float(hint.confidence), - ) - existing = merged.get(key) - if existing is None or existing.confidence < candidate.confidence: - merged[key] = candidate - - return list(merged.values()) - - @staticmethod - def _normalize_short_text_list(values: list[str]) -> list[str]: - normalized: list[str] = [] - seen: set[str] = set() - for value in values: - cleaned = str(value or "").strip() - if not cleaned or cleaned in seen: - continue - normalized.append(cleaned) - seen.add(cleaned) - return normalized[:6] - - def _infer_default_missing_slots( - self, - compact_query: str, - *, - scenario: str, - intent: str, - entities: list[OntologyEntity], - time_range: OntologyTimeRange, - context_json: dict[str, Any], - ) -> list[str]: - if scenario != "expense" or intent != "draft": - return [] - - entity_types = {item.type for item in entities} - attachment_count = int(context_json.get("attachment_count") or 0) - missing_slots: list[str] = [] - - if self._is_generic_expense_prompt(compact_query): - if "expense_type" not in entity_types: - missing_slots.append("expense_type") - if "amount" not in entity_types: - missing_slots.append("amount") - if not time_range.start_date: - missing_slots.append("time_range") - missing_slots.append("reason") - if attachment_count <= 0: - missing_slots.append("attachments") - return missing_slots - - if any( - item.normalized_value == "entertainment" - for item in entities - if item.type == "expense_type" - ): - if "customer" not in entity_types: - missing_slots.append("customer_name") - missing_slots.append("participants") - if attachment_count <= 0: - missing_slots.append("attachments") - - return missing_slots - - @staticmethod - def _resolve_confidence( - *, - model_confidence: float | None, - fallback_confidence: float, - clarification_required: bool, - permission: OntologyPermission, - ) -> float: - confidence = fallback_confidence if model_confidence is None else float(model_confidence) - confidence = max(0.0, min(confidence, 0.98)) - if permission.level == AgentPermissionLevel.FORBIDDEN.value: - confidence = max(confidence, 0.86) - if clarification_required and permission.level != AgentPermissionLevel.FORBIDDEN.value: - confidence = min(confidence, 0.58) - return round(confidence, 2) - - def _extract_entities( - self, - query: str, - compact_query: str, - reference: ReferenceCatalog, - ) -> list[OntologyEntity]: - entities: dict[tuple[str, str], OntologyEntity] = {} - - def upsert(entity: OntologyEntity) -> None: - key = (entity.type, entity.normalized_value) - if key not in entities: - entities[key] = entity - - for match in re.finditer(r"客户\s*([A-Za-z0-9一二三四五六七八九十]+)", query): - suffix = match.group(1).strip() - normalized = f"客户{suffix}".replace(" ", "") - upsert(self._make_entity("customer", match.group(0).strip(), normalized, role="filter")) - labeled_customer_match = re.search(r"客户名称[::]\s*(?P[^\n,。;]+)", query) - if labeled_customer_match: - customer_name = labeled_customer_match.group("name").strip() - upsert(self._make_entity("customer", customer_name, customer_name, role="filter")) - - for match in re.finditer(r"供应商\s*([A-Za-z0-9一二三四五六七八九十]+)", query): - suffix = match.group(1).strip() - normalized = f"供应商{suffix}".replace(" ", "") - upsert(self._make_entity("vendor", match.group(0).strip(), normalized, role="filter")) - - employee_match = re.search( - r"(?P[赵钱孙李周吴郑王冯陈褚卫蒋沈韩杨朱秦许何吕施张孔曹严华金魏陶姜" - r"戚谢邹喻柏水窦章云苏潘葛范彭郎鲁韦昌马苗凤花方俞任袁柳鲍史唐费廉岑" - r"薛雷贺倪汤滕殷罗毕郝邬安常乐于时傅卞康伍余元卜顾孟平黄和穆萧尹姚邵" - r"湛汪祁毛禹狄米贝明臧计成戴宋庞熊纪舒屈项祝董梁杜阮蓝闵席季强贾路江" - r"童颜郭梅盛林钟徐邱骆高夏蔡田樊胡凌霍虞万支柯管卢莫房裘缪解应宗丁宣" - r"邓洪包左石崔吉龚程嵇邢裴陆荣翁荀羊惠甄曲家封芮储靳汲邴糜松井段富巫" - r"乌焦巴弓牧隗山谷车侯伊宫宁仇栾刘景詹束龙叶司黎薄印白怀蒲邰从鄂索咸" - r"籍卓蔺屠蒙池乔阴胥能苍双闻莘党翟谭贡姬申扶堵冉宰郦雍桑桂牛寿通边扈" - r"燕冀浦尚农温别庄晏柴瞿阎连茹习艾容向古易慎戈廖庾终暨居衡步都耿满弘" - r"匡国文寇广禄阙东欧殳沃利蔚越夔隆师巩聂晁勾敖融冷辛阚那简饶曾关蒯相" - r"查后荆游竺权盖益桓公][\u4e00-\u9fa5]{1,2})(?=\s*(?:\d{4}年|\d{1,2}月|本月|" - r"上月|本周|报销|差旅|费用|申请))", - query, - ) - if employee_match: - name = employee_match.group("name") - upsert(self._make_entity("employee", name, name, role="filter")) - - for name in reference.employees: - if self._compact(name) in compact_query: - upsert(self._make_entity("employee", name, name, role="filter")) - for name in reference.departments: - if self._compact(name) in compact_query: - upsert(self._make_entity("department", name, name, role="filter")) - for name in reference.customers: - if self._compact(name) in compact_query: - upsert(self._make_entity("customer", name, name, role="filter")) - for name in reference.vendors: - if self._compact(name) in compact_query: - upsert(self._make_entity("vendor", name, name, role="filter")) - for code in reference.projects: - if self._compact(code) in compact_query: - upsert(self._make_entity("project", code, code, role="filter")) - - for code in re.findall(r"PRJ-[A-Z]+-\d+", query, flags=re.IGNORECASE): - upsert(self._make_entity("project", code, code.upper(), role="filter")) - for code in re.findall(r"EXP-\d{6}-\d{3}", query, flags=re.IGNORECASE): - upsert(self._make_entity("expense_claim", code, code.upper())) - for code in re.findall(r"AR-\d{6}-\d{3}", query, flags=re.IGNORECASE): - upsert(self._make_entity("receivable", code, code.upper())) - for code in re.findall(r"AP-\d{6}-\d{3}", query, flags=re.IGNORECASE): - upsert(self._make_entity("payable", code, code.upper())) - for code in re.findall(r"INV-[A-Z]+-\d+", query, flags=re.IGNORECASE): - upsert(self._make_entity("invoice", code, code.upper())) - for code in re.findall(r"CTR-[A-Z]+-\d+", query, flags=re.IGNORECASE): - upsert(self._make_entity("contract", code, code.upper())) - for location in LOCATION_KEYWORDS: - if location in query: - upsert(self._make_entity("location", location, location, role="filter", confidence=0.86)) - for label, normalized in EXPENSE_TYPE_KEYWORDS.items(): - if label in query: - upsert(self._make_entity("expense_type", label, normalized, role="filter")) - - has_customer_entertainment_signal = "客户" in query and any( - keyword in query for keyword in ("吃饭", "用餐", "餐饮", "宴请", "请客", "招待") - ) - if has_customer_entertainment_signal: - upsert( - self._make_entity( - "expense_type", - "客户招待", - "entertainment", - role="filter", - confidence=0.96, - ) - ) - - if any( - keyword in query - for keyword in ("打车", "网约车", "出租车", "车费", "乘车", "用车", "叫车", "车资", "停车费", "过路费") - ): - upsert(self._make_entity("expense_type", "交通", "transport", role="filter", confidence=0.9)) - - if any(keyword in query for keyword in ("出差", "机票", "火车", "高铁", "行程单")): - upsert(self._make_entity("expense_type", "差旅", "travel", role="filter", confidence=0.88)) - - if any(keyword in query for keyword in ("酒店", "住宿", "宾馆")): - upsert(self._make_entity("expense_type", "住宿", "hotel", role="filter", confidence=0.86)) - - if ( - not has_customer_entertainment_signal - and any(keyword in query for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "餐饮")) - ): - upsert(self._make_entity("expense_type", "餐费", "meal", role="filter", confidence=0.84)) - - if any( - keyword in query - for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板") - ): - upsert(self._make_entity("expense_type", "办公费", "office", role="filter", confidence=0.87)) - - if any(keyword in query for keyword in ("培训", "讲师费", "课时费", "课程费")): - upsert(self._make_entity("expense_type", "培训费", "training", role="filter", confidence=0.84)) - - if any(keyword in query for keyword in ("通讯费", "话费", "流量费", "宽带费")): - upsert(self._make_entity("expense_type", "通讯费", "communication", role="filter", confidence=0.84)) - - if any(keyword in query for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")): - upsert(self._make_entity("expense_type", "福利费", "welfare", role="filter", confidence=0.84)) - - for amount in self._extract_amount_entities(query): - upsert(amount) - - return list(entities.values()) - - def _extract_amount_entities(self, query: str) -> list[OntologyEntity]: - entities: list[OntologyEntity] = [] - for match in AMOUNT_PATTERN.finditer(query): - raw_value = match.group("value") - unit = match.group("unit") - prefix = match.group("prefix") - if raw_value is None: - continue - if prefix is None and unit is None: - continue - - amount_value = self._normalize_amount(raw_value, unit) - display_value = f"{raw_value}{unit or ''}" - role = "threshold" if prefix else "target" - entities.append( - self._make_entity( - "amount", - display_value, - str(amount_value), - role=role, - confidence=0.9, - ) - ) - return entities - - @staticmethod - def _make_entity( - entity_type: str, - value: str, - normalized_value: str, - *, - role: str = "target", - confidence: float = 0.92, - ) -> OntologyEntity: - return OntologyEntity( - type=entity_type, - value=value, - normalized_value=normalized_value, - role=role, - confidence=confidence, - ) - - @staticmethod - def _infer_scenario_from_entities(entities: list[OntologyEntity]) -> str | None: - entity_types = {item.type for item in entities} - if entity_types & {"vendor", "payable"}: - return "accounts_payable" - if entity_types & {"customer", "receivable", "contract"}: - return "accounts_receivable" - if entity_types & {"employee", "expense_claim", "expense_type"}: - return "expense" - return None - - def _extract_time_range( - self, - query: str, - compact_query: str, - *, - context_json: dict[str, Any], - ) -> tuple[OntologyTimeRange, float]: - today = self._resolve_reference_today(context_json) - - direct_mappings = [ - ("大前天", self._single_day_range(today - timedelta(days=3), "大前天", "day")), - ("前天", self._single_day_range(today - timedelta(days=2), "前天", "day")), - ("昨日", self._single_day_range(today - timedelta(days=1), "昨日", "day")), - ("昨天", self._single_day_range(today - timedelta(days=1), "昨天", "day")), - ("今天", self._single_day_range(today, "今天", "day")), - ("明天", self._single_day_range(today + timedelta(days=1), "明天", "day")), - ("后天", self._single_day_range(today + timedelta(days=2), "后天", "day")), - ("大后天", self._single_day_range(today + timedelta(days=3), "大后天", "day")), - ] - for keyword, value in direct_mappings: - if keyword in query: - return value, 0.10 - - if "本周" in query or "这周" in query or "本星期" in query: - start = today - timedelta(days=today.weekday()) - end = start + timedelta(days=6) - return self._range(start, end, "本周", "week"), 0.10 - if "上周" in query: - end = today - timedelta(days=today.weekday() + 1) - start = end - timedelta(days=6) - return self._range(start, end, "上周", "week"), 0.10 - if "本月" in query or "这个月" in query: - start = date(today.year, today.month, 1) - end = date(today.year, today.month, calendar.monthrange(today.year, today.month)[1]) - return self._range(start, end, "本月", "month"), 0.10 - if "上月" in query: - year = today.year if today.month > 1 else today.year - 1 - month = today.month - 1 if today.month > 1 else 12 - start = date(year, month, 1) - end = date(year, month, calendar.monthrange(year, month)[1]) - return self._range(start, end, "上月", "month"), 0.10 - if "本季度" in query or "这个季度" in query: - quarter = (today.month - 1) // 3 - start_month = quarter * 3 + 1 - end_month = start_month + 2 - start = date(today.year, start_month, 1) - end = date(today.year, end_month, calendar.monthrange(today.year, end_month)[1]) - return self._range(start, end, "本季度", "quarter"), 0.10 - if "今年" in query: - return ( - self._range(date(today.year, 1, 1), date(today.year, 12, 31), "今年", "year"), - 0.10, - ) - if "去年" in query or "上一年" in query: - year = today.year - 1 - return ( - self._range(date(year, 1, 1), date(year, 12, 31), "去年", "year"), - 0.10, - ) - - match = DATE_RANGE_PATTERN.search(query) - if match: - start = self._parse_iso_date(match.group("start")) - end = self._parse_iso_date(match.group("end")) - if start and end: - return self._range(start, end, match.group(0), "custom"), 0.10 - - match = EXPLICIT_DATE_PATTERN.search(query) - if match: - explicit = date( - int(match.group("year")), - int(match.group("month")), - int(match.group("day")), - ) - return self._single_day_range(explicit, match.group(0), "day"), 0.10 - - match = EXPLICIT_MONTH_PATTERN.search(query) - if match: - year = int(match.group("year")) - month = int(match.group("month")) - start = date(year, month, 1) - end = date(year, month, calendar.monthrange(year, month)[1]) - return self._range(start, end, match.group(0), "month"), 0.10 - - match = MONTH_DAY_RANGE_PATTERN.search(query) - if match: - start = date(today.year, int(match.group("start_month")), int(match.group("start_day"))) - end = date(today.year, int(match.group("end_month")), int(match.group("end_day"))) - return self._range(start, end, match.group(0), "custom"), 0.10 - - match = MONTH_DAY_PATTERN.search(compact_query) - if match: - explicit = date(today.year, int(match.group("month")), int(match.group("day"))) - return self._single_day_range(explicit, match.group(0), "day"), 0.08 - - month_match = re.search(r"(?P\d{1,2})月", compact_query) - if month_match: - month = int(month_match.group("month")) - start = date(today.year, month, 1) - end = date(today.year, month, calendar.monthrange(today.year, month)[1]) - return self._range(start, end, month_match.group(0), "month"), 0.08 - - return OntologyTimeRange(), 0.0 - - @staticmethod - def _resolve_reference_today(context_json: dict[str, Any]) -> date: - client_now_iso = str(context_json.get("client_now_iso") or "").strip() - if not client_now_iso: - return datetime.now(UTC).date() - - normalized = client_now_iso.replace("Z", "+00:00") - try: - client_now = datetime.fromisoformat(normalized) - except ValueError: - return datetime.now(UTC).date() - - if client_now.tzinfo is None: - client_now = client_now.replace(tzinfo=UTC) - - try: - offset_minutes = int(context_json.get("client_timezone_offset_minutes") or 0) - except (TypeError, ValueError): - offset_minutes = 0 - - local_now = client_now - timedelta(minutes=offset_minutes) - return local_now.date() - - @staticmethod - def _single_day_range(target: date, raw: str, granularity: str) -> OntologyTimeRange: - return OntologyTimeRange( - raw=raw, - start_date=target.isoformat(), - end_date=target.isoformat(), - granularity=granularity, - ) - - @staticmethod - def _range(start: date, end: date, raw: str, granularity: str) -> OntologyTimeRange: - return OntologyTimeRange( - raw=raw, - start_date=start.isoformat(), - end_date=end.isoformat(), - granularity=granularity, - ) - - @staticmethod - def _parse_iso_date(value: str) -> date | None: - try: - return date.fromisoformat(value) - except ValueError: - return None - - def _extract_metrics(self, compact_query: str) -> list[OntologyMetric]: - metrics: dict[str, OntologyMetric] = {} - - def upsert(metric: OntologyMetric) -> None: - metrics[metric.name] = metric - - if any( - keyword in compact_query - for keyword in ("多少钱", "金额", "总额", "支出", "回款", "应收", "应付") - ): - upsert(OntologyMetric(name="amount", aggregation="sum", unit="CNY")) - if any(keyword in compact_query for keyword in ("多少笔", "几笔", "数量", "条数", "单数")): - upsert(OntologyMetric(name="count", aggregation="count", unit="records")) - if "超标" in compact_query or "超预算" in compact_query: - upsert(OntologyMetric(name="amount_over_limit")) - if "逾期" in compact_query or "账龄" in compact_query: - upsert(OntologyMetric(name="overdue")) - if "重复" in compact_query: - upsert(OntologyMetric(name="duplicate_expense")) - - top_match = TOP_N_PATTERN.search(compact_query) - if top_match: - metrics["amount"] = OntologyMetric( - name="amount", - aggregation="sum", - unit="CNY", - sort="desc" if "最低" not in compact_query else "asc", - top_n=int(top_match.group("top")), - ) - - return list(metrics.values()) - - def _extract_constraints( - self, - compact_query: str, - entities: list[OntologyEntity], - ) -> list[OntologyConstraint]: - constraints: dict[tuple[str, str, str, str | None], OntologyConstraint] = {} - - def upsert(constraint: OntologyConstraint) -> None: - key = ( - constraint.field, - constraint.operator, - str(constraint.value), - constraint.currency, - ) - if key not in constraints: - constraints[key] = constraint - - for entity in entities: - if entity.type in { - "employee", - "department", - "customer", - "vendor", - "project", - "location", - "expense_type", - }: - upsert( - OntologyConstraint( - field=entity.type, - operator="=", - value=entity.normalized_value, - ) - ) - - for keyword, normalized in STATUS_KEYWORDS.items(): - if keyword in compact_query: - upsert(OntologyConstraint(field="status", operator="=", value=normalized)) - - for amount_match in AMOUNT_PATTERN.finditer(compact_query): - if not amount_match.group("prefix"): - continue - - operator = self._normalize_operator(amount_match.group("prefix")) - value = self._normalize_amount(amount_match.group("value"), amount_match.group("unit")) - upsert( - OntologyConstraint( - field="amount", - operator=operator, - value=value, - currency="CNY", - ) - ) - break - - top_match = TOP_N_PATTERN.search(compact_query) - if top_match: - top_n = int(top_match.group("top")) - upsert(OntologyConstraint(field="top_n", operator="=", value=top_n)) - upsert( - OntologyConstraint( - field="sort_by", - operator="desc" if "最低" not in compact_query else "asc", - value="amount", - ) - ) - - return list(constraints.values()) - - def _extract_risk_flags(self, compact_query: str, scenario: str) -> list[str]: - risk_flags: list[str] = [] - - def append(flag: str) -> None: - if flag not in risk_flags: - risk_flags.append(flag) - - if "重复" in compact_query: - append("duplicate_expense") - if any( - keyword in compact_query - for keyword in ("发票异常", "票据异常", "验真失败", "附件缺失", "补件") - ): - append("invoice_anomaly") - if any(keyword in compact_query for keyword in ("超标", "超预算", "超限")): - append("amount_over_limit") - if scenario == "accounts_receivable" and any( - keyword in compact_query for keyword in ("逾期", "账龄", "欠款", "未回款") - ): - append("ar_overdue") - if scenario == "accounts_payable" and any( - keyword in compact_query for keyword in ("逾期", "待付", "付款风险", "未付款") - ): - append("ap_overdue") - - return risk_flags - - def _resolve_permission( - self, - compact_query: str, - context_json: dict, - intent: str, - ) -> OntologyPermission: - role_codes = { - str(item).strip().lower() - for item in context_json.get("role_codes", []) - if str(item).strip() - } - is_admin = bool(context_json.get("is_admin")) - privileged = is_admin or bool(role_codes & PRIVILEGED_ROLE_CODES) - - if intent in {"query", "explain", "compare", "risk_check"}: - return OntologyPermission( - level=AgentPermissionLevel.READ.value, - allowed=True, - reason="只读查询。", - ) - if intent == "draft": - return OntologyPermission( - level=AgentPermissionLevel.DRAFT_WRITE.value, - allowed=True, - reason="允许生成草稿,但不会直接提交业务动作。", - ) - - if any(keyword in compact_query for keyword in OPERATE_KEYWORDS) or "付款" in compact_query: - if privileged: - return OntologyPermission( - level=AgentPermissionLevel.APPROVAL_REQUIRED.value, - allowed=False, - reason="涉及付款、审批或上线动作,必须进入人工审批链。", - ) - return OntologyPermission( - level=AgentPermissionLevel.FORBIDDEN.value, - allowed=False, - reason="当前账号缺少财务或审批权限,只能查看结果或生成草稿。", - ) - - return OntologyPermission( - level=AgentPermissionLevel.APPROVAL_REQUIRED.value, - allowed=False, - reason="操作类请求需要人工审批确认。", - ) - - def _build_field_errors( - self, - *, - scenario: str, - intent: str, - entities: list[OntologyEntity], - permission: OntologyPermission, - missing_slots: list[str], - ambiguity: list[str], - ) -> list[OntologyFieldError]: - errors: list[OntologyFieldError] = [] - if scenario == "unknown": - errors.append( - OntologyFieldError( - field="scenario", - code="scenario_unknown", - message="未识别出明确业务场景,请补充是报销、应收、应付还是制度问题。", - ) - ) - if intent == "compare" and len([item for item in entities if item.type != "amount"]) < 2: - errors.append( - OntologyFieldError( - field="entities", - code="compare_target_missing", - message="对比类问题请至少给出两个对象,或给出更明确的对比范围。", - ) - ) - if missing_slots: - errors.append( - OntologyFieldError( - field="missing_slots", - code="required_slot_missing", - message=( - "继续处理前还缺少关键信息:" - f"{'、'.join(self._display_slot_label(item) for item in missing_slots)}。" - ), - ) - ) - if ambiguity: - errors.append( - OntologyFieldError( - field="ambiguity", - code="ambiguity_detected", - message=f"当前问题存在歧义:{';'.join(ambiguity)}。", - ) - ) - if permission.level == AgentPermissionLevel.FORBIDDEN.value: - errors.append( - OntologyFieldError( - field="permission", - code="permission_forbidden", - message=permission.reason, - ) - ) - return errors - - def _build_clarification( - self, - *, - scenario: str, - intent: str, - entities: list[OntologyEntity], - permission: OntologyPermission, - missing_slots: list[str], - ambiguity: list[str], - allow_incomplete_draft: bool, - model_clarification_required: bool, - model_clarification_question: str | None, - ) -> tuple[bool, str | None]: - if permission.level == AgentPermissionLevel.FORBIDDEN.value: - return True, "当前动作超出权限范围。是否改为生成草稿或建议?" - if scenario == "knowledge" and intent in {"query", "explain"}: - return False, None - if model_clarification_required: - question = str(model_clarification_question or "").strip() - if question: - return True, question - if missing_slots: - return True, self._build_missing_slot_question(missing_slots) - if ambiguity: - return True, f"当前问题存在歧义,请进一步说明:{';'.join(ambiguity)}。" - if scenario == "unknown": - return True, "请说明这是报销、应收、应付,还是制度知识问题?" - if intent == "compare" and len([item for item in entities if item.type != "amount"]) < 2: - return True, "请补充需要对比的两个对象,例如两个客户、两个供应商或两个员工。" - if allow_incomplete_draft and scenario == "expense" and intent == "draft": - return False, None - if missing_slots: - return True, self._build_missing_slot_question(missing_slots) - if ambiguity: - return True, f"当前问题存在歧义,请进一步说明:{';'.join(ambiguity)}。" - return False, None - - @staticmethod - def _allow_incomplete_draft( - context_json: dict[str, Any], - *, - scenario: str, - intent: str, - ) -> bool: - if scenario != "expense" or intent != "draft": - return False - review_action = str(context_json.get("review_action") or "").strip() - return review_action in EXPENSE_REVIEW_ACTIONS - - @staticmethod - def _display_slot_label(slot: str) -> str: - return MISSING_SLOT_LABELS.get(slot, slot) - - def _build_missing_slot_question(self, missing_slots: list[str]) -> str: - labels = [self._display_slot_label(item) for item in missing_slots[:4]] - if not labels: - return "请补充更多上下文后再继续。" - return f"请补充{'、'.join(labels)},我再继续帮你解析和处理。" - - @staticmethod - def _compute_confidence( - *, - scenario: str, - scenario_score: float, - intent_score: float, - entities: list[OntologyEntity], - time_range: OntologyTimeRange, - metrics: list[OntologyMetric], - constraints: list[OntologyConstraint], - risk_flags: list[str], - clarification_required: bool, - permission: OntologyPermission, - ) -> float: - confidence = 0.18 + scenario_score + intent_score - confidence += min(0.16, len(entities) * 0.04) - if time_range.start_date: - confidence += 0.10 - if metrics: - confidence += 0.06 - if constraints: - confidence += 0.06 - if risk_flags: - confidence += 0.08 - if permission.level == AgentPermissionLevel.FORBIDDEN.value: - confidence = max(confidence, 0.86) - - if scenario == "unknown": - confidence = min(confidence, 0.45) - if clarification_required and permission.level != AgentPermissionLevel.FORBIDDEN.value: - confidence = min(confidence, 0.58) - - return round(min(confidence, 0.98), 2) - - @staticmethod - def _build_result_summary( - scenario: str, - intent: str, - permission_level: str, - confidence: float, - ) -> str: - return ( - f"语义解析完成:scenario={scenario}, intent={intent}, " - f"permission={permission_level}, confidence={confidence:.2f}" - ) - - @staticmethod - def _normalize_operator(value: str) -> str: - mapping = { - "超过": ">", - "大于": ">", - "高于": ">", - ">": ">", - ">=": ">=", - "不少于": ">=", - "不低于": ">=", - "小于": "<", - "低于": "<", - "少于": "<", - "<": "<", - "<=": "<=", - "至多": "<=", - "不超过": "<=", - "=": "=", - "=": "=", - } - return mapping.get(value, value) - - @staticmethod - def _normalize_amount(raw_value: str | None, unit: str | None) -> int | float: - numeric = float(raw_value or 0) - if unit in {"万", "万元"}: - numeric *= 10000 - return int(numeric) if numeric.is_integer() else round(numeric, 2) + @staticmethod + def _resolve_session_type_scenario(context_json: dict[str, Any]) -> str | None: + value = str(context_json.get("session_type") or "").strip() + if value == "knowledge": + return "knowledge" + return None diff --git a/server/src/app/services/ontology_detection.py b/server/src/app/services/ontology_detection.py new file mode 100644 index 0000000..72749b9 --- /dev/null +++ b/server/src/app/services/ontology_detection.py @@ -0,0 +1,451 @@ +from __future__ import annotations + +import json +import re +from typing import Any + +from pydantic import ValidationError + +from app.core.logging import get_logger +from app.schemas.ontology import ( + OntologyConstraint, + OntologyEntity, + OntologyMetric, + OntologyParseRequest, + OntologyTimeRange, +) +from app.services.ontology_rules import ( + AR_CORE_KEYWORDS, + AP_CORE_KEYWORDS, + COMPARE_KEYWORDS, + DRAFT_FOLLOW_UP_KEYWORDS, + DRAFT_KEYWORDS, + EXPENSE_NARRATIVE_KEYWORDS, + EXPENSE_REVIEW_ACTIONS, + EXPLAIN_KEYWORDS, + GENERIC_EXPENSE_PROMPTS, + KNOWLEDGE_INTENTS, + LlmOntologyEntityHint, + LlmOntologyParseResult, + OPERATE_KEYWORDS, + QUERY_KEYWORDS, + RISK_KEYWORDS, + SCENARIO_KEYWORDS, + STATUS_KEYWORDS, +) + +logger = get_logger("app.services.ontology") + + +class OntologyDetectionMixin: + def _detect_scenario(self, compact_query: str) -> tuple[str, float]: + scores = {key: 0.0 for key in SCENARIO_KEYWORDS} + for scenario, keywords in SCENARIO_KEYWORDS.items(): + for keyword, weight in keywords: + if keyword in compact_query: + scores[scenario] += weight + + best_scenario = max(scores, key=scores.get) + best_score = scores[best_scenario] + if best_score <= 0: + if "单据" in compact_query and any( + keyword in compact_query for keyword in STATUS_KEYWORDS + ): + return "expense", 0.14 + return "unknown", 0.0 + + if best_scenario == "knowledge": + business_scores = [ + scores["expense"], + scores["accounts_receivable"], + scores["accounts_payable"], + ] + if max(business_scores) > 0: + best_scenario = ("expense", "accounts_receivable", "accounts_payable")[ + business_scores.index(max(business_scores)) + ] + best_score = max(business_scores) + + return best_scenario, round(min(best_score, 0.34), 2) + + def _detect_intent( + self, + compact_query: str, + *, + scenario: str, + entities: list[OntologyEntity], + time_range: OntologyTimeRange, + ) -> tuple[str, float]: + if any(keyword in compact_query for keyword in OPERATE_KEYWORDS): + return "operate", 0.30 + status_document_query = ( + "单据" in compact_query + and any(keyword in compact_query for keyword in STATUS_KEYWORDS) + and not any(keyword in compact_query for keyword in DRAFT_KEYWORDS if keyword != "草稿") + ) + historical_document_query = any( + keyword in compact_query + for keyword in ("报销的单据", "报销单据", "报销过的单据", "报销记录") + ) + if scenario == "expense" and any( + keyword in compact_query + for keyword in ( + "报销了吗", + "报销了么", + "报销了没", + "报销了没有", + "报销没", + "单据状态", + "审批状态", + "报销进度", + "到哪了", + "到了哪", + "有没有报销", + "是否报销", + "进行中的单据", + "草稿单据", + "草稿的单据", + "待补充单据", + "审批中的单据", + "已提交单据", + "已入账单据", + ) + ) or (scenario == "expense" and (status_document_query or historical_document_query)): + return "query", 0.24 + if any(keyword in compact_query for keyword in DRAFT_KEYWORDS): + return "draft", 0.26 + if scenario == "expense" and self._is_generic_expense_prompt(compact_query): + return "draft", 0.24 + if any(keyword in compact_query for keyword in COMPARE_KEYWORDS): + return "compare", 0.24 + if any(keyword in compact_query for keyword in EXPLAIN_KEYWORDS): + return "explain", 0.22 + if any(keyword in compact_query for keyword in RISK_KEYWORDS): + return "risk_check", 0.24 + if any(keyword in compact_query for keyword in QUERY_KEYWORDS): + return "query", 0.20 + if self._looks_like_expense_narrative( + compact_query, + scenario=scenario, + entities=entities, + time_range=time_range, + ): + return "draft", 0.22 + return "query", 0.10 + + @staticmethod + def _looks_like_follow_up_message(compact_query: str) -> bool: + if not compact_query: + return False + if any(keyword in compact_query for keyword in DRAFT_FOLLOW_UP_KEYWORDS): + return True + if compact_query.startswith(("那", "这", "它", "这个", "那个")): + return True + + has_domain_keyword = any( + keyword in compact_query + for keyword, _weight in ( + *SCENARIO_KEYWORDS["expense"], + *SCENARIO_KEYWORDS["accounts_receivable"], + *SCENARIO_KEYWORDS["accounts_payable"], + *SCENARIO_KEYWORDS["knowledge"], + ) + ) + return len(compact_query) <= 12 and not has_domain_keyword + + def _should_inherit_expense_draft( + self, + compact_query: str, + *, + scenario: str, + entities: list[OntologyEntity], + time_range: OntologyTimeRange, + context_json: dict[str, Any], + ) -> bool: + context_scenario = self._resolve_context_scenario(context_json) + draft_claim_id = str(context_json.get("draft_claim_id") or "").strip() + review_action = str(context_json.get("review_action") or "").strip() + if review_action in EXPENSE_REVIEW_ACTIONS: + return True + if context_scenario != "expense" and not draft_claim_id: + return False + + if any(keyword in compact_query for keyword in DRAFT_FOLLOW_UP_KEYWORDS): + return True + if self._looks_like_expense_narrative( + compact_query, + scenario="expense", + entities=entities, + time_range=time_range, + ): + return True + if self._looks_like_follow_up_message(compact_query): + return True + + if any(keyword in compact_query for keyword in OPERATE_KEYWORDS): + return False + if any(keyword in compact_query for keyword in COMPARE_KEYWORDS + RISK_KEYWORDS): + return False + if any(keyword in compact_query for keyword in QUERY_KEYWORDS): + return False + + return bool( + draft_claim_id + and any( + item.type + in {"amount", "customer", "employee", "expense_type", "project", "invoice"} + for item in entities + ) + ) + + @staticmethod + def _is_generic_expense_prompt(compact_query: str) -> bool: + return compact_query in GENERIC_EXPENSE_PROMPTS + + @staticmethod + def _looks_like_expense_narrative( + compact_query: str, + *, + scenario: str, + entities: list[OntologyEntity], + time_range: OntologyTimeRange, + ) -> bool: + if scenario not in {"expense", "accounts_receivable", "accounts_payable", "unknown"}: + return False + + if any(keyword in compact_query for keyword in AR_CORE_KEYWORDS + AP_CORE_KEYWORDS): + return False + + entity_types = {item.type for item in entities} + has_expense_signal = any( + keyword in compact_query for keyword in EXPENSE_NARRATIVE_KEYWORDS + ) or "expense_type" in entity_types + has_context_signal = bool(time_range.start_date) or "amount" in entity_types + + return has_expense_signal and has_context_signal + + def _parse_with_model( + self, + *, + payload: OntologyParseRequest, + query: str, + compact_query: str, + fallback_scenario: str, + fallback_intent: str, + entities: list[OntologyEntity], + time_range: OntologyTimeRange, + metrics: list[OntologyMetric], + constraints: list[OntologyConstraint], + ) -> LlmOntologyParseResult | None: + messages = self._build_model_messages( + payload=payload, + query=query, + compact_query=compact_query, + fallback_scenario=fallback_scenario, + fallback_intent=fallback_intent, + entities=entities, + time_range=time_range, + metrics=metrics, + constraints=constraints, + ) + response_text = self.runtime_chat_service.complete( + messages, + max_tokens=600, + temperature=0.0, + ) + payload_json = self._extract_json_payload(response_text) + if payload_json is None: + return None + + try: + return LlmOntologyParseResult.model_validate(payload_json) + except ValidationError as exc: + logger.warning("Semantic model output validation failed: %s", exc) + return None + + @staticmethod + def _build_model_messages( + *, + payload: OntologyParseRequest, + query: str, + compact_query: str, + fallback_scenario: str, + fallback_intent: str, + entities: list[OntologyEntity], + time_range: OntologyTimeRange, + metrics: list[OntologyMetric], + constraints: list[OntologyConstraint], + ) -> list[dict[str, str]]: + facts = { + "query": query, + "compact_query": compact_query, + "context": { + "entry_source": payload.context_json.get("entry_source"), + "attachment_names": payload.context_json.get("attachment_names", []), + "attachment_count": payload.context_json.get("attachment_count", 0), + "ocr_summary": payload.context_json.get("ocr_summary", ""), + "ocr_documents": payload.context_json.get("ocr_documents", []), + "request_context": payload.context_json.get("request_context"), + "role_codes": payload.context_json.get("role_codes", []), + "conversation_id": payload.context_json.get("conversation_id"), + "conversation_scenario": payload.context_json.get("conversation_scenario"), + "conversation_intent": payload.context_json.get("conversation_intent"), + "draft_claim_id": payload.context_json.get("draft_claim_id"), + "review_action": payload.context_json.get("review_action"), + "review_form_values": payload.context_json.get("review_form_values"), + "conversation_history": payload.context_json.get("conversation_history", []), + }, + "rule_candidates": { + "scenario": fallback_scenario, + "intent": fallback_intent, + "entities": [item.model_dump(mode="json") for item in entities], + "time_range": time_range.model_dump(mode="json"), + "metrics": [item.model_dump(mode="json") for item in metrics], + "constraints": [item.model_dump(mode="json") for item in constraints], + }, + } + + system_prompt = ( + "你是企业财务共享平台的语义解析器。" + "你的任务是把用户输入解析为固定 JSON,用于后续路由、追问和权限判断。" + "只输出 JSON 对象,不要输出 Markdown、代码块、解释、标题或 。" + "场景 scenario 只能是:expense, accounts_receivable, " + "accounts_payable, knowledge, unknown。" + "意图 intent 只能是:query, explain, compare, risk_check, draft, operate。" + "如果用户是在描述一笔待处理费用、待报销事项、上传票据或希望整理报销," + "即使没有明确说“生成草稿”,也优先使用 expense + draft。" + "如果提供了 conversation_history,必须把最近轮次作为当前追问的上下文," + "正确理解“这个”“那笔”“改成 800”“继续补充”这类省略表达。" + "出现“客户”不等于应收,出现“供应商”不等于应付,必须结合动作词和业务目标判断。" + "只有明确查询、统计、列出、多少、明细、对比时才优先使用 query 或 compare。" + "附件名称和 OCR 摘要只作为辅助证据,不能编造未出现的事实。" + "信息不足时 clarification_required=true,并给出一句简短中文追问。" + "missing_slots 使用简短 snake_case,例如 expense_type, amount, " + "customer_name, participants, attachments。" + "entity_hints 只填写你比较确定的业务对象;如果不确定,可以返回空数组。" + ) + user_prompt = ( + "请根据以下事实输出 JSON:\n" + f"{json.dumps(facts, ensure_ascii=False, indent=2, default=str)}\n\n" + "输出格式:\n" + "{\n" + ' "scenario": "expense",\n' + ' "intent": "draft",\n' + ' "confidence": 0.88,\n' + ' "clarification_required": true,\n' + ' "clarification_question": "请补充客户单位、参与人员和票据附件。",\n' + ' "missing_slots": ["customer_name", "participants", "attachments"],\n' + ' "ambiguity": [],\n' + ' "entity_hints": [\n' + ' {"type": "expense_type", "value": "招待", ' + '"normalized_value": "entertainment", "role": "filter", ' + '"confidence": 0.86}\n' + " ]\n" + "}" + ) + return [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ] + + @staticmethod + def _extract_json_payload(response_text: str | None) -> dict[str, Any] | None: + if not response_text: + return None + + cleaned = re.sub(r".*?", "", response_text, flags=re.DOTALL | re.IGNORECASE) + cleaned = cleaned.strip() + if not cleaned: + return None + + fenced_match = re.search(r"```(?:json)?\s*(\{.*\})\s*```", cleaned, flags=re.DOTALL) + candidates = [fenced_match.group(1)] if fenced_match else [] + candidates.extend([cleaned]) + + start = cleaned.find("{") + end = cleaned.rfind("}") + if start != -1 and end != -1 and end > start: + candidates.append(cleaned[start : end + 1]) + + for candidate in candidates: + try: + parsed = json.loads(candidate) + except json.JSONDecodeError: + continue + if isinstance(parsed, dict): + return parsed + + return None + + @staticmethod + def _resolve_scenario( + fallback_scenario: str, + model_parse: LlmOntologyParseResult | None, + ) -> str: + if model_parse is None: + return fallback_scenario + if model_parse.scenario == "unknown" and fallback_scenario != "unknown": + return fallback_scenario + return model_parse.scenario + + def _resolve_intent( + self, + compact_query: str, + *, + fallback_intent: str, + scenario: str, + entities: list[OntologyEntity], + time_range: OntologyTimeRange, + model_parse: LlmOntologyParseResult | None, + ) -> str: + candidate = model_parse.intent if model_parse is not None else fallback_intent + if scenario == "knowledge": + if candidate in KNOWLEDGE_INTENTS: + return candidate + if fallback_intent in KNOWLEDGE_INTENTS: + return fallback_intent + return "query" + if candidate == "query" and scenario == "expense": + if self._is_generic_expense_prompt(compact_query) or fallback_intent == "draft": + return "draft" + return candidate + + @staticmethod + def _merge_entities( + base_entities: list[OntologyEntity], + entity_hints: list[LlmOntologyEntityHint], + ) -> list[OntologyEntity]: + merged: dict[tuple[str, str], OntologyEntity] = { + (item.type, item.normalized_value): item for item in base_entities + } + + for hint in entity_hints: + value = str(hint.value or "").strip() + if not value: + continue + normalized_value = str(hint.normalized_value or value).strip() + key = (str(hint.type).strip(), normalized_value) + candidate = OntologyEntity( + type=str(hint.type).strip(), + value=value, + normalized_value=normalized_value, + role=str(hint.role or "target").strip() or "target", + confidence=float(hint.confidence), + ) + existing = merged.get(key) + if existing is None or existing.confidence < candidate.confidence: + merged[key] = candidate + + return list(merged.values()) + + @staticmethod + def _normalize_short_text_list(values: list[str]) -> list[str]: + normalized: list[str] = [] + seen: set[str] = set() + for value in values: + cleaned = str(value or "").strip() + if not cleaned or cleaned in seen: + continue + normalized.append(cleaned) + seen.add(cleaned) + return normalized[:6] diff --git a/server/src/app/services/ontology_extraction.py b/server/src/app/services/ontology_extraction.py new file mode 100644 index 0000000..8789fa6 --- /dev/null +++ b/server/src/app/services/ontology_extraction.py @@ -0,0 +1,529 @@ +from __future__ import annotations + +import calendar +import re +from datetime import UTC, date, datetime, timedelta +from typing import Any + +from app.core.agent_enums import AgentPermissionLevel +from app.schemas.ontology import ( + OntologyConstraint, + OntologyEntity, + OntologyMetric, + OntologyPermission, + OntologyTimeRange, +) +from app.services.ontology_rules import ( + AMOUNT_PATTERN, + DATE_RANGE_PATTERN, + EXPLICIT_DATE_PATTERN, + EXPLICIT_MONTH_PATTERN, + EXPENSE_TYPE_KEYWORDS, + GENERIC_EXPENSE_PROMPTS, + LOCATION_KEYWORDS, + MONTH_DAY_PATTERN, + MONTH_DAY_RANGE_PATTERN, + ReferenceCatalog, + STATUS_KEYWORDS, + TOP_N_PATTERN, +) + + +class OntologyExtractionMixin: + def _infer_default_missing_slots( + self, + compact_query: str, + *, + scenario: str, + intent: str, + entities: list[OntologyEntity], + time_range: OntologyTimeRange, + context_json: dict[str, Any], + ) -> list[str]: + if scenario != "expense" or intent != "draft": + return [] + + entity_types = {item.type for item in entities} + attachment_count = int(context_json.get("attachment_count") or 0) + missing_slots: list[str] = [] + + if self._is_generic_expense_prompt(compact_query): + if "expense_type" not in entity_types: + missing_slots.append("expense_type") + if "amount" not in entity_types: + missing_slots.append("amount") + if not time_range.start_date: + missing_slots.append("time_range") + missing_slots.append("reason") + if attachment_count <= 0: + missing_slots.append("attachments") + return missing_slots + + if any( + item.normalized_value == "entertainment" + for item in entities + if item.type == "expense_type" + ): + if "customer" not in entity_types: + missing_slots.append("customer_name") + missing_slots.append("participants") + if attachment_count <= 0: + missing_slots.append("attachments") + + return missing_slots + + @staticmethod + def _resolve_confidence( + *, + model_confidence: float | None, + fallback_confidence: float, + clarification_required: bool, + permission: OntologyPermission, + ) -> float: + confidence = fallback_confidence if model_confidence is None else float(model_confidence) + confidence = max(0.0, min(confidence, 0.98)) + if permission.level == AgentPermissionLevel.FORBIDDEN.value: + confidence = max(confidence, 0.86) + if clarification_required and permission.level != AgentPermissionLevel.FORBIDDEN.value: + confidence = min(confidence, 0.58) + return round(confidence, 2) + + def _extract_entities( + self, + query: str, + compact_query: str, + reference: ReferenceCatalog, + ) -> list[OntologyEntity]: + entities: dict[tuple[str, str], OntologyEntity] = {} + + def upsert(entity: OntologyEntity) -> None: + key = (entity.type, entity.normalized_value) + if key not in entities: + entities[key] = entity + + for match in re.finditer(r"客户\s*([A-Za-z0-9一二三四五六七八九十]+)", query): + suffix = match.group(1).strip() + normalized = f"客户{suffix}".replace(" ", "") + upsert(self._make_entity("customer", match.group(0).strip(), normalized, role="filter")) + labeled_customer_match = re.search(r"客户名称[::]\s*(?P[^\n,。;]+)", query) + if labeled_customer_match: + customer_name = labeled_customer_match.group("name").strip() + upsert(self._make_entity("customer", customer_name, customer_name, role="filter")) + + for match in re.finditer(r"供应商\s*([A-Za-z0-9一二三四五六七八九十]+)", query): + suffix = match.group(1).strip() + normalized = f"供应商{suffix}".replace(" ", "") + upsert(self._make_entity("vendor", match.group(0).strip(), normalized, role="filter")) + + employee_match = re.search( + r"(?P[赵钱孙李周吴郑王冯陈褚卫蒋沈韩杨朱秦许何吕施张孔曹严华金魏陶姜" + r"戚谢邹喻柏水窦章云苏潘葛范彭郎鲁韦昌马苗凤花方俞任袁柳鲍史唐费廉岑" + r"薛雷贺倪汤滕殷罗毕郝邬安常乐于时傅卞康伍余元卜顾孟平黄和穆萧尹姚邵" + r"湛汪祁毛禹狄米贝明臧计成戴宋庞熊纪舒屈项祝董梁杜阮蓝闵席季强贾路江" + r"童颜郭梅盛林钟徐邱骆高夏蔡田樊胡凌霍虞万支柯管卢莫房裘缪解应宗丁宣" + r"邓洪包左石崔吉龚程嵇邢裴陆荣翁荀羊惠甄曲家封芮储靳汲邴糜松井段富巫" + r"乌焦巴弓牧隗山谷车侯伊宫宁仇栾刘景詹束龙叶司黎薄印白怀蒲邰从鄂索咸" + r"籍卓蔺屠蒙池乔阴胥能苍双闻莘党翟谭贡姬申扶堵冉宰郦雍桑桂牛寿通边扈" + r"燕冀浦尚农温别庄晏柴瞿阎连茹习艾容向古易慎戈廖庾终暨居衡步都耿满弘" + r"匡国文寇广禄阙东欧殳沃利蔚越夔隆师巩聂晁勾敖融冷辛阚那简饶曾关蒯相" + r"查后荆游竺权盖益桓公][\u4e00-\u9fa5]{1,2})(?=\s*(?:\d{4}年|\d{1,2}月|本月|" + r"上月|本周|报销|差旅|费用|申请))", + query, + ) + if employee_match: + name = employee_match.group("name") + upsert(self._make_entity("employee", name, name, role="filter")) + + for name in reference.employees: + if self._compact(name) in compact_query: + upsert(self._make_entity("employee", name, name, role="filter")) + for name in reference.departments: + if self._compact(name) in compact_query: + upsert(self._make_entity("department", name, name, role="filter")) + for name in reference.customers: + if self._compact(name) in compact_query: + upsert(self._make_entity("customer", name, name, role="filter")) + for name in reference.vendors: + if self._compact(name) in compact_query: + upsert(self._make_entity("vendor", name, name, role="filter")) + for code in reference.projects: + if self._compact(code) in compact_query: + upsert(self._make_entity("project", code, code, role="filter")) + + for code in re.findall(r"PRJ-[A-Z]+-\d+", query, flags=re.IGNORECASE): + upsert(self._make_entity("project", code, code.upper(), role="filter")) + for code in re.findall(r"EXP-\d{6}-\d{3}", query, flags=re.IGNORECASE): + upsert(self._make_entity("expense_claim", code, code.upper())) + for code in re.findall(r"AR-\d{6}-\d{3}", query, flags=re.IGNORECASE): + upsert(self._make_entity("receivable", code, code.upper())) + for code in re.findall(r"AP-\d{6}-\d{3}", query, flags=re.IGNORECASE): + upsert(self._make_entity("payable", code, code.upper())) + for code in re.findall(r"INV-[A-Z]+-\d+", query, flags=re.IGNORECASE): + upsert(self._make_entity("invoice", code, code.upper())) + for code in re.findall(r"CTR-[A-Z]+-\d+", query, flags=re.IGNORECASE): + upsert(self._make_entity("contract", code, code.upper())) + for location in LOCATION_KEYWORDS: + if location in query: + upsert(self._make_entity("location", location, location, role="filter", confidence=0.86)) + + for label, normalized in EXPENSE_TYPE_KEYWORDS.items(): + if label in query: + upsert(self._make_entity("expense_type", label, normalized, role="filter")) + + has_customer_entertainment_signal = "客户" in query and any( + keyword in query for keyword in ("吃饭", "用餐", "餐饮", "宴请", "请客", "招待") + ) + if has_customer_entertainment_signal: + upsert( + self._make_entity( + "expense_type", + "客户招待", + "entertainment", + role="filter", + confidence=0.96, + ) + ) + + if any( + keyword in query + for keyword in ("打车", "网约车", "出租车", "车费", "乘车", "用车", "叫车", "车资", "停车费", "过路费") + ): + upsert(self._make_entity("expense_type", "交通", "transport", role="filter", confidence=0.9)) + + if any(keyword in query for keyword in ("出差", "机票", "火车", "高铁", "行程单")): + upsert(self._make_entity("expense_type", "差旅", "travel", role="filter", confidence=0.88)) + + if any(keyword in query for keyword in ("酒店", "住宿", "宾馆")): + upsert(self._make_entity("expense_type", "住宿", "hotel", role="filter", confidence=0.86)) + + if ( + not has_customer_entertainment_signal + and any(keyword in query for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "餐饮")) + ): + upsert(self._make_entity("expense_type", "餐费", "meal", role="filter", confidence=0.84)) + + if any( + keyword in query + for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板") + ): + upsert(self._make_entity("expense_type", "办公费", "office", role="filter", confidence=0.87)) + + if any(keyword in query for keyword in ("培训", "讲师费", "课时费", "课程费")): + upsert(self._make_entity("expense_type", "培训费", "training", role="filter", confidence=0.84)) + + if any(keyword in query for keyword in ("通讯费", "话费", "流量费", "宽带费")): + upsert(self._make_entity("expense_type", "通讯费", "communication", role="filter", confidence=0.84)) + + if any(keyword in query for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")): + upsert(self._make_entity("expense_type", "福利费", "welfare", role="filter", confidence=0.84)) + + for amount in self._extract_amount_entities(query): + upsert(amount) + + return list(entities.values()) + + def _extract_amount_entities(self, query: str) -> list[OntologyEntity]: + entities: list[OntologyEntity] = [] + for match in AMOUNT_PATTERN.finditer(query): + raw_value = match.group("value") + unit = match.group("unit") + prefix = match.group("prefix") + if raw_value is None: + continue + if prefix is None and unit is None: + continue + + amount_value = self._normalize_amount(raw_value, unit) + display_value = f"{raw_value}{unit or ''}" + role = "threshold" if prefix else "target" + entities.append( + self._make_entity( + "amount", + display_value, + str(amount_value), + role=role, + confidence=0.9, + ) + ) + return entities + + @staticmethod + def _make_entity( + entity_type: str, + value: str, + normalized_value: str, + *, + role: str = "target", + confidence: float = 0.92, + ) -> OntologyEntity: + return OntologyEntity( + type=entity_type, + value=value, + normalized_value=normalized_value, + role=role, + confidence=confidence, + ) + + @staticmethod + def _infer_scenario_from_entities(entities: list[OntologyEntity]) -> str | None: + entity_types = {item.type for item in entities} + if entity_types & {"vendor", "payable"}: + return "accounts_payable" + if entity_types & {"customer", "receivable", "contract"}: + return "accounts_receivable" + if entity_types & {"employee", "expense_claim", "expense_type"}: + return "expense" + return None + + def _extract_time_range( + self, + query: str, + compact_query: str, + *, + context_json: dict[str, Any], + ) -> tuple[OntologyTimeRange, float]: + today = self._resolve_reference_today(context_json) + + direct_mappings = [ + ("大前天", self._single_day_range(today - timedelta(days=3), "大前天", "day")), + ("前天", self._single_day_range(today - timedelta(days=2), "前天", "day")), + ("昨日", self._single_day_range(today - timedelta(days=1), "昨日", "day")), + ("昨天", self._single_day_range(today - timedelta(days=1), "昨天", "day")), + ("今天", self._single_day_range(today, "今天", "day")), + ("明天", self._single_day_range(today + timedelta(days=1), "明天", "day")), + ("后天", self._single_day_range(today + timedelta(days=2), "后天", "day")), + ("大后天", self._single_day_range(today + timedelta(days=3), "大后天", "day")), + ] + for keyword, value in direct_mappings: + if keyword in query: + return value, 0.10 + + if "本周" in query or "这周" in query or "本星期" in query: + start = today - timedelta(days=today.weekday()) + end = start + timedelta(days=6) + return self._range(start, end, "本周", "week"), 0.10 + if "上周" in query: + end = today - timedelta(days=today.weekday() + 1) + start = end - timedelta(days=6) + return self._range(start, end, "上周", "week"), 0.10 + if "本月" in query or "这个月" in query: + start = date(today.year, today.month, 1) + end = date(today.year, today.month, calendar.monthrange(today.year, today.month)[1]) + return self._range(start, end, "本月", "month"), 0.10 + if "上月" in query: + year = today.year if today.month > 1 else today.year - 1 + month = today.month - 1 if today.month > 1 else 12 + start = date(year, month, 1) + end = date(year, month, calendar.monthrange(year, month)[1]) + return self._range(start, end, "上月", "month"), 0.10 + if "本季度" in query or "这个季度" in query: + quarter = (today.month - 1) // 3 + start_month = quarter * 3 + 1 + end_month = start_month + 2 + start = date(today.year, start_month, 1) + end = date(today.year, end_month, calendar.monthrange(today.year, end_month)[1]) + return self._range(start, end, "本季度", "quarter"), 0.10 + if "今年" in query: + return ( + self._range(date(today.year, 1, 1), date(today.year, 12, 31), "今年", "year"), + 0.10, + ) + if "去年" in query or "上一年" in query: + year = today.year - 1 + return ( + self._range(date(year, 1, 1), date(year, 12, 31), "去年", "year"), + 0.10, + ) + + match = DATE_RANGE_PATTERN.search(query) + if match: + start = self._parse_iso_date(match.group("start")) + end = self._parse_iso_date(match.group("end")) + if start and end: + return self._range(start, end, match.group(0), "custom"), 0.10 + + match = EXPLICIT_DATE_PATTERN.search(query) + if match: + explicit = date( + int(match.group("year")), + int(match.group("month")), + int(match.group("day")), + ) + return self._single_day_range(explicit, match.group(0), "day"), 0.10 + + match = EXPLICIT_MONTH_PATTERN.search(query) + if match: + year = int(match.group("year")) + month = int(match.group("month")) + start = date(year, month, 1) + end = date(year, month, calendar.monthrange(year, month)[1]) + return self._range(start, end, match.group(0), "month"), 0.10 + + match = MONTH_DAY_RANGE_PATTERN.search(query) + if match: + start = date(today.year, int(match.group("start_month")), int(match.group("start_day"))) + end = date(today.year, int(match.group("end_month")), int(match.group("end_day"))) + return self._range(start, end, match.group(0), "custom"), 0.10 + + match = MONTH_DAY_PATTERN.search(compact_query) + if match: + explicit = date(today.year, int(match.group("month")), int(match.group("day"))) + return self._single_day_range(explicit, match.group(0), "day"), 0.08 + + month_match = re.search(r"(?P\d{1,2})月", compact_query) + if month_match: + month = int(month_match.group("month")) + start = date(today.year, month, 1) + end = date(today.year, month, calendar.monthrange(today.year, month)[1]) + return self._range(start, end, month_match.group(0), "month"), 0.08 + + return OntologyTimeRange(), 0.0 + + @staticmethod + def _resolve_reference_today(context_json: dict[str, Any]) -> date: + client_now_iso = str(context_json.get("client_now_iso") or "").strip() + if not client_now_iso: + return datetime.now(UTC).date() + + normalized = client_now_iso.replace("Z", "+00:00") + try: + client_now = datetime.fromisoformat(normalized) + except ValueError: + return datetime.now(UTC).date() + + if client_now.tzinfo is None: + client_now = client_now.replace(tzinfo=UTC) + + try: + offset_minutes = int(context_json.get("client_timezone_offset_minutes") or 0) + except (TypeError, ValueError): + offset_minutes = 0 + + local_now = client_now - timedelta(minutes=offset_minutes) + return local_now.date() + + @staticmethod + def _single_day_range(target: date, raw: str, granularity: str) -> OntologyTimeRange: + return OntologyTimeRange( + raw=raw, + start_date=target.isoformat(), + end_date=target.isoformat(), + granularity=granularity, + ) + + @staticmethod + def _range(start: date, end: date, raw: str, granularity: str) -> OntologyTimeRange: + return OntologyTimeRange( + raw=raw, + start_date=start.isoformat(), + end_date=end.isoformat(), + granularity=granularity, + ) + + @staticmethod + def _parse_iso_date(value: str) -> date | None: + try: + return date.fromisoformat(value) + except ValueError: + return None + + def _extract_metrics(self, compact_query: str) -> list[OntologyMetric]: + metrics: dict[str, OntologyMetric] = {} + + def upsert(metric: OntologyMetric) -> None: + metrics[metric.name] = metric + + if any( + keyword in compact_query + for keyword in ("多少钱", "金额", "总额", "支出", "回款", "应收", "应付") + ): + upsert(OntologyMetric(name="amount", aggregation="sum", unit="CNY")) + if any(keyword in compact_query for keyword in ("多少笔", "几笔", "数量", "条数", "单数")): + upsert(OntologyMetric(name="count", aggregation="count", unit="records")) + if "超标" in compact_query or "超预算" in compact_query: + upsert(OntologyMetric(name="amount_over_limit")) + if "逾期" in compact_query or "账龄" in compact_query: + upsert(OntologyMetric(name="overdue")) + if "重复" in compact_query: + upsert(OntologyMetric(name="duplicate_expense")) + + top_match = TOP_N_PATTERN.search(compact_query) + if top_match: + metrics["amount"] = OntologyMetric( + name="amount", + aggregation="sum", + unit="CNY", + sort="desc" if "最低" not in compact_query else "asc", + top_n=int(top_match.group("top")), + ) + + return list(metrics.values()) + + def _extract_constraints( + self, + compact_query: str, + entities: list[OntologyEntity], + ) -> list[OntologyConstraint]: + constraints: dict[tuple[str, str, str, str | None], OntologyConstraint] = {} + + def upsert(constraint: OntologyConstraint) -> None: + key = ( + constraint.field, + constraint.operator, + str(constraint.value), + constraint.currency, + ) + if key not in constraints: + constraints[key] = constraint + + for entity in entities: + if entity.type in { + "employee", + "department", + "customer", + "vendor", + "project", + "location", + "expense_type", + }: + upsert( + OntologyConstraint( + field=entity.type, + operator="=", + value=entity.normalized_value, + ) + ) + + for keyword, normalized in STATUS_KEYWORDS.items(): + if keyword in compact_query: + upsert(OntologyConstraint(field="status", operator="=", value=normalized)) + + for amount_match in AMOUNT_PATTERN.finditer(compact_query): + if not amount_match.group("prefix"): + continue + + operator = self._normalize_operator(amount_match.group("prefix")) + value = self._normalize_amount(amount_match.group("value"), amount_match.group("unit")) + upsert( + OntologyConstraint( + field="amount", + operator=operator, + value=value, + currency="CNY", + ) + ) + break + + top_match = TOP_N_PATTERN.search(compact_query) + if top_match: + top_n = int(top_match.group("top")) + upsert(OntologyConstraint(field="top_n", operator="=", value=top_n)) + upsert( + OntologyConstraint( + field="sort_by", + operator="desc" if "最低" not in compact_query else "asc", + value="amount", + ) + ) + + return list(constraints.values()) diff --git a/server/src/app/services/ontology_rules.py b/server/src/app/services/ontology_rules.py new file mode 100644 index 0000000..1cc1956 --- /dev/null +++ b/server/src/app/services/ontology_rules.py @@ -0,0 +1,298 @@ +from __future__ import annotations + +import re +from dataclasses import dataclass + +from pydantic import BaseModel, ConfigDict, Field + +from app.schemas.ontology import OntologyIntent, OntologyScenario + +DATE_RANGE_PATTERN = re.compile( + r"(?P\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P\d{4}-\d{1,2}-\d{1,2})" +) +EXPLICIT_MONTH_PATTERN = re.compile(r"(?P\d{4})年(?P\d{1,2})月") +EXPLICIT_DATE_PATTERN = re.compile( + r"(?P\d{4})[年/-](?P\d{1,2})[月/-](?P\d{1,2})日?" +) +MONTH_DAY_RANGE_PATTERN = re.compile( + r"(?P\d{1,2})月(?P\d{1,2})日?\s*(?:到|至|~|-)\s*" + r"(?P\d{1,2})月(?P\d{1,2})日?" +) +MONTH_DAY_PATTERN = re.compile(r"(?P\d{1,2})月(?P\d{1,2})日?") +AMOUNT_PATTERN = re.compile( + r"(?P超过|大于|高于|不少于|不低于|小于|低于|少于|至多|不超过|<=|>=|<|>|=|=)?\s*" + r"(?P\d+(?:\.\d+)?)\s*(?P万元|万|元)?" +) +TOP_N_PATTERN = re.compile(r"(?:top|TOP|前|最高的?|最低的?)\s*(?P\d+)") + +SCENARIO_KEYWORDS = { + "expense": ( + ("报销", 0.20), + ("报销单", 0.20), + ("单据报销", 0.18), + ("报账", 0.20), + ("差旅", 0.20), + ("费用", 0.14), + ("发票", 0.14), + ("票据", 0.12), + ("借款", 0.12), + ("住宿", 0.10), + ("餐费", 0.10), + ("招待", 0.18), + ("招待费", 0.18), + ("花销", 0.16), + ("花了", 0.14), + ("支出", 0.14), + ("垫付", 0.14), + ), + "accounts_receivable": ( + ("应收", 0.22), + ("回款", 0.20), + ("收款", 0.18), + ("账龄", 0.18), + ("客户欠款", 0.22), + ), + "accounts_payable": ( + ("应付", 0.22), + ("付款", 0.20), + ("请款", 0.18), + ("供应商", 0.20), + ("待付", 0.16), + ("打款", 0.18), + ), + "knowledge": ( + ("制度", 0.20), + ("规则", 0.20), + ("办法", 0.18), + ("依据", 0.18), + ("政策", 0.16), + ("知识库", 0.18), + ), +} + +QUERY_KEYWORDS = ( + "查", + "查询", + "查看", + "列出", + "统计", + "汇总", + "多少", + "几笔", + "金额", + "明细", +) +EXPLAIN_KEYWORDS = ("为什么", "依据", "原因", "怎么处理", "是否可以", "能不能", "按什么规则") +COMPARE_KEYWORDS = ("对比", "比较", "相比", "差异", "变化") +RISK_KEYWORDS = ("风险", "异常", "重复", "超标", "超预算", "逾期", "验真", "巡检") +DRAFT_KEYWORDS = ("生成", "草稿", "起草", "拟一份", "创建", "发起", "准备") +DRAFT_FOLLOW_UP_KEYWORDS = ( + "继续", + "下一步", + "核对", + "补充", + "补一下", + "修改", + "改成", + "改为", + "换成", + "更新", + "确认", + "提交", + "保存", + "客户是", + "地点是", + "金额是", + "日期是", + "时间是", +) +EXPENSE_REVIEW_ACTIONS = { + "save_draft", + "next_step", + "edit_review", + "link_to_existing_draft", + "create_new_claim_from_documents", +} +OPERATE_KEYWORDS = ( + "直接付款", + "帮我付款", + "安排付款", + "发起付款", + "直接审批", + "审批通过", + "帮我审批", + "驳回", + "上线", + "激活", + "停用", + "删除", +) + +EXPENSE_TYPE_KEYWORDS = { + "差旅": "travel", + "出差": "travel", + "住宿": "hotel", + "酒店": "hotel", + "交通": "transport", + "打车": "transport", + "网约车": "transport", + "出租车": "transport", + "乘车": "transport", + "乘车费": "transport", + "用车": "transport", + "叫车": "transport", + "车资": "transport", + "停车费": "transport", + "餐费": "meal", + "用餐": "meal", + "会务": "meeting", + "招待费": "entertainment", + "招待": "entertainment", + "宴请": "entertainment", + "办公费": "office", + "办公用品": "office", + "文具": "office", + "耗材": "office", + "办公耗材": "office", + "打印纸": "office", + "办公设备": "office", + "培训费": "training", + "培训": "training", + "通讯费": "communication", + "话费": "communication", + "福利费": "welfare", + "团建": "welfare", +} + +EXPENSE_NARRATIVE_KEYWORDS = ( + "报销", + "报账", + "招待", + "招待费", + "花销", + "花了", + "支出", + "垫付", + "打车", + "车费", + "乘车", + "乘车费", + "用车", + "叫车", + "车资", + "餐费", + "吃饭", + "用餐", + "宴请", + "请客", + "住宿", + "发票", + "票据", + "差旅", + "客户现场", +) + +AR_CORE_KEYWORDS = ("应收", "回款", "收款", "账龄", "欠款", "未回款") +AP_CORE_KEYWORDS = ("应付", "付款", "请款", "待付", "打款", "未付款") +GENERIC_EXPENSE_PROMPTS = { + "报销", + "我要报销", + "我想报销", + "帮我报销", + "我要申请报销", + "发起报销", + "提交报销", +} +MISSING_SLOT_LABELS = { + "expense_type": "费用类型", + "amount": "金额", + "customer_name": "客户单位", + "vendor_name": "供应商", + "participants": "参与人员", + "attachments": "票据附件", + "time_range": "发生时间", + "reason": "事由说明", + "document_id": "单据号", +} + +STATUS_KEYWORDS = { + "草稿": "draft", + "待提交": "draft", + "待补充": "supplement", + "退回": "returned", + "已退回": "returned", + "进行中": "review", + "审批中": "review", + "审核中": "review", + "流转中": "review", + "已提交": "submitted", + "逾期": "overdue", + "待审批": "pending", + "待审": "pending", + "已审批": "approved", + "已通过": "approved", + "已审核": "approved", + "已入账": "paid", + "已付款": "paid", + "未付款": "unpaid", + "未回款": "unreceived", +} + +LOCATION_KEYWORDS = ( + "北京", + "上海", + "广州", + "深圳", + "杭州", + "南京", + "苏州", + "成都", + "重庆", + "天津", + "武汉", + "西安", + "郑州", + "长沙", + "青岛", + "厦门", + "宁波", + "合肥", + "济南", + "福州", +) + +PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"} +CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"} +KNOWLEDGE_INTENTS = {"query", "explain", "compare"} + + +@dataclass(slots=True) +class ReferenceCatalog: + employees: list[str] + departments: list[str] + customers: list[str] + vendors: list[str] + projects: list[str] + + +class LlmOntologyEntityHint(BaseModel): + model_config = ConfigDict(extra="ignore") + + type: str + value: str + normalized_value: str | None = None + role: str = "target" + confidence: float = Field(default=0.72, ge=0.0, le=1.0) + + +class LlmOntologyParseResult(BaseModel): + model_config = ConfigDict(extra="ignore") + + scenario: OntologyScenario = Field(default="unknown") + intent: OntologyIntent = Field(default="query") + confidence: float = Field(default=0.0, ge=0.0, le=1.0) + clarification_required: bool = False + clarification_question: str | None = None + missing_slots: list[str] = Field(default_factory=list) + ambiguity: list[str] = Field(default_factory=list) + entity_hints: list[LlmOntologyEntityHint] = Field(default_factory=list) diff --git a/server/src/app/services/ontology_validation.py b/server/src/app/services/ontology_validation.py new file mode 100644 index 0000000..9f6bc88 --- /dev/null +++ b/server/src/app/services/ontology_validation.py @@ -0,0 +1,285 @@ +from __future__ import annotations + +from typing import Any + +from app.core.agent_enums import AgentPermissionLevel +from app.schemas.ontology import ( + OntologyConstraint, + OntologyEntity, + OntologyFieldError, + OntologyMetric, + OntologyPermission, + OntologyTimeRange, +) +from app.services.ontology_rules import ( + AMOUNT_PATTERN, + EXPENSE_REVIEW_ACTIONS, + MISSING_SLOT_LABELS, + OPERATE_KEYWORDS, + PRIVILEGED_ROLE_CODES, +) + + +class OntologyValidationMixin: + def _extract_risk_flags(self, compact_query: str, scenario: str) -> list[str]: + risk_flags: list[str] = [] + + def append(flag: str) -> None: + if flag not in risk_flags: + risk_flags.append(flag) + + if "重复" in compact_query: + append("duplicate_expense") + if any( + keyword in compact_query + for keyword in ("发票异常", "票据异常", "验真失败", "附件缺失", "补件") + ): + append("invoice_anomaly") + if any(keyword in compact_query for keyword in ("超标", "超预算", "超限")): + append("amount_over_limit") + if scenario == "accounts_receivable" and any( + keyword in compact_query for keyword in ("逾期", "账龄", "欠款", "未回款") + ): + append("ar_overdue") + if scenario == "accounts_payable" and any( + keyword in compact_query for keyword in ("逾期", "待付", "付款风险", "未付款") + ): + append("ap_overdue") + + return risk_flags + + def _resolve_permission( + self, + compact_query: str, + context_json: dict, + intent: str, + ) -> OntologyPermission: + role_codes = { + str(item).strip().lower() + for item in context_json.get("role_codes", []) + if str(item).strip() + } + is_admin = bool(context_json.get("is_admin")) + privileged = is_admin or bool(role_codes & PRIVILEGED_ROLE_CODES) + + if intent in {"query", "explain", "compare", "risk_check"}: + return OntologyPermission( + level=AgentPermissionLevel.READ.value, + allowed=True, + reason="只读查询。", + ) + if intent == "draft": + return OntologyPermission( + level=AgentPermissionLevel.DRAFT_WRITE.value, + allowed=True, + reason="允许生成草稿,但不会直接提交业务动作。", + ) + + if any(keyword in compact_query for keyword in OPERATE_KEYWORDS) or "付款" in compact_query: + if privileged: + return OntologyPermission( + level=AgentPermissionLevel.APPROVAL_REQUIRED.value, + allowed=False, + reason="涉及付款、审批或上线动作,必须进入人工审批链。", + ) + return OntologyPermission( + level=AgentPermissionLevel.FORBIDDEN.value, + allowed=False, + reason="当前账号缺少财务或审批权限,只能查看结果或生成草稿。", + ) + + return OntologyPermission( + level=AgentPermissionLevel.APPROVAL_REQUIRED.value, + allowed=False, + reason="操作类请求需要人工审批确认。", + ) + + def _build_field_errors( + self, + *, + scenario: str, + intent: str, + entities: list[OntologyEntity], + permission: OntologyPermission, + missing_slots: list[str], + ambiguity: list[str], + ) -> list[OntologyFieldError]: + errors: list[OntologyFieldError] = [] + if scenario == "unknown": + errors.append( + OntologyFieldError( + field="scenario", + code="scenario_unknown", + message="未识别出明确业务场景,请补充是报销、应收、应付还是制度问题。", + ) + ) + if intent == "compare" and len([item for item in entities if item.type != "amount"]) < 2: + errors.append( + OntologyFieldError( + field="entities", + code="compare_target_missing", + message="对比类问题请至少给出两个对象,或给出更明确的对比范围。", + ) + ) + if missing_slots: + errors.append( + OntologyFieldError( + field="missing_slots", + code="required_slot_missing", + message=( + "继续处理前还缺少关键信息:" + f"{'、'.join(self._display_slot_label(item) for item in missing_slots)}。" + ), + ) + ) + if ambiguity: + errors.append( + OntologyFieldError( + field="ambiguity", + code="ambiguity_detected", + message=f"当前问题存在歧义:{';'.join(ambiguity)}。", + ) + ) + if permission.level == AgentPermissionLevel.FORBIDDEN.value: + errors.append( + OntologyFieldError( + field="permission", + code="permission_forbidden", + message=permission.reason, + ) + ) + return errors + + def _build_clarification( + self, + *, + scenario: str, + intent: str, + entities: list[OntologyEntity], + permission: OntologyPermission, + missing_slots: list[str], + ambiguity: list[str], + allow_incomplete_draft: bool, + model_clarification_required: bool, + model_clarification_question: str | None, + ) -> tuple[bool, str | None]: + if permission.level == AgentPermissionLevel.FORBIDDEN.value: + return True, "当前动作超出权限范围。是否改为生成草稿或建议?" + if scenario == "knowledge" and intent in {"query", "explain"}: + return False, None + if model_clarification_required: + question = str(model_clarification_question or "").strip() + if question: + return True, question + if missing_slots: + return True, self._build_missing_slot_question(missing_slots) + if ambiguity: + return True, f"当前问题存在歧义,请进一步说明:{';'.join(ambiguity)}。" + if scenario == "unknown": + return True, "请说明这是报销、应收、应付,还是制度知识问题?" + if intent == "compare" and len([item for item in entities if item.type != "amount"]) < 2: + return True, "请补充需要对比的两个对象,例如两个客户、两个供应商或两个员工。" + if allow_incomplete_draft and scenario == "expense" and intent == "draft": + return False, None + if missing_slots: + return True, self._build_missing_slot_question(missing_slots) + if ambiguity: + return True, f"当前问题存在歧义,请进一步说明:{';'.join(ambiguity)}。" + return False, None + + @staticmethod + def _allow_incomplete_draft( + context_json: dict[str, Any], + *, + scenario: str, + intent: str, + ) -> bool: + if scenario != "expense" or intent != "draft": + return False + review_action = str(context_json.get("review_action") or "").strip() + return review_action in EXPENSE_REVIEW_ACTIONS + + @staticmethod + def _display_slot_label(slot: str) -> str: + return MISSING_SLOT_LABELS.get(slot, slot) + + def _build_missing_slot_question(self, missing_slots: list[str]) -> str: + labels = [self._display_slot_label(item) for item in missing_slots[:4]] + if not labels: + return "请补充更多上下文后再继续。" + return f"请补充{'、'.join(labels)},我再继续帮你解析和处理。" + + @staticmethod + def _compute_confidence( + *, + scenario: str, + scenario_score: float, + intent_score: float, + entities: list[OntologyEntity], + time_range: OntologyTimeRange, + metrics: list[OntologyMetric], + constraints: list[OntologyConstraint], + risk_flags: list[str], + clarification_required: bool, + permission: OntologyPermission, + ) -> float: + confidence = 0.18 + scenario_score + intent_score + confidence += min(0.16, len(entities) * 0.04) + if time_range.start_date: + confidence += 0.10 + if metrics: + confidence += 0.06 + if constraints: + confidence += 0.06 + if risk_flags: + confidence += 0.08 + if permission.level == AgentPermissionLevel.FORBIDDEN.value: + confidence = max(confidence, 0.86) + + if scenario == "unknown": + confidence = min(confidence, 0.45) + if clarification_required and permission.level != AgentPermissionLevel.FORBIDDEN.value: + confidence = min(confidence, 0.58) + + return round(min(confidence, 0.98), 2) + + @staticmethod + def _build_result_summary( + scenario: str, + intent: str, + permission_level: str, + confidence: float, + ) -> str: + return ( + f"语义解析完成:scenario={scenario}, intent={intent}, " + f"permission={permission_level}, confidence={confidence:.2f}" + ) + + @staticmethod + def _normalize_operator(value: str) -> str: + mapping = { + "超过": ">", + "大于": ">", + "高于": ">", + ">": ">", + ">=": ">=", + "不少于": ">=", + "不低于": ">=", + "小于": "<", + "低于": "<", + "少于": "<", + "<": "<", + "<=": "<=", + "至多": "<=", + "不超过": "<=", + "=": "=", + "=": "=", + } + return mapping.get(value, value) + + @staticmethod + def _normalize_amount(raw_value: str | None, unit: str | None) -> int | float: + numeric = float(raw_value or 0) + if unit in {"万", "万元"}: + numeric *= 10000 + return int(numeric) if numeric.is_integer() else round(numeric, 2) diff --git a/server/src/app/services/orchestrator.py b/server/src/app/services/orchestrator.py index 6ccea21..e1491ae 100644 --- a/server/src/app/services/orchestrator.py +++ b/server/src/app/services/orchestrator.py @@ -1,1549 +1,456 @@ -from __future__ import annotations - -from dataclasses import dataclass -from datetime import UTC, datetime, timedelta -from time import perf_counter -from typing import Any - -from sqlalchemy import and_, func, or_, select -from sqlalchemy.orm import Session - -from app.core.agent_enums import ( - AgentAssetStatus, - AgentAssetType, - AgentName, - AgentPermissionLevel, - AgentRunSource, - AgentRunStatus, - AgentToolType, -) -from app.core.logging import get_logger -from app.models.employee import Employee -from app.models.financial_record import ( - AccountsPayableRecord, - AccountsReceivableRecord, - ExpenseClaim, -) -from app.schemas.agent_asset import AgentAssetListItem, AgentAssetRead -from app.schemas.ontology import OntologyParseRequest, OntologyParseResult -from app.schemas.orchestrator import ( - OrchestratorRequest, - OrchestratorResponse, - OrchestratorTraceSummary, -) -from app.schemas.user_agent import UserAgentRequest, UserAgentResponse -from app.services.agent_assets import AgentAssetService -from app.services.agent_conversations import AgentConversationService -from app.services.expense_claims import ExpenseClaimService -from app.services.agent_foundation import AgentFoundationService -from app.services.agent_runs import AgentRunService -from app.services.knowledge import KnowledgeService -from app.services.ontology import SemanticOntologyService -from app.services.user_agent import UserAgentService - -logger = get_logger("app.services.orchestrator") - -SCENARIO_TO_DOMAIN = { - "expense": "expense", - "accounts_receivable": "ar", - "accounts_payable": "ap", - "knowledge": "knowledge", - "unknown": "system", -} - - -@dataclass(slots=True) -class ExecutionOutcome: - status: str - result: dict[str, Any] - degraded: bool - tool_count: int - failed_tool_count: int - - -PRIVILEGED_EXPENSE_QUERY_ROLE_CODES = {"finance"} -SELF_REFERENCE_KEYWORDS = ("我的", "我自己", "本人", "我名下", "给我查", "我提交", "我申请") -EXPENSE_QUERY_RECENT_WINDOW_DAYS = 10 -EXPENSE_QUERY_PREVIEW_LIMIT = 20 -EXPENSE_STATUS_LABELS = { - "draft": "草稿", - "submitted": "已提交", - "review": "审核中", - "approved": "已通过", - "rejected": "已驳回", - "paid": "已付款", -} -EXPENSE_STATUS_GROUP_LABELS = { - "draft": "草稿", - "in_progress": "审批中", - "completed": "审批完成", - "other": "其他状态", -} -EXPENSE_STATUS_GROUP_ORDER = ("draft", "in_progress", "completed", "other") -EXPENSE_TYPE_LABELS = { - "travel": "差旅费", - "hotel": "住宿费", - "transport": "交通费", - "meal": "餐费", - "meeting": "会务费", - "entertainment": "业务招待费", - "office": "办公费", - "training": "培训费", - "communication": "通讯费", - "welfare": "福利费", - "other": "其他费用", -} - - -class OrchestratorService: - def __init__(self, db: Session) -> None: - self.db = db - self.asset_service = AgentAssetService(db) - self.conversation_service = AgentConversationService(db) - self.expense_claim_service = ExpenseClaimService(db) - self.knowledge_service = KnowledgeService(db=db) - self.run_service = AgentRunService(db) - self.ontology_service = SemanticOntologyService(db) - self.user_agent_service = UserAgentService(db) - - def run(self, payload: OrchestratorRequest) -> OrchestratorResponse: - AgentFoundationService(self.db).ensure_foundation_ready() - context_json = dict(payload.context_json or {}) - conversation_id = str(payload.conversation_id or "").strip() or None - conversation = None - if payload.source == AgentRunSource.USER_MESSAGE.value: - conversation = self.conversation_service.get_or_create_conversation( - conversation_id=conversation_id, - user_id=payload.user_id, - source=payload.source, - context_json=context_json, - ) - conversation_id = conversation.conversation_id +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any + +from sqlalchemy.orm import Session + +from app.core.agent_enums import ( + AgentAssetStatus, + AgentAssetType, + AgentName, + AgentPermissionLevel, + AgentRunSource, + AgentRunStatus, +) +from app.core.logging import get_logger +from app.schemas.agent_asset import AgentAssetListItem, AgentAssetRead +from app.schemas.ontology import OntologyParseRequest, OntologyParseResult +from app.schemas.orchestrator import ( + OrchestratorRequest, + OrchestratorResponse, + OrchestratorTraceSummary, +) +from app.schemas.user_agent import UserAgentRequest +from app.services.agent_assets import AgentAssetService +from app.services.agent_conversations import AgentConversationService +from app.services.expense_claims import ExpenseClaimService +from app.services.agent_foundation import AgentFoundationService +from app.services.agent_runs import AgentRunService +from app.services.knowledge import KnowledgeService +from app.services.ontology import SemanticOntologyService +from app.services.orchestrator_execution import ExecutionOutcome, OrchestratorExecutionEngine +from app.services.orchestrator_expense_query import OrchestratorDatabaseQueryBuilder +from app.services.user_agent import UserAgentService + +logger = get_logger("app.services.orchestrator") + +SCENARIO_TO_DOMAIN = { + "expense": "expense", + "accounts_receivable": "ar", + "accounts_payable": "ap", + "knowledge": "knowledge", + "unknown": "system", +} + +class OrchestratorService: + def __init__(self, db: Session) -> None: + self.db = db + self.asset_service = AgentAssetService(db) + self.conversation_service = AgentConversationService(db) + self.expense_claim_service = ExpenseClaimService(db) + self.knowledge_service = KnowledgeService(db=db) + self.run_service = AgentRunService(db) + self.ontology_service = SemanticOntologyService(db) + self.user_agent_service = UserAgentService(db) + self.database_query_builder = OrchestratorDatabaseQueryBuilder(db) + self.execution_engine = OrchestratorExecutionEngine( + run_service=self.run_service, + expense_claim_service=self.expense_claim_service, + knowledge_service=self.knowledge_service, + user_agent_service=self.user_agent_service, + database_query_builder=self.database_query_builder, + ) + + def run(self, payload: OrchestratorRequest) -> OrchestratorResponse: + AgentFoundationService(self.db).ensure_foundation_ready() + context_json = dict(payload.context_json or {}) + conversation_id = str(payload.conversation_id or "").strip() or None + conversation = None + if payload.source == AgentRunSource.USER_MESSAGE.value: + conversation = self.conversation_service.get_or_create_conversation( + conversation_id=conversation_id, + user_id=payload.user_id, + source=payload.source, + context_json=context_json, + ) + conversation_id = conversation.conversation_id context_json = self.conversation_service.hydrate_context_json( conversation=conversation, context_json=context_json, message=payload.message, ) - - route_json: dict[str, Any] = { - "orchestrated_by": AgentName.ORCHESTRATOR.value, - "stage": "created", - } - if conversation_id: - route_json["conversation_id"] = conversation_id - run = self.run_service.create_run( - agent=AgentName.ORCHESTRATOR.value, - source=payload.source, - user_id=payload.user_id, - task_id=payload.task_id, - ontology_json={}, - route_json=route_json, - permission_level=AgentPermissionLevel.READ.value, - status=AgentRunStatus.RUNNING.value, - result_summary="Orchestrator 已接收请求。", - ) - - try: - message, task_asset = self._resolve_message(payload) - if conversation is not None: - self.conversation_service.append_message( - conversation_id=conversation.conversation_id, - role="user", - content=message, - run_id=run.run_id, - message_json={ - "attachment_names": context_json.get("attachment_names", []), - "attachment_count": context_json.get("attachment_count", 0), - "ocr_summary": context_json.get("ocr_summary", ""), - }, - ) - ontology = self.ontology_service.parse_for_run( - OntologyParseRequest( - query=message, - user_id=payload.user_id, - context_json=context_json, - ), - run_id=run.run_id, - ) - if context_json.get("simulate_orchestrator_exception"): - raise RuntimeError("simulated orchestrator exception") - selected_agent, route_reason = self._select_agent(payload, ontology) - capabilities = self._select_capabilities( - payload=payload, - ontology=ontology, - task_asset=task_asset, - ) - selected_capability_codes = self._flatten_capability_codes(capabilities) - is_expense_review_action = self._is_expense_review_action(context_json) + + route_json: dict[str, Any] = { + "orchestrated_by": AgentName.ORCHESTRATOR.value, + "stage": "created", + } + if conversation_id: + route_json["conversation_id"] = conversation_id + run = self.run_service.create_run( + agent=AgentName.ORCHESTRATOR.value, + source=payload.source, + user_id=payload.user_id, + task_id=payload.task_id, + ontology_json={}, + route_json=route_json, + permission_level=AgentPermissionLevel.READ.value, + status=AgentRunStatus.RUNNING.value, + result_summary="Orchestrator 已接收请求。", + ) + + try: + message, task_asset = self._resolve_message(payload) + if conversation is not None: + self.conversation_service.append_message( + conversation_id=conversation.conversation_id, + role="user", + content=message, + run_id=run.run_id, + message_json={ + "attachment_names": context_json.get("attachment_names", []), + "attachment_count": context_json.get("attachment_count", 0), + "ocr_summary": context_json.get("ocr_summary", ""), + }, + ) + ontology = self.ontology_service.parse_for_run( + OntologyParseRequest( + query=message, + user_id=payload.user_id, + context_json=context_json, + ), + run_id=run.run_id, + ) + if context_json.get("simulate_orchestrator_exception"): + raise RuntimeError("simulated orchestrator exception") + selected_agent, route_reason = self._select_agent(payload, ontology) + capabilities = self._select_capabilities( + payload=payload, + ontology=ontology, + task_asset=task_asset, + ) + selected_capability_codes = self.execution_engine._flatten_capability_codes(capabilities) + is_expense_review_action = self.execution_engine._is_expense_review_action(context_json) requires_confirmation = ( ontology.permission.level == AgentPermissionLevel.APPROVAL_REQUIRED.value and not is_expense_review_action ) - - route_json = { - "orchestrated_by": AgentName.ORCHESTRATOR.value, - "stage": "routed", - "selected_agent": selected_agent, - "route_reason": route_reason, - "selected_capability_codes": selected_capability_codes, - "ontology_run_id": ontology.run_id, - } - - if ontology.permission.level == AgentPermissionLevel.FORBIDDEN.value: - outcome = ExecutionOutcome( - status=AgentRunStatus.BLOCKED.value, - result={ - "message": ontology.permission.reason, - "clarification_question": ontology.clarification_question, - "degraded": False, - }, - degraded=False, - tool_count=0, - failed_tool_count=0, - ) - selected_agent = None - route_reason = "permission_forbidden" - route_json["stage"] = "blocked" - route_json["route_reason"] = route_reason - elif ontology.clarification_required: - if selected_agent == AgentName.USER_AGENT.value and ontology.scenario == "expense": - clarification_response = self.user_agent_service.respond( - UserAgentRequest( - run_id=run.run_id, - user_id=payload.user_id, - message=payload.message or "", - ontology=ontology, - context_json=context_json, - tool_payload={"clarification_required": True}, - selected_capability_codes=selected_capability_codes, - degraded=False, - requires_confirmation=requires_confirmation, - ) - ) - clarification_result = self._build_user_agent_result( - clarification_response, - degraded=False, - ) - clarification_result.update( - { - "clarification_required": True, - "missing_slots": ontology.missing_slots, - "ambiguity": ontology.ambiguity, - "parse_strategy": ontology.parse_strategy, - } - ) - outcome = ExecutionOutcome( - status=AgentRunStatus.BLOCKED.value, - result=clarification_result, - degraded=False, - tool_count=0, - failed_tool_count=0, - ) - else: - outcome = ExecutionOutcome( - status=AgentRunStatus.BLOCKED.value, - result={ - "message": ontology.clarification_question or "需要补充更多上下文。", - "clarification_required": True, - "missing_slots": ontology.missing_slots, - "ambiguity": ontology.ambiguity, - "parse_strategy": ontology.parse_strategy, - "degraded": False, - }, - degraded=False, - tool_count=0, - failed_tool_count=0, - ) - route_reason = "clarification_required" - route_json["stage"] = "clarification" - route_json["route_reason"] = route_reason - elif selected_agent == AgentName.HERMES.value: - outcome = self._execute_hermes( - payload=payload, - run_id=run.run_id, - ontology=ontology, - capabilities=capabilities, - requires_confirmation=requires_confirmation, - task_asset=task_asset, - context_json=context_json, - ) - else: - outcome = self._execute_user_agent( - payload=payload, - run_id=run.run_id, - ontology=ontology, - capabilities=capabilities, - requires_confirmation=requires_confirmation, - context_json=context_json, - ) - - final_status = ( - AgentRunStatus.BLOCKED.value - if requires_confirmation - and outcome.status == AgentRunStatus.SUCCEEDED.value - and ontology.permission.level == AgentPermissionLevel.APPROVAL_REQUIRED.value - else outcome.status - ) - response_status = self._normalize_response_status(final_status) - result_message = ( - str(outcome.result.get("message", "")).strip() - or "Orchestrator 执行完成。" - ) - trace_summary = OrchestratorTraceSummary( - scenario=ontology.scenario, - intent=ontology.intent, - tool_count=outcome.tool_count, - failed_tool_count=outcome.failed_tool_count, - selected_capability_codes=selected_capability_codes, - degraded=outcome.degraded, - ) - self.run_service.update_run( - run.run_id, - agent=selected_agent or AgentName.ORCHESTRATOR.value, - ontology_json=self._build_ontology_json(ontology), - route_json={ - **route_json, - "requires_confirmation": requires_confirmation, - "degraded": outcome.degraded, - }, - permission_level=ontology.permission.level, - status=final_status, - result_summary=result_message, - error_message=None, - finished_at=datetime.now(UTC), - ) - if conversation is not None and conversation_id: - draft_payload = outcome.result.get("draft_payload") - self.conversation_service.update_state( - conversation_id=conversation_id, - run_id=run.run_id, - scenario=ontology.scenario, - intent=ontology.intent, - context_json=context_json, - draft_payload=draft_payload if isinstance(draft_payload, dict) else None, - ) - self.conversation_service.append_message( - conversation_id=conversation_id, - role="assistant", - content=result_message, - run_id=run.run_id, - message_json={ - "status": final_status, - "scenario": ontology.scenario, - "intent": ontology.intent, - "attachment_names": context_json.get("attachment_names", []), - "attachment_count": context_json.get("attachment_count", 0), - "draft_payload": draft_payload if isinstance(draft_payload, dict) else None, - "orchestrator_payload": { - "run_id": run.run_id, - "conversation_id": conversation_id, - "selected_agent": selected_agent, - "route_reason": route_reason, - "permission_level": ontology.permission.level, - "status": response_status, - "requires_confirmation": requires_confirmation, - "trace_summary": trace_summary.model_dump(), - "result": outcome.result, - }, - }, - ) - return OrchestratorResponse( - run_id=run.run_id, - conversation_id=conversation_id, - selected_agent=selected_agent, - route_reason=route_reason, - permission_level=ontology.permission.level, - status=response_status, - result=outcome.result, - requires_confirmation=requires_confirmation, - trace_summary=trace_summary, - ) - except Exception as exc: - logger.exception("Orchestrator run failed run_id=%s", run.run_id) - self.run_service.update_run( - run.run_id, - agent=AgentName.ORCHESTRATOR.value, - route_json={**route_json, "stage": "failed"}, - status=AgentRunStatus.FAILED.value, - result_summary="Orchestrator 执行失败。", - error_message=str(exc), - finished_at=datetime.now(UTC), - ) - if conversation is not None and conversation_id: - self.conversation_service.update_state( - conversation_id=conversation_id, - run_id=run.run_id, - scenario=None, - intent=None, - context_json=context_json, - draft_payload=None, - ) - self.conversation_service.append_message( - conversation_id=conversation_id, - role="assistant", - content=f"Orchestrator 执行失败:{exc}", - run_id=run.run_id, - message_json={"status": AgentRunStatus.FAILED.value}, - ) - return OrchestratorResponse( - run_id=run.run_id, - conversation_id=conversation_id, - selected_agent=None, - route_reason="orchestrator_exception", - permission_level=AgentPermissionLevel.READ.value, - status="failed", - result={"message": f"Orchestrator 执行失败:{exc}"}, - requires_confirmation=False, - trace_summary=OrchestratorTraceSummary( - scenario="unknown", - intent="query", - tool_count=0, - failed_tool_count=0, - selected_capability_codes=[], - degraded=False, - ), - ) - - def _resolve_message( - self, - payload: OrchestratorRequest, - ) -> tuple[str, AgentAssetRead | None]: - task_asset = None - if payload.task_id: - task_asset = self.asset_service.get_asset(payload.task_id) - - if payload.message and payload.message.strip(): - return payload.message.strip(), task_asset - - if task_asset is not None: - description = str(task_asset.description or "").strip() - scenario_text = " ".join(str(item) for item in task_asset.scenario_json) - message = f"{task_asset.name} {description} {scenario_text}".strip() - return message, task_asset - - if payload.source == AgentRunSource.SCHEDULE.value: - return "定时风险巡检任务", task_asset - - raise ValueError("message 或 task_id 至少需要提供一个。") - - @staticmethod - def _select_agent( - payload: OrchestratorRequest, - ontology: OntologyParseResult, - ) -> tuple[str, str]: - if payload.source == AgentRunSource.SCHEDULE.value: - return AgentName.HERMES.value, "schedule_source_defaults_to_hermes" - if payload.source == AgentRunSource.SYSTEM_EVENT.value and ontology.intent == "risk_check": - return AgentName.HERMES.value, "system_event_risk_check_routes_to_hermes" - if ontology.intent == "risk_check" and payload.source == AgentRunSource.SCHEDULE.value: - return AgentName.HERMES.value, "scheduled_risk_check_routes_to_hermes" - if ontology.intent in {"query", "explain", "draft", "compare", "operate"}: - return AgentName.USER_AGENT.value, f"{ontology.intent}_routes_to_user_agent" - return AgentName.USER_AGENT.value, "user_message_defaults_to_user_agent" - - def _select_capabilities( - self, - *, - payload: OrchestratorRequest, - ontology: OntologyParseResult, - task_asset: AgentAssetRead | None, - ) -> dict[str, list[AgentAssetListItem | AgentAssetRead]]: - domain_value = SCENARIO_TO_DOMAIN.get(ontology.scenario) - rules = self._rank_assets( - self.asset_service.list_assets( - asset_type=AgentAssetType.RULE.value, - status=AgentAssetStatus.ACTIVE.value, - domain=domain_value if domain_value not in {"knowledge", "system"} else None, - ), - ontology, - ) - skills = self._rank_assets( - self.asset_service.list_assets( - asset_type=AgentAssetType.SKILL.value, - status=AgentAssetStatus.ACTIVE.value, - domain=domain_value if domain_value not in {"system"} else None, - ), - ontology, - ) - mcps = self._rank_assets( - self.asset_service.list_assets( - asset_type=AgentAssetType.MCP.value, - status=AgentAssetStatus.ACTIVE.value, - ), - ontology, - ) - tasks: list[AgentAssetListItem | AgentAssetRead] = [] - if task_asset is not None and task_asset.status == AgentAssetStatus.ACTIVE.value: - tasks.append(task_asset) - elif payload.source == AgentRunSource.SCHEDULE.value: - tasks = self._rank_assets( - self.asset_service.list_assets( - asset_type=AgentAssetType.TASK.value, - status=AgentAssetStatus.ACTIVE.value, - ), - ontology, - ) - - return { - "rules": rules, - "skills": skills, - "mcps": mcps, - "tasks": tasks, - } - - def _execute_user_agent( - self, - *, - payload: OrchestratorRequest, - run_id: str, - ontology: OntologyParseResult, - capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], - requires_confirmation: bool, - context_json: dict[str, Any], - ) -> ExecutionOutcome: - selected_capability_codes = self._flatten_capability_codes(capabilities) - if requires_confirmation: - response, degraded = self._invoke_tool( - run_id=run_id, - tool_type=AgentToolType.LLM.value, - tool_name="user_agent.confirmation_placeholder", - request_json={ - "message": payload.message, - "permission_level": ontology.permission.level, - }, - context_json=context_json, - executor=lambda: { - "confirmation_title": "操作需要确认", - "message": f"{ontology.permission.reason} 当前仅返回确认摘要,不直接执行动作。", - }, - fallback_factory=lambda exc: { - "confirmation_title": "操作需要确认", - "message": f"确认摘要生成失败,已阻断自动执行:{exc}", - }, - ) - return ExecutionOutcome( - status=AgentRunStatus.BLOCKED.value, - result={**response, "degraded": degraded}, - degraded=degraded, - tool_count=1, - failed_tool_count=1 if degraded else 0, - ) - - next_step = self._resolve_next_step( - ontology, - payload.source, - context_json=context_json, - ) - if next_step == "query_database": - tool_payload, degraded = self._invoke_tool( - run_id=run_id, - tool_type=AgentToolType.DATABASE.value, - tool_name=self._database_tool_name(ontology.scenario), - request_json=self._build_ontology_json(ontology), - context_json=context_json, - executor=lambda: self._build_database_answer( - ontology, - user_id=payload.user_id, - context_json=context_json, - message=payload.message or "", - ), - fallback_factory=lambda exc: { - "message": f"数据库查询暂时不可用,已返回降级说明:{exc}", - "degraded": True, - }, - ) - result = self._build_user_agent_result( - self.user_agent_service.respond( - UserAgentRequest( - run_id=run_id, - user_id=payload.user_id, - message=payload.message or "", - ontology=ontology, - context_json=context_json, - tool_payload=tool_payload, - selected_capability_codes=selected_capability_codes, - degraded=degraded, - requires_confirmation=requires_confirmation, - ) - ), - degraded=degraded, - ) - return ExecutionOutcome( - status=AgentRunStatus.SUCCEEDED.value, - result=result, - degraded=degraded, - tool_count=1, - failed_tool_count=1 if degraded else 0, - ) - - if next_step == "search_knowledge": - tool_payload, degraded = self._invoke_tool( - run_id=run_id, - tool_type=AgentToolType.DATABASE.value, - tool_name="knowledge.search", - request_json=self._build_ontology_json(ontology), - context_json=context_json, - executor=lambda: self._build_knowledge_answer( - message=payload.message or "", - ontology=ontology, - capabilities=capabilities, - context_json=context_json, - ), - fallback_factory=lambda exc: { - "message": f"知识检索暂时不可用,建议稍后重试:{exc}", - "degraded": True, - }, - ) - result = self._build_user_agent_result( - self.user_agent_service.respond( - UserAgentRequest( - run_id=run_id, - user_id=payload.user_id, - message=payload.message or "", - ontology=ontology, - context_json=context_json, - tool_payload=tool_payload, - selected_capability_codes=selected_capability_codes, - degraded=degraded, - requires_confirmation=requires_confirmation, - ) - ), - degraded=degraded, - ) - return ExecutionOutcome( - status=AgentRunStatus.SUCCEEDED.value, - result=result, - degraded=degraded, - tool_count=1, - failed_tool_count=1 if degraded else 0, - ) - - if next_step == "run_rule": - tool_payload, degraded = self._invoke_tool( - run_id=run_id, - tool_type=AgentToolType.RULE_ENGINE.value, - tool_name=self._rule_tool_name(capabilities), - request_json=self._build_ontology_json(ontology), - context_json=context_json, - executor=lambda: self._build_rule_answer(ontology), - fallback_factory=lambda exc: { - "message": f"规则检查暂时不可用,已返回人工复核建议:{exc}", - "degraded": True, - }, - ) - result = self._build_user_agent_result( - self.user_agent_service.respond( - UserAgentRequest( - run_id=run_id, - user_id=payload.user_id, - message=payload.message or "", - ontology=ontology, - context_json=context_json, - tool_payload=tool_payload, - selected_capability_codes=selected_capability_codes, - degraded=degraded, - requires_confirmation=requires_confirmation, - ) - ), - degraded=degraded, - ) - return ExecutionOutcome( - status=AgentRunStatus.SUCCEEDED.value, - result=result, - degraded=degraded, - tool_count=1, - failed_tool_count=1 if degraded else 0, - ) - - tool_type = AgentToolType.LLM.value - tool_name = "user_agent.draft_placeholder" - executor = lambda: { - "message": ( - f"已生成 {ontology.scenario} 场景草稿," - "占位能力后续由 Day 5 User Agent 接管。" - ), - "draft_only": True, - } - fallback_factory = lambda exc: { - "message": f"内容整理暂时不可用,请稍后再试:{exc}", - "degraded": True, - } - - if ontology.scenario == "expense" or self._is_expense_review_action(context_json): - is_persistence_action = self._is_expense_persistence_action(context_json) - tool_type = ( - AgentToolType.DATABASE.value - if is_persistence_action - else AgentToolType.LLM.value - ) - tool_name = ( - "database.expense_claims.save_or_submit" - if is_persistence_action - else "user_agent.expense_review_preview" - ) - executor = lambda: self.expense_claim_service.save_or_submit_from_ontology( - run_id=run_id, - user_id=payload.user_id, - message=payload.message or "", - ontology=ontology, - context_json=context_json, - ) - fallback_factory = lambda exc: { - "message": ( - f"报销草稿落库失败,请稍后再试:{exc}" - if is_persistence_action - else f"报销内容预览生成失败,请稍后再试:{exc}" - ), - "degraded": True, + + route_json = { + "orchestrated_by": AgentName.ORCHESTRATOR.value, + "stage": "routed", + "selected_agent": selected_agent, + "route_reason": route_reason, + "selected_capability_codes": selected_capability_codes, + "ontology_run_id": ontology.run_id, } - - tool_payload, degraded = self._invoke_tool( - run_id=run_id, - tool_type=tool_type, - tool_name=tool_name, - request_json=self._build_ontology_json(ontology), - context_json=context_json, - executor=executor, - fallback_factory=fallback_factory, - ) - result = self._build_user_agent_result( - self.user_agent_service.respond( - UserAgentRequest( - run_id=run_id, - user_id=payload.user_id, - message=payload.message or "", - ontology=ontology, - context_json=context_json, - tool_payload=tool_payload, - selected_capability_codes=selected_capability_codes, - degraded=degraded, - requires_confirmation=requires_confirmation, - ) - ), - degraded=degraded, - ) - return ExecutionOutcome( - status=AgentRunStatus.SUCCEEDED.value, - result=result, - degraded=degraded, - tool_count=1, - failed_tool_count=1 if degraded else 0, - ) - - def _execute_hermes( - self, - *, - payload: OrchestratorRequest, - run_id: str, - ontology: OntologyParseResult, - capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], - requires_confirmation: bool, - task_asset: AgentAssetRead | None, - context_json: dict[str, Any], - ) -> ExecutionOutcome: - if requires_confirmation: - return ExecutionOutcome( - status=AgentRunStatus.BLOCKED.value, - result={ - "message": "Hermes 不会自动执行需要确认的高风险动作,已阻断。", - "degraded": False, - }, - degraded=False, - tool_count=0, - failed_tool_count=0, - ) - - rule_response, rule_degraded = self._invoke_tool( - run_id=run_id, - tool_type=AgentToolType.RULE_ENGINE.value, - tool_name=self._rule_tool_name(capabilities), - request_json=self._build_ontology_json(ontology), - context_json=context_json, - executor=lambda: self._build_rule_answer(ontology), - fallback_factory=lambda exc: { - "message": f"规则巡检失败,已降级为待人工复核:{exc}", - "degraded": True, - }, - ) - mcp_response, mcp_degraded = self._invoke_tool( - run_id=run_id, - tool_type=AgentToolType.MCP.value, - tool_name=self._mcp_tool_name(capabilities), - request_json={ - "task_code": task_asset.code if task_asset is not None else "", - "scenario": ontology.scenario, - }, - context_json=context_json, - executor=lambda: self._build_mcp_answer(task_asset, ontology), - fallback_factory=lambda exc: { - "message": f"MCP 调用失败,已使用缓存快照降级:{exc}", - "fallback": "used_cached_snapshot", - }, - ) - degraded = rule_degraded or mcp_degraded - failed_tool_count = int(rule_degraded) + int(mcp_degraded) - result = { - "message": self._build_hermes_message( - task_asset=task_asset, - ontology=ontology, - rule_response=rule_response, - mcp_response=mcp_response, - degraded=degraded, - ), - "report_type": task_asset.code if task_asset is not None else "hermes_runtime", - "degraded": degraded, - } - return ExecutionOutcome( - status=AgentRunStatus.SUCCEEDED.value, - result=result, - degraded=degraded, - tool_count=2, - failed_tool_count=failed_tool_count, - ) - - @staticmethod - def _resolve_next_step( - ontology: OntologyParseResult, - source: str, - *, - context_json: dict[str, Any] | None = None, - ) -> str: - if OrchestratorService._is_expense_review_action(context_json or {}): - return "create_draft" - if ontology.clarification_required: - return "ask_clarification" - if ontology.intent == "draft": - return "create_draft" - if ontology.scenario == "knowledge" or ontology.intent == "explain": - return "search_knowledge" - if ontology.intent == "risk_check" or source == AgentRunSource.SCHEDULE.value: - return "run_rule" - if ontology.intent in {"query", "compare"}: - return "query_database" - return "create_draft" - @staticmethod - def _is_expense_review_action(context_json: dict[str, Any]) -> bool: - review_action = str((context_json or {}).get("review_action") or "").strip() - return review_action in { - "save_draft", - "next_step", - "edit_review", - "link_to_existing_draft", - "create_new_claim_from_documents", - } + if ontology.permission.level == AgentPermissionLevel.FORBIDDEN.value: + outcome = ExecutionOutcome( + status=AgentRunStatus.BLOCKED.value, + result={ + "message": ontology.permission.reason, + "clarification_question": ontology.clarification_question, + "degraded": False, + }, + degraded=False, + tool_count=0, + failed_tool_count=0, + ) + selected_agent = None + route_reason = "permission_forbidden" + route_json["stage"] = "blocked" + route_json["route_reason"] = route_reason + elif ontology.clarification_required: + if selected_agent == AgentName.USER_AGENT.value and ontology.scenario == "expense": + clarification_response = self.user_agent_service.respond( + UserAgentRequest( + run_id=run.run_id, + user_id=payload.user_id, + message=payload.message or "", + ontology=ontology, + context_json=context_json, + tool_payload={"clarification_required": True}, + selected_capability_codes=selected_capability_codes, + degraded=False, + requires_confirmation=requires_confirmation, + ) + ) + clarification_result = self.execution_engine._build_user_agent_result( + clarification_response, + degraded=False, + ) + clarification_result.update( + { + "clarification_required": True, + "missing_slots": ontology.missing_slots, + "ambiguity": ontology.ambiguity, + "parse_strategy": ontology.parse_strategy, + } + ) + outcome = ExecutionOutcome( + status=AgentRunStatus.BLOCKED.value, + result=clarification_result, + degraded=False, + tool_count=0, + failed_tool_count=0, + ) + else: + outcome = ExecutionOutcome( + status=AgentRunStatus.BLOCKED.value, + result={ + "message": ontology.clarification_question or "需要补充更多上下文。", + "clarification_required": True, + "missing_slots": ontology.missing_slots, + "ambiguity": ontology.ambiguity, + "parse_strategy": ontology.parse_strategy, + "degraded": False, + }, + degraded=False, + tool_count=0, + failed_tool_count=0, + ) + route_reason = "clarification_required" + route_json["stage"] = "clarification" + route_json["route_reason"] = route_reason + elif selected_agent == AgentName.HERMES.value: + outcome = self.execution_engine._execute_hermes( + payload=payload, + run_id=run.run_id, + ontology=ontology, + capabilities=capabilities, + requires_confirmation=requires_confirmation, + task_asset=task_asset, + context_json=context_json, + ) + else: + outcome = self.execution_engine._execute_user_agent( + payload=payload, + run_id=run.run_id, + ontology=ontology, + capabilities=capabilities, + requires_confirmation=requires_confirmation, + context_json=context_json, + ) - @staticmethod - def _is_expense_persistence_action(context_json: dict[str, Any]) -> bool: - review_action = str((context_json or {}).get("review_action") or "").strip() - return review_action in { - "save_draft", - "next_step", - "link_to_existing_draft", - "create_new_claim_from_documents", - } - - @staticmethod - def _flatten_capability_codes( - capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], - ) -> list[str]: - codes: list[str] = [] - for items in capabilities.values(): - for item in items[:2]: - if item.code not in codes: - codes.append(item.code) - return codes - - def _rank_assets( - self, - items: list[AgentAssetListItem], - ontology: OntologyParseResult, - ) -> list[AgentAssetListItem]: - def score(item: AgentAssetListItem) -> tuple[int, str]: - item_tags = {str(value) for value in item.scenario_json or []} - weight = 0 - if ontology.scenario in item_tags: - weight += 3 - if ontology.intent in item_tags: - weight += 2 - for risk_flag in ontology.risk_flags: - if risk_flag in item_tags: - weight += 4 - return weight, item.code - - ranked = sorted(items, key=score, reverse=True) - if not ranked: - return [] - scored = [item for item in ranked if score(item)[0] > 0] - return scored or ranked[:1] - - def _invoke_tool( - self, - *, - run_id: str, - tool_type: str, - tool_name: str, - request_json: dict[str, Any], - context_json: dict[str, Any], - executor, - fallback_factory, - ) -> tuple[dict[str, Any], bool]: - started = perf_counter() - try: - self._maybe_raise_simulated_failure(tool_type, context_json) - response = executor() - duration_ms = int((perf_counter() - started) * 1000) - self.run_service.record_tool_call( - run_id=run_id, - tool_type=tool_type, - tool_name=tool_name, - request_json=request_json, - response_json=response, - status="succeeded", - duration_ms=duration_ms, - ) - return response, False - except Exception as exc: - duration_ms = int((perf_counter() - started) * 1000) - response = fallback_factory(exc) - self.run_service.record_tool_call( - run_id=run_id, - tool_type=tool_type, - tool_name=tool_name, - request_json=request_json, - response_json=response, - status="failed", - duration_ms=duration_ms, - error_message=str(exc), - ) - return response, True - - @staticmethod - def _maybe_raise_simulated_failure(tool_type: str, context_json: dict[str, Any]) -> None: - expected = str(context_json.get("simulate_tool_failure") or "").strip().lower() - if not expected: - return - if expected == tool_type.lower(): - raise RuntimeError(f"simulated {tool_type} failure") - - def _build_database_answer( - self, - ontology: OntologyParseResult, - *, - user_id: str | None, - context_json: dict[str, Any], - message: str, - ) -> dict[str, Any]: - if ontology.scenario == "expense": - conditions, scope_label, scoped_to_current_user = self._build_expense_query_scope( - ontology=ontology, - user_id=user_id, - context_json=context_json, - message=message, - ) - count_stmt = select(func.count()).select_from(ExpenseClaim) - amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(ExpenseClaim) - for condition in conditions: - count_stmt = count_stmt.where(condition) - amount_stmt = amount_stmt.where(condition) - total_count = int(self.db.scalar(count_stmt) or 0) - total_amount = float(self.db.scalar(amount_stmt) or 0) - - recent_window_applied = self._should_limit_expense_query_to_recent_window(ontology) - display_count = total_count - display_amount = total_amount - older_record_count = 0 - display_conditions = list(conditions) - window_start_date: str | None = None - window_end_date: str | None = None - - if recent_window_applied: - reference_now = self._resolve_reference_now(context_json) - recent_window_start, recent_window_end = self._resolve_expense_recent_window_bounds(reference_now) - recent_condition = self._build_expense_recent_window_condition( - recent_window_start, - recent_window_end, - ) - display_conditions.append(recent_condition) - window_start_date = recent_window_start.date().isoformat() - window_end_date = (recent_window_end - timedelta(microseconds=1)).date().isoformat() - - recent_count_stmt = select(func.count()).select_from(ExpenseClaim).where(recent_condition) - recent_amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(ExpenseClaim).where( - recent_condition - ) - for condition in conditions: - recent_count_stmt = recent_count_stmt.where(condition) - recent_amount_stmt = recent_amount_stmt.where(condition) - display_count = int(self.db.scalar(recent_count_stmt) or 0) - display_amount = float(self.db.scalar(recent_amount_stmt) or 0) - older_record_count = max(0, total_count - display_count) - - preview_stmt = ( - select(ExpenseClaim) - .order_by( - func.coalesce( - ExpenseClaim.submitted_at, - ExpenseClaim.created_at, - ExpenseClaim.occurred_at, - ).desc(), - ExpenseClaim.occurred_at.desc(), - ) - .limit(EXPENSE_QUERY_PREVIEW_LIMIT) - ) - for condition in display_conditions: - preview_stmt = preview_stmt.where(condition) - preview_claims = list(self.db.scalars(preview_stmt).all()) - status_groups = self._build_expense_status_groups(display_conditions) - return { - "result_type": "expense_claim_list", - "record_count": display_count, - "total_amount": round(display_amount, 2), - "scope_label": scope_label, - "scoped_to_current_user": scoped_to_current_user, - "recent_window_applied": recent_window_applied, - "window_days": EXPENSE_QUERY_RECENT_WINDOW_DAYS if recent_window_applied else None, - "window_start_date": window_start_date, - "window_end_date": window_end_date, - "preview_count": len(preview_claims), - "older_record_count": older_record_count, - "records": [ - self._build_expense_query_record(claim) - for claim in preview_claims - ], - "status_groups": status_groups, - "has_more_in_window": display_count > len(preview_claims), - "total_matched_count": total_count, - } - - if ontology.scenario == "accounts_receivable": - total_count = int( - self.db.scalar( - select(func.count()).select_from(AccountsReceivableRecord) - ) - or 0 - ) - total_amount = float( - self.db.scalar( - select(func.coalesce(func.sum(AccountsReceivableRecord.amount_outstanding), 0)) - ) - or 0 - ) - return { - "record_count": total_count, - "outstanding_amount": round(total_amount, 2), - } - - total_count = int( - self.db.scalar(select(func.count()).select_from(AccountsPayableRecord)) - or 0 - ) - total_amount = float( - self.db.scalar( - select(func.coalesce(func.sum(AccountsPayableRecord.amount_outstanding), 0)) - ) - or 0 - ) - return { - "record_count": total_count, - "outstanding_amount": round(total_amount, 2), - } - - @staticmethod - def _should_limit_expense_query_to_recent_window( - ontology: OntologyParseResult, - ) -> bool: - has_explicit_claim_no = any( - item.type == "expense_claim" - and str(item.normalized_value or item.value or "").strip() - for item in ontology.entities - ) - has_explicit_time_range = bool( - ontology.time_range.start_date or ontology.time_range.end_date - ) - return not has_explicit_claim_no and not has_explicit_time_range - - @staticmethod - def _resolve_reference_now(context_json: dict[str, Any]) -> datetime: - raw_value = str(context_json.get("client_now_iso") or "").strip() - if raw_value: - normalized = raw_value.replace("Z", "+00:00") - try: - parsed = datetime.fromisoformat(normalized) - if parsed.tzinfo is None: - return parsed.replace(tzinfo=UTC) - return parsed.astimezone(UTC) - except ValueError: - pass - return datetime.now(UTC) - - @staticmethod - def _resolve_expense_recent_window_bounds( - reference_now: datetime, - ) -> tuple[datetime, datetime]: - normalized_now = reference_now.astimezone(UTC) - window_end = normalized_now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(days=1) - window_start = window_end - timedelta(days=EXPENSE_QUERY_RECENT_WINDOW_DAYS) - return window_start, window_end - - @staticmethod - def _build_expense_recent_window_condition( - window_start: datetime, - window_end: datetime, - ) -> Any: - document_datetime = func.coalesce( - ExpenseClaim.submitted_at, - ExpenseClaim.created_at, - ExpenseClaim.occurred_at, - ) - return and_(document_datetime >= window_start, document_datetime < window_end) - - def _build_expense_status_groups( - self, - conditions: list[Any], - ) -> list[dict[str, Any]]: - stmt = select(ExpenseClaim.status, func.count()).select_from(ExpenseClaim).group_by(ExpenseClaim.status) - for condition in conditions: - stmt = stmt.where(condition) - - grouped_counts = { - key: 0 - for key in EXPENSE_STATUS_GROUP_ORDER - } - for status, count in self.db.execute(stmt).all(): - group_key, _ = self._resolve_expense_status_group(str(status or "").strip()) - grouped_counts[group_key] = grouped_counts.get(group_key, 0) + int(count or 0) - - return [ - { - "key": key, - "label": EXPENSE_STATUS_GROUP_LABELS[key], - "count": grouped_counts.get(key, 0), - } - for key in EXPENSE_STATUS_GROUP_ORDER - if grouped_counts.get(key, 0) > 0 - ] - - @staticmethod - def _resolve_expense_status_group(status: str) -> tuple[str, str]: - normalized = str(status or "").strip().lower() - if normalized == "draft": - return "draft", EXPENSE_STATUS_GROUP_LABELS["draft"] - if normalized in {"submitted", "review"}: - return "in_progress", EXPENSE_STATUS_GROUP_LABELS["in_progress"] - if normalized in {"approved", "paid"}: - return "completed", EXPENSE_STATUS_GROUP_LABELS["completed"] - return "other", EXPENSE_STATUS_GROUP_LABELS["other"] - - @staticmethod - def _resolve_expense_query_document_datetime( - claim: ExpenseClaim, - ) -> datetime | None: - return claim.submitted_at or claim.created_at or claim.occurred_at - - def _build_expense_query_record( - self, - claim: ExpenseClaim, - ) -> dict[str, Any]: - status_group, status_group_label = self._resolve_expense_status_group(claim.status) - document_datetime = self._resolve_expense_query_document_datetime(claim) - return { - "claim_id": claim.id, - "claim_no": claim.claim_no, - "employee_name": claim.employee_name, - "expense_type": claim.expense_type, - "expense_type_label": EXPENSE_TYPE_LABELS.get(claim.expense_type, claim.expense_type or "报销"), - "amount": round(float(claim.amount), 2), - "status": claim.status, - "status_label": EXPENSE_STATUS_LABELS.get(claim.status, claim.status or "处理中"), - "status_group": status_group, - "status_group_label": status_group_label, - "approval_stage": claim.approval_stage, - "document_date": document_datetime.date().isoformat() if document_datetime else "", - "occurred_at": claim.occurred_at.date().isoformat() if claim.occurred_at else "", - "reason": claim.reason, - "location": claim.location, - } - - def _build_expense_query_scope( - self, - *, - ontology: OntologyParseResult, - user_id: str | None, - context_json: dict[str, Any], - message: str, - ) -> tuple[list[Any], str, bool]: - conditions: list[Any] = [] - explicit_employee_names = list( - dict.fromkeys( - str(item.value or "").strip() - for item in ontology.entities - if item.type == "employee" and str(item.value or "").strip() - ) - ) - expense_claim_nos = list( - dict.fromkeys( - str(item.normalized_value or item.value or "").strip().upper() - for item in ontology.entities - if item.type == "expense_claim" and str(item.normalized_value or item.value or "").strip() - ) - ) - expense_types = list( - dict.fromkeys( - str(item.normalized_value or item.value or "").strip() - for item in ontology.entities - if item.type == "expense_type" and str(item.normalized_value or item.value or "").strip() + final_status = ( + AgentRunStatus.BLOCKED.value + if requires_confirmation + and outcome.status == AgentRunStatus.SUCCEEDED.value + and ontology.permission.level == AgentPermissionLevel.APPROVAL_REQUIRED.value + else outcome.status + ) + response_status = self._normalize_response_status(final_status) + result_message = ( + str(outcome.result.get("message", "")).strip() + or "Orchestrator 执行完成。" + ) + trace_summary = OrchestratorTraceSummary( + scenario=ontology.scenario, + intent=ontology.intent, + tool_count=outcome.tool_count, + failed_tool_count=outcome.failed_tool_count, + selected_capability_codes=selected_capability_codes, + degraded=outcome.degraded, + ) + self.run_service.update_run( + run.run_id, + agent=selected_agent or AgentName.ORCHESTRATOR.value, + ontology_json=self.execution_engine._build_ontology_json(ontology), + route_json={ + **route_json, + "requires_confirmation": requires_confirmation, + "degraded": outcome.degraded, + }, + permission_level=ontology.permission.level, + status=final_status, + result_summary=result_message, + error_message=None, + finished_at=datetime.now(UTC), + ) + if conversation is not None and conversation_id: + draft_payload = outcome.result.get("draft_payload") + self.conversation_service.update_state( + conversation_id=conversation_id, + run_id=run.run_id, + scenario=ontology.scenario, + intent=ontology.intent, + context_json=context_json, + draft_payload=draft_payload if isinstance(draft_payload, dict) else None, + ) + self.conversation_service.append_message( + conversation_id=conversation_id, + role="assistant", + content=result_message, + run_id=run.run_id, + message_json={ + "status": final_status, + "scenario": ontology.scenario, + "intent": ontology.intent, + "attachment_names": context_json.get("attachment_names", []), + "attachment_count": context_json.get("attachment_count", 0), + "draft_payload": draft_payload if isinstance(draft_payload, dict) else None, + "orchestrator_payload": { + "run_id": run.run_id, + "conversation_id": conversation_id, + "selected_agent": selected_agent, + "route_reason": route_reason, + "permission_level": ontology.permission.level, + "status": response_status, + "requires_confirmation": requires_confirmation, + "trace_summary": trace_summary.model_dump(), + "result": outcome.result, + }, + }, + ) + return OrchestratorResponse( + run_id=run.run_id, + conversation_id=conversation_id, + selected_agent=selected_agent, + route_reason=route_reason, + permission_level=ontology.permission.level, + status=response_status, + result=outcome.result, + requires_confirmation=requires_confirmation, + trace_summary=trace_summary, + ) + except Exception as exc: + logger.exception("Orchestrator run failed run_id=%s", run.run_id) + self.run_service.update_run( + run.run_id, + agent=AgentName.ORCHESTRATOR.value, + route_json={**route_json, "stage": "failed"}, + status=AgentRunStatus.FAILED.value, + result_summary="Orchestrator 执行失败。", + error_message=str(exc), + finished_at=datetime.now(UTC), + ) + if conversation is not None and conversation_id: + self.conversation_service.update_state( + conversation_id=conversation_id, + run_id=run.run_id, + scenario=None, + intent=None, + context_json=context_json, + draft_payload=None, + ) + self.conversation_service.append_message( + conversation_id=conversation_id, + role="assistant", + content=f"Orchestrator 执行失败:{exc}", + run_id=run.run_id, + message_json={"status": AgentRunStatus.FAILED.value}, + ) + return OrchestratorResponse( + run_id=run.run_id, + conversation_id=conversation_id, + selected_agent=None, + route_reason="orchestrator_exception", + permission_level=AgentPermissionLevel.READ.value, + status="failed", + result={"message": f"Orchestrator 执行失败:{exc}"}, + requires_confirmation=False, + trace_summary=OrchestratorTraceSummary( + scenario="unknown", + intent="query", + tool_count=0, + failed_tool_count=0, + selected_capability_codes=[], + degraded=False, + ), ) - ) - project_values = self._collect_expense_query_filter_values(ontology, "project") - location_values = self._collect_expense_query_filter_values(ontology, "location") - status_values = list( - dict.fromkeys( - str(item.value).strip() - for item in ontology.constraints - if item.field == "status" and item.operator == "=" and str(item.value).strip() - ) - ) - amount_constraints = [ - item - for item in ontology.constraints - if item.field == "amount" and item.operator in {">", ">=", "<", "<=", "="} - ] - scope_label = "报销单" - scoped_to_current_user = False - - if expense_claim_nos: - conditions.append(ExpenseClaim.claim_no.in_(expense_claim_nos)) - if expense_types: - conditions.append(ExpenseClaim.expense_type.in_(expense_types)) - if status_values: - conditions.append(ExpenseClaim.status.in_(status_values)) - if project_values: - project_conditions = [] - for value in project_values: - pattern = f"%{value}%" - project_conditions.append(ExpenseClaim.project_code.ilike(pattern)) - project_conditions.append(ExpenseClaim.reason.ilike(pattern)) - conditions.append(or_(*project_conditions)) - if location_values: - location_conditions = [] - for value in location_values: - pattern = f"%{value}%" - location_conditions.append(ExpenseClaim.location.ilike(pattern)) - location_conditions.append(ExpenseClaim.reason.ilike(pattern)) - conditions.append(or_(*location_conditions)) - - for item in amount_constraints: - amount_value = float(item.value) - if item.operator == ">": - conditions.append(ExpenseClaim.amount > amount_value) - elif item.operator == ">=": - conditions.append(ExpenseClaim.amount >= amount_value) - elif item.operator == "<": - conditions.append(ExpenseClaim.amount < amount_value) - elif item.operator == "<=": - conditions.append(ExpenseClaim.amount <= amount_value) - else: - conditions.append(ExpenseClaim.amount == amount_value) - - if ontology.time_range.start_date: - conditions.append( - ExpenseClaim.occurred_at - >= datetime.fromisoformat(f"{ontology.time_range.start_date}T00:00:00+00:00") - ) - if ontology.time_range.end_date: - conditions.append( - ExpenseClaim.occurred_at - <= datetime.fromisoformat(f"{ontology.time_range.end_date}T23:59:59.999999+00:00") - ) - - has_privileged_access = self._has_privileged_expense_query_access(context_json) - refers_to_self = self._is_self_expense_query(message) - if not has_privileged_access: - owner_conditions, owner_label = self._build_current_user_claim_conditions( - user_id=user_id, - context_json=context_json, - ) - if owner_conditions: - conditions.append(or_(*owner_conditions)) - scope_label = owner_label - scoped_to_current_user = True - else: - conditions.append(ExpenseClaim.id == "__no_visible_claim__") - scope_label = "你的报销单" - scoped_to_current_user = True - elif explicit_employee_names: - conditions.append(ExpenseClaim.employee_name.in_(explicit_employee_names)) - scope_label = f"{'、'.join(explicit_employee_names)}的报销单" - elif refers_to_self: - owner_conditions, owner_label = self._build_current_user_claim_conditions( - user_id=user_id, - context_json=context_json, - ) - if owner_conditions: - conditions.append(or_(*owner_conditions)) - scope_label = owner_label - scoped_to_current_user = True - else: - conditions.append(ExpenseClaim.id == "__no_visible_claim__") - scope_label = "你的报销单" - scoped_to_current_user = True - else: - scope_label = "全部报销单" - return conditions, scope_label, scoped_to_current_user + def _resolve_message( + self, + payload: OrchestratorRequest, + ) -> tuple[str, AgentAssetRead | None]: + task_asset = None + if payload.task_id: + task_asset = self.asset_service.get_asset(payload.task_id) + + if payload.message and payload.message.strip(): + return payload.message.strip(), task_asset + + if task_asset is not None: + description = str(task_asset.description or "").strip() + scenario_text = " ".join(str(item) for item in task_asset.scenario_json) + message = f"{task_asset.name} {description} {scenario_text}".strip() + return message, task_asset + + if payload.source == AgentRunSource.SCHEDULE.value: + return "定时风险巡检任务", task_asset + + raise ValueError("message 或 task_id 至少需要提供一个。") @staticmethod - def _collect_expense_query_filter_values( + def _select_agent( + payload: OrchestratorRequest, ontology: OntologyParseResult, - field_name: str, - ) -> list[str]: - values: list[str] = [] - for entity in ontology.entities: - if entity.type != field_name: - continue - value = str(entity.normalized_value or entity.value or "").strip() - if value: - values.append(value) - for constraint in ontology.constraints: - if constraint.field != field_name or constraint.operator != "=": - continue - value = str(constraint.value or "").strip() - if value: - values.append(value) - return list(dict.fromkeys(values)) + ) -> tuple[str, str]: + if payload.source == AgentRunSource.SCHEDULE.value: + return AgentName.HERMES.value, "schedule_source_defaults_to_hermes" + if payload.source == AgentRunSource.SYSTEM_EVENT.value and ontology.intent == "risk_check": + return AgentName.HERMES.value, "system_event_risk_check_routes_to_hermes" + if ontology.intent == "risk_check" and payload.source == AgentRunSource.SCHEDULE.value: + return AgentName.HERMES.value, "scheduled_risk_check_routes_to_hermes" + if ontology.intent in {"query", "explain", "draft", "compare", "operate"}: + return AgentName.USER_AGENT.value, f"{ontology.intent}_routes_to_user_agent" + return AgentName.USER_AGENT.value, "user_message_defaults_to_user_agent" - def _build_current_user_claim_conditions( + def _select_capabilities( self, - *, - user_id: str | None, - context_json: dict[str, Any], - ) -> tuple[list[Any], str]: - normalized_user_id = str(user_id or "").strip() - employee = None - if normalized_user_id: - employee = self.db.scalar( - select(Employee) - .where(func.lower(Employee.email) == normalized_user_id.lower()) - .limit(1) - ) - - conditions: list[Any] = [] - seen: set[tuple[str, str]] = set() - - def add_condition(field_name: str, value: str | None) -> None: - normalized = str(value or "").strip() - if not normalized: - return - - marker = (field_name, normalized.lower()) - if marker in seen: - return - seen.add(marker) - - if field_name == "employee_id": - conditions.append(ExpenseClaim.employee_id == normalized) - return - conditions.append(ExpenseClaim.employee_name == normalized) - - if employee is not None: - add_condition("employee_id", employee.id) - add_condition("employee_name", employee.email) - if self._employee_name_is_unique(employee): - add_condition("employee_name", employee.name) - else: - add_condition("employee_id", normalized_user_id) - add_condition("employee_name", normalized_user_id) - - subject_name = (employee.name if employee is not None else "") or normalized_user_id - if subject_name: - return conditions, "你的报销单" - return conditions, "当前用户的报销单" - - def _employee_name_is_unique(self, employee: Employee) -> bool: - normalized_name = str(employee.name or "").strip() - if not normalized_name: - return False - - same_name_count = int( - self.db.scalar( - select(func.count()).select_from(Employee).where(Employee.name == normalized_name) - ) - or 0 - ) - return same_name_count == 1 - - @staticmethod - def _has_privileged_expense_query_access(context_json: dict[str, Any]) -> bool: - role_codes = { - str(item).strip().lower() - for item in context_json.get("role_codes", []) - if str(item).strip() - } - return bool(role_codes & PRIVILEGED_EXPENSE_QUERY_ROLE_CODES) - - @staticmethod - def _is_self_expense_query(message: str) -> bool: - compact_message = "".join(str(message or "").split()) - return any(keyword in compact_message for keyword in SELF_REFERENCE_KEYWORDS) - - @staticmethod - def _build_user_query_result( - ontology: OntologyParseResult, - response: dict[str, Any], - ) -> dict[str, Any]: - if ontology.scenario == "expense": - return { - "message": ( - f"已路由到 User Agent,占位查询结果:命中 {response['record_count']} 笔报销," - f"金额合计 {response['total_amount']} 元。" - ), - "data": response, - } - if ontology.scenario == "accounts_receivable": - return { - "message": ( - f"已路由到 User Agent,占位查询结果:命中 {response['record_count']} 条应收," - f"未回款金额 {response['outstanding_amount']} 元。" - ), - "data": response, - } - return { - "message": ( - f"已路由到 User Agent,占位查询结果:命中 {response['record_count']} 条应付," - f"待付金额 {response['outstanding_amount']} 元。" - ), - "data": response, - } - - @staticmethod - def _build_user_agent_result( - response: UserAgentResponse, - *, - degraded: bool, - ) -> dict[str, Any]: - result = { - "message": response.answer, - "answer": response.answer, - "citations": [item.model_dump() for item in response.citations], - "suggested_actions": [item.model_dump() for item in response.suggested_actions], - "risk_flags": response.risk_flags, - "requires_confirmation": response.requires_confirmation, - "degraded": degraded, - } - if response.query_payload is not None: - result["query_payload"] = response.query_payload.model_dump() - if response.draft_payload is not None: - result["draft_payload"] = response.draft_payload.model_dump() - if response.review_payload is not None: - result["review_payload"] = response.review_payload.model_dump() - return result - - def _build_knowledge_answer( - self, - *, - message: str, - ontology: OntologyParseResult, - capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], - context_json: dict[str, Any], - ) -> dict[str, Any]: - del ontology, capabilities - conversation_history = context_json.get("conversation_history") - if not isinstance(conversation_history, list): - conversation_history = None - payload = self.knowledge_service.search_knowledge( - message, - conversation_history=conversation_history, - limit=8, + *, + payload: OrchestratorRequest, + ontology: OntologyParseResult, + task_asset: AgentAssetRead | None, + ) -> dict[str, list[AgentAssetListItem | AgentAssetRead]]: + domain_value = SCENARIO_TO_DOMAIN.get(ontology.scenario) + rules = self.execution_engine._rank_assets( + self.asset_service.list_assets( + asset_type=AgentAssetType.RULE.value, + status=AgentAssetStatus.ACTIVE.value, + domain=domain_value if domain_value not in {"knowledge", "system"} else None, + ), + ontology, ) - references = [str(item).strip() for item in list(payload.get("references") or []) if str(item).strip()] - if references: - payload["references"] = references - return payload - - @staticmethod - def _build_rule_answer(ontology: OntologyParseResult) -> dict[str, Any]: - risk_text = ( - "、".join(ontology.risk_flags) - if ontology.risk_flags - else "未识别到明确风险标签" - ) - return { - "message": f"已完成占位规则检查,风险标签:{risk_text}。", - "risk_flags": ontology.risk_flags, - } - - @staticmethod - def _build_mcp_answer( - task_asset: AgentAssetRead | None, - ontology: OntologyParseResult, - ) -> dict[str, Any]: - return { - "message": ( - f"已调用占位 MCP 快照,任务={task_asset.code if task_asset else 'none'}," - f"scenario={ontology.scenario}。" - ), - "snapshot": "stubbed", - } - - @staticmethod - def _build_hermes_message( - *, - task_asset: AgentAssetRead | None, - ontology: OntologyParseResult, - rule_response: dict[str, Any], - mcp_response: dict[str, Any], - degraded: bool, - ) -> str: - task_code = task_asset.code if task_asset is not None else "task.unspecified" - suffix = ",其中部分能力已降级。" if degraded else "。" - return ( - f"Hermes 占位执行完成:任务 {task_code}," - f"场景 {ontology.scenario},规则结果={rule_response.get('message', '')}," - f"MCP 结果={mcp_response.get('message', '')}{suffix}" - ) - - @staticmethod - def _database_tool_name(scenario: str) -> str: - if scenario == "expense": - return "database.expense_claims.lookup" - if scenario == "accounts_receivable": - return "database.accounts_receivable.lookup" - return "database.accounts_payable.lookup" - - @staticmethod - def _rule_tool_name( - capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], - ) -> str: - if capabilities["rules"]: - return capabilities["rules"][0].code - return "rule_engine.default_risk_check" - - @staticmethod - def _mcp_tool_name( - capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], - ) -> str: - if capabilities["mcps"]: - return capabilities["mcps"][0].code - return "mcp.default_snapshot" - - @staticmethod - def _build_ontology_json(ontology: OntologyParseResult) -> dict[str, Any]: - return { - "scenario": ontology.scenario, - "intent": ontology.intent, - "entities": [item.model_dump() for item in ontology.entities], - "time_range": ontology.time_range.model_dump(), - "metrics": [item.model_dump() for item in ontology.metrics], - "constraints": [item.model_dump() for item in ontology.constraints], - "risk_flags": ontology.risk_flags, - "permission": ontology.permission.model_dump(), - } - - @staticmethod - def _normalize_response_status(status: str) -> str: - if status == AgentRunStatus.FAILED.value: - return "failed" - if status == AgentRunStatus.BLOCKED.value: - return "blocked" - return "succeeded" + skills = self.execution_engine._rank_assets( + self.asset_service.list_assets( + asset_type=AgentAssetType.SKILL.value, + status=AgentAssetStatus.ACTIVE.value, + domain=domain_value if domain_value not in {"system"} else None, + ), + ontology, + ) + mcps = self.execution_engine._rank_assets( + self.asset_service.list_assets( + asset_type=AgentAssetType.MCP.value, + status=AgentAssetStatus.ACTIVE.value, + ), + ontology, + ) + tasks: list[AgentAssetListItem | AgentAssetRead] = [] + if task_asset is not None and task_asset.status == AgentAssetStatus.ACTIVE.value: + tasks.append(task_asset) + elif payload.source == AgentRunSource.SCHEDULE.value: + tasks = self.execution_engine._rank_assets( + self.asset_service.list_assets( + asset_type=AgentAssetType.TASK.value, + status=AgentAssetStatus.ACTIVE.value, + ), + ontology, + ) + + return { + "rules": rules, + "skills": skills, + "mcps": mcps, + "tasks": tasks, + } + + @staticmethod + def _normalize_response_status(status: str) -> str: + if status == AgentRunStatus.FAILED.value: + return "failed" + if status == AgentRunStatus.BLOCKED.value: + return "blocked" + return "succeeded" diff --git a/server/src/app/services/orchestrator_execution.py b/server/src/app/services/orchestrator_execution.py new file mode 100644 index 0000000..73fe053 --- /dev/null +++ b/server/src/app/services/orchestrator_execution.py @@ -0,0 +1,626 @@ +from __future__ import annotations + +from dataclasses import dataclass +from time import perf_counter +from typing import Any + +from app.core.agent_enums import AgentRunSource, AgentRunStatus, AgentToolType +from app.schemas.agent_asset import AgentAssetListItem, AgentAssetRead +from app.schemas.ontology import OntologyParseResult +from app.schemas.orchestrator import OrchestratorRequest +from app.schemas.user_agent import UserAgentRequest, UserAgentResponse + + +@dataclass(slots=True) +class ExecutionOutcome: + status: str + result: dict[str, Any] + degraded: bool + tool_count: int + failed_tool_count: int + + +class OrchestratorExecutionEngine: + def __init__( + self, + *, + run_service, + expense_claim_service, + knowledge_service, + user_agent_service, + database_query_builder, + ) -> None: + self.run_service = run_service + self.expense_claim_service = expense_claim_service + self.knowledge_service = knowledge_service + self.user_agent_service = user_agent_service + self.database_query_builder = database_query_builder + + def _execute_user_agent( + self, + *, + payload: OrchestratorRequest, + run_id: str, + ontology: OntologyParseResult, + capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], + requires_confirmation: bool, + context_json: dict[str, Any], + ) -> ExecutionOutcome: + selected_capability_codes = self._flatten_capability_codes(capabilities) + if requires_confirmation: + response, degraded = self._invoke_tool( + run_id=run_id, + tool_type=AgentToolType.LLM.value, + tool_name="user_agent.confirmation_placeholder", + request_json={ + "message": payload.message, + "permission_level": ontology.permission.level, + }, + context_json=context_json, + executor=lambda: { + "confirmation_title": "操作需要确认", + "message": f"{ontology.permission.reason} 当前仅返回确认摘要,不直接执行动作。", + }, + fallback_factory=lambda exc: { + "confirmation_title": "操作需要确认", + "message": f"确认摘要生成失败,已阻断自动执行:{exc}", + }, + ) + return ExecutionOutcome( + status=AgentRunStatus.BLOCKED.value, + result={**response, "degraded": degraded}, + degraded=degraded, + tool_count=1, + failed_tool_count=1 if degraded else 0, + ) + + next_step = self._resolve_next_step( + ontology, + payload.source, + context_json=context_json, + ) + if next_step == "query_database": + tool_payload, degraded = self._invoke_tool( + run_id=run_id, + tool_type=AgentToolType.DATABASE.value, + tool_name=self._database_tool_name(ontology.scenario), + request_json=self._build_ontology_json(ontology), + context_json=context_json, + executor=lambda: self.database_query_builder.build_database_answer( + ontology, + user_id=payload.user_id, + context_json=context_json, + message=payload.message or "", + ), + fallback_factory=lambda exc: { + "message": f"数据库查询暂时不可用,已返回降级说明:{exc}", + "degraded": True, + }, + ) + result = self._build_user_agent_result( + self.user_agent_service.respond( + UserAgentRequest( + run_id=run_id, + user_id=payload.user_id, + message=payload.message or "", + ontology=ontology, + context_json=context_json, + tool_payload=tool_payload, + selected_capability_codes=selected_capability_codes, + degraded=degraded, + requires_confirmation=requires_confirmation, + ) + ), + degraded=degraded, + ) + return ExecutionOutcome( + status=AgentRunStatus.SUCCEEDED.value, + result=result, + degraded=degraded, + tool_count=1, + failed_tool_count=1 if degraded else 0, + ) + + if next_step == "search_knowledge": + tool_payload, degraded = self._invoke_tool( + run_id=run_id, + tool_type=AgentToolType.DATABASE.value, + tool_name="knowledge.search", + request_json=self._build_ontology_json(ontology), + context_json=context_json, + executor=lambda: self._build_knowledge_answer( + message=payload.message or "", + ontology=ontology, + capabilities=capabilities, + context_json=context_json, + ), + fallback_factory=lambda exc: { + "message": f"知识检索暂时不可用,建议稍后重试:{exc}", + "degraded": True, + }, + ) + result = self._build_user_agent_result( + self.user_agent_service.respond( + UserAgentRequest( + run_id=run_id, + user_id=payload.user_id, + message=payload.message or "", + ontology=ontology, + context_json=context_json, + tool_payload=tool_payload, + selected_capability_codes=selected_capability_codes, + degraded=degraded, + requires_confirmation=requires_confirmation, + ) + ), + degraded=degraded, + ) + return ExecutionOutcome( + status=AgentRunStatus.SUCCEEDED.value, + result=result, + degraded=degraded, + tool_count=1, + failed_tool_count=1 if degraded else 0, + ) + + if next_step == "run_rule": + tool_payload, degraded = self._invoke_tool( + run_id=run_id, + tool_type=AgentToolType.RULE_ENGINE.value, + tool_name=self._rule_tool_name(capabilities), + request_json=self._build_ontology_json(ontology), + context_json=context_json, + executor=lambda: self._build_rule_answer(ontology), + fallback_factory=lambda exc: { + "message": f"规则检查暂时不可用,已返回人工复核建议:{exc}", + "degraded": True, + }, + ) + result = self._build_user_agent_result( + self.user_agent_service.respond( + UserAgentRequest( + run_id=run_id, + user_id=payload.user_id, + message=payload.message or "", + ontology=ontology, + context_json=context_json, + tool_payload=tool_payload, + selected_capability_codes=selected_capability_codes, + degraded=degraded, + requires_confirmation=requires_confirmation, + ) + ), + degraded=degraded, + ) + return ExecutionOutcome( + status=AgentRunStatus.SUCCEEDED.value, + result=result, + degraded=degraded, + tool_count=1, + failed_tool_count=1 if degraded else 0, + ) + + tool_type = AgentToolType.LLM.value + tool_name = "user_agent.draft_placeholder" + executor = lambda: { + "message": ( + f"已生成 {ontology.scenario} 场景草稿," + "占位能力后续由 Day 5 User Agent 接管。" + ), + "draft_only": True, + } + fallback_factory = lambda exc: { + "message": f"内容整理暂时不可用,请稍后再试:{exc}", + "degraded": True, + } + + if ontology.scenario == "expense" or self._is_expense_review_action(context_json): + is_persistence_action = self._is_expense_persistence_action(context_json) + tool_type = ( + AgentToolType.DATABASE.value + if is_persistence_action + else AgentToolType.LLM.value + ) + tool_name = ( + "database.expense_claims.save_or_submit" + if is_persistence_action + else "user_agent.expense_review_preview" + ) + executor = lambda: self.expense_claim_service.save_or_submit_from_ontology( + run_id=run_id, + user_id=payload.user_id, + message=payload.message or "", + ontology=ontology, + context_json=context_json, + ) + fallback_factory = lambda exc: { + "message": ( + f"报销草稿落库失败,请稍后再试:{exc}" + if is_persistence_action + else f"报销内容预览生成失败,请稍后再试:{exc}" + ), + "degraded": True, + } + + tool_payload, degraded = self._invoke_tool( + run_id=run_id, + tool_type=tool_type, + tool_name=tool_name, + request_json=self._build_ontology_json(ontology), + context_json=context_json, + executor=executor, + fallback_factory=fallback_factory, + ) + result = self._build_user_agent_result( + self.user_agent_service.respond( + UserAgentRequest( + run_id=run_id, + user_id=payload.user_id, + message=payload.message or "", + ontology=ontology, + context_json=context_json, + tool_payload=tool_payload, + selected_capability_codes=selected_capability_codes, + degraded=degraded, + requires_confirmation=requires_confirmation, + ) + ), + degraded=degraded, + ) + return ExecutionOutcome( + status=AgentRunStatus.SUCCEEDED.value, + result=result, + degraded=degraded, + tool_count=1, + failed_tool_count=1 if degraded else 0, + ) + + def _execute_hermes( + self, + *, + payload: OrchestratorRequest, + run_id: str, + ontology: OntologyParseResult, + capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], + requires_confirmation: bool, + task_asset: AgentAssetRead | None, + context_json: dict[str, Any], + ) -> ExecutionOutcome: + if requires_confirmation: + return ExecutionOutcome( + status=AgentRunStatus.BLOCKED.value, + result={ + "message": "Hermes 不会自动执行需要确认的高风险动作,已阻断。", + "degraded": False, + }, + degraded=False, + tool_count=0, + failed_tool_count=0, + ) + + rule_response, rule_degraded = self._invoke_tool( + run_id=run_id, + tool_type=AgentToolType.RULE_ENGINE.value, + tool_name=self._rule_tool_name(capabilities), + request_json=self._build_ontology_json(ontology), + context_json=context_json, + executor=lambda: self._build_rule_answer(ontology), + fallback_factory=lambda exc: { + "message": f"规则巡检失败,已降级为待人工复核:{exc}", + "degraded": True, + }, + ) + mcp_response, mcp_degraded = self._invoke_tool( + run_id=run_id, + tool_type=AgentToolType.MCP.value, + tool_name=self._mcp_tool_name(capabilities), + request_json={ + "task_code": task_asset.code if task_asset is not None else "", + "scenario": ontology.scenario, + }, + context_json=context_json, + executor=lambda: self._build_mcp_answer(task_asset, ontology), + fallback_factory=lambda exc: { + "message": f"MCP 调用失败,已使用缓存快照降级:{exc}", + "fallback": "used_cached_snapshot", + }, + ) + degraded = rule_degraded or mcp_degraded + failed_tool_count = int(rule_degraded) + int(mcp_degraded) + result = { + "message": self._build_hermes_message( + task_asset=task_asset, + ontology=ontology, + rule_response=rule_response, + mcp_response=mcp_response, + degraded=degraded, + ), + "report_type": task_asset.code if task_asset is not None else "hermes_runtime", + "degraded": degraded, + } + return ExecutionOutcome( + status=AgentRunStatus.SUCCEEDED.value, + result=result, + degraded=degraded, + tool_count=2, + failed_tool_count=failed_tool_count, + ) + + @staticmethod + def _resolve_next_step( + ontology: OntologyParseResult, + source: str, + *, + context_json: dict[str, Any] | None = None, + ) -> str: + if OrchestratorExecutionEngine._is_expense_review_action(context_json or {}): + return "create_draft" + if ontology.clarification_required: + return "ask_clarification" + if ontology.intent == "draft": + return "create_draft" + if ontology.scenario == "knowledge" or ontology.intent == "explain": + return "search_knowledge" + if ontology.intent == "risk_check" or source == AgentRunSource.SCHEDULE.value: + return "run_rule" + if ontology.intent in {"query", "compare"}: + return "query_database" + return "create_draft" + + @staticmethod + def _is_expense_review_action(context_json: dict[str, Any]) -> bool: + review_action = str((context_json or {}).get("review_action") or "").strip() + return review_action in { + "save_draft", + "next_step", + "edit_review", + "link_to_existing_draft", + "create_new_claim_from_documents", + } + + @staticmethod + def _is_expense_persistence_action(context_json: dict[str, Any]) -> bool: + review_action = str((context_json or {}).get("review_action") or "").strip() + return review_action in { + "save_draft", + "next_step", + "link_to_existing_draft", + "create_new_claim_from_documents", + } + + @staticmethod + def _flatten_capability_codes( + capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], + ) -> list[str]: + codes: list[str] = [] + for items in capabilities.values(): + for item in items[:2]: + if item.code not in codes: + codes.append(item.code) + return codes + + def _rank_assets( + self, + items: list[AgentAssetListItem], + ontology: OntologyParseResult, + ) -> list[AgentAssetListItem]: + def score(item: AgentAssetListItem) -> tuple[int, str]: + item_tags = {str(value) for value in item.scenario_json or []} + weight = 0 + if ontology.scenario in item_tags: + weight += 3 + if ontology.intent in item_tags: + weight += 2 + for risk_flag in ontology.risk_flags: + if risk_flag in item_tags: + weight += 4 + return weight, item.code + + ranked = sorted(items, key=score, reverse=True) + if not ranked: + return [] + scored = [item for item in ranked if score(item)[0] > 0] + return scored or ranked[:1] + + def _invoke_tool( + self, + *, + run_id: str, + tool_type: str, + tool_name: str, + request_json: dict[str, Any], + context_json: dict[str, Any], + executor, + fallback_factory, + ) -> tuple[dict[str, Any], bool]: + started = perf_counter() + try: + self._maybe_raise_simulated_failure(tool_type, context_json) + response = executor() + duration_ms = int((perf_counter() - started) * 1000) + self.run_service.record_tool_call( + run_id=run_id, + tool_type=tool_type, + tool_name=tool_name, + request_json=request_json, + response_json=response, + status="succeeded", + duration_ms=duration_ms, + ) + return response, False + except Exception as exc: + duration_ms = int((perf_counter() - started) * 1000) + response = fallback_factory(exc) + self.run_service.record_tool_call( + run_id=run_id, + tool_type=tool_type, + tool_name=tool_name, + request_json=request_json, + response_json=response, + status="failed", + duration_ms=duration_ms, + error_message=str(exc), + ) + return response, True + + @staticmethod + def _maybe_raise_simulated_failure(tool_type: str, context_json: dict[str, Any]) -> None: + expected = str(context_json.get("simulate_tool_failure") or "").strip().lower() + if not expected: + return + if expected == tool_type.lower(): + raise RuntimeError(f"simulated {tool_type} failure") + + @staticmethod + def _build_user_query_result( + ontology: OntologyParseResult, + response: dict[str, Any], + ) -> dict[str, Any]: + if ontology.scenario == "expense": + return { + "message": ( + f"已路由到 User Agent,占位查询结果:命中 {response['record_count']} 笔报销," + f"金额合计 {response['total_amount']} 元。" + ), + "data": response, + } + if ontology.scenario == "accounts_receivable": + return { + "message": ( + f"已路由到 User Agent,占位查询结果:命中 {response['record_count']} 条应收," + f"未回款金额 {response['outstanding_amount']} 元。" + ), + "data": response, + } + return { + "message": ( + f"已路由到 User Agent,占位查询结果:命中 {response['record_count']} 条应付," + f"待付金额 {response['outstanding_amount']} 元。" + ), + "data": response, + } + + @staticmethod + def _build_user_agent_result( + response: UserAgentResponse, + *, + degraded: bool, + ) -> dict[str, Any]: + result = { + "message": response.answer, + "answer": response.answer, + "citations": [item.model_dump() for item in response.citations], + "suggested_actions": [item.model_dump() for item in response.suggested_actions], + "risk_flags": response.risk_flags, + "requires_confirmation": response.requires_confirmation, + "degraded": degraded, + } + if response.query_payload is not None: + result["query_payload"] = response.query_payload.model_dump() + if response.draft_payload is not None: + result["draft_payload"] = response.draft_payload.model_dump() + if response.review_payload is not None: + result["review_payload"] = response.review_payload.model_dump() + return result + + def _build_knowledge_answer( + self, + *, + message: str, + ontology: OntologyParseResult, + capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], + context_json: dict[str, Any], + ) -> dict[str, Any]: + del ontology, capabilities + conversation_history = context_json.get("conversation_history") + if not isinstance(conversation_history, list): + conversation_history = None + payload = self.knowledge_service.search_knowledge( + message, + conversation_history=conversation_history, + limit=8, + ) + references = [str(item).strip() for item in list(payload.get("references") or []) if str(item).strip()] + if references: + payload["references"] = references + return payload + + @staticmethod + def _build_rule_answer(ontology: OntologyParseResult) -> dict[str, Any]: + risk_text = ( + "、".join(ontology.risk_flags) + if ontology.risk_flags + else "未识别到明确风险标签" + ) + return { + "message": f"已完成占位规则检查,风险标签:{risk_text}。", + "risk_flags": ontology.risk_flags, + } + + @staticmethod + def _build_mcp_answer( + task_asset: AgentAssetRead | None, + ontology: OntologyParseResult, + ) -> dict[str, Any]: + return { + "message": ( + f"已调用占位 MCP 快照,任务={task_asset.code if task_asset else 'none'}," + f"scenario={ontology.scenario}。" + ), + "snapshot": "stubbed", + } + + @staticmethod + def _build_hermes_message( + *, + task_asset: AgentAssetRead | None, + ontology: OntologyParseResult, + rule_response: dict[str, Any], + mcp_response: dict[str, Any], + degraded: bool, + ) -> str: + task_code = task_asset.code if task_asset is not None else "task.unspecified" + suffix = ",其中部分能力已降级。" if degraded else "。" + return ( + f"Hermes 占位执行完成:任务 {task_code}," + f"场景 {ontology.scenario},规则结果={rule_response.get('message', '')}," + f"MCP 结果={mcp_response.get('message', '')}{suffix}" + ) + + @staticmethod + def _database_tool_name(scenario: str) -> str: + if scenario == "expense": + return "database.expense_claims.lookup" + if scenario == "accounts_receivable": + return "database.accounts_receivable.lookup" + return "database.accounts_payable.lookup" + + @staticmethod + def _rule_tool_name( + capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], + ) -> str: + if capabilities["rules"]: + return capabilities["rules"][0].code + return "rule_engine.default_risk_check" + + @staticmethod + def _mcp_tool_name( + capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]], + ) -> str: + if capabilities["mcps"]: + return capabilities["mcps"][0].code + return "mcp.default_snapshot" + + @staticmethod + def _build_ontology_json(ontology: OntologyParseResult) -> dict[str, Any]: + return { + "scenario": ontology.scenario, + "intent": ontology.intent, + "entities": [item.model_dump() for item in ontology.entities], + "time_range": ontology.time_range.model_dump(), + "metrics": [item.model_dump() for item in ontology.metrics], + "constraints": [item.model_dump() for item in ontology.constraints], + "risk_flags": ontology.risk_flags, + "permission": ontology.permission.model_dump(), + } + diff --git a/server/src/app/services/orchestrator_expense_query.py b/server/src/app/services/orchestrator_expense_query.py new file mode 100644 index 0000000..d8b39c4 --- /dev/null +++ b/server/src/app/services/orchestrator_expense_query.py @@ -0,0 +1,535 @@ +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from typing import Any + +from sqlalchemy import and_, func, or_, select +from sqlalchemy.orm import Session + +from app.models.employee import Employee +from app.models.financial_record import ( + AccountsPayableRecord, + AccountsReceivableRecord, + ExpenseClaim, +) +from app.schemas.ontology import OntologyParseResult + +PRIVILEGED_EXPENSE_QUERY_ROLE_CODES = {"finance"} +SELF_REFERENCE_KEYWORDS = ("我的", "我自己", "本人", "我名下", "给我查", "我提交", "我申请") +EXPENSE_QUERY_RECENT_WINDOW_DAYS = 10 +EXPENSE_QUERY_PREVIEW_LIMIT = 20 +EXPENSE_STATUS_LABELS = { + "draft": "草稿", + "submitted": "已提交", + "review": "审核中", + "approved": "已通过", + "rejected": "已驳回", + "paid": "已付款", +} +EXPENSE_STATUS_GROUP_LABELS = { + "draft": "草稿", + "in_progress": "审批中", + "completed": "审批完成", + "other": "其他状态", +} +EXPENSE_STATUS_GROUP_ORDER = ("draft", "in_progress", "completed", "other") +EXPENSE_TYPE_LABELS = { + "travel": "差旅费", + "hotel": "住宿费", + "transport": "交通费", + "meal": "餐费", + "meeting": "会务费", + "entertainment": "业务招待费", + "office": "办公费", + "training": "培训费", + "communication": "通讯费", + "welfare": "福利费", + "other": "其他费用", +} + + +class OrchestratorDatabaseQueryBuilder: + def __init__(self, db: Session) -> None: + self.db = db + + def build_database_answer( + self, + ontology: OntologyParseResult, + *, + user_id: str | None, + context_json: dict[str, Any], + message: str, + ) -> dict[str, Any]: + if ontology.scenario == "expense": + return self._build_expense_database_answer( + ontology=ontology, + user_id=user_id, + context_json=context_json, + message=message, + ) + + if ontology.scenario == "accounts_receivable": + return self._build_accounts_receivable_answer() + + return self._build_accounts_payable_answer() + + def _build_expense_database_answer( + self, + *, + ontology: OntologyParseResult, + user_id: str | None, + context_json: dict[str, Any], + message: str, + ) -> dict[str, Any]: + conditions, scope_label, scoped_to_current_user = self._build_expense_query_scope( + ontology=ontology, + user_id=user_id, + context_json=context_json, + message=message, + ) + count_stmt = select(func.count()).select_from(ExpenseClaim) + amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(ExpenseClaim) + for condition in conditions: + count_stmt = count_stmt.where(condition) + amount_stmt = amount_stmt.where(condition) + total_count = int(self.db.scalar(count_stmt) or 0) + total_amount = float(self.db.scalar(amount_stmt) or 0) + + recent_window_applied = self._should_limit_expense_query_to_recent_window(ontology) + display_count = total_count + display_amount = total_amount + older_record_count = 0 + display_conditions = list(conditions) + window_start_date: str | None = None + window_end_date: str | None = None + + if recent_window_applied: + reference_now = self._resolve_reference_now(context_json) + recent_window_start, recent_window_end = self._resolve_expense_recent_window_bounds(reference_now) + recent_condition = self._build_expense_recent_window_condition( + recent_window_start, + recent_window_end, + ) + display_conditions.append(recent_condition) + window_start_date = recent_window_start.date().isoformat() + window_end_date = (recent_window_end - timedelta(microseconds=1)).date().isoformat() + + recent_count_stmt = select(func.count()).select_from(ExpenseClaim).where(recent_condition) + recent_amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(ExpenseClaim).where( + recent_condition + ) + for condition in conditions: + recent_count_stmt = recent_count_stmt.where(condition) + recent_amount_stmt = recent_amount_stmt.where(condition) + display_count = int(self.db.scalar(recent_count_stmt) or 0) + display_amount = float(self.db.scalar(recent_amount_stmt) or 0) + older_record_count = max(0, total_count - display_count) + + preview_stmt = ( + select(ExpenseClaim) + .order_by( + func.coalesce( + ExpenseClaim.submitted_at, + ExpenseClaim.created_at, + ExpenseClaim.occurred_at, + ).desc(), + ExpenseClaim.occurred_at.desc(), + ) + .limit(EXPENSE_QUERY_PREVIEW_LIMIT) + ) + for condition in display_conditions: + preview_stmt = preview_stmt.where(condition) + preview_claims = list(self.db.scalars(preview_stmt).all()) + status_groups = self._build_expense_status_groups(display_conditions) + return { + "result_type": "expense_claim_list", + "record_count": display_count, + "total_amount": round(display_amount, 2), + "scope_label": scope_label, + "scoped_to_current_user": scoped_to_current_user, + "recent_window_applied": recent_window_applied, + "window_days": EXPENSE_QUERY_RECENT_WINDOW_DAYS if recent_window_applied else None, + "window_start_date": window_start_date, + "window_end_date": window_end_date, + "preview_count": len(preview_claims), + "older_record_count": older_record_count, + "records": [ + self._build_expense_query_record(claim) + for claim in preview_claims + ], + "status_groups": status_groups, + "has_more_in_window": display_count > len(preview_claims), + "total_matched_count": total_count, + } + + def _build_accounts_receivable_answer(self) -> dict[str, Any]: + total_count = int( + self.db.scalar( + select(func.count()).select_from(AccountsReceivableRecord) + ) + or 0 + ) + total_amount = float( + self.db.scalar( + select(func.coalesce(func.sum(AccountsReceivableRecord.amount_outstanding), 0)) + ) + or 0 + ) + return { + "record_count": total_count, + "outstanding_amount": round(total_amount, 2), + } + + def _build_accounts_payable_answer(self) -> dict[str, Any]: + total_count = int( + self.db.scalar(select(func.count()).select_from(AccountsPayableRecord)) + or 0 + ) + total_amount = float( + self.db.scalar( + select(func.coalesce(func.sum(AccountsPayableRecord.amount_outstanding), 0)) + ) + or 0 + ) + return { + "record_count": total_count, + "outstanding_amount": round(total_amount, 2), + } + + @staticmethod + def _should_limit_expense_query_to_recent_window( + ontology: OntologyParseResult, + ) -> bool: + has_explicit_claim_no = any( + item.type == "expense_claim" + and str(item.normalized_value or item.value or "").strip() + for item in ontology.entities + ) + has_explicit_time_range = bool( + ontology.time_range.start_date or ontology.time_range.end_date + ) + return not has_explicit_claim_no and not has_explicit_time_range + + @staticmethod + def _resolve_reference_now(context_json: dict[str, Any]) -> datetime: + raw_value = str(context_json.get("client_now_iso") or "").strip() + if raw_value: + normalized = raw_value.replace("Z", "+00:00") + try: + parsed = datetime.fromisoformat(normalized) + if parsed.tzinfo is None: + return parsed.replace(tzinfo=UTC) + return parsed.astimezone(UTC) + except ValueError: + pass + return datetime.now(UTC) + + @staticmethod + def _resolve_expense_recent_window_bounds( + reference_now: datetime, + ) -> tuple[datetime, datetime]: + normalized_now = reference_now.astimezone(UTC) + window_end = normalized_now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(days=1) + window_start = window_end - timedelta(days=EXPENSE_QUERY_RECENT_WINDOW_DAYS) + return window_start, window_end + + @staticmethod + def _build_expense_recent_window_condition( + window_start: datetime, + window_end: datetime, + ) -> Any: + document_datetime = func.coalesce( + ExpenseClaim.submitted_at, + ExpenseClaim.created_at, + ExpenseClaim.occurred_at, + ) + return and_(document_datetime >= window_start, document_datetime < window_end) + + def _build_expense_status_groups( + self, + conditions: list[Any], + ) -> list[dict[str, Any]]: + stmt = select(ExpenseClaim.status, func.count()).select_from(ExpenseClaim).group_by(ExpenseClaim.status) + for condition in conditions: + stmt = stmt.where(condition) + + grouped_counts = { + key: 0 + for key in EXPENSE_STATUS_GROUP_ORDER + } + for status, count in self.db.execute(stmt).all(): + group_key, _ = self._resolve_expense_status_group(str(status or "").strip()) + grouped_counts[group_key] = grouped_counts.get(group_key, 0) + int(count or 0) + + return [ + { + "key": key, + "label": EXPENSE_STATUS_GROUP_LABELS[key], + "count": grouped_counts.get(key, 0), + } + for key in EXPENSE_STATUS_GROUP_ORDER + if grouped_counts.get(key, 0) > 0 + ] + + @staticmethod + def _resolve_expense_status_group(status: str) -> tuple[str, str]: + normalized = str(status or "").strip().lower() + if normalized == "draft": + return "draft", EXPENSE_STATUS_GROUP_LABELS["draft"] + if normalized in {"submitted", "review"}: + return "in_progress", EXPENSE_STATUS_GROUP_LABELS["in_progress"] + if normalized in {"approved", "paid"}: + return "completed", EXPENSE_STATUS_GROUP_LABELS["completed"] + return "other", EXPENSE_STATUS_GROUP_LABELS["other"] + + @staticmethod + def _resolve_expense_query_document_datetime( + claim: ExpenseClaim, + ) -> datetime | None: + return claim.submitted_at or claim.created_at or claim.occurred_at + + def _build_expense_query_record( + self, + claim: ExpenseClaim, + ) -> dict[str, Any]: + status_group, status_group_label = self._resolve_expense_status_group(claim.status) + document_datetime = self._resolve_expense_query_document_datetime(claim) + return { + "claim_id": claim.id, + "claim_no": claim.claim_no, + "employee_name": claim.employee_name, + "expense_type": claim.expense_type, + "expense_type_label": EXPENSE_TYPE_LABELS.get(claim.expense_type, claim.expense_type or "报销"), + "amount": round(float(claim.amount), 2), + "status": claim.status, + "status_label": EXPENSE_STATUS_LABELS.get(claim.status, claim.status or "处理中"), + "status_group": status_group, + "status_group_label": status_group_label, + "approval_stage": claim.approval_stage, + "document_date": document_datetime.date().isoformat() if document_datetime else "", + "occurred_at": claim.occurred_at.date().isoformat() if claim.occurred_at else "", + "reason": claim.reason, + "location": claim.location, + } + + def _build_expense_query_scope( + self, + *, + ontology: OntologyParseResult, + user_id: str | None, + context_json: dict[str, Any], + message: str, + ) -> tuple[list[Any], str, bool]: + conditions: list[Any] = [] + explicit_employee_names = list( + dict.fromkeys( + str(item.value or "").strip() + for item in ontology.entities + if item.type == "employee" and str(item.value or "").strip() + ) + ) + expense_claim_nos = list( + dict.fromkeys( + str(item.normalized_value or item.value or "").strip().upper() + for item in ontology.entities + if item.type == "expense_claim" and str(item.normalized_value or item.value or "").strip() + ) + ) + expense_types = list( + dict.fromkeys( + str(item.normalized_value or item.value or "").strip() + for item in ontology.entities + if item.type == "expense_type" and str(item.normalized_value or item.value or "").strip() + ) + ) + project_values = self._collect_expense_query_filter_values(ontology, "project") + location_values = self._collect_expense_query_filter_values(ontology, "location") + status_values = list( + dict.fromkeys( + str(item.value).strip() + for item in ontology.constraints + if item.field == "status" and item.operator == "=" and str(item.value).strip() + ) + ) + amount_constraints = [ + item + for item in ontology.constraints + if item.field == "amount" and item.operator in {">", ">=", "<", "<=", "="} + ] + scope_label = "报销单" + scoped_to_current_user = False + + if expense_claim_nos: + conditions.append(ExpenseClaim.claim_no.in_(expense_claim_nos)) + if expense_types: + conditions.append(ExpenseClaim.expense_type.in_(expense_types)) + if status_values: + conditions.append(ExpenseClaim.status.in_(status_values)) + if project_values: + project_conditions = [] + for value in project_values: + pattern = f"%{value}%" + project_conditions.append(ExpenseClaim.project_code.ilike(pattern)) + project_conditions.append(ExpenseClaim.reason.ilike(pattern)) + conditions.append(or_(*project_conditions)) + if location_values: + location_conditions = [] + for value in location_values: + pattern = f"%{value}%" + location_conditions.append(ExpenseClaim.location.ilike(pattern)) + location_conditions.append(ExpenseClaim.reason.ilike(pattern)) + conditions.append(or_(*location_conditions)) + + for item in amount_constraints: + amount_value = float(item.value) + if item.operator == ">": + conditions.append(ExpenseClaim.amount > amount_value) + elif item.operator == ">=": + conditions.append(ExpenseClaim.amount >= amount_value) + elif item.operator == "<": + conditions.append(ExpenseClaim.amount < amount_value) + elif item.operator == "<=": + conditions.append(ExpenseClaim.amount <= amount_value) + else: + conditions.append(ExpenseClaim.amount == amount_value) + + if ontology.time_range.start_date: + conditions.append( + ExpenseClaim.occurred_at + >= datetime.fromisoformat(f"{ontology.time_range.start_date}T00:00:00+00:00") + ) + if ontology.time_range.end_date: + conditions.append( + ExpenseClaim.occurred_at + <= datetime.fromisoformat(f"{ontology.time_range.end_date}T23:59:59.999999+00:00") + ) + + has_privileged_access = self._has_privileged_expense_query_access(context_json) + refers_to_self = self._is_self_expense_query(message) + if not has_privileged_access: + owner_conditions, owner_label = self._build_current_user_claim_conditions( + user_id=user_id, + context_json=context_json, + ) + if owner_conditions: + conditions.append(or_(*owner_conditions)) + scope_label = owner_label + scoped_to_current_user = True + else: + conditions.append(ExpenseClaim.id == "__no_visible_claim__") + scope_label = "你的报销单" + scoped_to_current_user = True + elif explicit_employee_names: + conditions.append(ExpenseClaim.employee_name.in_(explicit_employee_names)) + scope_label = f"{'、'.join(explicit_employee_names)}的报销单" + elif refers_to_self: + owner_conditions, owner_label = self._build_current_user_claim_conditions( + user_id=user_id, + context_json=context_json, + ) + if owner_conditions: + conditions.append(or_(*owner_conditions)) + scope_label = owner_label + scoped_to_current_user = True + else: + conditions.append(ExpenseClaim.id == "__no_visible_claim__") + scope_label = "你的报销单" + scoped_to_current_user = True + else: + scope_label = "全部报销单" + + return conditions, scope_label, scoped_to_current_user + + @staticmethod + def _collect_expense_query_filter_values( + ontology: OntologyParseResult, + field_name: str, + ) -> list[str]: + values: list[str] = [] + for entity in ontology.entities: + if entity.type != field_name: + continue + value = str(entity.normalized_value or entity.value or "").strip() + if value: + values.append(value) + for constraint in ontology.constraints: + if constraint.field != field_name or constraint.operator != "=": + continue + value = str(constraint.value or "").strip() + if value: + values.append(value) + return list(dict.fromkeys(values)) + + def _build_current_user_claim_conditions( + self, + *, + user_id: str | None, + context_json: dict[str, Any], + ) -> tuple[list[Any], str]: + normalized_user_id = str(user_id or "").strip() + employee = None + if normalized_user_id: + employee = self.db.scalar( + select(Employee) + .where(func.lower(Employee.email) == normalized_user_id.lower()) + .limit(1) + ) + + conditions: list[Any] = [] + seen: set[tuple[str, str]] = set() + + def add_condition(field_name: str, value: str | None) -> None: + normalized = str(value or "").strip() + if not normalized: + return + + marker = (field_name, normalized.lower()) + if marker in seen: + return + seen.add(marker) + + if field_name == "employee_id": + conditions.append(ExpenseClaim.employee_id == normalized) + return + conditions.append(ExpenseClaim.employee_name == normalized) + + if employee is not None: + add_condition("employee_id", employee.id) + add_condition("employee_name", employee.email) + if self._employee_name_is_unique(employee): + add_condition("employee_name", employee.name) + else: + add_condition("employee_id", normalized_user_id) + add_condition("employee_name", normalized_user_id) + + subject_name = (employee.name if employee is not None else "") or normalized_user_id + if subject_name: + return conditions, "你的报销单" + return conditions, "当前用户的报销单" + + def _employee_name_is_unique(self, employee: Employee) -> bool: + normalized_name = str(employee.name or "").strip() + if not normalized_name: + return False + + same_name_count = int( + self.db.scalar( + select(func.count()).select_from(Employee).where(Employee.name == normalized_name) + ) + or 0 + ) + return same_name_count == 1 + + @staticmethod + def _has_privileged_expense_query_access(context_json: dict[str, Any]) -> bool: + role_codes = { + str(item).strip().lower() + for item in context_json.get("role_codes", []) + if str(item).strip() + } + return bool(role_codes & PRIVILEGED_EXPENSE_QUERY_ROLE_CODES) + + @staticmethod + def _is_self_expense_query(message: str) -> bool: + compact_message = "".join(str(message or "").split()) + return any(keyword in compact_message for keyword in SELF_REFERENCE_KEYWORDS) diff --git a/server/src/app/services/user_agent.py b/server/src/app/services/user_agent.py index 56cc515..1fe9ce2 100644 --- a/server/src/app/services/user_agent.py +++ b/server/src/app/services/user_agent.py @@ -40,245 +40,34 @@ from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, Runtime from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check from app.services.runtime_chat import RuntimeChatService from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService +from app.services.user_agent_documents import UserAgentDocumentService +from app.services.user_agent_knowledge import UserAgentKnowledgeMixin -SCENARIO_LABELS = { - "expense": "报销", - "accounts_receivable": "应收", - "accounts_payable": "应付", - "knowledge": "知识", - "unknown": "通用", -} - -RISK_REASON_MAP = { - "duplicate_expense": "检测到同员工、同金额或近似单据存在重复提交迹象。", - "location_mismatch": "申报出差地点与票据识别地点可能不一致,需要核对行程或补充说明。", - "amount_over_limit": "金额超过当前制度或预算阈值,需要补充例外说明。", - "invoice_anomaly": "票据或附件完整性不满足当前规则要求,需要补件或人工复核。", - "ar_overdue": "应收账款已出现逾期,存在回款延迟风险。", - "ap_overdue": "应付付款已出现逾期,可能影响供应商履约或合作关系。", -} - -GENERIC_EXPENSE_PROMPTS = { - "报销", - "我要报销", - "我想报销", - "帮我报销", - "我要申请报销", - "发起报销", - "提交报销", -} - -EXPLICIT_DRAFT_KEYWORDS = ("生成", "草稿", "起草", "创建", "发起", "准备") - -EXPENSE_TYPE_LABELS = { - "travel": "差旅费", - "hotel": "住宿费", - "transport": "交通费", - "meal": "餐费", - "meeting": "会务费", - "entertainment": "业务招待费", - "office": "办公费", - "training": "培训费", - "communication": "通讯费", - "welfare": "福利费", - "other": "其他费用", -} - -GROUP_SCENE_LABELS = { - "travel": "差旅费", - "entertainment": "业务招待费", - "meal": "伙食费", - "transport": "交通费", - "hotel": "住宿费", - "office": "办公费", - "training": "培训费", - "communication": "通讯费", - "welfare": "福利费", - "other": "其他费用", -} - -EXPENSE_SCENE_SELECTION_OPTIONS = ( - ("travel", "差旅费", "出差、长途交通、住宿、差旅补贴等场景。"), - ("transport", "交通费", "市内打车、停车、过路费等日常交通场景。"), - ("hotel", "住宿费", "单独住宿、酒店发票等场景。"), - ("entertainment", "业务招待费", "客户接待、宴请、招待等场景。"), - ("office", "办公费", "办公用品、耗材、办公设备等采购场景。"), - ("other", "其他费用", "暂不属于以上分类的报销场景。"), -) - -KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS = 3 -KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS = 5 -KNOWLEDGE_MODEL_TIMEOUT_SECONDS = KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS - -KNOWLEDGE_DIRECT_ANSWER_HINTS = ( - "是什么", - "标准", - "限额", - "流程", - "条件", - "规则", - "怎么", - "如何", - "哪些", - "需要", - "是否", - "区别", - "范围", - "额度", - "金额", - "多少", - "多少钱", - "上限", -) -KNOWLEDGE_QUERY_STOPWORDS = { - "什么", - "多少", - "哪些", - "怎么", - "如何", - "请问", - "一下", - "关于", - "规定", - "标准", - "可以", - "是否", - "一个", - "哪些人", - "目前", - "当前", - "一下子", -} -MAX_KNOWLEDGE_QUERY_TERMS = 12 -MAX_KNOWLEDGE_DIRECT_EVIDENCE = 4 -MAX_KNOWLEDGE_MODEL_HITS = 5 -KNOWLEDGE_SECTION_HEADING_PATTERN = re.compile( - r"^(#\s*.+|##\s*.+|###\s*.+|第[一二三四五六七八九十百零0-9]+[章节条]\s*.*|[一二三四五六七八九十]+、.*|([一二三四五六七八九十]+).*|\([一二三四五六七八九十]+\).*)$" -) -KNOWLEDGE_LIST_ITEM_PATTERN = re.compile(r"^[-*•]\s+.+$") -KNOWLEDGE_NUMBERED_ITEM_PATTERN = re.compile( - r"^(?:(?:\d+[.)、])|(?:[((][一二三四五六七八九十百零0-9]+[))])|[①②③④⑤⑥⑦⑧⑨⑩])\s*.+$" -) -KNOWLEDGE_ARTICLE_PATTERN = re.compile(r"^(第[一二三四五六七八九十百零0-9]+条)\s*.*$") - -EXPENSE_STATUS_LABELS = { - "draft": "草稿", - "submitted": "已提交", - "review": "审核中", - "approved": "已通过", - "rejected": "已驳回", - "paid": "已付款", -} - -EXPENSE_STATUS_GROUP_LABELS = { - "draft": "草稿", - "in_progress": "审批中", - "completed": "审批完成", - "other": "其他状态", -} - -SLOT_LABELS = { - "expense_type": "报销类型", - "customer_name": "客户名称", - "time_range": "发生时间", - "location": "地点", - "merchant_name": "酒店/商户", - "amount": "金额", - "reason": "事由说明", - "participants": "参与人员", - "attachments": "票据附件", -} - -DATE_TEXT_PATTERN = re.compile( - r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[::][0-5]\d)?)" -) -AMOUNT_TEXT_PATTERN = re.compile( - r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)" -) -DOCUMENT_AMOUNT_PATTERN = re.compile( - r"(?:价税合计|合计金额|费用合计|订单(?:总)?金额|支付(?:金额)?|实付(?:金额)?|实收(?:金额)?|总(?:额|计|价)|票价|金额|车费|消费金额)" - r"[::\s¥¥人民币]*([0-9]+(?:[.,][0-9]{1,2})?)" -) -DOCUMENT_CURRENCY_AMOUNT_PATTERN = re.compile(r"[¥¥]\s*([0-9]+(?:[.,][0-9]{1,2})?)") -TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)") -TRAVEL_ROUTE_PATTERN = re.compile(r"([\u4e00-\u9fa5]{2,12})\s*(?:至|→|->|-|—)\s*([\u4e00-\u9fa5]{2,12})") - -SOURCE_LABELS = { - "user_text": "用户描述", - "user_form": "用户修改", - "ocr": "票据识别", - "upload": "上传附件", - "detail_context": "关联单据", - "system_context": "系统上下文", - "inferred": "语义推断", - "system": "系统判断", -} - -DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS = ("历史报销画像", "用户画像", "制度注意事项", "制度注意") - -SCENE_REQUIRED_SLOT_KEYS = { - "hotel": {"merchant_name"}, - "meeting": {"location"}, - "entertainment": {"location", "customer_name", "participants"}, -} -INFERRED_REASON_LABELS = { - "travel": "出差行程", - "hotel": "住宿报销", - "transport": "交通出行", - "meal": "餐饮用餐", - "meeting": "会务活动", - "entertainment": "客户接待", - "office": "办公采购", - "training": "培训学习", - "communication": "通讯使用", - "welfare": "员工福利", - "other": "其他费用", -} -SYSTEM_GENERATED_REASON_PREFIXES = ( - "我上传了", - "请按当前已识别信息", - "请把当前上传的票据", - "请基于当前上传的多张票据", - "我已核对右侧识别结果", - "请同步修正逐票据识别结果", - "我已校正核对信息", - "查看报销草稿", - "请解释一下当前这笔报销的合规风险和待补充项", -) -LEADING_REASON_TIME_PATTERNS = ( - re.compile( - r"^\s*(?:识别事项(?:有)?[::]\s*)?" - r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*" - r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" - r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" - r"\s*[,,。;;、]?\s*" - ), - re.compile( - r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" - r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" - r"\s*[,,。;;、]\s*" - ), -) -AMOUNT_UNIT_ALIASES = { - "员": "元", - "圆": "元", - "园": "元", - "块": "元", - "块钱": "元", - "元整": "元", - "万员": "万元", - "万圆": "万元", - "万园": "万元", - "万块": "万元", - "万元整": "万元", -} +from app.services.user_agent_constants import * +from app.services.user_agent_response import UserAgentResponseMixin +from app.services.user_agent_review_core import UserAgentReviewCoreMixin +from app.services.user_agent_review_messages import UserAgentReviewMessageMixin +from app.services.user_agent_review_profile import UserAgentReviewProfileMixin +from app.services.user_agent_review_slots import UserAgentReviewSlotMixin +from app.services.user_agent_review_travel_policy import UserAgentReviewTravelPolicyMixin +from app.services.user_agent_review_travel_receipts import UserAgentReviewTravelReceiptMixin -class UserAgentService: +class UserAgentService( + UserAgentResponseMixin, + UserAgentKnowledgeMixin, + UserAgentReviewCoreMixin, + UserAgentReviewTravelPolicyMixin, + UserAgentReviewTravelReceiptMixin, + UserAgentReviewMessageMixin, + UserAgentReviewProfileMixin, + UserAgentReviewSlotMixin, +): def __init__(self, db: Session) -> None: self.db = db self.asset_service = AgentAssetService(db) self.runtime_chat_service = RuntimeChatService(db) + self._document_service = UserAgentDocumentService(group_scene_labels=GROUP_SCENE_LABELS) def respond(self, payload: UserAgentRequest) -> UserAgentResponse: AgentFoundationService(self.db).ensure_foundation_ready() @@ -395,4701 +184,24 @@ class UserAgentService: requires_confirmation=payload.requires_confirmation, ) - def _build_fallback_answer( - self, - payload: UserAgentRequest, - *, - citations: list[UserAgentCitation], - draft_payload: UserAgentDraftPayload | None, - ) -> str: - if str(payload.tool_payload.get("result_type") or "").strip() == "knowledge_search": - return self._build_explain_answer(payload, citations) - - if payload.ontology.intent in {"query", "compare"}: - return self._build_query_answer(payload) - - if payload.ontology.intent == "risk_check": - return self._build_risk_answer(payload, citations) - - if payload.ontology.intent == "draft": - tool_message = str(payload.tool_payload.get("message") or "").strip() - if payload.tool_payload.get("draft_limit_reached"): - return tool_message or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。" - if tool_message and ( - str(payload.tool_payload.get("claim_id") or "").strip() - or str(payload.tool_payload.get("claim_no") or "").strip() - ): - return tool_message - if payload.ontology.intent == "draft" and draft_payload is not None: - return ( - f"已生成 {draft_payload.title},当前仅返回待人工确认的草稿内容," - "仍需人工确认后再进入正式流程。" - ) - - return self._build_explain_answer(payload, citations) - - def _build_guided_answer(self, payload: UserAgentRequest) -> str | None: - if not self._is_generic_expense_prompt(payload): - return self._build_implicit_expense_draft_guidance(payload) - - attachment_names = self._resolve_attachment_names(payload) - ocr_summary = str(payload.context_json.get("ocr_summary") or "").strip() - attachment_hint = "" - if ocr_summary: - attachment_hint = f" 我已读取附件 OCR 摘要:{ocr_summary}" - elif attachment_names: - attachment_hint = ( - f" 我已带入 {len(attachment_names)} 份附件名称,但目前还不能直接读取附件内容," - "仍需要你补充关键信息。" - ) - - return ( - "可以帮你发起报销。请补充费用类型、发生时间、金额、事由和相关对象," - "或者直接上传票据附件,我再继续帮你判断能否报、缺什么材料,并整理待核对信息。" - f"{attachment_hint}" - ) - - def _build_implicit_expense_draft_guidance( - self, - payload: UserAgentRequest, - ) -> str | None: - if not self._is_implicit_expense_draft_request(payload): - return None - - amount_text = next( - (item.value for item in payload.ontology.entities if item.type == "amount"), - "", - ) - expense_type = next( - ( - EXPENSE_TYPE_LABELS.get(item.normalized_value, item.value) - for item in payload.ontology.entities - if item.type == "expense_type" - ), - "报销", - ) - time_text = payload.ontology.time_range.raw or "本次" - amount_hint = f",金额 {amount_text}" if amount_text else "" - - return ( - f"已识别到一笔{time_text}的{expense_type}支出{amount_hint}。" - "如果要继续整理报销核对信息,还需要补充客户单位、参与人员、费用明细和票据附件。" - "你也可以继续上传发票或图片,我会把这些信息带入后续对话。" - ) - - def _generate_answer_with_model( - self, - payload: UserAgentRequest, - *, - citations: list[UserAgentCitation], - suggested_actions: list[UserAgentSuggestedAction], - risk_flags: list[str], - draft_payload: UserAgentDraftPayload | None, - fallback_answer: str, - ) -> str | None: - messages = self._build_model_messages( - payload, - citations=citations, - suggested_actions=suggested_actions, - risk_flags=risk_flags, - draft_payload=draft_payload, - fallback_answer=fallback_answer, - ) - answer = self._sanitize_model_answer( - self.runtime_chat_service.complete( - messages, - max_tokens=800 if payload.ontology.scenario == "knowledge" else 420, - temperature=0.2, - timeout_seconds=( - KNOWLEDGE_MODEL_TIMEOUT_SECONDS - if payload.ontology.scenario == "knowledge" - else None - ), - slot_timeouts=( - { - "main": KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS, - "backup": KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS, - } - if payload.ontology.scenario == "knowledge" - else None - ), - max_attempts=1 if payload.ontology.scenario == "knowledge" else None, - ) - ) - return self._reject_unsupported_location_inference(payload, answer) - - def _sanitize_model_answer(self, answer: str | None) -> str | None: - if not answer: - return None - - cleaned = re.sub(r".*?", "", answer, flags=re.DOTALL | re.IGNORECASE) - cleaned = cleaned.strip() - leaked_reasoning_markers = ( - "用户问的是", - "让我分析一下", - "实体识别", - "从对话历史来看", - "从tool_payload来看", - "现在问题是", - "我需要:", - "关键是我", - ) - if any(marker in cleaned[:500] for marker in leaked_reasoning_markers): - return None - return cleaned or None - - @staticmethod - def _extract_query_location(message: str) -> str: - match = re.search(r"(?:去|到|前往)([\u4e00-\u9fff]{2,8})(?:出差|开会|培训)", str(message or "")) - return match.group(1) if match else "" - - def _reject_unsupported_location_inference( - self, - payload: UserAgentRequest, - answer: str | None, - ) -> str | None: - del payload - return answer - - def _build_model_messages( - self, - payload: UserAgentRequest, - *, - citations: list[UserAgentCitation], - suggested_actions: list[UserAgentSuggestedAction], - risk_flags: list[str], - draft_payload: UserAgentDraftPayload | None, - fallback_answer: str, - ) -> list[dict[str, str]]: - knowledge_question = ( - self._resolve_knowledge_question(payload) - if payload.ontology.scenario == "knowledge" - else "" - ) - facts = { - "run_id": payload.run_id, - "user_message": payload.message, - "ontology": payload.ontology.model_dump(mode="json"), - "context": { - "entry_source": payload.context_json.get("entry_source"), - "user_name": payload.context_json.get("name"), - "user_role": payload.context_json.get("role"), - "user_department": payload.context_json.get("department_name") - or payload.context_json.get("department"), - "user_position": payload.context_json.get("position"), - "user_grade": payload.context_json.get("grade"), - "employee_no": payload.context_json.get("employee_no"), - "manager_name": payload.context_json.get("manager_name"), - "employee_location": payload.context_json.get("employee_location"), - "cost_center": payload.context_json.get("cost_center"), - "finance_owner_name": payload.context_json.get("finance_owner_name"), - "employee_risk_profile": payload.context_json.get("employee_risk_profile", {}), - "user_role_codes": payload.context_json.get("role_codes", []), - "is_admin": bool(payload.context_json.get("is_admin")), - "request_context": payload.context_json.get("request_context"), - "attachment_count": payload.context_json.get("attachment_count"), - "attachment_names": self._resolve_attachment_names(payload), - "ocr_summary": payload.context_json.get("ocr_summary", ""), - "ocr_documents": payload.context_json.get("ocr_documents", []), - "conversation_id": payload.context_json.get("conversation_id"), - "conversation_scenario": payload.context_json.get("conversation_scenario"), - "conversation_intent": payload.context_json.get("conversation_intent"), - "draft_claim_id": payload.context_json.get("draft_claim_id"), - "conversation_history": self._resolve_conversation_history(payload), - }, - "tool_payload": self._build_model_tool_payload( - payload.tool_payload, - question=knowledge_question, - ), - "citations": [item.model_dump(mode="json") for item in citations], - "suggested_actions": [item.model_dump(mode="json") for item in suggested_actions], - "risk_flags": risk_flags, - "draft_payload": draft_payload.model_dump(mode="json") if draft_payload is not None else None, - "selected_capability_codes": payload.selected_capability_codes, - "requires_confirmation": payload.requires_confirmation, - "fallback_answer": fallback_answer, - } - if payload.ontology.scenario == "knowledge": - facts["knowledge_evidence_blocks"] = self._build_knowledge_evidence_blocks( - payload.tool_payload, - question=knowledge_question, - ) - facts["knowledge_answer_evidence"] = [ - { - "title": str(item.get("title") or "").strip(), - "heading": str(item.get("heading") or "").strip(), - "kind": str(item.get("kind") or "").strip(), - "content": str(item.get("content") or "").strip(), - } - for item in self._build_knowledge_answer_evidence(payload) - ] - - if payload.ontology.scenario == "knowledge": - answer_style_instruction = ( - "你是财务制度知识问答助手。只能依据 facts.tool_payload.hits、facts.knowledge_answer_evidence、citations 与 conversation_history 回答," - "不要扩展成通用助手。优先直接回答,不要复述思考过程,不要输出 JSON、代码块或 。" - "回答风格要像一位真正熟悉制度的财务伙伴:先直接回应用户的核心问题,再用一张简洁表格或短段落说明依据," - "最后补充最重要的注意事项。不要写成“已检索到内容”的系统回执,也不要把命中片段连缀成答案。" - "必须优先回答用户当前这句话本身,不能把制度标题、制度全文或完整标准表当成主答案。" - "如果用户问的是某次具体行程“一共能报多少”,就先给“当前已能确认的金额”,再用一张很短的表说明项目、" - "适用标准、计算式和结果;如果总额还缺少住宿晚数、实际票据或其他必要条件,就明确写出“暂不能确认的部分”。" - "只有用户明确在问“标准有哪些”或“制度全文怎么规定”时,才展开完整标准表。" - "如果命中的知识已经足够支持计算、比较或归纳,就直接给出结论;金额、标准、天数、补贴等问题要把计算过程写清楚。" - "适合时请使用 Markdown 二级标题、短段落和表格,让回答更清晰;表格必须保证每一行列数一致,不要出现空白残列。" - "只能陈述 hits 中明确出现的事实,不能用常识、外部知识或主观推断补齐缺失条件。" - "回答前先在全部 hits 中寻找与问题最直接相关的章节、表格或条目,不能只依赖排在最前面的片段。" - "如果 facts.knowledge_answer_evidence 中已经给出更短的高相关证据,优先基于这些证据组织答案,再回看原始 hits 补上下文。" - "如果某个表格在检索片段中已经被摊平成连续文本,只有在行、列和数值对应关系能够从片段本身明确确认时才能据此计算;" - "如果列对应关系不清楚,必须说明表格结构在当前片段中不够清晰,不能把第一列或相邻数字想当然套给用户。" - "如果 hits 中出现“结构化表格补充”,它表示知识归纳阶段已经把原文表格重新整理过," - "优先使用这类结构化表格来理解行列关系,再回看原文确认上下文。" - "facts.knowledge_evidence_blocks 中保留了原始换行和定宽排版;遇到表格时,优先按这些证据块阅读," - "必须按表头从左到右逐列对应数值,不能把第一列的数值直接套给后面的列名。" - "如果完成计算或归纳仍缺少某个关键映射关系、适用条件或数值依据,必须明确说明当前知识库还缺哪一项信息,再给出已能确认的部分。" - "如果用户问题里没有明确给出某个套用条件,而 hits 或 evidence 里也没有明确出现,就不能自己补一个默认值。" - "当问题涉及追问时,必须结合 conversation_history 延续上一轮上下文,而不是重新泛化成制度全文摘录。" - "不要大段粘贴原始命中文本;只提炼与问题直接相关的规则、条件、金额和注意事项。" - "如果依据仍然不足,明确指出缺少哪一项信息,再给出当前能确认的部分。" - ) - else: - answer_style_instruction = "用 2 到 4 段完成回答,优先给结论,再补充最关键的依据与下一步建议。" - - personalization_instruction = ( - "如果 context.user_name 存在,并且当前问题与员工本人适用标准、报销额度、审批权限、职级待遇有关," - "开头应自然称呼一次用户,例如“曹笑竹,您好”。" - "如果需要根据员工身份判断标准,优先参考 context.user_grade 与 context.user_position。" - "如果问题与用户身份无关,就不要生硬加入姓名、职级或岗位。" - ) - - system_prompt = ( - "你是 X-Financial 的专业财务 AI 助手。" - "回答必须准确、自然、可执行,不要泄露中间推理。" - "当知识问题有命中依据时,先给结论,再给结构化说明。" - "不要把制度全文原样搬出来,不要把检索片段当作最终答案直接粘贴。" - "如果使用表格,确保列名简洁、数值明确。" - f"{personalization_instruction}" - f"{answer_style_instruction}" - ) - user_prompt = ( - "请严格依据下面的 facts 生成最终答复:\n" - f"{json.dumps(facts, ensure_ascii=False, indent=2)}" - ) - return [ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt}, - ] - - @staticmethod - def _build_model_tool_payload( - tool_payload: dict[str, Any], - *, - question: str | None = None, - ) -> dict[str, Any]: - normalized = dict(tool_payload or {}) - hits = [] - for item in UserAgentService._select_knowledge_model_hits( - tool_payload, - question=question, - ): - if not isinstance(item, dict): - continue - hits.append( - { - "title": str(item.get("title") or "").strip(), - "document_name": str(item.get("document_name") or "").strip(), - "excerpt": str(item.get("excerpt") or "").strip(), - "content": str(item.get("content") or "").strip()[:1200], - "tags": list(item.get("tags") or [])[:5], - "evidence": list(item.get("evidence") or [])[:3], - "code": str(item.get("code") or "").strip(), - } - ) - normalized["hits"] = hits - return normalized - - @staticmethod - def _build_knowledge_evidence_blocks( - tool_payload: dict[str, Any], - *, - question: str | None = None, - ) -> str: - blocks: list[str] = [] - for index, item in enumerate( - UserAgentService._select_knowledge_model_hits( - tool_payload, - question=question, - )[:3], - start=1, - ): - if not isinstance(item, dict): - continue - title = str(item.get("title") or item.get("document_name") or f"证据 {index}").strip() - code = str(item.get("code") or "").strip() - content = str(item.get("content") or "").strip() - if not content: - continue - blocks.append( - "\n".join( - [ - f"[证据 {index}] {title}" + (f" ({code})" if code else ""), - "```text", - content[:1200], - "```", - ] - ) - ) - return "\n\n".join(blocks) - - @staticmethod - def _select_knowledge_model_hits( - tool_payload: dict[str, Any], - *, - question: str | None = None, - ) -> list[dict[str, Any]]: - raw_hits = [ - item - for item in list(tool_payload.get("hits") or []) - if isinstance(item, dict) - ][: max(MAX_KNOWLEDGE_MODEL_HITS + 1, 6)] - if not raw_hits: - return [] - - query_terms = UserAgentService._extract_knowledge_query_terms(question or "") - if not query_terms: - return raw_hits[:MAX_KNOWLEDGE_MODEL_HITS] - - ranked_hits = sorted( - enumerate(raw_hits), - key=lambda value: ( - UserAgentService._score_knowledge_model_hit( - value[1], - query_terms=query_terms, - rank_index=value[0], - ), - -value[0], - ), - reverse=True, - ) - return [item for _, item in ranked_hits[:MAX_KNOWLEDGE_MODEL_HITS]] - - @staticmethod - def _score_knowledge_model_hit( - item: dict[str, Any], - *, - query_terms: list[str], - rank_index: int, - ) -> int: - title = str(item.get("title") or item.get("document_name") or "").lower() - excerpt = str(item.get("excerpt") or "").lower() - content = str(item.get("content") or "").lower() - haystack = "\n".join([title, excerpt, content[:1400]]) - - matched_terms = [term for term in query_terms if term in haystack] - score = max(1, 48 - rank_index * 4) - score += len(matched_terms) * 10 - score += sum(1 for term in matched_terms if term in title) * 8 - - leading_marker = UserAgentService._leading_knowledge_appendix_marker(content) - if leading_marker == "# 章节导航": - score -= 22 - elif leading_marker == "# 问答线索补充": - score += 6 if matched_terms else -8 - elif leading_marker == "# 重点章节摘录": - score += 4 if matched_terms else -4 - elif leading_marker == "# 结构化表格补充": - score += 8 if matched_terms else -3 - - if matched_terms and "|" in content: - score += 8 - if matched_terms and any(marker in content for marker in (":", ":")): - score += 10 - if matched_terms and "\n" in content: - score += 4 - if matched_terms and any(marker in content for marker in ("附表", "第", "条")): - score += 4 - if matched_terms and any(marker in content for marker in ("第", "条", ":", "-", "•")): - score += 4 - if re.search(r"没有.{0,8}(信息|规定|说明|依据)", content): - score -= 12 - return score - - @staticmethod - def _leading_knowledge_appendix_marker(content: str) -> str: - normalized = str(content or "").lstrip() - for marker in ("# 章节导航", "# 重点章节摘录", "# 问答线索补充", "# 结构化表格补充"): - index = normalized.find(marker) - if 0 <= index <= 220: - return marker - return "" - - def _build_query_answer(self, payload: UserAgentRequest) -> str: - scenario = payload.ontology.scenario - data = payload.tool_payload - subject = self._resolve_subject(payload) - - if scenario == "expense": - query_payload = self._build_query_payload(payload) - scope_label = str(data.get("scope_label") or subject).strip() or subject - if query_payload is None: - return f"当前没有查到{scope_label}。你可以补充时间范围、单号或状态继续筛选。" - - window_prefix = ( - f"{query_payload.window_start_date} 至 {query_payload.window_end_date}" - if query_payload.recent_window_applied - and query_payload.window_start_date - and query_payload.window_end_date - else ( - f"近 {query_payload.window_days} 日内" - if query_payload.recent_window_applied and query_payload.window_days - else "当前条件下" - ) - ) - if query_payload.record_count <= 0: - if query_payload.older_record_count > 0 and query_payload.window_days: - return ( - f"{window_prefix}没有查到{query_payload.scope_label}。" - f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据," - "请前往个人报销中心查看。" - ) - return f"{window_prefix}没有查到{query_payload.scope_label}。你可以补充时间范围、单号或状态继续筛选。" - - group_lines = [ - f"{item.label} {item.count} 笔" - for item in query_payload.status_groups - if item.count > 0 - ] - answer_parts = [ - f"我先为你列出{window_prefix}的{query_payload.scope_label}," - f"共 {query_payload.record_count} 笔,金额合计 {query_payload.total_amount:.2f} 元。" - ] - if group_lines: - answer_parts.append(f"其中包括:{'、'.join(group_lines)}。") - - hint_parts: list[str] = [] - if query_payload.has_more_in_window and query_payload.preview_count < query_payload.record_count: - hint_parts.append( - f"下方先展示最近 {query_payload.preview_count} 笔,你可以直接点击单据查看详情。" - ) - elif query_payload.records: - hint_parts.append("下方已列出本次命中的真实单据,可直接点击查看详情。") - - if query_payload.older_record_count > 0 and query_payload.window_days: - hint_parts.append( - f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据," - "请前往个人报销中心查看。" - ) - - return " ".join(answer_parts + hint_parts).strip() - - if scenario == "accounts_receivable": - record_count = int(data.get("record_count") or 0) - outstanding_amount = float(data.get("outstanding_amount") or 0) - return ( - f"{subject}共命中 {record_count} 条应收,未回款金额 {outstanding_amount:.2f} 元。" - "建议结合账龄和客户分布继续排查逾期风险。" - ) - - if scenario == "accounts_payable": - record_count = int(data.get("record_count") or 0) - outstanding_amount = float(data.get("outstanding_amount") or 0) - return ( - f"{subject}共命中 {record_count} 条应付,待付金额 {outstanding_amount:.2f} 元。" - "如需推进动作,建议先生成付款建议草稿并发起人工确认。" - ) - - return "已完成当前查询,但暂时没有更多结构化结果可展示。" - - def _build_query_payload( - self, - payload: UserAgentRequest, - ) -> UserAgentQueryPayload | None: - if payload.ontology.scenario != "expense" or payload.ontology.intent not in {"query", "compare"}: - return None - - result_type = str(payload.tool_payload.get("result_type") or "").strip() - if result_type and result_type != "expense_claim_list": - return None - - records: list[UserAgentExpenseQueryRecord] = [] - for item in payload.tool_payload.get("records") or []: - if not isinstance(item, dict): - continue - amount = float(item.get("amount") or 0) - records.append( - UserAgentExpenseQueryRecord( - claim_id=str(item.get("claim_id") or "").strip(), - claim_no=str(item.get("claim_no") or "").strip() or "未编号", - employee_name=str(item.get("employee_name") or "").strip(), - expense_type=str(item.get("expense_type") or "").strip(), - expense_type_label=str(item.get("expense_type_label") or "").strip() - or EXPENSE_TYPE_LABELS.get(str(item.get("expense_type") or "").strip(), "报销"), - amount=round(amount, 2), - status=str(item.get("status") or "").strip(), - status_label=str(item.get("status_label") or "").strip() - or EXPENSE_STATUS_LABELS.get(str(item.get("status") or "").strip(), "处理中"), - status_group=str(item.get("status_group") or "").strip() or "other", - status_group_label=str(item.get("status_group_label") or "").strip() - or EXPENSE_STATUS_GROUP_LABELS.get(str(item.get("status_group") or "").strip(), "其他状态"), - approval_stage=str(item.get("approval_stage") or "").strip() or None, - document_date=str(item.get("document_date") or "").strip(), - occurred_at=str(item.get("occurred_at") or "").strip(), - reason=str(item.get("reason") or "").strip(), - location=str(item.get("location") or "").strip(), - ) - ) - - status_groups: list[UserAgentQueryStatusGroup] = [] - for item in payload.tool_payload.get("status_groups") or []: - if not isinstance(item, dict): - continue - status_groups.append( - UserAgentQueryStatusGroup( - key=str(item.get("key") or "").strip() or "other", - label=str(item.get("label") or "").strip() or "其他状态", - count=max(0, int(item.get("count") or 0)), - ) - ) - - return UserAgentQueryPayload( - result_type="expense_claim_list", - scope_label=str(payload.tool_payload.get("scope_label") or self._resolve_subject(payload)).strip() or "报销单", - recent_window_applied=bool(payload.tool_payload.get("recent_window_applied")), - window_days=( - int(payload.tool_payload["window_days"]) - if payload.tool_payload.get("window_days") not in {None, ""} - else None - ), - window_start_date=( - str(payload.tool_payload.get("window_start_date") or "").strip() or None - ), - window_end_date=( - str(payload.tool_payload.get("window_end_date") or "").strip() or None - ), - record_count=max(0, int(payload.tool_payload.get("record_count") or 0)), - preview_count=max(0, int(payload.tool_payload.get("preview_count") or len(records))), - older_record_count=max(0, int(payload.tool_payload.get("older_record_count") or 0)), - has_more_in_window=bool(payload.tool_payload.get("has_more_in_window") or payload.tool_payload.get("has_more")), - total_amount=round(float(payload.tool_payload.get("total_amount") or 0), 2), - status_groups=status_groups, - records=records, - ) - - def _build_fast_knowledge_answer( - self, - payload: UserAgentRequest, - *, - citations: list[UserAgentCitation], - ) -> str | None: - if payload.ontology.scenario != "knowledge": - return None - if str(payload.tool_payload.get("result_type") or "").strip() != "knowledge_search": - return None - - evidence_items = self._build_knowledge_answer_evidence(payload) - if not evidence_items: - return None - - question = self._resolve_knowledge_question(payload) - if not self._should_use_direct_knowledge_answer(question, evidence_items): - return None - - return self._render_knowledge_direct_answer( - payload, - citations=citations, - evidence_items=evidence_items, - ) - - def _render_knowledge_direct_answer( - self, - payload: UserAgentRequest, - *, - citations: list[UserAgentCitation], - evidence_items: list[dict[str, Any]], - ) -> str | None: - if not evidence_items: - return None - - title = str( - (citations[0].title if citations else "") - or evidence_items[0].get("title") - or "相关制度" - ).strip() - user_name = str(payload.context_json.get("name") or "").strip() - question = self._resolve_knowledge_question(payload) - query_terms = self._extract_knowledge_query_terms(question) - ordered_evidence_items = self._prioritize_knowledge_evidence_items(question, evidence_items) - primary_item = ordered_evidence_items[0] - primary_heading = self._format_knowledge_heading_label( - str(primary_item.get("heading") or "").strip() - ) - primary_lines = self._collect_direct_knowledge_answer_lines(ordered_evidence_items) - - lines: list[str] = [] - if user_name: - lines.append(f"{user_name},您好。") - source_prefix = f"根据《{title}》" - if primary_heading: - source_prefix = f"{source_prefix}({primary_heading})" - - if str(primary_item.get("kind") or "") == "table": - lines.append(f"{source_prefix},当前能直接确认的是:") - lines.append(self._extract_relevant_table_preview(str(primary_item.get("content") or ""), query_terms)) - else: - if not primary_lines: - lines.append( - f"{source_prefix},当前能直接确认的是:" - f"{self._summarize_knowledge_evidence_content(primary_item, query_terms)}" - ) - elif len(primary_lines) == 1: - lines.append(f"{source_prefix},当前能直接确认的是:{primary_lines[0].strip()}") - else: - lines.append(f"{source_prefix},当前能直接确认的是:") - lines.extend(primary_lines) - - notes: list[str] = [] - location_note = self._build_missing_location_grounding_note(question, evidence_items) - if location_note: - notes.append(location_note) - if self._question_requires_explicit_condition(question) and not self._answer_evidence_has_numeric_or_condition(evidence_items): - notes.append("当前命中的证据更偏规则说明或流程约束,还没有直接给出可立即套用的数值或完整条件。") - - if notes: - lines.append("") - lines.append("说明:") - lines.extend(f"- {note}" for note in notes) - - return "\n".join(line for line in lines if line is not None).strip() - - def _prioritize_knowledge_evidence_items( - self, - question: str, - evidence_items: list[dict[str, Any]], - ) -> list[dict[str, Any]]: - if not evidence_items or not self._question_requires_explicit_condition(question): - return evidence_items - - for preferred_kind in ("table", "kv", "clause", "list"): - for index, item in enumerate(evidence_items): - if str(item.get("kind") or "") != preferred_kind: - continue - return [item, *evidence_items[:index], *evidence_items[index + 1 :]] - - for index, item in enumerate(evidence_items): - if re.search(r"\d", str(item.get("content") or "")): - return [item, *evidence_items[:index], *evidence_items[index + 1 :]] - - return evidence_items - - @staticmethod - def _resolve_knowledge_question(payload: UserAgentRequest) -> str: - return str(payload.context_json.get("user_input_text") or payload.message or "").strip() - - @staticmethod - def _looks_like_structured_knowledge_query(question: str) -> bool: - normalized = str(question or "").strip() - if not normalized: - return False - return any(keyword in normalized for keyword in KNOWLEDGE_DIRECT_ANSWER_HINTS) - - def _should_use_direct_knowledge_answer( - self, - question: str, - evidence_items: list[dict[str, Any]], - ) -> bool: - if not evidence_items: - return False - if self._looks_like_structured_knowledge_query(question): - return True - return str(evidence_items[0].get("kind") or "") in {"table", "kv", "list", "clause"} - - def _build_knowledge_answer_evidence( - self, - payload: UserAgentRequest, - ) -> list[dict[str, Any]]: - question = self._resolve_knowledge_question(payload) - query_terms = self._extract_knowledge_query_terms(question) - candidates: list[dict[str, Any]] = [] - - for hit in self._select_knowledge_model_hits( - payload.tool_payload, - question=question, - ): - if not isinstance(hit, dict): - continue - candidates.extend(self._extract_knowledge_evidence_candidates(hit, query_terms)) - - deduped: list[dict[str, Any]] = [] - seen: set[tuple[str, str, str]] = set() - ranked_candidates = sorted( - candidates, - key=lambda value: ( - float(value.get("score") or 0), - -len(str(value.get("content") or "")), - ), - reverse=True, - ) - top_score = float(ranked_candidates[0].get("score") or 0) if ranked_candidates else 0.0 - - for item in ranked_candidates: - score = float(item.get("score") or 0) - if deduped and score < max(6.0, top_score - 14): - continue - key = ( - str(item.get("title") or "").strip(), - str(item.get("heading") or "").strip(), - self._clean_knowledge_segment_text(str(item.get("content") or ""))[:180], - ) - if key in seen: - continue - seen.add(key) - deduped.append(item) - if len(deduped) >= MAX_KNOWLEDGE_DIRECT_EVIDENCE: - break - return deduped - - def _extract_knowledge_evidence_candidates( - self, - hit: dict[str, Any], - query_terms: list[str], - ) -> list[dict[str, Any]]: - title = str(hit.get("title") or hit.get("document_name") or "相关制度").strip() - content = str(hit.get("content") or "").strip() - if not content: - return [] - - raw_candidates = self._merge_knowledge_lead_in_segments( - self._split_knowledge_hit_into_segments(content) - ) - candidates: list[dict[str, Any]] = [] - for item in raw_candidates: - score = self._score_knowledge_evidence_candidate(item, query_terms) - if query_terms and score <= 0: - continue - normalized = dict(item) - normalized["title"] = title - normalized["score"] = score - candidates.append(normalized) - - if candidates: - return candidates - - fallback_text = str(hit.get("excerpt") or "").strip() or self._extract_excerpt(content) - if not fallback_text: - return [] - return [ - { - "title": title, - "heading": "", - "kind": "paragraph", - "content": fallback_text, - "score": 1, - } - ] - - @staticmethod - def _is_knowledge_lead_in_segment(item: dict[str, str]) -> bool: - kind = str(item.get("kind") or "").strip() - content = str(item.get("content") or "").strip() - return kind in {"kv", "list", "clause"} and content.endswith((":", ":")) - - @staticmethod - def _extract_knowledge_marker_family(content: str) -> str: - normalized = str(content or "").strip() - if not normalized: - return "" - if KNOWLEDGE_ARTICLE_PATTERN.match(normalized): - return "article" - if re.match(r"^\d+[.)、]\s*", normalized): - return "arabic" - if re.match(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", normalized): - return "paren" - if re.match(r"^[①②③④⑤⑥⑦⑧⑨⑩]\s*", normalized): - return "circled" - if KNOWLEDGE_LIST_ITEM_PATTERN.match(normalized): - return "bullet" - return "" - - @staticmethod - def _format_knowledge_heading_label(heading: str) -> str: - parts = [item.strip() for item in str(heading or "").split(">") if item.strip()] - return " / ".join(parts) - - def _merge_knowledge_lead_in_segments( - self, - segments: list[dict[str, str]], - ) -> list[dict[str, str]]: - if not segments: - return [] - - merged: list[dict[str, str]] = [] - index = 0 - while index < len(segments): - current = dict(segments[index]) - if not self._is_knowledge_lead_in_segment(current): - merged.append(current) - index += 1 - continue - - base_heading = str(current.get("heading") or "").strip() - current_marker = self._extract_knowledge_marker_family(str(current.get("content") or "")) - follow_segments: list[dict[str, str]] = [] - next_index = index + 1 - - while next_index < len(segments): - candidate = segments[next_index] - if str(candidate.get("heading") or "").strip() != base_heading: - break - - candidate_kind = str(candidate.get("kind") or "").strip() - candidate_content = str(candidate.get("content") or "").strip() - candidate_marker = self._extract_knowledge_marker_family(candidate_content) - if not candidate_content or candidate_kind == "table": - break - if current_marker and candidate_marker == current_marker: - break - if self._is_knowledge_lead_in_segment(candidate) and follow_segments: - break - if candidate_kind not in {"list", "paragraph", "kv", "clause"}: - break - - follow_segments.append(candidate) - next_index += 1 - if len(follow_segments) >= 4: - break - if candidate_kind == "paragraph" and len(candidate_content) >= 200: - break - - if follow_segments: - current["content"] = "\n".join( - [str(current.get("content") or "").strip()] - + [str(item.get("content") or "").strip() for item in follow_segments] - ) - if any(str(item.get("kind") or "").strip() == "list" for item in follow_segments): - current["kind"] = "list" - merged.append(current) - index = next_index - continue - - merged.append(current) - index += 1 - - return merged - - def _split_knowledge_hit_into_segments(self, content: str) -> list[dict[str, str]]: - segments: list[dict[str, str]] = [] - markdown_headings: list[str] = [] - section_heading = "" - paragraph_lines: list[str] = [] - table_lines: list[str] = [] - - def current_heading() -> str: - heading_parts = [item for item in markdown_headings if item] - if section_heading: - heading_parts.append(section_heading) - return " > ".join(heading_parts) - - def flush_paragraph() -> None: - nonlocal paragraph_lines - if not paragraph_lines: - return - merged = " ".join(line.strip() for line in paragraph_lines if line.strip()).strip() - paragraph_lines = [] - if merged: - segments.append( - { - "heading": current_heading(), - "kind": "paragraph", - "content": merged, - } - ) - - def flush_table() -> None: - nonlocal table_lines - if not table_lines: - return - merged = "\n".join(line.rstrip() for line in table_lines if line.strip()).strip() - table_lines = [] - if merged: - segments.append( - { - "heading": current_heading(), - "kind": "table", - "content": merged, - } - ) - - for raw_line in str(content or "").replace("\r\n", "\n").replace("\r", "\n").splitlines(): - line = raw_line.rstrip() - stripped = line.strip() - - if not stripped: - flush_paragraph() - flush_table() - continue - - markdown_heading_match = re.match(r"^(#{1,6})\s+(.+)$", stripped) - if markdown_heading_match: - flush_paragraph() - flush_table() - level = len(markdown_heading_match.group(1)) - heading_text = markdown_heading_match.group(2).strip() - markdown_headings = markdown_headings[: max(0, level - 1)] - markdown_headings.append(heading_text) - section_heading = "" - continue - - if KNOWLEDGE_SECTION_HEADING_PATTERN.match(stripped) and len(stripped) <= 90: - flush_paragraph() - flush_table() - section_heading = stripped.lstrip("#").strip() - continue - - if stripped.count("|") >= 2 and "|" in stripped: - flush_paragraph() - table_lines.append(stripped) - continue - - flush_table() - - if KNOWLEDGE_LIST_ITEM_PATTERN.match(stripped): - flush_paragraph() - segments.append( - { - "heading": current_heading(), - "kind": "list", - "content": stripped, - } - ) - continue - - if KNOWLEDGE_NUMBERED_ITEM_PATTERN.match(stripped): - flush_paragraph() - segments.append( - { - "heading": current_heading(), - "kind": "list", - "content": stripped, - } - ) - continue - - if KNOWLEDGE_ARTICLE_PATTERN.match(stripped): - flush_paragraph() - segments.append( - { - "heading": current_heading(), - "kind": "clause", - "content": stripped, - } - ) - continue - - if (":" in stripped or ":" in stripped) and len(stripped) <= 180: - flush_paragraph() - segments.append( - { - "heading": current_heading(), - "kind": "kv", - "content": stripped, - } - ) - continue - - paragraph_lines.append(stripped) - - flush_paragraph() - flush_table() - return segments - - def _score_knowledge_evidence_candidate( - self, - item: dict[str, str], - query_terms: list[str], - ) -> int: - heading = str(item.get("heading") or "").lower() - content = str(item.get("content") or "").lower() - kind = str(item.get("kind") or "").strip() - haystack = "\n".join([heading, content]) - - matched_terms = [term for term in query_terms if term in haystack] - score = len(matched_terms) * 10 - score += sum(1 for term in matched_terms if term in heading) * 6 - - if kind == "table": - score += 10 - elif kind in {"kv", "clause", "list"}: - score += 8 - elif kind == "paragraph": - score += 4 - - if "问答线索补充" in heading or "重点章节摘录" in heading: - score += 8 - if "结构化表格补充" in heading: - score += 10 - if "章节导航" in heading or "目录" in heading: - score -= 16 - if re.search(r"[.。…]{6,}", content): - score -= 12 - if any(hint in content for hint in ("应", "需", "不得", "可以", "标准", "条件", "材料", "审批", "流程", "包括")): - score += 3 - - content_length = len(content) - if content_length > 220: - score -= min(8, (content_length - 220) // 40) - return score - - @staticmethod - def _extract_knowledge_query_terms(question: str) -> list[str]: - normalized_question = str(question or "").strip().lower() - if not normalized_question: - return [] - - terms: list[str] = [] - seen: set[str] = set() - - def remember(term: str) -> None: - normalized = str(term or "").strip().lower() - if ( - not normalized - or normalized in seen - or normalized in KNOWLEDGE_QUERY_STOPWORDS - ): - return - seen.add(normalized) - terms.append(normalized) - - for item in re.findall(r"[a-z0-9][a-z0-9_\-]{1,}", normalized_question): - remember(item) - - for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_question): - if len(block) <= 4: - remember(block) - continue - for size in (4, 3, 2): - for start in range(0, len(block) - size + 1): - remember(block[start : start + size]) - if len(terms) >= MAX_KNOWLEDGE_QUERY_TERMS: - return terms - - return terms[:MAX_KNOWLEDGE_QUERY_TERMS] - - @staticmethod - def _clean_knowledge_segment_text(content: str) -> str: - normalized = str(content or "").strip() - normalized = re.sub(r"^[-*•]\s*", "", normalized) - normalized = re.sub(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*", "", normalized) - normalized = re.sub(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", "", normalized) - normalized = re.sub(r"\s+", " ", normalized) - if len(normalized) <= 180: - return normalized - return f"{normalized[:177].rstrip()}..." - - @staticmethod - def _normalize_knowledge_line(content: str, *, preserve_marker: bool) -> str: - normalized = str(content or "").strip() - normalized = re.sub(r"^[-*•]\s*", "", normalized) - if not preserve_marker: - normalized = re.sub(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*", "", normalized) - normalized = re.sub(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", "", normalized) - normalized = re.sub(r"\s+", " ", normalized) - return normalized - - def _split_clean_knowledge_lines( - self, - content: str, - *, - preserve_marker: bool, - ) -> list[str]: - return [ - line - for line in ( - self._normalize_knowledge_line(item, preserve_marker=preserve_marker) - for item in str(content or "").splitlines() - ) - if line - ] - - def _render_knowledge_evidence_text(self, item: dict[str, Any]) -> str: - lines = self._split_clean_knowledge_lines( - str(item.get("content") or ""), - preserve_marker=True, - ) - if not lines: - return "" - if len(lines) == 1: - return self._clean_knowledge_segment_text(lines[0]) - return "\n".join(f" {line}" for line in lines) - - def _collect_direct_knowledge_answer_lines( - self, - ordered_evidence_items: list[dict[str, Any]], - ) -> list[str]: - if not ordered_evidence_items: - return [] - - primary_item = ordered_evidence_items[0] - primary_title = str(primary_item.get("title") or "").strip() - primary_heading = str(primary_item.get("heading") or "").strip() - primary_kind = str(primary_item.get("kind") or "").strip() - - related_items = [primary_item] - if primary_kind != "table": - for item in ordered_evidence_items[1:]: - if len(related_items) >= 3: - break - if str(item.get("kind") or "").strip() != primary_kind: - continue - if str(item.get("title") or "").strip() != primary_title: - continue - if str(item.get("heading") or "").strip() != primary_heading: - continue - related_items.append(item) - - lines: list[str] = [] - seen: set[str] = set() - for item in related_items: - rendered = self._render_knowledge_evidence_text(item) - for line in rendered.splitlines(): - normalized = str(line or "").strip() - if not normalized or normalized in seen: - continue - seen.add(normalized) - lines.append(line) - return lines - - def _summarize_knowledge_evidence_content( - self, - item: dict[str, Any], - query_terms: list[str], - ) -> str: - kind = str(item.get("kind") or "").strip() - content = str(item.get("content") or "").strip() - if kind == "table": - preview = self._extract_relevant_table_preview(content, query_terms) - preview_rows = [line for line in preview.splitlines() if line.strip()][:4] - if len(preview_rows) >= 3: - return "当前命中的直接依据是一张与问题强相关的标准表,已摘出最相关的表头和行。" - return "当前命中的直接依据是一张与问题强相关的标准表。" - lines = self._split_clean_knowledge_lines(content, preserve_marker=True) - if len(lines) >= 2: - return self._clean_knowledge_segment_text(f"{lines[0]} {' '.join(lines[1:4])}") - return self._clean_knowledge_segment_text(content) - - @staticmethod - def _extract_relevant_table_preview(content: str, query_terms: list[str]) -> str: - lines = [line.strip() for line in str(content or "").splitlines() if line.strip()] - if len(lines) <= 3: - return "\n".join(lines) - - header = lines[0] - divider = lines[1] if len(lines) > 1 else "" - body = lines[2:] if divider.count("|") >= 2 else lines[1:] - - matched_rows = [ - row - for row in body - if any(term in row.lower() for term in query_terms) - ] - selected_rows = matched_rows[:3] or body[:2] - preview_lines = [header] - if divider: - preview_lines.append(divider) - preview_lines.extend(selected_rows) - return "\n".join(preview_lines).strip() - - @staticmethod - def _question_requires_explicit_condition(question: str) -> bool: - normalized = str(question or "").strip() - return any(keyword in normalized for keyword in ("多少", "金额", "上限", "限额", "标准", "条件", "需要")) - - def _build_missing_location_grounding_note( - self, - question: str, - evidence_items: list[dict[str, Any]], - ) -> str: - location = self._extract_query_location(question) - if not location: - return "" - - haystack = "\n".join( - str(item.get("heading") or "") + "\n" + str(item.get("content") or "") - for item in evidence_items - ) - if location in haystack: - return "" - return ( - f"当前命中的制度依据没有直接写出“{location}”对应的地区档位或映射关系," - "因此不能直接把它套用到表格中的某一列。" - ) - - @staticmethod - def _answer_evidence_has_numeric_or_condition(evidence_items: list[dict[str, Any]]) -> bool: - for item in evidence_items: - content = str(item.get("content") or "") - if re.search(r"\d", content): - return True - if any( - keyword in content - for keyword in ("应", "需", "不得", "可以", "条件", "材料", "审批", "流程", "标准", "适用") - ): - return True - return False - - def _build_explain_answer( - self, - payload: UserAgentRequest, - citations: list[UserAgentCitation], - ) -> str: - if str(payload.tool_payload.get("result_type") or "").strip() == "knowledge_search": - if citations: - return self._build_knowledge_search_answer(payload, citations) - - tool_message = str(payload.tool_payload.get("message") or "").strip() - if tool_message: - return tool_message - - if citations: - titles = "、".join(item.title for item in citations[:2]) - summary = citations[0].excerpt or "请结合制度全文进一步确认。" - return f"已检索到相关依据:{titles}。核心说明:{summary}" - - return ( - f"当前还没有与“{SCENARIO_LABELS.get(payload.ontology.scenario, '当前问题')}”" - "强匹配的已上线规则引用,建议先人工复核或补充更具体的单据上下文。" - ) - - def _build_knowledge_search_answer( - self, - payload: UserAgentRequest, - citations: list[UserAgentCitation], - ) -> str: - hits = [item for item in list(payload.tool_payload.get("hits") or []) if isinstance(item, dict)] - evidence_items = self._build_knowledge_answer_evidence(payload) - primary_citation = citations[0] if citations else None - title = str( - (primary_citation.title if primary_citation else "") - or (hits[0].get("title") if hits else "") - or "相关制度" - ).strip() - user_name = str(payload.context_json.get("name") or "").strip() - prefix = f"{user_name},您好。\n" if user_name else "" - if not hits: - return ( - f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据," - "但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败," - "建议先检查主对话模型的连通性。" - ) - - evidence_lines: list[str] = [] - for item in evidence_items[:3]: - heading = str(item.get("heading") or "").strip() - heading_text = f" > {heading}" if heading else "" - if str(item.get("kind") or "") == "table": - preview = self._extract_relevant_table_preview( - str(item.get("content") or ""), - self._extract_knowledge_query_terms(self._resolve_knowledge_question(payload)), - ) - evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:\n{preview}") - continue - rendered = self._render_knowledge_evidence_text(item) - if rendered: - if "\n" in rendered: - evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:\n{rendered}") - else: - evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:{rendered}") - - if not evidence_lines: - for item in hits[:2]: - item_title = str(item.get("title") or item.get("document_name") or "相关制度").strip() - excerpt = ( - str(item.get("excerpt") or "").strip() - or self._extract_excerpt(str(item.get("content") or "")) - ) - if not excerpt: - continue - evidence_lines.append(f"- 《{item_title}》:{excerpt}") - - if not evidence_lines: - return ( - f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据," - "但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败," - "建议先检查主对话模型的连通性。" - ) - - return "\n".join( - [ - f"{prefix}我已经命中与你这次问题最相关的制度依据,但答案整理阶段本轮没有及时返回。", - "先给你当前最直接的依据:", - *evidence_lines, - "如果你希望我继续把这些依据整理成更完整的结论、步骤或对比说明,可以继续缩小问题范围后再问一次。", - ] - ).strip() - - def _build_risk_answer( - self, - payload: UserAgentRequest, - citations: list[UserAgentCitation], - ) -> str: - risk_flags = self._resolve_risk_flags(payload) - platform_messages = self._evaluate_platform_risk_messages(payload) - if not risk_flags and not platform_messages: - return "当前未识别到明确风险标签,建议继续查看原始明细或补充更多上下文。" - - reasons = [ - f"{flag}:{RISK_REASON_MAP.get(flag, f'{flag} 需要人工进一步确认。')}" - for flag in risk_flags - ] - if platform_messages: - reasons.extend(platform_messages) - citation_text = ( - f" 参考规则:{'、'.join(item.title for item in citations[:2])}。" - if citations - else "" - ) - signal_count = len(risk_flags) + (1 if platform_messages else 0) - return ( - f"本次识别到 {signal_count} 类风险信号。" - f"触发原因:{';'.join(reasons)}。" - "建议先复核明细、附件和审批链,再决定是否继续处理。" - f"{citation_text}" - ) - - def _evaluate_platform_risk_messages(self, payload: UserAgentRequest) -> list[str]: - claim_id = str(payload.tool_payload.get("claim_id") or "").strip() - if not claim_id: - return [] - - claim = self.db.scalar( - select(ExpenseClaim) - .where(ExpenseClaim.id == claim_id) - .options(selectinload(ExpenseClaim.items)) - ) - if claim is None: - return [] - - rule_codes = resolve_rule_codes_for_risk_check( - payload.ontology, - query_text=payload.message, - ) - review = ExpenseClaimService(self.db).evaluate_platform_risk_rules( - claim, - rule_codes=rule_codes, - ) - messages: list[str] = [] - for flag in review.get("flags") or []: - if not isinstance(flag, dict): - continue - message = str(flag.get("message") or "").strip() - if message and message not in messages: - messages.append(message) - return messages - - def _build_draft_payload(self, payload: UserAgentRequest) -> UserAgentDraftPayload: - scenario_label = SCENARIO_LABELS.get(payload.ontology.scenario, "业务") - subject = self._resolve_subject(payload) - claim_no = str(payload.tool_payload.get("claim_no") or "").strip() or None - claim_status = str(payload.tool_payload.get("status") or "").strip() or None - approval_stage = str(payload.tool_payload.get("approval_stage") or "").strip() or None - is_submitted = claim_status == "submitted" - title = f"{scenario_label}处理意见草稿" - if claim_no: - title = f"{scenario_label}{'报销单' if is_submitted else '草稿'} {claim_no}" - if is_submitted: - body = ( - f"主题:{subject}\n" - f"结论:报销单已提交,当前节点为 {approval_stage or '审批中'}。\n" - "建议:后续可在个人报销列表中跟踪审批进度,必要时再补充说明或附件。\n" - f"原始问题:{payload.message}" - ) - else: - body = ( - f"主题:{subject}\n" - "结论:已根据当前语义解析结果生成草稿,尚未自动执行。\n" - "建议:请先核对明细、规则命中和所需附件,再由人工确认是否提交正式流程。\n" - f"原始问题:{payload.message}" - ) - return UserAgentDraftPayload( - draft_type=payload.ontology.scenario, - title=title, - body=body, - confirmation_required=not is_submitted, - claim_id=str(payload.tool_payload.get("claim_id") or "").strip() or None, - claim_no=claim_no, - status=claim_status, - approval_stage=approval_stage, - ) - - @staticmethod - def _should_build_draft_payload(payload: UserAgentRequest) -> bool: - if payload.ontology.scenario == "expense" and payload.tool_payload.get("preview_only"): - return any( - str(payload.tool_payload.get(key) or "").strip() - for key in ("claim_id", "claim_no") - ) - if payload.ontology.intent == "draft": - return True - if payload.ontology.scenario != "expense": - return False - return any( - str(payload.tool_payload.get(key) or "").strip() - for key in ("claim_id", "claim_no", "status") - ) - - def _build_suggested_actions( - self, - payload: UserAgentRequest, - ) -> list[UserAgentSuggestedAction]: - if payload.ontology.scenario == "knowledge": - return [] - - if self._should_prompt_expense_scene_selection(payload): - return [ - UserAgentSuggestedAction( - label=label, - action_type="select_expense_type", - description=description, - payload={ - "expense_type": code, - "expense_type_label": label, - "original_message": payload.message, - }, - ) - for code, label, description in EXPENSE_SCENE_SELECTION_OPTIONS - ] - - if self._is_generic_expense_prompt(payload): - return [ - UserAgentSuggestedAction( - label="上传票据", - action_type="ask_clarification", - description="上传发票、行程单或付款截图,继续识别报销内容。", - ), - UserAgentSuggestedAction( - label="补充报销信息", - action_type="ask_clarification", - description="补充费用类型、金额、时间和事由后继续处理。", - ), - ] - - if payload.ontology.intent in {"query", "compare"}: - return [ - UserAgentSuggestedAction( - label="查看明细", - action_type="open_detail", - description="继续查看命中记录和过滤条件。", - ), - UserAgentSuggestedAction( - label="生成处理意见", - action_type="create_draft", - description="把当前查询结果整理成可确认草稿。", - ), - ] - - if payload.ontology.intent == "risk_check": - return [ - UserAgentSuggestedAction( - label="人工复核风险", - action_type="manual_review", - description="优先检查明细、附件和规则命中原因。", - ), - UserAgentSuggestedAction( - label="生成整改建议", - action_type="create_draft", - description="把风险说明整理成处理意见草稿。", - ), - ] - - if payload.ontology.intent == "draft": - return [ - UserAgentSuggestedAction( - label="复制草稿", - action_type="copy_draft", - description="复制当前草稿后交由人工确认。", - ), - UserAgentSuggestedAction( - label="补充上下文", - action_type="ask_clarification", - description="补充单据编号、客户或供应商信息以完善草稿。", - ), - ] - - return [ - UserAgentSuggestedAction( - label="查看规则全文", - action_type="open_rule", - description="继续查看引用规则或知识内容。", - ), - UserAgentSuggestedAction( - label="补充问题上下文", - action_type="ask_clarification", - description="补充业务对象、时间或单据范围,提升回答准确度。", - ), - ] - - def _should_prompt_expense_scene_selection(self, payload: UserAgentRequest) -> bool: - if payload.ontology.scenario != "expense": - return False - if payload.ontology.intent not in {"draft", "operate"}: - return False - if str(payload.context_json.get("review_action") or "").strip(): - return False - review_form_values = self._resolve_review_form_values(payload) - if str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip(): - return False - if self._resolve_attachment_count(payload) > 0 or self._resolve_ocr_documents(payload): - return False - return not any( - item.type == "expense_type" and str(item.normalized_value or item.value or "").strip() - for item in payload.ontology.entities - ) - - @staticmethod - def _build_expense_scene_selection_answer(payload: UserAgentRequest) -> str: - has_time = bool(payload.ontology.time_range.start_date or payload.ontology.time_range.raw) - context_hint = "我先识别到这是一次报销申请" - if has_time: - context_hint += ",并看到了业务发生时间" - return ( - f"{context_hint}。但你还没有明确这笔单据属于哪类报销。" - "请先在下面选择报销场景,我会按你选择的场景再继续识别时间、地点、事由、金额和所需票据," - "避免系统先入为主把项目支持、部署等描述误判成差旅。" - ) - - def _build_review_payload( - self, - payload: UserAgentRequest, - *, - citations: list[UserAgentCitation], - draft_payload: UserAgentDraftPayload | None, - ) -> UserAgentReviewPayload | None: - attachment_count = self._resolve_attachment_count(payload) - ocr_documents = self._resolve_ocr_documents(payload) - if payload.ontology.scenario != "expense": - return None - if payload.ontology.intent not in {"draft", "operate"} and attachment_count <= 0 and not ocr_documents: - return None - - document_cards = self._build_review_document_cards(payload, ocr_documents=ocr_documents) - claim_groups = self._build_review_claim_groups( - payload, - document_cards=document_cards, - ) - slot_cards = self._build_review_slot_cards( - payload, - ocr_documents=ocr_documents, - claim_groups=claim_groups, - ) - travel_receipt_state = self._build_travel_receipt_state( - payload, - document_cards=document_cards, - claim_groups=claim_groups, - ) - missing_slot_keys = self._resolve_review_missing_slot_keys( - payload, - slot_cards=slot_cards, - ) - submission_blocked = bool(payload.tool_payload.get("submission_blocked")) - risk_briefs = self._build_review_risk_briefs( - payload, - citations=citations, - document_cards=document_cards, - claim_groups=claim_groups, - ) - risk_briefs.extend(self._build_travel_receipt_briefs(travel_receipt_state)) - association_choice_pending = self._is_review_association_choice_pending(payload) - can_proceed = ( - False - if association_choice_pending or submission_blocked or travel_receipt_state.get("blocks_next_step") - else self._can_proceed_review( - payload, - missing_slot_keys=missing_slot_keys, - claim_groups=claim_groups, - ) - ) - confirmation_actions = self._build_review_confirmation_actions( - payload, - can_proceed=can_proceed, - claim_groups=claim_groups, - draft_payload=draft_payload, - missing_slot_keys=missing_slot_keys, - ) - edit_fields = self._build_review_edit_fields( - payload, - draft_payload=draft_payload, - slot_cards=slot_cards, - ) - intent_summary = self._build_review_intent_summary( - payload, - slot_cards=slot_cards, - claim_groups=claim_groups, - ) - body_message = self._build_review_body_message( - payload, - slot_cards=slot_cards, - risk_briefs=risk_briefs, - can_proceed=can_proceed, - document_cards=document_cards, - travel_receipt_state=travel_receipt_state, - ) - missing_slot_labels = [SLOT_LABELS.get(key, key) for key in missing_slot_keys] - missing_slot_labels.extend( - str(item) - for item in travel_receipt_state.get("required_missing_labels", []) - if str(item).strip() - ) - missing_slot_labels = list(dict.fromkeys(missing_slot_labels)) - - return UserAgentReviewPayload( - intent_summary=intent_summary, - body_message=body_message, - scenario=payload.ontology.scenario, - intent=payload.ontology.intent, - can_proceed=can_proceed, - missing_slots=missing_slot_labels, - risk_briefs=risk_briefs, - slot_cards=slot_cards, - document_cards=document_cards, - claim_groups=claim_groups, - confirmation_actions=confirmation_actions, - edit_fields=edit_fields, - ) - - def _build_review_slot_cards( - self, - payload: UserAgentRequest, - *, - ocr_documents: list[dict[str, object]], - claim_groups: list[UserAgentReviewClaimGroup], - ) -> list[UserAgentReviewSlotCard]: - entity_map = self._collect_entity_values(payload) - time_slot = self._build_time_slot(payload) - location_slot = self._build_location_slot(payload) - customer_slot = self._build_customer_slot(payload, entity_map=entity_map) - participants_slot = self._build_participants_slot(payload, entity_map=entity_map) - amount_slot = self._build_amount_slot(payload, entity_map=entity_map, ocr_documents=ocr_documents) - expense_type_slot = self._build_expense_type_slot( - payload, - entity_map=entity_map, - ocr_documents=ocr_documents, - ) - merchant_slot = self._build_merchant_slot(payload, ocr_documents=ocr_documents) - reason_slot = self._build_reason_slot( - payload, - claim_groups=claim_groups, - ) - attachment_slot = self._build_attachment_slot(payload) - required_keys = self._resolve_required_review_keys( - payload, - primary_expense_type=str(expense_type_slot["normalized_value"] or ""), - claim_groups=claim_groups, - ) - - cards = [ - self._make_slot_card( - key="expense_type", - value=expense_type_slot["value"], - raw_value=expense_type_slot["raw_value"], - normalized_value=expense_type_slot["normalized_value"], - source=expense_type_slot["source"], - confidence=expense_type_slot["confidence"], - evidence=expense_type_slot["evidence"], - required="expense_type" in required_keys, - ), - self._make_slot_card( - key="customer_name", - value=customer_slot["value"], - raw_value=customer_slot["raw_value"], - normalized_value=customer_slot["normalized_value"], - source=customer_slot["source"], - confidence=customer_slot["confidence"], - evidence=customer_slot["evidence"], - required="customer_name" in required_keys, - ), - self._make_slot_card( - key="time_range", - value=time_slot["value"], - raw_value=time_slot["raw_value"], - normalized_value=time_slot["normalized_value"], - source=time_slot["source"], - confidence=time_slot["confidence"], - evidence=time_slot["evidence"], - required="time_range" in required_keys, - ), - self._make_slot_card( - key="location", - value=location_slot["value"], - raw_value=location_slot["raw_value"], - normalized_value=location_slot["normalized_value"], - source=location_slot["source"], - confidence=location_slot["confidence"], - evidence=location_slot["evidence"], - required="location" in required_keys, - ), - self._make_slot_card( - key="merchant_name", - value=merchant_slot["value"], - raw_value=merchant_slot["raw_value"], - normalized_value=merchant_slot["normalized_value"], - source=merchant_slot["source"], - confidence=merchant_slot["confidence"], - evidence=merchant_slot["evidence"], - required="merchant_name" in required_keys, - ), - self._make_slot_card( - key="amount", - value=amount_slot["value"], - raw_value=amount_slot["raw_value"], - normalized_value=amount_slot["normalized_value"], - source=amount_slot["source"], - confidence=amount_slot["confidence"], - evidence=amount_slot["evidence"], - required="amount" in required_keys, - ), - self._make_slot_card( - key="reason", - value=reason_slot["value"], - raw_value=reason_slot["raw_value"], - normalized_value=reason_slot["normalized_value"], - source=reason_slot["source"], - confidence=reason_slot["confidence"], - evidence=reason_slot["evidence"], - required="reason" in required_keys, - ), - self._make_slot_card( - key="participants", - value=participants_slot["value"], - raw_value=participants_slot["raw_value"], - normalized_value=participants_slot["normalized_value"], - source=participants_slot["source"], - confidence=participants_slot["confidence"], - evidence=participants_slot["evidence"], - required="participants" in required_keys, - ), - self._make_slot_card( - key="attachments", - value=attachment_slot["value"], - raw_value=attachment_slot["raw_value"], - normalized_value=attachment_slot["normalized_value"], - source=attachment_slot["source"], - confidence=attachment_slot["confidence"], - evidence=attachment_slot["evidence"], - required="attachments" in required_keys, - ), - ] - return cards - - def _build_review_document_cards( - self, - payload: UserAgentRequest, - *, - ocr_documents: list[dict[str, object]], - ) -> list[UserAgentReviewDocumentCard]: - cards: list[UserAgentReviewDocumentCard] = [] - for index, item in enumerate(ocr_documents, start=1): - classified = self._classify_document(item, payload) - fields = self._extract_document_fields(item) - cards.append( - UserAgentReviewDocumentCard( - index=index, - filename=str(item.get("filename") or f"document-{index}"), - document_type=classified["document_type"], - suggested_expense_type=classified["expense_type"], - scene_label=GROUP_SCENE_LABELS.get( - classified["group_code"], - classified["scene_label"], - ), - summary=str(item.get("summary") or item.get("text") or "").strip(), - avg_score=float(item.get("avg_score") or 0.0), - preview_kind=str(item.get("preview_kind") or "").strip(), - preview_data_url=str(item.get("preview_data_url") or "").strip(), - warnings=[str(warning) for warning in item.get("warnings", []) if str(warning).strip()], - fields=[ - UserAgentReviewDocumentField( - label=label, - value=value, - source="ocr", - ) - for label, value in fields.items() - if str(value).strip() - ], - ) - ) - return cards - - def _build_review_claim_groups( - self, - payload: UserAgentRequest, - *, - document_cards: list[UserAgentReviewDocumentCard], - ) -> list[UserAgentReviewClaimGroup]: - groups: dict[str, dict[str, object]] = {} - for card in document_cards: - group_code = self._normalize_group_code(card.suggested_expense_type) - bucket = groups.setdefault( - group_code, - { - "document_indexes": [], - "amount_total": 0.0, - "expense_type": str(card.suggested_expense_type or group_code).strip() or group_code, - "scene_label": GROUP_SCENE_LABELS.get( - str(card.suggested_expense_type or group_code).strip() or group_code, - GROUP_SCENE_LABELS.get(group_code, "其他费用"), - ), - "reasons": [], - }, - ) - bucket["document_indexes"].append(card.index) - bucket["amount_total"] = float(bucket["amount_total"]) + self._extract_amount_from_card(card) - bucket["reasons"].append(f"{card.filename} 识别为 {card.scene_label}") - current_expense_type = str(bucket["expense_type"] or "").strip() - current_card_type = str(card.suggested_expense_type or "").strip() - if current_expense_type and current_card_type and current_expense_type != current_card_type: - bucket["expense_type"] = group_code - bucket["scene_label"] = GROUP_SCENE_LABELS.get(group_code, "其他费用") - - if not groups: - expense_type_code = self._collect_entity_values(payload).get("expense_type_code", "other") - group_code = self._normalize_group_code(expense_type_code) - groups[group_code] = { - "document_indexes": [], - "amount_total": self._resolve_amount_value(payload), - "expense_type": expense_type_code or "other", - "scene_label": GROUP_SCENE_LABELS.get(group_code, "其他费用"), - "reasons": ["当前主要依据用户文本和页面上下文进行分单建议。"], - } - - claim_groups: list[UserAgentReviewClaimGroup] = [] - for index, (group_code, bucket) in enumerate(groups.items(), start=1): - title = f"建议报销单 {index}:{bucket['scene_label']}" - rationale = ( - ";".join(dict.fromkeys(str(item) for item in bucket["reasons"])) - if bucket["reasons"] - else "当前仅有单一场景,无需拆单。" - ) - claim_groups.append( - UserAgentReviewClaimGroup( - group_code=group_code, - title=title, - expense_type=str(bucket["expense_type"]), - scene_label=str(bucket["scene_label"]), - document_indexes=list(bucket["document_indexes"]), - amount_total=round(float(bucket["amount_total"]), 2), - rationale=rationale, - ) - ) - return claim_groups - - def _build_review_risk_briefs( - self, - payload: UserAgentRequest, - *, - citations: list[UserAgentCitation], - document_cards: list[UserAgentReviewDocumentCard], - claim_groups: list[UserAgentReviewClaimGroup], - ) -> list[UserAgentReviewRiskBrief]: - briefs: list[UserAgentReviewRiskBrief] = [] - for reason in self._resolve_submission_blocked_reasons(payload): - briefs.append( - UserAgentReviewRiskBrief( - title="提交风险提示", - level=self._resolve_submission_blocked_risk_level(reason), - content=reason, - detail=( - "该项属于提交审批前的阻断条件。系统会先要求补齐基础字段、附件或业务说明," - "否则审批人无法判断成本归属、业务真实性或票据有效性。" - ), - suggestion="按提示补齐对应信息;如果业务场景本身合理,请补充说明或佐证附件后再提交。", - ) - ) - - briefs.extend( - self._build_travel_policy_precheck_briefs( - payload, - document_cards=document_cards, - claim_groups=claim_groups, - ) - ) - - employee = self._resolve_employee_profile(payload) - employee_name = ( - str(employee.name).strip() - if employee is not None and employee.name - else self._collect_entity_values(payload).get("employee_name") - or str(payload.context_json.get("name") or "").strip() - ) - current_amount = self._resolve_amount_value(payload) or sum( - self._extract_amount_from_card(card) for card in document_cards - ) - if employee_name and current_amount > 0: - since = datetime.now(UTC) - timedelta(days=90) - claim_identity_conditions = [ExpenseClaim.employee_name == employee_name] - if employee is not None: - employee_identifiers = { - str(employee.name or "").strip(), - str(employee.email or "").strip(), - str(employee.employee_no or "").strip(), - } - employee_identifiers.discard("") - claim_identity_conditions = [ - ExpenseClaim.employee_id == employee.id, - ExpenseClaim.employee_name.in_(list(employee_identifiers)), - ] - stmt = select(ExpenseClaim).where(or_(*claim_identity_conditions), ExpenseClaim.occurred_at >= since) - recent_claims = list(self.db.scalars(stmt).all()) - if recent_claims: - duplicate_count = sum( - 1 - for item in recent_claims - if abs(float(item.amount) - current_amount) < 0.01 - ) - if duplicate_count: - briefs.append( - UserAgentReviewRiskBrief( - title="金额重复预警", - level="warning", - content=( - f"近 90 天发现 {duplicate_count} 笔金额相同的报销记录," - "提交前建议核对是否为重复报销或拆分不当。" - ), - detail=( - "系统将当前金额与近 90 天历史报销金额进行比对。金额完全一致不一定违规," - "但在交通、餐饮、办公采购等场景中可能提示重复票据或拆分报销。" - ), - suggestion="核对历史单据与当前票据是否对应同一业务;如不是重复,请在事由中说明差异。", - ) - ) - - warning_count = sum(len(item.warnings) for item in document_cards) - if warning_count: - briefs.append( - UserAgentReviewRiskBrief( - title="票据识别提醒", - level="warning", - content=f"当前共有 {warning_count} 条票据识别提示,建议逐张确认 OCR 识别字段。", - detail="票据 OCR 识别存在字段缺失、置信度偏低或类型判断不稳定时,会生成该提醒。", - suggestion="打开票据明细逐张核对日期、金额、商户和票据类型,必要时更正后再提交。", - ) - ) - - if len(claim_groups) > 1: - briefs.append( - UserAgentReviewRiskBrief( - title="建议拆单", - level="warning", - content=f"系统检测到 {len(claim_groups)} 类费用场景,建议拆成多张报销单后再提交。", - detail="同一批附件中包含多类费用场景时,混在一张报销单里会影响规则匹配、附件核验和审批归口。", - suggestion="按费用场景拆成多张报销单,分别确认金额、事由和附件归属。", - ) - ) - - return self._filter_deprecated_review_risk_briefs(briefs) - - @staticmethod - def _resolve_submission_blocked_risk_level(reason: str) -> str: - normalized = re.sub(r"\s+", "", str(reason or "")) - amount_keywords = ("金额", "超标", "费用", "价款", "票面金额", "单价", "合计") - return "high" if any(keyword in normalized for keyword in amount_keywords) else "warning" - - @staticmethod - def _filter_deprecated_review_risk_briefs( - briefs: list[UserAgentReviewRiskBrief], - ) -> list[UserAgentReviewRiskBrief]: - filtered: list[UserAgentReviewRiskBrief] = [] - for brief in briefs: - title = str(brief.title or "").strip() - if any(keyword in title for keyword in DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS): - continue - filtered.append(brief) - return filtered - - def _build_travel_policy_precheck_briefs( - self, - payload: UserAgentRequest, - *, - document_cards: list[UserAgentReviewDocumentCard], - claim_groups: list[UserAgentReviewClaimGroup], - ) -> list[UserAgentReviewRiskBrief]: - if not document_cards or not self._is_travel_review_context(payload, document_cards, claim_groups): - return [] - - rule_catalog = ExpenseRuleRuntimeService(self.db).load_catalog() - policy = rule_catalog.travel_policy - if policy is None: - return [] - - employee = self._resolve_employee_profile(payload) - grade = self._resolve_review_employee_grade(payload, employee=employee) - grade_band = ExpenseClaimService._resolve_travel_policy_band(grade) - band_label = policy.band_labels.get(grade_band or "", grade or "当前职级") - declared_city = self._resolve_declared_travel_city(payload, policy) - reason_corpus = self._build_review_reason_corpus(payload) - has_exception_note = self._text_contains_any(reason_corpus, policy.standard_exception_keywords) - standard_rule_name = str(getattr(policy, "standard_rule_name", "") or policy.rule_name) - standard_rule_version = str(getattr(policy, "standard_rule_version", "") or policy.rule_version) - - briefs: list[UserAgentReviewRiskBrief] = [] - amount_measurement_lines: list[str] = [] - seen_keys: set[str] = set() - - def append_once(key: str, brief: UserAgentReviewRiskBrief) -> None: - if key in seen_keys: - return - seen_keys.add(key) - briefs.append(brief) - - for card in document_cards: - document_type = str(card.document_type or "").strip().lower() - suggested_type = str(card.suggested_expense_type or "").strip().lower() - card_text = self._build_review_document_card_text(card) - document_type_label = resolve_document_type_label(document_type) - amount = self._extract_amount_decimal_from_card(card) - - if self._is_review_hotel_card(card): - hotel_city = self._extract_policy_city_from_text(card_text, policy) or declared_city - city_tier = policy.city_tiers.get(hotel_city, "tier_3") - city_tier_label = self._format_travel_city_tier(city_tier) - - if amount is None: - amount_measurement_lines.append( - f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法完成住宿差标测算。" - ) - append_once( - f"hotel-amount-missing-{card.index}", - UserAgentReviewRiskBrief( - title="住宿金额待补充", - level="warning", - content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算的住宿金额。", - detail=( - f"依据《{standard_rule_name}》({standard_rule_version}),住宿票据需要按员工职级、城市级别和每晚金额进行差标核算。" - "当前票据缺少金额,系统无法判断是否超出差旅标准。" - ), - suggestion="请在票据识别结果中补充或更正住宿金额,再继续核对报销单。", - ), - ) - continue - - if grade_band is None: - amount_measurement_lines.append( - f"{card.filename}:识别住宿金额 {amount:.2f} 元,但缺少员工职级,无法匹配住宿标准。" - ) - append_once( - f"hotel-grade-missing-{card.index}", - UserAgentReviewRiskBrief( - title="职级信息待确认", - level="warning", - content=f"{card.filename} 已识别住宿金额 {amount:.2f} 元,但当前员工职级缺失,无法匹配住宿标准。", - detail=( - f"依据《{standard_rule_name}》({standard_rule_version}),住宿标准按职级档位和城市级别配置。" - "当前未能识别员工职级,因此无法完成创建前差标核算。" - ), - suggestion="请确认员工档案或页面上下文中的职级信息,再重新进行差旅规则预检。", - ), - ) - continue - - cap = self._resolve_review_hotel_cap( - policy, - grade_band=grade_band, - city=hotel_city, - city_tier=city_tier, - ) - if cap <= Decimal("0.00"): - continue - night_count = self._extract_review_hotel_night_count(card) - nightly_amount = (amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01")) - amount_measurement_lines.append( - f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元," - f"按 {night_count} 晚折算 {nightly_amount:.2f} 元/晚;" - f"适用标准为 {band_label}{city_tier_label} {cap:.2f} 元/晚," - f"{'超出标准' if nightly_amount > cap else '测算通过'}。" - ) - if nightly_amount <= cap: - continue - - basis = ( - f"依据《{standard_rule_name}》({standard_rule_version}),{band_label} 在{city_tier_label}" - f"住宿标准为 {cap:.2f} 元/晚;{card.filename} 识别为{document_type_label}," - f"金额 {amount:.2f} 元,按 {night_count} 晚折算约 {nightly_amount:.2f} 元/晚。" - ) - append_once( - f"hotel-over-limit-{card.index}", - UserAgentReviewRiskBrief( - title="住宿超标待说明" if not has_exception_note else "住宿超标提醒", - level="high", - content=( - f"{card.filename} 住宿金额约 {nightly_amount:.2f} 元/晚," - f"超过 {band_label} {city_tier_label}标准 {cap:.2f} 元/晚。" - ), - detail=( - basis - + ( - "当前未识别到超标说明,创建单据前需要先补充原因。" - if not has_exception_note - else "当前已识别到例外说明,后续仍需审批人重点复核。" - ) - ), - suggestion="补充超标说明、协议酒店满房/会议高峰等原因,或调整住宿金额后再继续。", - ), - ) - continue - - if document_type == "meal_receipt": - allowance = self._resolve_review_travel_allowance_standard( - policy, - declared_city=declared_city, - card_text=card_text, - ) - if allowance is not None: - region_label, standard_amount = allowance - if amount is None: - amount_measurement_lines.append( - f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法按{region_label}伙食补助标准测算。" - ) - append_once( - f"travel-meal-amount-missing-{card.index}", - UserAgentReviewRiskBrief( - title="差旅餐饮金额待补充", - level="high", - content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算金额。", - detail=( - f"依据《{standard_rule_name}》({standard_rule_version}),差旅餐饮票据优先按出差补助标准中的伙食补助进行测算。" - f"当前匹配区域为{region_label},但票据缺少金额,系统无法判断是否超出补助标准。" - ), - suggestion="请在票据识别结果中补充或更正餐饮金额,再继续创建报销单。", - ), - ) - continue - - amount_measurement_lines.append( - f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;" - f"适用《{standard_rule_name}》{region_label}伙食补助标准 {standard_amount:.2f} 元/天," - f"{'超出标准' if amount > standard_amount else '测算通过'}。" - ) - if amount > standard_amount: - append_once( - f"travel-meal-allowance-over-limit-{card.index}", - UserAgentReviewRiskBrief( - title="差旅餐饮金额超出伙食补助标准", - level="high", - content=( - f"{card.filename} 识别金额 {amount:.2f} 元," - f"超过{region_label}伙食补助标准 {standard_amount:.2f} 元/天。" - ), - detail=( - f"依据《{standard_rule_name}》({standard_rule_version})的出差补助标准," - f"{region_label}伙食补助为 {standard_amount:.2f} 元/天;" - f"当前票据类型识别为{document_type_label},识别金额 {amount:.2f} 元。" - "首轮上传阶段按单张票据先行测算,后续可结合出差天数和实际餐补口径复核。" - ), - suggestion="如该票据属于差旅餐补,请调整金额或补充超标/拆分说明;如属于业务招待或普通餐费,请改为对应费用类型后再提交。", - ), - ) - continue - - scene_code = self._resolve_review_amount_scene_code(card, payload) - scene_policy = rule_catalog.get_scene_policy(scene_code) - scene_limit = self._resolve_review_scene_amount_limit(scene_policy) - if scene_policy is not None and scene_limit is not None: - metric_label = str(getattr(scene_limit, "metric_label", "") or scene_policy.label or "金额").strip() - standard_amount = self._resolve_scene_standard_amount(scene_limit) - if amount is None: - amount_measurement_lines.append( - f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法按{metric_label}测算。" - ) - append_once( - f"{scene_code}-amount-missing-{card.index}", - UserAgentReviewRiskBrief( - title=f"{scene_policy.label}金额待补充", - level="warning", - content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算金额。", - detail=( - f"依据《{scene_policy.rule_name}》({scene_policy.rule_version})," - f"{scene_policy.label}需要按{metric_label}进行金额审核。当前票据缺少金额,系统无法判断是否合规。" - ), - suggestion="请在票据识别结果中补充或更正金额,再继续核对报销单。", - ), - ) - continue - - if standard_amount is not None: - amount_measurement_lines.append( - f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;" - f"适用《{scene_policy.rule_name}》{metric_label}标准 {standard_amount:.2f} 元," - f"{'超出标准' if amount > standard_amount else '测算通过'}。" - ) - - amount_risk = self._evaluate_review_scene_amount( - amount=amount, - limit_config=scene_limit, - reason_text=reason_corpus, - ) - if amount_risk is not None: - severity, threshold = amount_risk - append_once( - f"{scene_code}-amount-over-limit-{card.index}", - UserAgentReviewRiskBrief( - title=f"{scene_policy.label}金额超标待说明", - level="high" if severity == "high" else "warning", - content=( - f"{card.filename} 识别金额 {amount:.2f} 元," - f"超过{metric_label}标准 {threshold:.2f} 元。" - ), - detail=( - f"依据《{scene_policy.rule_name}》({scene_policy.rule_version})," - f"{scene_policy.label}按{metric_label}审核,当前票据类型识别为{document_type_label}," - f"识别金额 {amount:.2f} 元,标准阈值 {threshold:.2f} 元。" - ), - suggestion="请补充超标原因或拆分到更准确的费用类型;如属于例外场景,请在事由中写明业务背景。", - ), - ) - continue - - transport_class = self._detect_review_transport_class(card, policy) - if transport_class and grade_band is not None: - transport_kind, class_label, class_level = transport_class - allowed_level = policy.transport_limits.get(grade_band, {}).get(transport_kind) - if allowed_level is not None and class_level > allowed_level: - append_once( - f"transport-class-over-limit-{card.index}-{class_label}", - UserAgentReviewRiskBrief( - title="交通舱位超标待说明" if not has_exception_note else "交通舱位超标提醒", - level="warning", - content=f"{card.filename} 识别为 {class_label},{band_label} 当前默认不可报销该舱位/席别。", - detail=( - f"依据《{standard_rule_name}》({standard_rule_version}),{band_label} 的交通席别标准" - f"未覆盖 {class_label};票据类型识别为{document_type_label}。" - + ( - "当前未识别到例外说明,创建单据前需要补充原因。" - if not has_exception_note - else "当前已识别到例外说明,后续仍需审批人重点复核。" - ) - ), - suggestion="补充无直达、临时改签、行程变更等例外说明,或更换为符合标准的票据。", - ), - ) - continue - - if document_type == "meal_receipt" and self._is_travel_review_context(payload, document_cards, claim_groups): - if amount is not None: - amount_measurement_lines.append( - f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;需确认按餐补、餐费或业务招待口径归口。" - ) - append_once( - f"travel-meal-card-{card.index}", - UserAgentReviewRiskBrief( - title="差旅餐饮票据待归口", - level="warning", - content=f"{card.filename} 已识别为餐饮票据,当前差旅报销单需要确认是否允许并入差旅费用。", - detail=( - f"依据《{standard_rule_name}》({standard_rule_version})的差旅票据预检口径,系统优先核算交通、住宿等差旅核心票据。" - "餐饮票据可能需要按餐费或业务招待场景拆分,并补充同行人员或客户信息。" - ), - suggestion="如属于差旅餐补,请补充制度允许口径;如属于招待或普通餐费,建议拆成对应费用类型单据。", - ), - ) - continue - - if suggested_type in {"travel", "hotel", "transport"} and document_type in {"other", "travel_ticket"}: - append_once( - f"travel-type-uncertain-{card.index}", - UserAgentReviewRiskBrief( - title="差旅票据类型待确认", - level="warning", - content=f"{card.filename} 归入差旅场景,但票据类型仍需确认。", - detail=( - f"依据《{standard_rule_name}》({standard_rule_version}),差旅预检需要先明确票据是机票、火车票、住宿票据、打车票等," - "再匹配对应的金额或舱位规则。当前类型识别不够稳定。" - ), - suggestion="请在附件识别结果中更正票据类型,或重新上传更清晰的附件后再继续。", - ), - ) - - if amount_measurement_lines: - briefs.insert( - 0, - UserAgentReviewRiskBrief( - title="附件金额测算结果", - level="info", - content="系统已根据首轮上传附件识别金额,并匹配当前可执行的报销标准进行测算。", - detail=";".join(dict.fromkeys(amount_measurement_lines)), - suggestion="如测算结果超标,请补充超标说明、调整金额或更正票据类型后再继续。", - ), - ) - - return briefs - - def _is_travel_review_context( - self, - payload: UserAgentRequest, - document_cards: list[UserAgentReviewDocumentCard], - claim_groups: list[UserAgentReviewClaimGroup], - ) -> bool: - entity_expense_type = self._collect_entity_values(payload).get("expense_type_code", "") - review_form_values = self._resolve_review_form_values(payload) - form_expense_type = str(review_form_values.get("expense_type") or "").strip() - message_context = " ".join( - [ - str(payload.message or ""), - str(payload.context_json.get("user_input_text") or ""), - str(payload.context_json.get("expense_type") or ""), - form_expense_type, - ] - ) - if entity_expense_type in {"travel", "hotel", "transport"}: - return True - if any(group.group_code == "travel" or group.expense_type in {"travel", "hotel", "transport"} for group in claim_groups): - return True - if any(card.suggested_expense_type in {"travel", "hotel", "transport"} for card in document_cards): - return True - return any(keyword in message_context for keyword in ("差旅", "出差", "机票", "火车", "高铁", "酒店", "住宿")) - - def _build_travel_receipt_state( - self, - payload: UserAgentRequest, - *, - document_cards: list[UserAgentReviewDocumentCard], - claim_groups: list[UserAgentReviewClaimGroup], - ) -> dict[str, Any]: - empty_state: dict[str, Any] = { - "is_travel_context": False, - "has_long_distance_ticket": False, - "ticket_type_label": "", - "ticket_amount": Decimal("0.00"), - "destination": "", - "days": 1, - "has_hotel_invoice": False, - "has_local_transport": False, - "required_missing_labels": [], - "optional_missing_labels": [], - "blocks_next_step": False, - } - if not document_cards or not self._is_travel_review_context(payload, document_cards, claim_groups): - return empty_state - - long_distance_cards = [card for card in document_cards if self._is_long_distance_travel_card(card)] - if not long_distance_cards: - return { - **empty_state, - "is_travel_context": True, - } - - has_hotel_invoice = any(self._is_review_hotel_card(card) for card in document_cards) - has_local_transport = any(self._is_local_transport_receipt_card(card) for card in document_cards) - required_missing_labels = [] if has_hotel_invoice else ["酒店的报销票据待上传(必须)"] - optional_missing_labels = [] if has_local_transport else ["市内交通/乘车票据可继续上传(非必须)"] - ticket_amount = sum( - (self._extract_amount_decimal_from_card(card) or Decimal("0.00")) - for card in long_distance_cards - ).quantize(Decimal("0.01")) - - return { - **empty_state, - "is_travel_context": True, - "has_long_distance_ticket": True, - "ticket_type_label": self._resolve_travel_ticket_type_label(long_distance_cards), - "ticket_amount": ticket_amount, - "destination": self._resolve_travel_receipt_destination(payload, long_distance_cards), - "days": self._resolve_travel_receipt_days(payload, long_distance_cards), - "has_hotel_invoice": has_hotel_invoice, - "has_local_transport": has_local_transport, - "required_missing_labels": required_missing_labels, - "optional_missing_labels": optional_missing_labels, - "blocks_next_step": bool(required_missing_labels), - } - - @staticmethod - def _is_long_distance_travel_card(card: UserAgentReviewDocumentCard) -> bool: - document_type = str(card.document_type or "").strip().lower() - return document_type in {"train_ticket", "flight_itinerary"} - - @staticmethod - def _is_local_transport_receipt_card(card: UserAgentReviewDocumentCard) -> bool: - document_type = str(card.document_type or "").strip().lower() - suggested_type = str(card.suggested_expense_type or "").strip().lower() - return document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"} or ( - suggested_type == "transport" and document_type not in {"train_ticket", "flight_itinerary"} - ) - - @staticmethod - def _resolve_travel_ticket_type_label(cards: list[UserAgentReviewDocumentCard]) -> str: - labels: list[str] = [] - for card in cards: - document_type = str(card.document_type or "").strip().lower() - if document_type == "train_ticket" and "火车" not in labels: - labels.append("火车") - if document_type == "flight_itinerary" and "飞机" not in labels: - labels.append("飞机") - return "/".join(labels) if labels else "交通" - - def _resolve_travel_receipt_destination( - self, - payload: UserAgentRequest, - long_distance_cards: list[UserAgentReviewDocumentCard], - ) -> str: - for card in long_distance_cards: - for field in card.fields: - if str(field.label or "").strip() not in {"行程", "路线"}: - continue - destination = self._extract_travel_destination_from_route(field.value) - if destination: - return self._normalize_travel_destination(destination) - - card_text = self._build_review_document_card_text(card) - route_match = TRAVEL_ROUTE_PATTERN.search(card_text) - if route_match: - return self._normalize_travel_destination(route_match.group(2)) - - location = self._resolve_location_value(payload) - if location: - return self._normalize_travel_destination(location) - return "" - - @staticmethod - def _extract_travel_destination_from_route(value: str) -> str: - route_text = str(value or "").strip() - if not route_text: - return "" - route_match = TRAVEL_ROUTE_PATTERN.search(route_text) - if route_match: - return route_match.group(2).strip() - parts = [ - item.strip() - for item in re.split(r"\s*(?:至|到|→|->|-|—|~|~)\s*", route_text) - if item.strip() - ] - return parts[-1] if len(parts) >= 2 else "" - - def _normalize_travel_destination(self, value: str) -> str: - candidate = re.sub( - r"(?:火车站|高铁站|动车站|车站|站|机场|航站楼)$", - "", - str(value or "").strip(), - ) - if not candidate: - return "" - try: - policy = ExpenseRuleRuntimeService(self.db).load_catalog().travel_policy - except Exception: - policy = None - if policy is not None: - policy_city = self._extract_policy_city_from_text(candidate, policy) - if policy_city: - return policy_city - return candidate - - def _resolve_travel_receipt_days( - self, - payload: UserAgentRequest, - long_distance_cards: list[UserAgentReviewDocumentCard], - ) -> int: - dates: list[datetime] = [] - for card in long_distance_cards: - card_text = self._build_review_document_card_text(card) - dates.extend(self._extract_dates_from_text(card_text)) - - if dates: - return max(1, (max(dates).date() - min(dates).date()).days + 1) - - start_date = self._parse_date_text(payload.ontology.time_range.start_date or "") - end_date = self._parse_date_text(payload.ontology.time_range.end_date or "") - if start_date and end_date: - return max(1, (end_date.date() - start_date.date()).days + 1) - return 1 - - @staticmethod - def _extract_dates_from_text(text: str) -> list[datetime]: - dates: list[datetime] = [] - for match in DATE_TEXT_PATTERN.finditer(str(text or "")): - parsed = UserAgentService._parse_date_text(match.group(1)) - if parsed is not None: - dates.append(parsed) - return dates - - @staticmethod - def _parse_date_text(value: str) -> datetime | None: - raw_value = str(value or "").strip() - if not raw_value: - return None - normalized = ( - raw_value.replace("年", "-") - .replace("月", "-") - .replace("/", "-") - .replace("日", "") - .strip() - ) - parts = [part for part in normalized.split("-") if part] - if len(parts) != 3: - return None - try: - year, month, day = (int(part) for part in parts) - return datetime(year, month, day) - except ValueError: - return None - - def _build_travel_receipt_briefs( - self, - travel_receipt_state: dict[str, Any], - ) -> list[UserAgentReviewRiskBrief]: - if not travel_receipt_state.get("has_long_distance_ticket"): - return [] - - required_labels = [ - str(item).strip() - for item in travel_receipt_state.get("required_missing_labels", []) - if str(item).strip() - ] - optional_labels = [ - str(item).strip() - for item in travel_receipt_state.get("optional_missing_labels", []) - if str(item).strip() - ] - if not required_labels and not optional_labels: - return [] - - content_parts = [*required_labels, *optional_labels] - required_text = ";".join(required_labels) - optional_text = ";".join(optional_labels) - return [ - UserAgentReviewRiskBrief( - title="差旅票据待补充", - level="warning" if required_labels else "info", - content=";".join(content_parts), - detail=( - "系统已识别到长途交通票据,会按差旅报销口径核对住宿、交通等票据完整性。" - + (f"当前必须补充:{required_text}。" if required_text else "") - + (f"当前还可以补充:{optional_text}。" if optional_text else "") - ), - suggestion=( - "请先补充酒店住宿发票或住宿清单;在补齐前只能保存为草稿。" - if required_labels - else "如还有市内交通、打车、地铁或停车等乘车票据,可以继续上传;没有也可以进入下一步或保存草稿。" - ), - ) - ] - - def _resolve_review_travel_allowance_standard( - self, - policy: RuntimeTravelPolicy, - *, - declared_city: str, - card_text: str, - ) -> tuple[str, Decimal] | None: - meal_limits = getattr(policy, "allowance_limits", {}).get("meal", {}) - if not meal_limits: - return None - - region_label = self._resolve_review_travel_allowance_region( - " ".join([declared_city or "", card_text or ""]) - ) - amount = meal_limits.get(region_label) - if amount is None and region_label != "其他地区": - amount = meal_limits.get("其他地区") - region_label = "其他地区" - if amount is None: - return None - return region_label, Decimal(amount).quantize(Decimal("0.01")) - - @staticmethod - def _resolve_review_travel_allowance_region(text: str) -> str: - normalized = re.sub(r"\s+", "", str(text or "")) - if not normalized: - return "其他地区" - if any(keyword in normalized for keyword in ("境外", "国外", "海外")): - return "国外" - if any(keyword in normalized for keyword in ("香港", "澳门", "台湾", "港澳台")): - return "港澳台" - if "乌鲁木齐" in normalized: - return "新疆-乌鲁木齐" - if "新疆" in normalized: - return "新疆-其他" - if any(keyword in normalized for keyword in ("西藏", "拉萨")): - return "西藏" - if any(keyword in normalized for keyword in ("北京", "上海", "天津", "重庆", "深圳", "珠海", "汕头", "厦门")): - return "直辖市/特区" - return "其他地区" - - def _resolve_review_amount_scene_code( - self, - card: UserAgentReviewDocumentCard, - payload: UserAgentRequest, - ) -> str: - document_type = str(card.document_type or "").strip().lower() - suggested_type = str(card.suggested_expense_type or "").strip().lower() - if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}: - return "transport" - if document_type == "meal_receipt": - entity_values = self._collect_entity_values(payload) - if suggested_type == "entertainment" or entity_values.get("expense_type_code") == "entertainment": - return "entertainment" - return "meal" - if document_type == "hotel_invoice" or suggested_type == "hotel": - return "hotel" - if suggested_type in { - "travel", - "transport", - "meal", - "entertainment", - "office", - "meeting", - "training", - "communication", - "welfare", - "other", - }: - return suggested_type - return self._collect_entity_values(payload).get("expense_type_code") or "other" - - @staticmethod - def _resolve_review_scene_amount_limit(scene_policy: Any | None) -> Any | None: - if scene_policy is None: - return None - return getattr(scene_policy, "item_amount_limit", None) or getattr(scene_policy, "claim_amount_limit", None) - - @staticmethod - def _resolve_scene_standard_amount(limit_config: Any | None) -> Decimal | None: - if limit_config is None: - return None - warn_amount = getattr(limit_config, "warn_amount", None) - block_amount = getattr(limit_config, "block_amount", None) - amount = warn_amount if warn_amount is not None else block_amount - if amount is None: - return None - try: - return Decimal(amount).quantize(Decimal("0.01")) - except (InvalidOperation, ValueError): - return None - - @staticmethod - def _evaluate_review_scene_amount( - *, - amount: Decimal, - limit_config: Any, - reason_text: str, - ) -> tuple[str, Decimal] | None: - block_amount = getattr(limit_config, "block_amount", None) - warn_amount = getattr(limit_config, "warn_amount", None) - exception_keywords = list(getattr(limit_config, "exception_keywords", []) or []) - has_exception = UserAgentService._text_contains_any(reason_text, exception_keywords) - - if block_amount is not None and amount > Decimal(block_amount): - return ("high", Decimal(block_amount).quantize(Decimal("0.01"))) - if warn_amount is not None and amount > Decimal(warn_amount): - return ("high", Decimal(warn_amount).quantize(Decimal("0.01"))) - return None - - def _resolve_review_employee_grade(self, payload: UserAgentRequest, *, employee: Employee | None) -> str: - if employee is not None and employee.grade: - return str(employee.grade).strip() - review_form_values = self._resolve_review_form_values(payload) - for source in ( - review_form_values, - payload.context_json, - payload.tool_payload, - ): - for key in ("employee_grade", "grade", "user_grade", "position_grade"): - value = str(source.get(key) or "").strip() if isinstance(source, dict) else "" - if value: - return value - return "" - - def _build_review_reason_corpus(self, payload: UserAgentRequest) -> str: - review_form_values = self._resolve_review_form_values(payload) - parts = [ - str(payload.message or ""), - str(payload.context_json.get("user_input_text") or ""), - str(review_form_values.get("reason") or ""), - str(review_form_values.get("business_reason") or ""), - str(review_form_values.get("location") or ""), - str(review_form_values.get("business_location") or ""), - ] - return "\n".join(part.strip() for part in parts if part and part.strip()) - - def _resolve_declared_travel_city(self, payload: UserAgentRequest, policy: RuntimeTravelPolicy) -> str: - review_form_values = self._resolve_review_form_values(payload) - candidates = [ - str(review_form_values.get("business_location") or ""), - str(review_form_values.get("location") or ""), - self._resolve_location_value(payload), - str(payload.message or ""), - ] - for candidate in candidates: - city = self._extract_policy_city_from_text(candidate, policy) - if city: - return city - return "" - - @staticmethod - def _build_review_document_card_text(card: UserAgentReviewDocumentCard) -> str: - field_text = " ".join(f"{field.label}:{field.value}" for field in card.fields) - return " ".join( - [ - str(card.filename or ""), - str(card.document_type or ""), - str(card.scene_label or ""), - str(card.summary or ""), - field_text, - ] - ).strip() - - @staticmethod - def _is_review_hotel_card(card: UserAgentReviewDocumentCard) -> bool: - document_type = str(card.document_type or "").strip().lower() - suggested_type = str(card.suggested_expense_type or "").strip().lower() - scene_label = str(card.scene_label or "").strip() - return document_type == "hotel_invoice" or suggested_type == "hotel" or "住宿" in scene_label - - @staticmethod - def _extract_amount_decimal_from_card(card: UserAgentReviewDocumentCard) -> Decimal | None: - for field in card.fields: - if field.label != "金额": - continue - normalized = str(field.value or "").replace("元", "").replace("¥", "").replace("¥", "").replace(",", "").strip() - try: - amount = Decimal(normalized).quantize(Decimal("0.01")) - except (InvalidOperation, ValueError): - continue - if amount > Decimal("0.00"): - return amount - return None - - @staticmethod - def _extract_review_hotel_night_count(card: UserAgentReviewDocumentCard) -> int: - text = f"{card.summary or ''} {' '.join(f'{field.label}:{field.value}' for field in card.fields)}" - match = TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN.search(text) - if not match: - return 1 - try: - return max(1, int(match.group(1))) - except (TypeError, ValueError): - return 1 - - @staticmethod - def _extract_policy_city_from_text(text: str, policy: RuntimeTravelPolicy) -> str: - normalized = str(text or "").strip() - if not normalized: - return "" - city_names = set(policy.city_tiers.keys()) - city_names.update(getattr(policy, "hotel_city_limits", {}).keys()) - for city in sorted(city_names, key=lambda item: len(item), reverse=True): - if city in normalized: - return city - return "" - - @staticmethod - def _format_travel_city_tier(city_tier: str) -> str: - return { - "tier_1": "一线城市", - "tier_2": "重点城市", - "tier_3": "其他城市", - }.get(str(city_tier or "").strip(), "当前城市") - - @staticmethod - def _resolve_review_hotel_cap( - policy: RuntimeTravelPolicy, - *, - grade_band: str, - city: str, - city_tier: str, - ) -> Decimal: - normalized_city = str(city or "").strip() - if normalized_city and getattr(policy, "hotel_city_limits", None): - city_limits = policy.hotel_city_limits.get(normalized_city, {}) - city_cap = city_limits.get(grade_band) - if city_cap is not None: - return Decimal(city_cap).quantize(Decimal("0.01")) - return Decimal(policy.hotel_limits.get(grade_band, {}).get(city_tier, Decimal("0.00"))).quantize( - Decimal("0.01") - ) - - def _detect_review_transport_class( - self, - card: UserAgentReviewDocumentCard, - policy: RuntimeTravelPolicy, - ) -> tuple[str, str, int] | None: - document_type = str(card.document_type or "").strip().lower() - text = re.sub(r"\s+", "", self._build_review_document_card_text(card)) - if not text: - return None - - if document_type == "flight_itinerary" or any(keyword in text for keyword in ("机票", "航班", "登机牌")): - for config in policy.flight_classes: - label = str(config.keyword or "").strip() - if label and label in text: - return "flight", label, int(config.level) - - if document_type == "train_ticket" or any(keyword in text for keyword in ("火车", "高铁", "动车", "铁路")): - for config in policy.train_classes: - label = str(config.keyword or "").strip() - if label and label in text: - return "train", label, int(config.level) - return None - - @staticmethod - def _text_contains_any(text: str, keywords: list[str] | tuple[str, ...]) -> bool: - compact = re.sub(r"\s+", "", str(text or "")) - return bool(compact) and any(str(keyword or "").strip() and str(keyword).strip() in compact for keyword in keywords) - - @staticmethod - def _resolve_submission_blocked_reasons(payload: UserAgentRequest) -> list[str]: - raw_reasons = payload.tool_payload.get("submission_blocked_reasons") - submission_blocked = bool(payload.tool_payload.get("submission_blocked")) - if raw_reasons is None and submission_blocked: - raw_reasons = payload.tool_payload.get("missing_fields") - if raw_reasons is None and not submission_blocked: - return [] - - reasons: list[str] = [] - if isinstance(raw_reasons, list): - reasons.extend(str(item or "").strip() for item in raw_reasons) - elif isinstance(raw_reasons, str): - reasons.extend( - item.strip() - for item in re.split(r"[;;\n]+", raw_reasons) - if item.strip() - ) - - if not reasons and submission_blocked: - message = str(payload.tool_payload.get("message") or "").strip() - for prefix in ( - "提交前请先补全信息:", - "AI预审暂未通过,原因如下:", - "AI预审未通过,原因如下:", - "AI预审暂未通过:", - "AI预审未通过:", - ): - if message.startswith(prefix): - message = message[len(prefix):].strip() - break - if message: - reasons.extend( - item.strip() - for item in re.split(r"[;;\n]+", message) - if item.strip() and not item.strip().startswith("AI预审暂未通过") - ) - - return list(dict.fromkeys(reason for reason in reasons if reason)) - - def _build_review_confirmation_actions( - self, - payload: UserAgentRequest, - *, - can_proceed: bool, - claim_groups: list[UserAgentReviewClaimGroup], - draft_payload: UserAgentDraftPayload | None, - missing_slot_keys: set[str] | None = None, - ) -> list[UserAgentReviewAction]: - missing_slot_keys = set(missing_slot_keys or set()) - if self._is_review_association_choice_pending(payload): - claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip() - link_label = f"关联到草稿 {claim_no}" if claim_no else "关联到现有草稿" - return [ - UserAgentReviewAction( - label=link_label, - action_type="link_to_existing_draft", - description=( - f"把本次上传票据并入现有草稿 {claim_no}。" - if claim_no - else "把本次上传票据并入现有草稿。" - ), - emphasis="primary", - ), - UserAgentReviewAction( - label="单独建立报销单", - action_type="create_new_claim_from_documents", - description="基于当前上传的多张票据,新建一张独立的报销草稿。", - emphasis="secondary", - ), - ] - - review_action = str(payload.context_json.get("review_action") or "").strip() - if "expense_type" in missing_slot_keys and not review_action: - return [ - UserAgentReviewAction( - label="保存为草稿", - action_type="save_draft", - description="先暂存当前已识别信息,稍后仍可从个人报销继续补充或提交。", - emphasis="primary", - ), - ] - - primary_action = UserAgentReviewAction( - label="继续下一步" if can_proceed else "保存为草稿", - action_type="next_step" if can_proceed else "save_draft", - description=( - "当前识别信息已满足继续处理条件,确认后进入下一步。" - if can_proceed - else "暂存当前识别结果,后续可以继续补充或修改。" - ), - emphasis="primary", - ) - if len(claim_groups) > 1 and can_proceed: - primary_action.description = f"系统建议拆分为 {len(claim_groups)} 张报销单,确认后继续下一步。" - if draft_payload is not None and draft_payload.claim_no and not can_proceed: - primary_action.description = f"保存后会生成草稿 {draft_payload.claim_no},后续仍可继续补充。" - - actions = [] - if can_proceed: - actions.append( - UserAgentReviewAction( - label="保存为草稿", - action_type="save_draft", - description="先暂存当前已识别信息,稍后仍可从个人报销继续补充或提交。", - emphasis="secondary", - ) - ) - actions.append(primary_action) - return actions - - def _build_review_intent_summary( - self, - payload: UserAgentRequest, - *, - slot_cards: list[UserAgentReviewSlotCard], - claim_groups: list[UserAgentReviewClaimGroup], - ) -> str: - slots = {item.key: item for item in slot_cards} - expense_type = slots.get("expense_type") - amount = slots.get("amount") - time_range = slots.get("time_range") - location = slots.get("location") - customer = slots.get("customer_name") - - summary = "我先根据您当前提供的信息整理出一笔报销:" - if expense_type and expense_type.value: - summary = f"识别到您希望报销一笔“{expense_type.value}”费用:" - details: list[str] = [] - if customer and customer.value: - details.append(f"客户:{customer.value}") - if time_range and time_range.value: - details.append(f"时间:{time_range.value}") - if location and location.value: - details.append(f"地点:{location.value}") - if amount and amount.value: - details.append(f"金额:{amount.value}") - reason = slots.get("reason") - if reason and reason.value: - details.append(f"事由:{reason.value}") - if details: - return "\n\n".join([summary, "基础信息识别结果:", "\n".join(details)]) - return summary - - def _build_review_body_answer( - self, - payload: UserAgentRequest, - *, - review_payload: UserAgentReviewPayload | None, - draft_payload: UserAgentDraftPayload | None, - ) -> str | None: - if review_payload is None: - return None - if payload.ontology.scenario != "expense": - return None - if payload.ontology.intent not in {"draft", "operate"}: - return None - if payload.tool_payload.get("draft_limit_reached"): - return ( - str(payload.tool_payload.get("message") or "").strip() - or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。" - ) - - review_action = str(payload.context_json.get("review_action") or "").strip() - if payload.tool_payload.get("preview_only") and not review_action: - return review_payload.body_message or self._build_review_intent_summary( - payload, - slot_cards=review_payload.slot_cards, - claim_groups=review_payload.claim_groups, - ) - if payload.tool_payload.get("duplicate_attachment_blocked") or payload.tool_payload.get("duplicate_invoice_blocked"): - return ( - str(payload.tool_payload.get("message") or "").strip() - or "检测到本次上传票据与当前单据已有票据重复,请重新上传不同的票据后再归集。" - ) - if review_action == "save_draft": - if draft_payload is not None and draft_payload.claim_no: - return ( - f"已按您当前确认的信息保存为草稿 {draft_payload.claim_no}。" - "后续您可以继续补充缺失项,或修改识别结果后再继续提交。" - ) - return "已按您当前确认的信息保存为草稿。后续您可以继续补充缺失项,或修改识别结果后再继续提交。" - if review_action == "link_to_existing_draft": - document_count = self._resolve_review_document_count(payload) - followup_copy = self._build_review_action_followup_copy(review_payload) - if draft_payload is not None and draft_payload.claim_no: - return ( - f"已将本次上传的 {document_count} 张票据关联到草稿 {draft_payload.claim_no}。" - f"{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" - ) - return f"已将本次上传的票据关联到现有草稿。{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" - if review_action == "create_new_claim_from_documents": - document_count = self._resolve_review_document_count(payload) - followup_copy = self._build_review_action_followup_copy(review_payload) - if draft_payload is not None and draft_payload.claim_no: - return ( - f"已按当前上传的 {document_count} 张票据新建报销草稿 {draft_payload.claim_no}。" - f"{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" - ) - return f"已按当前上传票据新建报销草稿。{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" - if review_action == "next_step": - if draft_payload is not None and draft_payload.status == "submitted": - stage_text = draft_payload.approval_stage or "审批中" - return f"报销单 {draft_payload.claim_no or ''} 已提交,当前节点为 {stage_text}。".strip() - if payload.tool_payload.get("submission_blocked"): - reasons = self._resolve_submission_blocked_reasons(payload) - if reasons: - reason_lines = "\n".join( - f"{index}. {reason}" for index, reason in enumerate(reasons, start=1) - ) - return ( - "AI预审暂未通过,所以还没有提交到审批人。\n" - f"{reason_lines}\n" - "请先处理以上项目;处理完成后再点继续下一步。" - ) - return str(payload.tool_payload.get("message") or "").strip() or "当前报销单暂时还不能提交审批。" - return ( - f"{self._build_review_intent_summary(payload, slot_cards=review_payload.slot_cards, claim_groups=review_payload.claim_groups)}\n\n" - "当前关键信息已基本齐全,您确认无误后可以继续下一步。" - ) - return review_payload.body_message or None - - def _build_review_body_message( - self, - payload: UserAgentRequest, - *, - slot_cards: list[UserAgentReviewSlotCard], - risk_briefs: list[UserAgentReviewRiskBrief], - can_proceed: bool, - document_cards: list[UserAgentReviewDocumentCard], - travel_receipt_state: dict[str, Any] | None = None, - ) -> str: - if self._is_review_association_choice_pending(payload): - claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip() - document_count = len(document_cards) or self._resolve_review_document_count(payload) - if claim_no: - return ( - f"已识别出本次上传的 {document_count} 张票据。" - f"系统检测到你已有草稿 {claim_no},请选择关联到该草稿,或单独建立一张新的报销单。" - ) - return ( - f"已识别出本次上传的 {document_count} 张票据。" - "系统检测到你已有可用草稿,请先选择关联到现有草稿,或单独建立一张新的报销单。" - ) - - blocked_reasons = self._resolve_submission_blocked_reasons(payload) - if blocked_reasons: - reason_text = ";".join(dict.fromkeys(reason.strip("。;;") for reason in blocked_reasons if reason)) - return ( - f"AI预审未通过:{reason_text}。" - "请先根据风险提示补充原因、调整金额或更换附件,整改后再继续提交。" - ) - - travel_message = self._build_travel_receipt_guidance_message( - payload, - travel_receipt_state=travel_receipt_state or {}, - can_proceed=can_proceed, - ) - if travel_message: - return travel_message - - missing_labels = self._resolve_review_missing_slot_labels(slot_cards) - if travel_receipt_state: - missing_labels.extend( - str(item) - for item in travel_receipt_state.get("required_missing_labels", []) - if str(item).strip() - ) - missing_labels = list(dict.fromkeys(missing_labels)) - - expense_type_slot = next((item for item in slot_cards if item.key == "expense_type"), None) - if expense_type_slot is not None and not str(expense_type_slot.value or "").strip(): - return ( - f"{self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[])}\n\n" - "我已经先保留了当前识别出的时间、地点和事由,但还不能确定这张单据应该走哪类报销流程。" - "请先点击“选择报销类型”,在差旅费、交通费、住宿费等选项中选定;" - "选定后,后续上传的票据都会作为这张单据的补充继续核对,不会重新改判报销类型。" - ) - - review_payload = UserAgentReviewPayload( - intent_summary="", - body_message="", - scenario=payload.ontology.scenario, - intent=payload.ontology.intent, - can_proceed=can_proceed, - missing_slots=missing_labels, - risk_briefs=risk_briefs, - slot_cards=slot_cards, - document_cards=[], - claim_groups=[], - confirmation_actions=[], - edit_fields=[], - ) - return "\n\n".join( - item - for item in [ - self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[]), - self._build_review_standard_calculation_copy(payload, slot_cards), - self._build_review_guidance_copy(review_payload, mention_save_draft=not can_proceed), - ] - if item - ) - - def _build_review_standard_calculation_copy( - self, - payload: UserAgentRequest, - slot_cards: list[UserAgentReviewSlotCard], - ) -> str: - slots = {item.key: item for item in slot_cards} - expense_type = str(slots.get("expense_type").value if slots.get("expense_type") else "").strip() - if "差旅" in expense_type: - return self._build_review_travel_calculation_table(payload, slots) - if "交通" in expense_type: - return ( - "报销测算参考:交通费通常以实际票据金额为基础,结合出行地点、业务事由和票据合规性复核;" - "如果它属于差旅行程的一部分,后续也会并入差旅费测算。" - ) - if "住宿" in expense_type: - return ( - "报销测算参考:住宿费通常按“实际住宿金额”和“目的地住宿标准 × 住宿天数”取合规口径;" - "补齐酒店票据后再核对是否超标。" - ) - return ( - "报销测算参考:先以用户填写金额或票据识别金额为基础," - "再结合费用类型、发生地点、业务事由和规则中心限额进行复核。" - ) - - def _build_review_travel_calculation_table( - self, - payload: UserAgentRequest, - slots: dict[str, UserAgentReviewSlotCard], - ) -> str: - destination = self._resolve_slot_text(slots, "location") - days = self._resolve_review_travel_days(payload, slots) - ticket_amount = self._resolve_slot_money(slots, "amount") - employee = self._resolve_employee_profile(payload) - grade = self._resolve_review_employee_grade(payload, employee=employee) - - if not destination or not grade: - return "\n".join( - [ - "报销测算参考:", - "", - "| 项目 | 当前信息 | 测算说明 |", - "| --- | --- | --- |", - f"| 出差地点 | {destination or '待确认'} | 用于匹配城市住宿标准和补贴区域 |", - f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |", - f"| 职级 | {grade or '待确认'} | 补齐后才能匹配住宿标准和补贴档位 |", - f"| 交通票据 | {self._format_decimal_money(ticket_amount)} 元 | 上传票据后会按真实金额重新复核 |", - ] - ) - - current_user = CurrentUserContext( - username=str(payload.user_id or payload.context_json.get("name") or "anonymous").strip() or "anonymous", - name=str(payload.context_json.get("name") or payload.user_id or "anonymous").strip() or "anonymous", - role_codes=[ - str(item).strip() - for item in list(payload.context_json.get("role_codes") or []) - if str(item).strip() - ], - is_admin=bool(payload.context_json.get("is_admin")), - department_name=str(payload.context_json.get("department_name") or payload.context_json.get("department") or "").strip(), - ) - try: - calculation = TravelReimbursementCalculatorService(self.db).calculate( - TravelReimbursementCalculatorRequest(days=days, location=destination, grade=grade), - current_user, - ) - except Exception: - return "\n".join( - [ - "报销测算参考:", - "", - "| 项目 | 当前信息 | 测算说明 |", - "| --- | --- | --- |", - f"| 出差地点 | {destination} | 暂时未能匹配规则中心地点 |", - f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |", - f"| 职级 | {grade} | 暂时无法自动匹配差旅标准 |", - f"| 交通票据 | {self._format_decimal_money(ticket_amount)} 元 | 上传票据后会按真实金额重新复核 |", - ] - ) - - total_amount = ( - ticket_amount - + self._coerce_decimal_money(calculation.hotel_amount) - + self._coerce_decimal_money(calculation.allowance_amount) - ).quantize(Decimal("0.01")) - ticket_basis = "当前未上传交通票据,先按 0.00 元占位" if ticket_amount <= Decimal("0.00") else "已识别或填写的交通票据金额" - return "\n".join( - [ - "报销测算参考:", - "", - ( - f"职级 {calculation.grade},目的地 {destination},匹配城市 {calculation.matched_city};" - "补齐交通、酒店等票据后,我会按真实票据金额和规则中心标准重新复核。" - ), - "", - "| 项目 | 测算口径 | 金额 |", - "| --- | --- | ---: |", - f"| 交通票据 | {ticket_basis} | {self._format_decimal_money(ticket_amount)} 元 |", - f"| 住宿标准 | {self._format_decimal_money(calculation.hotel_rate)} 元/天 × {calculation.days} 天 | {self._format_decimal_money(calculation.hotel_amount)} 元 |", - f"| 出差补贴 | {self._format_decimal_money(calculation.total_allowance_rate)} 元/天 × {calculation.days} 天 | {self._format_decimal_money(calculation.allowance_amount)} 元 |", - f"| 参考合计 | 交通票据 + 住宿标准 + 出差补贴 | {self._format_decimal_money(total_amount)} 元 |", - ] - ) - - @staticmethod - def _resolve_slot_text(slots: dict[str, UserAgentReviewSlotCard], key: str) -> str: - item = slots.get(key) - return str(getattr(item, "value", "") or getattr(item, "raw_value", "") or "").strip() - - def _resolve_review_travel_days( - self, - payload: UserAgentRequest, - slots: dict[str, UserAgentReviewSlotCard], - ) -> int: - text = " ".join( - [ - str(payload.message or ""), - str(payload.context_json.get("user_input_text") or ""), - self._resolve_slot_text(slots, "reason"), - self._resolve_slot_text(slots, "time_range"), - ] - ) - explicit_match = re.search(r"(?= 2: - return max(1, (max(dates).date() - min(dates).date()).days) - return 1 - - def _resolve_slot_money( - self, - slots: dict[str, UserAgentReviewSlotCard], - key: str, - ) -> Decimal: - text = self._resolve_slot_text(slots, key).replace(",", "") - match = re.search(r"([0-9]+(?:\.[0-9]{1,2})?)", text) - if not match: - return Decimal("0.00") - return self._coerce_decimal_money(match.group(1)) - - @staticmethod - def _build_review_action_followup_copy(review_payload: UserAgentReviewPayload) -> str: - missing_slots = [str(item).strip() for item in review_payload.missing_slots if str(item).strip()] - receipt_briefs = [ - item - for item in review_payload.risk_briefs - if "差旅票据待补充" in str(item.title or "") - ] - if missing_slots: - return f"当前仍有 {'、'.join(missing_slots)},暂时只能保存为草稿,补齐后再继续下一步。" - if receipt_briefs: - return "当前必需票据已具备;如还有市内交通、打车、地铁或停车等乘车票据,可以继续上传,也可以继续下一步或保存草稿。" - if review_payload.can_proceed: - return "当前信息已较完整,您可以继续下一步,也可以先保存为草稿。" - return "" - - def _build_travel_receipt_guidance_message( - self, - payload: UserAgentRequest, - *, - travel_receipt_state: dict[str, Any], - can_proceed: bool, - ) -> str: - review_action = str(payload.context_json.get("review_action") or "").strip() - if review_action or not travel_receipt_state.get("has_long_distance_ticket"): - return "" - - employee = self._resolve_employee_profile(payload) - user_name = ( - str(employee.name).strip() - if employee is not None and employee.name - else str(payload.context_json.get("name") or payload.user_id or "同事").strip() - ) - destination = str(travel_receipt_state.get("destination") or "待确认").strip() - days = max(1, int(travel_receipt_state.get("days") or 1)) - ticket_type_label = str(travel_receipt_state.get("ticket_type_label") or "交通").strip() - ticket_amount = self._coerce_decimal_money(travel_receipt_state.get("ticket_amount")) - - required_labels = [ - str(item).strip() - for item in travel_receipt_state.get("required_missing_labels", []) - if str(item).strip() - ] - optional_labels = [ - str(item).strip() - for item in travel_receipt_state.get("optional_missing_labels", []) - if str(item).strip() - ] - - provide_items: list[str] = [] - if required_labels: - provide_items.append("1. 酒店住宿发票/住宿清单(必须,当前待上传)") - if optional_labels: - provide_items.append(f"{len(provide_items) + 1}. 市内交通/乘车票据(非必须,如打车、地铁、停车等)") - - sections = [ - f"您好,{user_name}。我先按票据信息做一次差旅预检。", - "\n".join( - [ - "已识别信息:", - f"1. 出差地点:{destination}", - f"2. 预计天数:{days} 天", - f"3. 票据类型:{ticket_type_label}票", - f"4. 票据金额:{self._format_decimal_money(ticket_amount)} 元", - ] - ), - ] - - if provide_items: - sections.append("还需补充:\n" + "\n".join(provide_items)) - else: - sections.append("票据完整性:当前核心票据已较完整,无需继续上传票据。") - - if required_labels: - sections.append( - "处理建议:酒店票据仍缺失,暂时不能继续下一步。" - "您可以先保存为草稿,补齐后再提交。" - ) - elif can_proceed and optional_labels: - sections.append( - "处理建议:必需票据已具备。" - "如暂时没有乘车票据,也可以继续下一步,或先保存为草稿。" - ) - elif can_proceed: - sections.append( - "处理建议:当前信息已较完整,确认无误后可以继续下一步;" - "暂时不提交时,也可以先保存为草稿。" - ) - - estimate_copy = self._build_travel_receipt_estimate_copy( - payload, - travel_receipt_state=travel_receipt_state, - ) - if estimate_copy: - sections.append(estimate_copy) - return "\n\n".join(section for section in sections if section) - - def _build_travel_receipt_estimate_copy( - self, - payload: UserAgentRequest, - *, - travel_receipt_state: dict[str, Any], - ) -> str: - destination = str(travel_receipt_state.get("destination") or "").strip() - days = max(1, int(travel_receipt_state.get("days") or 1)) - ticket_type_label = str(travel_receipt_state.get("ticket_type_label") or "交通").strip() - ticket_amount = self._coerce_decimal_money(travel_receipt_state.get("ticket_amount")) - employee = self._resolve_employee_profile(payload) - grade = self._resolve_review_employee_grade(payload, employee=employee) - - if not destination or not grade: - return ( - "差旅费测算:\n" - f"1. 职级:{grade or '待确认'}\n" - f"2. 目的地:{destination or '出差地点待确认'}\n" - f"3. 已提交{ticket_type_label}:{self._format_decimal_money(ticket_amount)} 元\n" - "4. 住宿和补贴金额:需补齐职级或地点后再核算。" - ) - - current_user = CurrentUserContext( - username=str(payload.user_id or payload.context_json.get("name") or "anonymous").strip() or "anonymous", - name=str(payload.context_json.get("name") or payload.user_id or "anonymous").strip() or "anonymous", - role_codes=[ - str(item).strip() - for item in list(payload.context_json.get("role_codes") or []) - if str(item).strip() - ], - is_admin=bool(payload.context_json.get("is_admin")), - department_name=str(payload.context_json.get("department_name") or payload.context_json.get("department") or "").strip(), - ) - try: - calculation = TravelReimbursementCalculatorService(self.db).calculate( - TravelReimbursementCalculatorRequest(days=days, location=destination, grade=grade), - current_user, - ) - except Exception: - return ( - "差旅费测算:\n" - f"1. 职级:{grade}\n" - f"2. 目的地:{destination}\n" - f"3. 已提交{ticket_type_label}:{self._format_decimal_money(ticket_amount)} 元\n" - "4. 住宿和补贴标准:暂时无法自动测算,请以规则中心最新差旅标准为准。" - ) - - total_amount = ( - ticket_amount - + self._coerce_decimal_money(calculation.hotel_amount) - + self._coerce_decimal_money(calculation.allowance_amount) - ).quantize(Decimal("0.01")) - return ( - "差旅费测算:\n" - f"1. 职级:{calculation.grade}\n" - f"2. 目的地:{calculation.matched_city or destination}\n" - f"3. 已提交{ticket_type_label}:{self._format_decimal_money(ticket_amount)} 元\n" - f"4. 住宿标准:{self._format_decimal_money(calculation.hotel_rate)} 元/天 × {calculation.days} 天\n" - f"5. 出差补贴:{self._format_decimal_money(calculation.total_allowance_rate)} 元/天 × {calculation.days} 天\n" - f"6. 参考合计:{self._format_decimal_money(total_amount)} 元" - ) - - @staticmethod - def _coerce_decimal_money(value: Any) -> Decimal: - try: - return Decimal(str(value or "0")).quantize(Decimal("0.01")) - except (InvalidOperation, ValueError): - return Decimal("0.00") - - @staticmethod - def _format_decimal_money(value: Any) -> str: - return f"{UserAgentService._coerce_decimal_money(value):.2f}" - - @staticmethod - def _resolve_review_missing_slot_labels( - slot_cards: list[UserAgentReviewSlotCard], - ) -> list[str]: - return [item.label for item in slot_cards if item.status == "missing"] - - @staticmethod - def _build_review_guidance_copy( - review_payload: UserAgentReviewPayload, - *, - mention_save_draft: bool, - ) -> str: - reminder_count = len(review_payload.risk_briefs) - - if review_payload.can_proceed: - if reminder_count: - return ( - f"当前关键信息已基本齐全,但还有 {reminder_count} 条提醒。" - "请核查对话中的文字说明,确认无误后继续下一步。" - ) - return "当前关键信息已基本齐全,您确认无误后可以继续下一步。" - - return "" - - @staticmethod - def _can_proceed_review( - payload: UserAgentRequest, - *, - missing_slot_keys: list[str], - claim_groups: list[UserAgentReviewClaimGroup], - ) -> bool: - if payload.ontology.ambiguity: - return False - if missing_slot_keys: - return False - if not claim_groups: - return False - return True - - def _build_review_edit_fields( - self, - payload: UserAgentRequest, - *, - draft_payload: UserAgentDraftPayload | None, - slot_cards: list[UserAgentReviewSlotCard], - ) -> list[UserAgentReviewEditField]: - slot_map = {item.key: item for item in slot_cards} - employee = self._resolve_employee_profile(payload) - reporter_name = ( - slot_map.get("reporter_name").value - if slot_map.get("reporter_name") - else str(payload.context_json.get("name") or "").strip() - ) - manager_name = self._resolve_manager_name(employee) - reason = slot_map.get("reason").value if slot_map.get("reason") else "" - attachments = "、".join(self._resolve_attachment_names(payload)) - - fields = [ - UserAgentReviewEditField( - key="claim_no", - label="报销单据编号", - value=str(draft_payload.claim_no if draft_payload is not None and draft_payload.claim_no else "待生成"), - placeholder="保存草稿后自动生成", - required=False, - group="basic", - ), - UserAgentReviewEditField( - key="expense_type", - label="报销类型", - value=slot_map.get("expense_type").value if slot_map.get("expense_type") else "", - placeholder="例如:业务招待费 / 差旅费", - group="basic", - ), - UserAgentReviewEditField( - key="occurred_date", - label="业务发生时间", - value=slot_map.get("time_range").normalized_value if slot_map.get("time_range") and slot_map.get("time_range").normalized_value else slot_map.get("time_range").value if slot_map.get("time_range") else "", - placeholder="例如:2026-05-11", - group="basic", - ), - UserAgentReviewEditField( - key="reporter_name", - label="报销人", - value=reporter_name, - placeholder="请输入报销人姓名", - group="basic", - ), - UserAgentReviewEditField( - key="manager_name", - label="直属上司姓名", - value=manager_name, - placeholder="请输入直属上司姓名", - required=False, - group="basic", - ), - UserAgentReviewEditField( - key="customer_name", - label="客户名称", - value=slot_map.get("customer_name").value if slot_map.get("customer_name") else "", - placeholder="请输入客户名称", - group="business", - ), - UserAgentReviewEditField( - key="business_location", - label="业务地点", - value=slot_map.get("location").normalized_value if slot_map.get("location") and slot_map.get("location").normalized_value else slot_map.get("location").value if slot_map.get("location") else "", - placeholder="例如:北京 / 客户现场", - required=False, - group="business", - ), - UserAgentReviewEditField( - key="merchant_name", - label="酒店/商户", - value=slot_map.get("merchant_name").value if slot_map.get("merchant_name") else "", - placeholder="请输入酒店或商户名称", - required=False, - group="business", - ), - UserAgentReviewEditField( - key="amount", - label="金额", - value=slot_map.get("amount").normalized_value if slot_map.get("amount") and slot_map.get("amount").normalized_value else slot_map.get("amount").value if slot_map.get("amount") else "", - placeholder="例如:200.00元", - group="business", - ), - UserAgentReviewEditField( - key="participants", - label="参与人员", - value=slot_map.get("participants").value if slot_map.get("participants") else "", - placeholder="例如:客户 2 人,我方 1 人", - group="business", - ), - UserAgentReviewEditField( - key="reason", - label="事由", - value=reason, - placeholder="请输入报销事由", - field_type="textarea", - group="business", - ), - UserAgentReviewEditField( - key="attachment_names", - label="附件清单", - value=attachments, - placeholder="例如:发票.jpg、行程单.png", - required=False, - field_type="textarea", - group="attachments", - ), - ] - return fields - - def _resolve_employee_profile(self, payload: UserAgentRequest) -> Employee | None: - candidates = [ - str(payload.context_json.get("name") or "").strip(), - str(payload.user_id or "").strip(), - self._collect_entity_values(payload).get("employee_name", ""), - ] - normalized = [item for item in dict.fromkeys(candidates) if item] - if not normalized: - return None - - stmt = ( - select(Employee) - .options(selectinload(Employee.organization_unit), selectinload(Employee.manager)) - .where( - or_( - Employee.name.in_(normalized), - Employee.employee_no.in_(normalized), - Employee.email.in_(normalized), - ) - ) - .limit(1) - ) - return self.db.scalar(stmt) - - @staticmethod - def _resolve_manager_name(employee: Employee | None) -> str: - if employee is None: - return "" - if employee.manager is not None and employee.manager.name: - return employee.manager.name - if employee.organization_unit is not None and employee.organization_unit.manager_name: - return employee.organization_unit.manager_name - return "" - - @staticmethod - def _extract_message_reason(message: str) -> str: - for line in str(message or "").splitlines(): - cleaned = line.strip() - if not cleaned: - continue - if cleaned.startswith(("附件名称:", "OCR摘要:", "关联单号:")): - continue - return cleaned[:300] - return "" - - @staticmethod - def _looks_like_system_generated_reason_message(message: str) -> bool: - cleaned = str(message or "").strip() - if not cleaned: - return False - compact = re.sub(r"\s+", "", cleaned) - return compact.startswith(SYSTEM_GENERATED_REASON_PREFIXES) - - def _resolve_reason_source_text(self, payload: UserAgentRequest) -> str: - explicit_text = payload.context_json.get("user_input_text") - if isinstance(explicit_text, str): - return explicit_text.strip() - if self._looks_like_system_generated_reason_message(payload.message): - return "" - return str(payload.message or "").strip() - - @classmethod - def _resolve_reason_text(cls, message: str) -> str: - reason = cls._strip_leading_time_from_reason(cls._extract_message_reason(message)) - if not reason: - return "" - - compact = re.sub(r"\s+", "", reason) - if compact in GENERIC_EXPENSE_PROMPTS: - return "" - - instruction_prefixes = ( - "帮我生成", - "请帮我生成", - "生成", - "起草", - "创建", - "发起", - "准备", - "帮我报销", - "我要报销", - "我想报销", - ) - if compact.startswith(instruction_prefixes): - for separator in (",", ",", "。", ";", ";", ":", ":"): - if separator in reason: - trailing = reason.split(separator, 1)[1].strip() - if trailing: - return trailing[:300] - return "" - - return reason - - @staticmethod - def _strip_leading_time_from_reason(value: str) -> str: - reason = str(value or "").strip() - for pattern in LEADING_REASON_TIME_PATTERNS: - next_reason = pattern.sub("", reason).strip() - if next_reason != reason: - return next_reason - return reason - - @staticmethod - def _should_skip_model_answer( - payload: UserAgentRequest, - review_payload: UserAgentReviewPayload | None, - ) -> bool: - if payload.ontology.scenario == "expense" and payload.ontology.intent in {"query", "compare"}: - return True - if review_payload is None: - return False - return payload.ontology.scenario == "expense" and ( - payload.ontology.intent == "draft" - or int(payload.context_json.get("attachment_count") or 0) > 0 - ) - - def _build_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]: - knowledge_citations = self._build_knowledge_citations(payload) - if payload.ontology.scenario == "knowledge": - return knowledge_citations[:3] - - rule_citations = self._build_rule_asset_citations(payload) - if knowledge_citations: - return (knowledge_citations + rule_citations)[:3] - return rule_citations - - @staticmethod - def _build_knowledge_citations(payload: UserAgentRequest) -> list[UserAgentCitation]: - citations: list[UserAgentCitation] = [] - for item in list(payload.tool_payload.get("hits") or [])[:3]: - if not isinstance(item, dict): - continue - title = str(item.get("title") or item.get("document_name") or "").strip() - code = str(item.get("code") or item.get("candidate_id") or "").strip() - if not title or not code: - continue - citations.append( - UserAgentCitation( - source_type="knowledge", - code=code, - title=title, - version=str(item.get("version") or "").strip() or None, - updated_at=str(item.get("updated_at") or "").strip() or None, - excerpt=( - str(item.get("excerpt") or "").strip() - or str(item.get("content") or "").strip() - or None - ), - ) - ) - return citations - - def _build_rule_asset_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]: - domain = self._resolve_domain(payload.ontology.scenario) - items = self.asset_service.list_assets( - asset_type=AgentAssetType.RULE.value, - status=AgentAssetStatus.ACTIVE.value, - domain=domain, - ) - ranked = self._rank_rule_assets(items, payload) - citations: list[UserAgentCitation] = [] - for item in ranked[:2]: - detail = self.asset_service.get_asset(item.id) - if detail is None: - continue - excerpt = self._extract_excerpt(str(detail.current_version_content or "")) - citations.append( - UserAgentCitation( - source_type="rule", - code=detail.code, - title=detail.name, - version=detail.current_version, - updated_at=detail.updated_at.date().isoformat(), - excerpt=excerpt, - ) - ) - return citations - - @staticmethod - def _resolve_risk_flags(payload: UserAgentRequest) -> list[str]: - tool_flags = payload.tool_payload.get("risk_flags") - if isinstance(tool_flags, list) and tool_flags: - return [str(item) for item in tool_flags] - return [str(item) for item in payload.ontology.risk_flags] - - @staticmethod - def _resolve_subject(payload: UserAgentRequest) -> str: - named_entities = [ - item.value - for item in payload.ontology.entities - if item.type in {"employee", "customer", "vendor", "project"} - ] - if named_entities: - return f"{'、'.join(named_entities)} 相关数据" - return f"{SCENARIO_LABELS.get(payload.ontology.scenario, '当前')}场景数据" - - @staticmethod - def _is_generic_expense_prompt(payload: UserAgentRequest) -> bool: - if payload.ontology.scenario != "expense": - return False - normalized_message = re.sub(r"\s+", "", payload.message) - return normalized_message in GENERIC_EXPENSE_PROMPTS - - @staticmethod - def _is_implicit_expense_draft_request(payload: UserAgentRequest) -> bool: - if payload.ontology.scenario != "expense" or payload.ontology.intent != "draft": - return False - - compact_message = re.sub(r"\s+", "", payload.message) - if any(keyword in compact_message for keyword in EXPLICIT_DRAFT_KEYWORDS): - return False - - return True - - @staticmethod - def _resolve_attachment_names(payload: UserAgentRequest) -> list[str]: - names = payload.context_json.get("attachment_names") - if not isinstance(names, list): - return [] - return [str(name) for name in names if str(name).strip()] - - @staticmethod - def _resolve_attachment_count(payload: UserAgentRequest) -> int: - names = UserAgentService._resolve_attachment_names(payload) - if names: - return len(names) - try: - return max(0, int(payload.context_json.get("attachment_count") or 0)) - except (TypeError, ValueError): - return 0 - - @staticmethod - def _resolve_ocr_documents(payload: UserAgentRequest) -> list[dict[str, object]]: - documents = payload.context_json.get("ocr_documents") - if not isinstance(documents, list): - return [] - overrides = payload.context_json.get("review_document_form_values") - override_map: dict[tuple[int, str], dict[str, object]] = {} - if isinstance(overrides, list): - for item in overrides: - if not isinstance(item, dict): - continue - filename = str(item.get("filename") or "").strip() - index = int(item.get("index") or 0) - if not filename and index <= 0: - continue - override_map[(index, filename)] = item - normalized: list[dict[str, object]] = [] - for index, item in enumerate(documents[:8], start=1): - if not isinstance(item, dict): - continue - normalized_item = dict(item) - override = override_map.get((index, str(normalized_item.get("filename") or "").strip())) - if override is None: - override = override_map.get((index, "")) - if override is not None: - summary = str(override.get("summary") or "").strip() - scene_label = str(override.get("scene_label") or "").strip() - fields = override.get("fields") - if summary: - normalized_item["summary"] = summary - if scene_label: - normalized_item["scene_label"] = scene_label - if isinstance(fields, list): - normalized_item["document_fields"] = [ - { - "key": str(field.get("key") or field.get("label") or "").strip(), - "label": str(field.get("label") or "").strip(), - "value": str(field.get("value") or "").strip(), - } - for field in fields - if isinstance(field, dict) - and str(field.get("label") or "").strip() - and str(field.get("value") or "").strip() - ] - normalized.append(normalized_item) - return normalized - - @staticmethod - def _is_review_association_choice_pending(payload: UserAgentRequest) -> bool: - return bool(payload.tool_payload.get("pending_association_decision")) - - def _resolve_review_document_count(self, payload: UserAgentRequest) -> int: - return max( - len(self._resolve_ocr_documents(payload)), - self._resolve_attachment_count(payload), - ) - - @staticmethod - def _resolve_conversation_history(payload: UserAgentRequest) -> list[dict[str, object]]: - history = payload.context_json.get("conversation_history") - if not isinstance(history, list): - return [] - - normalized: list[dict[str, object]] = [] - for item in history[-8:]: - if not isinstance(item, dict): - continue - role = str(item.get("role") or "").strip() - content = str(item.get("content") or "").strip() - if not role or not content: - continue - normalized.append({"role": role, "content": content}) - return normalized - - @staticmethod - def _resolve_domain(scenario: str) -> str | None: - if scenario == "expense": - return "expense" - if scenario == "accounts_receivable": - return "ar" - if scenario == "accounts_payable": - return "ap" - return None - - @staticmethod - def _rank_rule_assets( - items: list[AgentAssetListItem], - payload: UserAgentRequest, - ) -> list[AgentAssetListItem]: - def score(item: AgentAssetListItem) -> tuple[int, str]: - tags = {str(value) for value in item.scenario_json or []} - weight = 0 - if payload.ontology.scenario in tags: - weight += 3 - if payload.ontology.intent in tags: - weight += 2 - for risk_flag in payload.ontology.risk_flags: - if risk_flag in tags: - weight += 4 - return weight, item.code - - ranked = sorted(items, key=score, reverse=True) - return [item for item in ranked if score(item)[0] > 0] - - @staticmethod - def _extract_excerpt(content: str) -> str: - lines = [line.strip() for line in str(content).splitlines() if line.strip()] - cleaned: list[str] = [] - for line in lines: - normalized = re.sub(r"^[#>\-\*\d\.\s`]+", "", line).strip() - if normalized: - cleaned.append(normalized) - if len(cleaned) >= 2: - break - return ";".join(cleaned[:2]) - - def _collect_entity_values(self, payload: UserAgentRequest) -> dict[str, str]: - values = { - "employee_name": "", - "customer": "", - "participants": "", - "amount": "", - "expense_type": "", - "expense_type_code": "", - } - participants: list[str] = [] - for item in payload.ontology.entities: - if item.type == "employee" and not values["employee_name"]: - values["employee_name"] = item.value - elif item.type == "customer" and not values["customer"]: - values["customer"] = item.value - elif item.type == "amount" and item.role != "threshold" and not values["amount"]: - normalized_amount = str(item.normalized_value or "").strip() - values["amount"] = f"{normalized_amount}元" if normalized_amount else item.value - elif item.type == "expense_type" and not values["expense_type_code"]: - values["expense_type_code"] = item.normalized_value - values["expense_type"] = EXPENSE_TYPE_LABELS.get( - item.normalized_value, - item.value, - ) - elif item.type in {"participant", "person"} and item.value.strip(): - participants.append(item.value.strip()) - if participants: - values["participants"] = "、".join(dict.fromkeys(participants)) - return values - - def _format_time_range(self, payload: UserAgentRequest) -> str: - time_range = payload.ontology.time_range - if time_range.start_date and time_range.end_date: - if time_range.start_date == time_range.end_date: - return time_range.start_date - normalized = f"{time_range.start_date} 至 {time_range.end_date}" - return normalized - if time_range.raw: - return time_range.raw - return "" - - def _resolve_location_value(self, payload: UserAgentRequest) -> str: - review_form_values = self._resolve_review_form_values(payload) - for key in ("business_location", "location"): - value = str(review_form_values.get(key) or "").strip() - if value: - return value - - if str(payload.context_json.get("entry_source") or "").strip() == "detail": - request_context = payload.context_json.get("request_context") - if isinstance(request_context, dict): - for key in ("city", "location"): - value = str(request_context.get(key) or "").strip() - if value: - return value - - labeled_match = re.search(r"(?:业务地点|发生地点|地点)[::]\s*(?P[^\n,。;]+)", payload.message) - if labeled_match: - return labeled_match.group("value").strip() - - city_match = re.search( - r"去(?P[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)", - payload.message, - ) - if city_match: - return city_match.group("city").strip() - if "客户现场" in payload.message.replace(" ", ""): - return "客户现场" - return "" - - @staticmethod - def _resolve_review_form_values(payload: UserAgentRequest) -> dict[str, str]: - values = payload.context_json.get("review_form_values") - if not isinstance(values, dict): - return {} - normalized: dict[str, str] = {} - for key, value in values.items(): - cleaned_key = str(key or "").strip() - if not cleaned_key: - continue - normalized[cleaned_key] = str(value or "").strip() - return normalized - - @staticmethod - def _build_slot_value( - *, - value: str = "", - raw_value: str = "", - normalized_value: str = "", - source: str = "system", - confidence: float = 0.0, - evidence: str = "", - ) -> dict[str, str | float]: - return { - "value": str(value or "").strip(), - "raw_value": str(raw_value or "").strip(), - "normalized_value": str(normalized_value or "").strip(), - "source": str(source or "system").strip() or "system", - "confidence": float(confidence), - "evidence": str(evidence or "").strip(), - } - - def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: - review_form_values = self._resolve_review_form_values(payload) - edited_value = str( - review_form_values.get("time_range") - or review_form_values.get("business_time") - or review_form_values.get("occurred_date") - or "" - ).strip() - if edited_value: - raw_value = str(review_form_values.get("time_range_raw") or edited_value).strip() - return self._build_slot_value( - value=edited_value, - raw_value=raw_value, - normalized_value=edited_value, - source="user_form", - confidence=1.0, - evidence="来源于用户修改后的结构化表单。", - ) - - time_range = payload.ontology.time_range - if time_range.start_date and time_range.end_date: - normalized_value = ( - time_range.start_date - if time_range.start_date == time_range.end_date - else f"{time_range.start_date} 至 {time_range.end_date}" - ) - raw_value = str(time_range.raw or "").strip() - return self._build_slot_value( - value=normalized_value, - raw_value=raw_value, - normalized_value=normalized_value, - source="user_text", - confidence=0.92, - evidence="系统已根据当前日期将相对时间换算为标准日期。", - ) - - return self._build_slot_value() - - def _build_location_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: - review_form_values = self._resolve_review_form_values(payload) - for key in ("business_location", "location"): - value = str(review_form_values.get(key) or "").strip() - if value: - return self._build_slot_value( - value=value, - normalized_value=value, - source="user_form", - confidence=1.0, - evidence="来源于用户修改后的结构化表单。", - ) - - if str(payload.context_json.get("entry_source") or "").strip() == "detail": - request_context = payload.context_json.get("request_context") - if isinstance(request_context, dict): - for key in ("city", "location"): - value = str(request_context.get(key) or "").strip() - if value: - return self._build_slot_value( - value=value, - normalized_value=value, - source="detail_context", - confidence=0.68, - evidence="来源于当前关联单据,仅作为辅助上下文,需要用户再次核对。", - ) - - value = self._resolve_location_value(payload) - if value: - evidence = "用户在文本中明确描述了业务地点。" - if value == "客户现场": - evidence = "用户明确提到“客户现场”,但未提供具体城市或地址。" - return self._build_slot_value( - value=value, - normalized_value=value, - source="user_text", - confidence=0.82, - evidence=evidence, - ) - return self._build_slot_value() - - def _build_customer_slot( - self, - payload: UserAgentRequest, - *, - entity_map: dict[str, str], - ) -> dict[str, str | float]: - review_form_values = self._resolve_review_form_values(payload) - value = str(review_form_values.get("customer_name") or "").strip() - if value: - return self._build_slot_value( - value=value, - normalized_value=value, - source="user_form", - confidence=1.0, - evidence="来源于用户修改后的结构化表单。", - ) - - value = entity_map.get("customer", "") - if value: - return self._build_slot_value( - value=value, - normalized_value=value, - source="user_text", - confidence=0.88, - evidence="用户在原始描述中直接提到了客户对象。", - ) - return self._build_slot_value() - - def _build_participants_slot( - self, - payload: UserAgentRequest, - *, - entity_map: dict[str, str], - ) -> dict[str, str | float]: - review_form_values = self._resolve_review_form_values(payload) - value = str(review_form_values.get("participants") or "").strip() - if value: - return self._build_slot_value( - value=value, - normalized_value=value, - source="user_form", - confidence=1.0, - evidence="来源于用户修改后的结构化表单。", - ) - - value = entity_map.get("participants", "") - if value: - return self._build_slot_value( - value=value, - normalized_value=value, - source="user_text", - confidence=0.8, - evidence="用户在当前描述中补充了参与人员。", - ) - return self._build_slot_value() - - def _build_reason_slot( - self, - payload: UserAgentRequest, - *, - claim_groups: list[UserAgentReviewClaimGroup], - ) -> dict[str, str | float]: - review_form_values = self._resolve_review_form_values(payload) - edited_value = str(review_form_values.get("reason") or "").strip() - if edited_value: - return self._build_slot_value( - value=edited_value, - raw_value=edited_value, - normalized_value=edited_value, - source="user_form", - confidence=1.0, - evidence="来源于用户修改后的结构化表单。", - ) - - inferred_reason = self._infer_reason_from_claim_groups( - claim_groups=claim_groups, - ) - reason_value = self._resolve_reason_text(self._resolve_reason_source_text(payload)) - if inferred_reason: - return self._build_slot_value( - value=inferred_reason, - raw_value=reason_value or inferred_reason, - normalized_value=inferred_reason, - source="ocr", - confidence=0.82, - evidence=( - "系统已根据票据识别结果预置场景类型;原始描述仍保留为补充说明。" - if reason_value - else "系统已根据票据识别场景补全通用事由,若需更具体说明可继续修改。" - ), - ) - - if reason_value: - return self._build_slot_value( - value=reason_value, - raw_value=reason_value, - normalized_value=reason_value, - source="user_text", - confidence=0.76, - evidence="系统从用户原始描述中提取了本次费用事由,建议继续核对。", - ) - return self._build_slot_value() - - def _build_amount_slot( - self, - payload: UserAgentRequest, - *, - entity_map: dict[str, str], - ocr_documents: list[dict[str, object]], - ) -> dict[str, str | float]: - review_form_values = self._resolve_review_form_values(payload) - edited_amount = str(review_form_values.get("amount") or "").strip() - if edited_amount: - normalized = self._normalize_amount_text(edited_amount) - return self._build_slot_value( - value=normalized, - raw_value=edited_amount, - normalized_value=normalized, - source="user_form", - confidence=1.0, - evidence="来源于用户修改后的结构化表单。", - ) - - amount_value = entity_map.get("amount", "") - if amount_value: - normalized = self._normalize_amount_text(amount_value) - return self._build_slot_value( - value=normalized, - raw_value=amount_value, - normalized_value=normalized, - source="user_text", - confidence=0.92, - evidence="用户在原始描述中直接给出了金额。", - ) - - ocr_total_amount = self._sum_ocr_amounts(ocr_documents) - if ocr_total_amount > 0: - normalized = f"{ocr_total_amount:.2f}元" - return self._build_slot_value( - value=normalized, - normalized_value=normalized, - source="ocr", - confidence=0.76, - evidence="金额来自 OCR 汇总结果,仍建议用户核对票据原文。", - ) - return self._build_slot_value() - - def _build_expense_type_slot( - self, - payload: UserAgentRequest, - *, - entity_map: dict[str, str], - ocr_documents: list[dict[str, object]], - ) -> dict[str, str | float]: - review_form_values = self._resolve_review_form_values(payload) - edited_value = str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip() - if edited_value: - normalized_code, normalized_label = self._normalize_expense_type_input(edited_value) - return self._build_slot_value( - value=normalized_label, - raw_value=edited_value, - normalized_value=normalized_code, - source="user_form", - confidence=1.0, - evidence="来源于用户修改后的结构化表单。", - ) - - expense_type_code = entity_map.get("expense_type_code", "") - expense_type_value = EXPENSE_TYPE_LABELS.get(expense_type_code, entity_map.get("expense_type", "")) - if expense_type_value: - return self._build_slot_value( - value=expense_type_value, - raw_value=expense_type_value, - normalized_value=expense_type_code, - source="user_text", - confidence=0.9, - evidence="系统根据用户描述中的业务场景判断费用类型。", - ) - - inferred_label = self._infer_expense_type_from_documents(payload, ocr_documents) if ocr_documents else "" - if inferred_label: - normalized_code, normalized_label = self._normalize_expense_type_input(inferred_label) - return self._build_slot_value( - value=normalized_label, - raw_value=inferred_label, - normalized_value=normalized_code, - source="ocr", - confidence=0.74, - evidence="系统根据票据内容推断费用类型,仍建议用户确认。", - ) - return self._build_slot_value() - - def _build_merchant_slot( - self, - payload: UserAgentRequest, - *, - ocr_documents: list[dict[str, object]], - ) -> dict[str, str | float]: - review_form_values = self._resolve_review_form_values(payload) - edited_value = str(review_form_values.get("merchant_name") or "").strip() - if edited_value: - return self._build_slot_value( - value=edited_value, - normalized_value=edited_value, - source="user_form", - confidence=1.0, - evidence="来源于用户修改后的结构化表单。", - ) - - merchant_value = "" - for document in ocr_documents: - if not self._is_hotel_document_item(document): - continue - merchant_value = self._extract_document_merchant_name(document) - if merchant_value: - break - if merchant_value: - return self._build_slot_value( - value=merchant_value, - normalized_value=merchant_value, - source="ocr", - confidence=0.72, - evidence="商户名称来自 OCR 票据识别结果,仍建议用户核对。", - ) - return self._build_slot_value() - - def _build_attachment_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: - review_form_values = self._resolve_review_form_values(payload) - attachment_names = str(review_form_values.get("attachment_names") or "").strip() - if attachment_names: - return self._build_slot_value( - value=attachment_names, - normalized_value=attachment_names, - source="user_form", - confidence=1.0, - evidence="来源于用户修改后的结构化表单。", - ) - - count = self._resolve_attachment_count(payload) - if count > 0: - names = self._resolve_attachment_names(payload) - value = "、".join(names) if names else f"{count} 份附件" - return self._build_slot_value( - value=value, - raw_value=value, - normalized_value=str(count), - source="upload", - confidence=1.0, - evidence="系统已接收到用户上传的附件。", - ) - return self._build_slot_value() - - @staticmethod - def _normalize_amount_text(value: str) -> str: - cleaned = str(value or "").strip() - if not cleaned: - return "" - for alias, canonical in sorted(AMOUNT_UNIT_ALIASES.items(), key=lambda item: len(item[0]), reverse=True): - cleaned = cleaned.replace(alias, canonical) - match = AMOUNT_TEXT_PATTERN.search(cleaned) - if not match: - return cleaned - number = float(match.group(1)) - return f"{number:.2f}元" - - @staticmethod - def _normalize_expense_type_input(value: str) -> tuple[str, str]: - compact = str(value or "").replace(" ", "") - if "招待" in compact or ("客户" in compact and any(keyword in compact for keyword in ("吃饭", "用餐", "宴请", "请客"))): - return "entertainment", "业务招待费" - if any(keyword in compact for keyword in ("差旅", "出差", "机票", "行程")): - return "travel", "差旅费" - if any(keyword in compact for keyword in ("住宿", "酒店", "宾馆")): - return "hotel", "住宿费" - if any(keyword in compact for keyword in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")): - return "transport", "交通费" - if any(keyword in compact for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")): - return "meal", "餐费" - if "会务" in compact: - return "meeting", "会务费" - if any(keyword in compact for keyword in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")): - return "office", "办公费" - if any(keyword in compact for keyword in ("培训费", "培训", "讲师费", "课时费", "课程费")): - return "training", "培训费" - if any(keyword in compact for keyword in ("通讯费", "话费", "流量费", "宽带费")): - return "communication", "通讯费" - if any(keyword in compact for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")): - return "welfare", "福利费" - return "other", str(value or "").strip() or "其他费用" - - def _resolve_required_review_keys( - self, - payload: UserAgentRequest, - *, - primary_expense_type: str, - claim_groups: list[UserAgentReviewClaimGroup], - ) -> set[str]: - required = {"expense_type", "time_range", "amount", "reason", "attachments"} - scene_codes = { - str(item.group_code or "").strip() - for item in claim_groups - if str(item.group_code or "").strip() - } - if primary_expense_type: - scene_codes.add(primary_expense_type) - - for scene_code in scene_codes: - required.update(SCENE_REQUIRED_SLOT_KEYS.get(scene_code, set())) - - compact_message = re.sub(r"\s+", "", self._resolve_reason_source_text(payload) or payload.message) - if "entertainment" in scene_codes or ( - "客户" in compact_message and any(keyword in compact_message for keyword in ("招待", "吃饭", "用餐", "宴请", "请客")) - ): - required.update({"customer_name", "participants"}) - - return required - - @staticmethod - def _infer_reason_from_claim_groups( - *, - claim_groups: list[UserAgentReviewClaimGroup], - ) -> str: - if len(claim_groups) == 1: - document_indexes = list(claim_groups[0].document_indexes or []) - if not document_indexes: - return "" - - expense_type = str(claim_groups[0].expense_type or "").strip() - group_code = str(claim_groups[0].group_code or "").strip() - if expense_type: - return INFERRED_REASON_LABELS.get(expense_type, "") or str(claim_groups[0].scene_label or "").strip() - if group_code: - return INFERRED_REASON_LABELS.get(group_code, "") or str(claim_groups[0].scene_label or "").strip() - return "" - - @staticmethod - def _resolve_review_missing_slot_keys( - payload: UserAgentRequest, - *, - slot_cards: list[UserAgentReviewSlotCard], - ) -> list[str]: - required_keys = {item.key for item in slot_cards if item.required} - slot_map = {item.key: item for item in slot_cards} - missing_keys = { - item.key - for item in slot_cards - if item.required and (item.status == "missing" or not str(item.value).strip()) - } - for key in payload.ontology.missing_slots: - normalized_key = str(key or "").strip() - if ( - normalized_key - and normalized_key in required_keys - and ( - normalized_key not in slot_map - or slot_map[normalized_key].status == "missing" - or not str(slot_map[normalized_key].value).strip() - ) - ): - missing_keys.add(normalized_key) - - ordered_keys: list[str] = [] - for item in slot_cards: - if item.required and item.key in missing_keys and item.key not in ordered_keys: - ordered_keys.append(item.key) - return ordered_keys - - def _make_slot_card( - self, - *, - key: str, - value: str, - raw_value: str, - normalized_value: str, - source: str, - confidence: float, - evidence: str, - required: bool = True, - ) -> UserAgentReviewSlotCard: - is_missing = required and not str(value).strip() - source_key = source if source in SOURCE_LABELS else "system" - return UserAgentReviewSlotCard( - key=key, - label=SLOT_LABELS.get(key, key), - value=str(value or "").strip(), - raw_value=str(raw_value or "").strip(), - normalized_value=str(normalized_value or "").strip(), - source=source, - source_label=SOURCE_LABELS.get(source_key, "系统判断"), - confidence=confidence, - required=required, - confirmed=not is_missing and source in {"user_text", "user_form"}, - status="missing" if is_missing else "identified" if source in {"user_text", "user_form"} else "inferred", - hint=f"建议补充 {SLOT_LABELS.get(key, key)}。" - if is_missing and required - else ("该字段来自系统辅助上下文,建议你再核对一次。" if source in {"detail_context", "ocr"} else ""), - evidence=evidence, - ) - def _classify_document( self, item: dict[str, object], payload: UserAgentRequest, ) -> dict[str, str]: - provided_type = str(item.get("document_type") or "").strip().lower() - expense_type_code = self._collect_entity_values(payload).get("expense_type_code", "") - has_customer = bool(self._collect_entity_values(payload).get("customer")) - if provided_type: - if provided_type in {"flight_itinerary", "train_ticket"}: - return { - "document_type": provided_type, - "expense_type": "travel", - "group_code": "travel", - "scene_label": "差旅票据", - } - if provided_type == "hotel_invoice": - return { - "document_type": provided_type, - "expense_type": "hotel", - "group_code": "travel", - "scene_label": "住宿票据", - } - if provided_type in {"taxi_receipt", "parking_toll_receipt"}: - return { - "document_type": provided_type, - "expense_type": "transport", - "group_code": "travel", - "scene_label": "交通票据", - } - if provided_type == "meal_receipt": - group_code = "entertainment" if expense_type_code == "entertainment" or has_customer else "meal" - return { - "document_type": provided_type, - "expense_type": group_code, - "group_code": group_code, - "scene_label": "餐饮票据", - } - if provided_type == "office_invoice": - return { - "document_type": provided_type, - "expense_type": "office", - "group_code": "office", - "scene_label": "办公用品票据", - } - if provided_type == "meeting_invoice": - return { - "document_type": provided_type, - "expense_type": "meeting", - "group_code": "meeting", - "scene_label": "会务票据", - } - if provided_type == "training_invoice": - return { - "document_type": provided_type, - "expense_type": "training", - "group_code": "training", - "scene_label": "培训票据", - } - - text = " ".join( - [ - str(item.get("filename") or ""), - str(item.get("summary") or ""), - str(item.get("text") or ""), - ] - ).lower() - compact = text.replace(" ", "") - - if any(keyword in compact for keyword in ("机票", "航班", "火车", "高铁", "行程单")): - return { - "document_type": "travel_ticket", - "expense_type": "travel", - "group_code": "travel", - "scene_label": "差旅票据", - } - if any(keyword in compact for keyword in ("酒店", "住宿", "宾馆")): - return { - "document_type": "hotel_invoice", - "expense_type": "hotel", - "group_code": "travel", - "scene_label": "住宿票据", - } - if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "乘车", "用车", "叫车", "车费", "车资", "的士", "过路费", "停车")): - return { - "document_type": "transport_receipt", - "expense_type": "transport", - "group_code": "travel", - "scene_label": "交通票据", - } - if any(keyword in compact for keyword in ("餐", "饭店", "酒楼", "酒家", "餐饮", "meal")): - group_code = "entertainment" if expense_type_code == "entertainment" or has_customer else "meal" - return { - "document_type": "meal_receipt", - "expense_type": group_code, - "group_code": group_code, - "scene_label": "餐饮票据", - } - if any(keyword in compact for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "键盘", "鼠标", "白板", "墨盒", "硒鼓")): - return { - "document_type": "other", - "expense_type": "office", - "group_code": "office", - "scene_label": "办公用品票据", - } - return { - "document_type": "other", - "expense_type": expense_type_code or "other", - "group_code": self._normalize_group_code(expense_type_code or "other"), - "scene_label": "其他票据", - } + entity_values = self._collect_entity_values(payload) + return self._document_service.classify_document( + item, + expense_type_code=entity_values.get("expense_type_code", ""), + has_customer=bool(entity_values.get("customer")), + ) @staticmethod def _normalize_group_code(expense_type_code: str) -> str: - if expense_type_code in {"travel", "hotel", "transport"}: - return "travel" - if expense_type_code in {"entertainment", "meal", "office", "training", "communication", "welfare"}: - return expense_type_code - return "other" + return UserAgentDocumentService.normalize_group_code(expense_type_code) def _extract_document_fields(self, item: dict[str, object]) -> dict[str, str]: - raw_fields = item.get("document_fields") - normalized_fields: dict[str, str] = {} - document_type = str(item.get("document_type") or "").strip().lower() - if isinstance(raw_fields, list): - for field in raw_fields: - if not isinstance(field, dict): - continue - key = str(field.get("key") or "").strip() - label = str(field.get("label") or "").strip() - value = str(field.get("value") or "").strip() - if not value: - continue - normalized_label = self._normalize_document_field_label(key=key, label=label) - display_label = normalized_label or label - display_label = self._resolve_document_time_display_label( - document_type=document_type, - key=key, - label=label, - normalized_label=display_label, - ) - normalized_value = self._normalize_document_field_value( - label=display_label, - value=value, - ) - if display_label == "商户/酒店" and not self._is_hotel_document_item(item): - continue - if display_label and normalized_value: - normalized_fields.setdefault(display_label, normalized_value) - - text = " ".join([str(item.get("summary") or ""), str(item.get("text") or "")]).strip() - amount_value = self._extract_amount_text_from_value(text) - if amount_value and "金额" not in normalized_fields: - normalized_fields["金额"] = amount_value - date_match = DATE_TEXT_PATTERN.search(text) - if date_match and "时间" not in normalized_fields: - time_label = self._resolve_document_time_display_label( - document_type=document_type, - key="date", - label="日期", - normalized_label="时间", - ) - normalized_fields[time_label] = date_match.group(1) - - merchant = self._extract_document_merchant_name_from_text(text) if self._is_hotel_document_item(item) else "" - if merchant and "商户/酒店" not in normalized_fields: - normalized_fields["商户/酒店"] = merchant - return normalized_fields + return self._document_service.extract_document_fields(item) @staticmethod def _resolve_document_time_display_label( @@ -5099,158 +211,52 @@ class UserAgentService: label: str, normalized_label: str, ) -> str: - if normalized_label != "时间": - return normalized_label - - label_by_type = { - "train_ticket": "列车出发时间", - "flight_itinerary": "起飞日期", - "taxi_receipt": "乘车时间", - "transport_receipt": "乘车时间", - "parking_toll_receipt": "通行日期", - } - normalized_type = str(document_type or "").strip().lower() - if normalized_type not in label_by_type: - return normalized_label - - compact_key = str(key or "").strip().lower().replace("_", "") - compact_label = str(label or "").replace(" ", "") - if compact_key in {"date", "time", "issuedat", "issuedate", "invoicedate"}: - return label_by_type[normalized_type] - if any(token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")): - return label_by_type[normalized_type] - return normalized_label - - @staticmethod - def _normalize_document_field_label(*, key: str, label: str) -> str: - compact_key = str(key or "").strip().lower().replace("_", "") - compact_label = str(label or "").replace(" ", "") - if compact_key in { - "amount", - "totalamount", - "paymentamount", - "paidamount", - "actualamount", - } or any( - token in compact_label - for token in ("金额", "价税合计", "合计", "总额", "总计", "票价", "支付金额", "实付金额", "实收金额") - ): - return "金额" - if compact_key in {"date", "time", "issuedat", "invoicedate"} or any( - token in compact_label for token in ("日期", "时间", "开票日期", "发生时间") - ): - return "时间" - if compact_key in {"merchant", "merchantname", "sellername", "vendorname"} or any( - token in compact_label for token in ("商户", "酒店", "销售方", "开票方", "收款方") - ): - return "商户/酒店" - return label - - def _normalize_document_field_value(self, *, label: str, value: str) -> str: - normalized_label = str(label or "").strip() - raw_value = str(value or "").strip() - if not normalized_label or not raw_value: - return "" - if normalized_label == "金额": - return self._extract_amount_text_from_value(raw_value) or raw_value - if normalized_label in {"时间", "出发日期", "列车出发时间", "起飞日期", "乘车时间", "通行日期"}: - match = DATE_TEXT_PATTERN.search(raw_value) - return match.group(1) if match else raw_value - return raw_value - - def _extract_amount_text_from_value(self, value: str) -> str: - raw_value = str(value or "").strip() - if not raw_value: - return "" - best_amount: Decimal | None = None - for pattern in (DOCUMENT_AMOUNT_PATTERN, DOCUMENT_CURRENCY_AMOUNT_PATTERN, AMOUNT_TEXT_PATTERN): - for match in pattern.finditer(raw_value): - try: - candidate = Decimal(str(match.group(1)).replace(",", ".")) - except (InvalidOperation, TypeError): - continue - if candidate <= Decimal("0.00"): - continue - if best_amount is None or candidate > best_amount: - best_amount = candidate - if best_amount is None: - return "" - return f"{best_amount.quantize(Decimal('0.01')):.2f}元" - - def _extract_document_merchant_name(self, item: dict[str, object]) -> str: - fields = self._extract_document_fields(item) - merchant = str(fields.get("商户/酒店") or "").strip() - if merchant: - return merchant - if not self._is_hotel_document_item(item): - return "" - text = " ".join([str(item.get("summary") or ""), str(item.get("text") or "")]).strip() - return self._extract_document_merchant_name_from_text(text) - - @staticmethod - def _is_hotel_document_item(item: dict[str, object]) -> bool: - document_type = str(item.get("document_type") or "").strip().lower() - scene_code = str(item.get("scene_code") or "").strip().lower() - scene_label = str(item.get("scene_label") or "").strip() - suggested_expense_type = str(item.get("suggested_expense_type") or "").strip().lower() - return ( - document_type == "hotel_invoice" - or scene_code == "hotel" - or suggested_expense_type == "hotel" - or "住宿" in scene_label - or "酒店" in scene_label + return UserAgentDocumentService.resolve_document_time_display_label( + document_type=document_type, + key=key, + label=label, + normalized_label=normalized_label, ) + @staticmethod + def _normalize_document_field_label(*, key: str, label: str) -> str: + return UserAgentDocumentService.normalize_document_field_label(key=key, label=label) + + def _normalize_document_field_value(self, *, label: str, value: str) -> str: + return self._document_service.normalize_document_field_value(label=label, value=value) + + def _extract_amount_text_from_value(self, value: str) -> str: + return self._document_service.extract_amount_text_from_value(value) + + def _extract_document_merchant_name(self, item: dict[str, object]) -> str: + return self._document_service.extract_document_merchant_name(item) + + @staticmethod + def _is_hotel_document_item(item: dict[str, object]) -> bool: + return UserAgentDocumentService.is_hotel_document_item(item) + @staticmethod def _extract_document_merchant_name_from_text(text: str) -> str: - for keyword in ("酒店", "宾馆", "饭店", "酒楼", "餐厅", "航空", "铁路", "滴滴"): - if keyword in text: - return keyword - return "" + return UserAgentDocumentService.extract_document_merchant_name_from_text(text) @staticmethod def _extract_amount_from_card(card: UserAgentReviewDocumentCard) -> float: - for item in card.fields: - if item.label != "金额": - continue - try: - normalized_value = str(item.value).replace("元", "").replace("¥", "").replace("¥", "").strip() - return float(normalized_value) - except ValueError: - return 0.0 - return 0.0 + return UserAgentDocumentService.extract_amount_from_card(card) def _resolve_amount_value(self, payload: UserAgentRequest) -> float: - for item in payload.ontology.entities: - if item.type == "amount" and item.role != "threshold": - try: - return float(item.normalized_value) - except ValueError: - return 0.0 - return 0.0 + return self._document_service.resolve_amount_value(payload) def _sum_ocr_amounts(self, ocr_documents: list[dict[str, object]]) -> float: - total = 0.0 - for item in ocr_documents: - fields = self._extract_document_fields(item) - amount_text = str(fields.get("金额") or "").replace("元", "").replace("¥", "").replace("¥", "").strip() - if not amount_text: - continue - try: - total += float(amount_text) - except ValueError: - continue - return total + return self._document_service.sum_ocr_amounts(ocr_documents) def _infer_expense_type_from_documents( self, payload: UserAgentRequest, ocr_documents: list[dict[str, object]], ) -> str: - labels: list[str] = [] - for item in ocr_documents: - classified = self._classify_document(item, payload) - label = GROUP_SCENE_LABELS.get(classified["group_code"], "") - if label and label not in labels: - labels.append(label) - return " + ".join(labels[:3]) + entity_values = self._collect_entity_values(payload) + return self._document_service.infer_expense_type_from_documents( + ocr_documents, + expense_type_code=entity_values.get("expense_type_code", ""), + has_customer=bool(entity_values.get("customer")), + ) diff --git a/server/src/app/services/user_agent_constants.py b/server/src/app/services/user_agent_constants.py new file mode 100644 index 0000000..bf8819a --- /dev/null +++ b/server/src/app/services/user_agent_constants.py @@ -0,0 +1,179 @@ +from __future__ import annotations + +import re + +SCENARIO_LABELS = { + "expense": "报销", + "accounts_receivable": "应收", + "accounts_payable": "应付", + "knowledge": "知识", + "unknown": "通用", +} + +RISK_REASON_MAP = { + "duplicate_expense": "检测到同员工、同金额或近似单据存在重复提交迹象。", + "location_mismatch": "申报出差地点与票据识别地点可能不一致,需要核对行程或补充说明。", + "amount_over_limit": "金额超过当前制度或预算阈值,需要补充例外说明。", + "invoice_anomaly": "票据或附件完整性不满足当前规则要求,需要补件或人工复核。", + "ar_overdue": "应收账款已出现逾期,存在回款延迟风险。", + "ap_overdue": "应付付款已出现逾期,可能影响供应商履约或合作关系。", +} + +GENERIC_EXPENSE_PROMPTS = { + "报销", + "我要报销", + "我想报销", + "帮我报销", + "我要申请报销", + "发起报销", + "提交报销", +} + +EXPLICIT_DRAFT_KEYWORDS = ("生成", "草稿", "起草", "创建", "发起", "准备") + +EXPENSE_TYPE_LABELS = { + "travel": "差旅费", + "hotel": "住宿费", + "transport": "交通费", + "meal": "餐费", + "meeting": "会务费", + "entertainment": "业务招待费", + "office": "办公费", + "training": "培训费", + "communication": "通讯费", + "welfare": "福利费", + "other": "其他费用", +} + +GROUP_SCENE_LABELS = { + "travel": "差旅费", + "entertainment": "业务招待费", + "meal": "伙食费", + "transport": "交通费", + "hotel": "住宿费", + "office": "办公费", + "training": "培训费", + "communication": "通讯费", + "welfare": "福利费", + "other": "其他费用", +} + +EXPENSE_SCENE_SELECTION_OPTIONS = ( + ("travel", "差旅费", "出差、长途交通、住宿、差旅补贴等场景。"), + ("transport", "交通费", "市内打车、停车、过路费等日常交通场景。"), + ("hotel", "住宿费", "单独住宿、酒店发票等场景。"), + ("entertainment", "业务招待费", "客户接待、宴请、招待等场景。"), + ("office", "办公费", "办公用品、耗材、办公设备等采购场景。"), + ("other", "其他费用", "暂不属于以上分类的报销场景。"), +) + +KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS = 3 +KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS = 5 +KNOWLEDGE_MODEL_TIMEOUT_SECONDS = KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS + +EXPENSE_STATUS_LABELS = { + "draft": "草稿", + "submitted": "已提交", + "review": "审核中", + "approved": "已通过", + "rejected": "已驳回", + "paid": "已付款", +} + +EXPENSE_STATUS_GROUP_LABELS = { + "draft": "草稿", + "in_progress": "审批中", + "completed": "审批完成", + "other": "其他状态", +} + +SLOT_LABELS = { + "expense_type": "报销类型", + "customer_name": "客户名称", + "time_range": "发生时间", + "location": "地点", + "merchant_name": "酒店/商户", + "amount": "金额", + "reason": "事由说明", + "participants": "参与人员", + "attachments": "票据附件", +} + +DATE_TEXT_PATTERN = re.compile( + r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[::][0-5]\d)?)" +) +AMOUNT_TEXT_PATTERN = re.compile( + r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)" +) +TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)") +TRAVEL_ROUTE_PATTERN = re.compile(r"([\u4e00-\u9fa5]{2,12})\s*(?:至|→|->|-|—)\s*([\u4e00-\u9fa5]{2,12})") + +SOURCE_LABELS = { + "user_text": "用户描述", + "user_form": "用户修改", + "ocr": "票据识别", + "upload": "上传附件", + "detail_context": "关联单据", + "system_context": "系统上下文", + "inferred": "语义推断", + "system": "系统判断", +} + +DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS = ("历史报销画像", "用户画像", "制度注意事项", "制度注意") + +SCENE_REQUIRED_SLOT_KEYS = { + "hotel": {"merchant_name"}, + "meeting": {"location"}, + "entertainment": {"location", "customer_name", "participants"}, +} +INFERRED_REASON_LABELS = { + "travel": "出差行程", + "hotel": "住宿报销", + "transport": "交通出行", + "meal": "餐饮用餐", + "meeting": "会务活动", + "entertainment": "客户接待", + "office": "办公采购", + "training": "培训学习", + "communication": "通讯使用", + "welfare": "员工福利", + "other": "其他费用", +} +SYSTEM_GENERATED_REASON_PREFIXES = ( + "我上传了", + "请按当前已识别信息", + "请把当前上传的票据", + "请基于当前上传的多张票据", + "我已核对右侧识别结果", + "请同步修正逐票据识别结果", + "我已校正核对信息", + "查看报销草稿", + "请解释一下当前这笔报销的合规风险和待补充项", +) +LEADING_REASON_TIME_PATTERNS = ( + re.compile( + r"^\s*(?:识别事项(?:有)?[::]\s*)?" + r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[::]?\s*" + r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" + r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" + r"\s*[,,。;;、]?\s*" + ), + re.compile( + r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?" + r"(?:\s*(?:至|到|~|~|—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?" + r"\s*[,,。;;、]\s*" + ), +) +AMOUNT_UNIT_ALIASES = { + "员": "元", + "圆": "元", + "园": "元", + "块": "元", + "块钱": "元", + "元整": "元", + "万员": "万元", + "万圆": "万元", + "万园": "万元", + "万块": "万元", + "万元整": "万元", +} diff --git a/server/src/app/services/user_agent_documents.py b/server/src/app/services/user_agent_documents.py new file mode 100644 index 0000000..d56e5c8 --- /dev/null +++ b/server/src/app/services/user_agent_documents.py @@ -0,0 +1,380 @@ +from __future__ import annotations + +import re +from decimal import Decimal, InvalidOperation +from typing import Mapping + +from app.schemas.user_agent import UserAgentRequest, UserAgentReviewDocumentCard + +DEFAULT_GROUP_SCENE_LABELS = { + "travel": "差旅费", + "entertainment": "业务招待费", + "meal": "伙食费", + "transport": "交通费", + "hotel": "住宿费", + "office": "办公费", + "training": "培训费", + "communication": "通讯费", + "welfare": "福利费", + "other": "其他费用", +} + +DOCUMENT_DATE_TEXT_PATTERN = re.compile( + r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[::][0-5]\d)?)" +) +DOCUMENT_AMOUNT_TEXT_PATTERN = re.compile( + r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)" +) +DOCUMENT_AMOUNT_PATTERN = re.compile( + r"(?:价税合计|合计金额|费用合计|订单(?:总)?金额|支付(?:金额)?|实付(?:金额)?|实收(?:金额)?|总(?:额|计|价)|票价|金额|车费|消费金额)" + r"[::\s¥¥人民币]*([0-9]+(?:[.,][0-9]{1,2})?)" +) +DOCUMENT_CURRENCY_AMOUNT_PATTERN = re.compile(r"[¥¥]\s*([0-9]+(?:[.,][0-9]{1,2})?)") + + +class UserAgentDocumentService: + """集中处理票据分类和 OCR 字段抽取,避免主服务继续膨胀。""" + + def __init__(self, *, group_scene_labels: Mapping[str, str] | None = None) -> None: + self._group_scene_labels = dict(group_scene_labels or DEFAULT_GROUP_SCENE_LABELS) + + def classify_document( + self, + item: dict[str, object], + *, + expense_type_code: str = "", + has_customer: bool = False, + ) -> dict[str, str]: + provided_type = str(item.get("document_type") or "").strip().lower() + normalized_expense_type = str(expense_type_code or "").strip().lower() + if provided_type: + if provided_type in {"flight_itinerary", "train_ticket"}: + return { + "document_type": provided_type, + "expense_type": "travel", + "group_code": "travel", + "scene_label": "差旅票据", + } + if provided_type == "hotel_invoice": + return { + "document_type": provided_type, + "expense_type": "hotel", + "group_code": "travel", + "scene_label": "住宿票据", + } + if provided_type in {"taxi_receipt", "parking_toll_receipt"}: + return { + "document_type": provided_type, + "expense_type": "transport", + "group_code": "travel", + "scene_label": "交通票据", + } + if provided_type == "meal_receipt": + group_code = "entertainment" if normalized_expense_type == "entertainment" or has_customer else "meal" + return { + "document_type": provided_type, + "expense_type": group_code, + "group_code": group_code, + "scene_label": "餐饮票据", + } + if provided_type == "office_invoice": + return { + "document_type": provided_type, + "expense_type": "office", + "group_code": "office", + "scene_label": "办公用品票据", + } + if provided_type == "meeting_invoice": + return { + "document_type": provided_type, + "expense_type": "meeting", + "group_code": "meeting", + "scene_label": "会务票据", + } + if provided_type == "training_invoice": + return { + "document_type": provided_type, + "expense_type": "training", + "group_code": "training", + "scene_label": "培训票据", + } + + text = " ".join( + [ + str(item.get("filename") or ""), + str(item.get("summary") or ""), + str(item.get("text") or ""), + ] + ).lower() + compact = text.replace(" ", "") + + if any(keyword in compact for keyword in ("机票", "航班", "火车", "高铁", "行程单")): + return { + "document_type": "travel_ticket", + "expense_type": "travel", + "group_code": "travel", + "scene_label": "差旅票据", + } + if any(keyword in compact for keyword in ("酒店", "住宿", "宾馆")): + return { + "document_type": "hotel_invoice", + "expense_type": "hotel", + "group_code": "travel", + "scene_label": "住宿票据", + } + if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "乘车", "用车", "叫车", "车费", "车资", "的士", "过路费", "停车")): + return { + "document_type": "transport_receipt", + "expense_type": "transport", + "group_code": "travel", + "scene_label": "交通票据", + } + if any(keyword in compact for keyword in ("餐", "饭店", "酒楼", "酒家", "餐饮", "meal")): + group_code = "entertainment" if normalized_expense_type == "entertainment" or has_customer else "meal" + return { + "document_type": "meal_receipt", + "expense_type": group_code, + "group_code": group_code, + "scene_label": "餐饮票据", + } + if any(keyword in compact for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "键盘", "鼠标", "白板", "墨盒", "硒鼓")): + return { + "document_type": "other", + "expense_type": "office", + "group_code": "office", + "scene_label": "办公用品票据", + } + return { + "document_type": "other", + "expense_type": normalized_expense_type or "other", + "group_code": self.normalize_group_code(normalized_expense_type or "other"), + "scene_label": "其他票据", + } + + @staticmethod + def normalize_group_code(expense_type_code: str) -> str: + if expense_type_code in {"travel", "hotel", "transport"}: + return "travel" + if expense_type_code in {"entertainment", "meal", "office", "training", "communication", "welfare"}: + return expense_type_code + return "other" + + def extract_document_fields(self, item: dict[str, object]) -> dict[str, str]: + raw_fields = item.get("document_fields") + normalized_fields: dict[str, str] = {} + document_type = str(item.get("document_type") or "").strip().lower() + if isinstance(raw_fields, list): + for field in raw_fields: + if not isinstance(field, dict): + continue + key = str(field.get("key") or "").strip() + label = str(field.get("label") or "").strip() + value = str(field.get("value") or "").strip() + if not value: + continue + normalized_label = self.normalize_document_field_label(key=key, label=label) + display_label = normalized_label or label + display_label = self.resolve_document_time_display_label( + document_type=document_type, + key=key, + label=label, + normalized_label=display_label, + ) + normalized_value = self.normalize_document_field_value( + label=display_label, + value=value, + ) + if display_label == "商户/酒店" and not self.is_hotel_document_item(item): + continue + if display_label and normalized_value: + normalized_fields.setdefault(display_label, normalized_value) + + text = " ".join([str(item.get("summary") or ""), str(item.get("text") or "")]).strip() + amount_value = self.extract_amount_text_from_value(text) + if amount_value and "金额" not in normalized_fields: + normalized_fields["金额"] = amount_value + date_match = DOCUMENT_DATE_TEXT_PATTERN.search(text) + if date_match and "时间" not in normalized_fields: + time_label = self.resolve_document_time_display_label( + document_type=document_type, + key="date", + label="日期", + normalized_label="时间", + ) + normalized_fields[time_label] = date_match.group(1) + + merchant = self.extract_document_merchant_name_from_text(text) if self.is_hotel_document_item(item) else "" + if merchant and "商户/酒店" not in normalized_fields: + normalized_fields["商户/酒店"] = merchant + return normalized_fields + + @staticmethod + def resolve_document_time_display_label( + *, + document_type: str, + key: str, + label: str, + normalized_label: str, + ) -> str: + if normalized_label != "时间": + return normalized_label + + label_by_type = { + "train_ticket": "列车出发时间", + "flight_itinerary": "起飞日期", + "taxi_receipt": "乘车时间", + "transport_receipt": "乘车时间", + "parking_toll_receipt": "通行日期", + } + normalized_type = str(document_type or "").strip().lower() + if normalized_type not in label_by_type: + return normalized_label + + compact_key = str(key or "").strip().lower().replace("_", "") + compact_label = str(label or "").replace(" ", "") + if compact_key in {"date", "time", "issuedat", "issuedate", "invoicedate"}: + return label_by_type[normalized_type] + if any(token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")): + return label_by_type[normalized_type] + return normalized_label + + @staticmethod + def normalize_document_field_label(*, key: str, label: str) -> str: + compact_key = str(key or "").strip().lower().replace("_", "") + compact_label = str(label or "").replace(" ", "") + if compact_key in { + "amount", + "totalamount", + "paymentamount", + "paidamount", + "actualamount", + } or any( + token in compact_label + for token in ("金额", "价税合计", "合计", "总额", "总计", "票价", "支付金额", "实付金额", "实收金额") + ): + return "金额" + if compact_key in {"date", "time", "issuedat", "invoicedate"} or any( + token in compact_label for token in ("日期", "时间", "开票日期", "发生时间") + ): + return "时间" + if compact_key in {"merchant", "merchantname", "sellername", "vendorname"} or any( + token in compact_label for token in ("商户", "酒店", "销售方", "开票方", "收款方") + ): + return "商户/酒店" + return label + + def normalize_document_field_value(self, *, label: str, value: str) -> str: + normalized_label = str(label or "").strip() + raw_value = str(value or "").strip() + if not normalized_label or not raw_value: + return "" + if normalized_label == "金额": + return self.extract_amount_text_from_value(raw_value) or raw_value + if normalized_label in {"时间", "出发日期", "列车出发时间", "起飞日期", "乘车时间", "通行日期"}: + match = DOCUMENT_DATE_TEXT_PATTERN.search(raw_value) + return match.group(1) if match else raw_value + return raw_value + + def extract_amount_text_from_value(self, value: str) -> str: + raw_value = str(value or "").strip() + if not raw_value: + return "" + best_amount: Decimal | None = None + for pattern in (DOCUMENT_AMOUNT_PATTERN, DOCUMENT_CURRENCY_AMOUNT_PATTERN, DOCUMENT_AMOUNT_TEXT_PATTERN): + for match in pattern.finditer(raw_value): + try: + candidate = Decimal(str(match.group(1)).replace(",", ".")) + except (InvalidOperation, TypeError): + continue + if candidate <= Decimal("0.00"): + continue + if best_amount is None or candidate > best_amount: + best_amount = candidate + if best_amount is None: + return "" + return f"{best_amount.quantize(Decimal('0.01')):.2f}元" + + def extract_document_merchant_name(self, item: dict[str, object]) -> str: + fields = self.extract_document_fields(item) + merchant = str(fields.get("商户/酒店") or "").strip() + if merchant: + return merchant + if not self.is_hotel_document_item(item): + return "" + text = " ".join([str(item.get("summary") or ""), str(item.get("text") or "")]).strip() + return self.extract_document_merchant_name_from_text(text) + + @staticmethod + def is_hotel_document_item(item: dict[str, object]) -> bool: + document_type = str(item.get("document_type") or "").strip().lower() + scene_code = str(item.get("scene_code") or "").strip().lower() + scene_label = str(item.get("scene_label") or "").strip() + suggested_expense_type = str(item.get("suggested_expense_type") or "").strip().lower() + return ( + document_type == "hotel_invoice" + or scene_code == "hotel" + or suggested_expense_type == "hotel" + or "住宿" in scene_label + or "酒店" in scene_label + ) + + @staticmethod + def extract_document_merchant_name_from_text(text: str) -> str: + for keyword in ("酒店", "宾馆", "饭店", "酒楼", "餐厅", "航空", "铁路", "滴滴"): + if keyword in text: + return keyword + return "" + + @staticmethod + def extract_amount_from_card(card: UserAgentReviewDocumentCard) -> float: + for item in card.fields: + if item.label != "金额": + continue + try: + normalized_value = str(item.value).replace("元", "").replace("¥", "").replace("¥", "").strip() + return float(normalized_value) + except ValueError: + return 0.0 + return 0.0 + + @staticmethod + def resolve_amount_value(payload: UserAgentRequest) -> float: + for item in payload.ontology.entities: + if item.type == "amount" and item.role != "threshold": + try: + return float(item.normalized_value) + except ValueError: + return 0.0 + return 0.0 + + def sum_ocr_amounts(self, ocr_documents: list[dict[str, object]]) -> float: + total = 0.0 + for item in ocr_documents: + fields = self.extract_document_fields(item) + amount_text = str(fields.get("金额") or "").replace("元", "").replace("¥", "").replace("¥", "").strip() + if not amount_text: + continue + try: + total += float(amount_text) + except ValueError: + continue + return total + + def infer_expense_type_from_documents( + self, + ocr_documents: list[dict[str, object]], + *, + expense_type_code: str = "", + has_customer: bool = False, + ) -> str: + labels: list[str] = [] + for item in ocr_documents: + classified = self.classify_document( + item, + expense_type_code=expense_type_code, + has_customer=has_customer, + ) + label = self._group_scene_labels.get(classified["group_code"], "") + if label and label not in labels: + labels.append(label) + return " + ".join(labels[:3]) diff --git a/server/src/app/services/user_agent_knowledge.py b/server/src/app/services/user_agent_knowledge.py new file mode 100644 index 0000000..a4006e0 --- /dev/null +++ b/server/src/app/services/user_agent_knowledge.py @@ -0,0 +1,627 @@ +from __future__ import annotations + +import re +from typing import Any + +from app.schemas.user_agent import UserAgentCitation, UserAgentRequest +from app.services.user_agent_knowledge_helpers import UserAgentKnowledgeHelpersMixin +from app.services.user_agent_knowledge_constants import ( + KNOWLEDGE_ARTICLE_PATTERN, + KNOWLEDGE_DIRECT_ANSWER_HINTS, + KNOWLEDGE_LIST_ITEM_PATTERN, + KNOWLEDGE_NUMBERED_ITEM_PATTERN, + KNOWLEDGE_QUERY_STOPWORDS, + KNOWLEDGE_SECTION_HEADING_PATTERN, + MAX_KNOWLEDGE_DIRECT_EVIDENCE, + MAX_KNOWLEDGE_MODEL_HITS, + MAX_KNOWLEDGE_QUERY_TERMS, +) + + +class UserAgentKnowledgeMixin(UserAgentKnowledgeHelpersMixin): + @staticmethod + def _build_model_tool_payload( + tool_payload: dict[str, Any], + *, + question: str | None = None, + ) -> dict[str, Any]: + normalized = dict(tool_payload or {}) + hits = [] + for item in UserAgentKnowledgeMixin._select_knowledge_model_hits( + tool_payload, + question=question, + ): + if not isinstance(item, dict): + continue + hits.append( + { + "title": str(item.get("title") or "").strip(), + "document_name": str(item.get("document_name") or "").strip(), + "excerpt": str(item.get("excerpt") or "").strip(), + "content": str(item.get("content") or "").strip()[:1200], + "tags": list(item.get("tags") or [])[:5], + "evidence": list(item.get("evidence") or [])[:3], + "code": str(item.get("code") or "").strip(), + } + ) + normalized["hits"] = hits + return normalized + + @staticmethod + def _build_knowledge_evidence_blocks( + tool_payload: dict[str, Any], + *, + question: str | None = None, + ) -> str: + blocks: list[str] = [] + for index, item in enumerate( + UserAgentKnowledgeMixin._select_knowledge_model_hits( + tool_payload, + question=question, + )[:3], + start=1, + ): + if not isinstance(item, dict): + continue + title = str(item.get("title") or item.get("document_name") or f"证据 {index}").strip() + code = str(item.get("code") or "").strip() + content = str(item.get("content") or "").strip() + if not content: + continue + blocks.append( + "\n".join( + [ + f"[证据 {index}] {title}" + (f" ({code})" if code else ""), + "```text", + content[:1200], + "```", + ] + ) + ) + return "\n\n".join(blocks) + + def _build_fast_knowledge_answer( + self, + payload: UserAgentRequest, + *, + citations: list[UserAgentCitation], + ) -> str | None: + if payload.ontology.scenario != "knowledge": + return None + if str(payload.tool_payload.get("result_type") or "").strip() != "knowledge_search": + return None + + evidence_items = self._build_knowledge_answer_evidence(payload) + if not evidence_items: + return None + + question = self._resolve_knowledge_question(payload) + if not self._should_use_direct_knowledge_answer(question, evidence_items): + return None + + return self._render_knowledge_direct_answer( + payload, + citations=citations, + evidence_items=evidence_items, + ) + + + def _render_knowledge_direct_answer( + self, + payload: UserAgentRequest, + *, + citations: list[UserAgentCitation], + evidence_items: list[dict[str, Any]], + ) -> str | None: + if not evidence_items: + return None + + title = str( + (citations[0].title if citations else "") + or evidence_items[0].get("title") + or "相关制度" + ).strip() + user_name = str(payload.context_json.get("name") or "").strip() + question = self._resolve_knowledge_question(payload) + query_terms = self._extract_knowledge_query_terms(question) + ordered_evidence_items = self._prioritize_knowledge_evidence_items(question, evidence_items) + primary_item = ordered_evidence_items[0] + primary_heading = self._format_knowledge_heading_label( + str(primary_item.get("heading") or "").strip() + ) + primary_lines = self._collect_direct_knowledge_answer_lines(ordered_evidence_items) + + lines: list[str] = [] + if user_name: + lines.append(f"{user_name},您好。") + source_prefix = f"根据《{title}》" + if primary_heading: + source_prefix = f"{source_prefix}({primary_heading})" + + if str(primary_item.get("kind") or "") == "table": + lines.append(f"{source_prefix},当前能直接确认的是:") + lines.append(self._extract_relevant_table_preview(str(primary_item.get("content") or ""), query_terms)) + else: + if not primary_lines: + lines.append( + f"{source_prefix},当前能直接确认的是:" + f"{self._summarize_knowledge_evidence_content(primary_item, query_terms)}" + ) + elif len(primary_lines) == 1: + lines.append(f"{source_prefix},当前能直接确认的是:{primary_lines[0].strip()}") + else: + lines.append(f"{source_prefix},当前能直接确认的是:") + lines.extend(primary_lines) + + notes: list[str] = [] + location_note = self._build_missing_location_grounding_note(question, evidence_items) + if location_note: + notes.append(location_note) + if self._question_requires_explicit_condition(question) and not self._answer_evidence_has_numeric_or_condition(evidence_items): + notes.append("当前命中的证据更偏规则说明或流程约束,还没有直接给出可立即套用的数值或完整条件。") + + if notes: + lines.append("") + lines.append("说明:") + lines.extend(f"- {note}" for note in notes) + + return "\n".join(line for line in lines if line is not None).strip() + + + @staticmethod + def _resolve_knowledge_question(payload: UserAgentRequest) -> str: + return str(payload.context_json.get("user_input_text") or payload.message or "").strip() + + + @staticmethod + def _looks_like_structured_knowledge_query(question: str) -> bool: + normalized = str(question or "").strip() + if not normalized: + return False + return any(keyword in normalized for keyword in KNOWLEDGE_DIRECT_ANSWER_HINTS) + + + def _should_use_direct_knowledge_answer( + self, + question: str, + evidence_items: list[dict[str, Any]], + ) -> bool: + if not evidence_items: + return False + if self._looks_like_structured_knowledge_query(question): + return True + return str(evidence_items[0].get("kind") or "") in {"table", "kv", "list", "clause"} + + + def _build_knowledge_answer_evidence( + self, + payload: UserAgentRequest, + ) -> list[dict[str, Any]]: + question = self._resolve_knowledge_question(payload) + query_terms = self._extract_knowledge_query_terms(question) + candidates: list[dict[str, Any]] = [] + + for hit in self._select_knowledge_model_hits( + payload.tool_payload, + question=question, + ): + if not isinstance(hit, dict): + continue + candidates.extend(self._extract_knowledge_evidence_candidates(hit, query_terms)) + + deduped: list[dict[str, Any]] = [] + seen: set[tuple[str, str, str]] = set() + ranked_candidates = sorted( + candidates, + key=lambda value: ( + float(value.get("score") or 0), + -len(str(value.get("content") or "")), + ), + reverse=True, + ) + top_score = float(ranked_candidates[0].get("score") or 0) if ranked_candidates else 0.0 + + for item in ranked_candidates: + score = float(item.get("score") or 0) + if deduped and score < max(6.0, top_score - 14): + continue + key = ( + str(item.get("title") or "").strip(), + str(item.get("heading") or "").strip(), + self._clean_knowledge_segment_text(str(item.get("content") or ""))[:180], + ) + if key in seen: + continue + seen.add(key) + deduped.append(item) + if len(deduped) >= MAX_KNOWLEDGE_DIRECT_EVIDENCE: + break + return deduped + + + def _extract_knowledge_evidence_candidates( + self, + hit: dict[str, Any], + query_terms: list[str], + ) -> list[dict[str, Any]]: + title = str(hit.get("title") or hit.get("document_name") or "相关制度").strip() + content = str(hit.get("content") or "").strip() + if not content: + return [] + + raw_candidates = self._merge_knowledge_lead_in_segments( + self._split_knowledge_hit_into_segments(content) + ) + candidates: list[dict[str, Any]] = [] + for item in raw_candidates: + score = self._score_knowledge_evidence_candidate(item, query_terms) + if query_terms and score <= 0: + continue + normalized = dict(item) + normalized["title"] = title + normalized["score"] = score + candidates.append(normalized) + + if candidates: + return candidates + + fallback_text = str(hit.get("excerpt") or "").strip() or self._extract_excerpt(content) + if not fallback_text: + return [] + return [ + { + "title": title, + "heading": "", + "kind": "paragraph", + "content": fallback_text, + "score": 1, + } + ] + + + def _merge_knowledge_lead_in_segments( + self, + segments: list[dict[str, str]], + ) -> list[dict[str, str]]: + if not segments: + return [] + + merged: list[dict[str, str]] = [] + index = 0 + while index < len(segments): + current = dict(segments[index]) + if not self._is_knowledge_lead_in_segment(current): + merged.append(current) + index += 1 + continue + + base_heading = str(current.get("heading") or "").strip() + current_marker = self._extract_knowledge_marker_family(str(current.get("content") or "")) + follow_segments: list[dict[str, str]] = [] + next_index = index + 1 + + while next_index < len(segments): + candidate = segments[next_index] + if str(candidate.get("heading") or "").strip() != base_heading: + break + + candidate_kind = str(candidate.get("kind") or "").strip() + candidate_content = str(candidate.get("content") or "").strip() + candidate_marker = self._extract_knowledge_marker_family(candidate_content) + if not candidate_content or candidate_kind == "table": + break + if current_marker and candidate_marker == current_marker: + break + if self._is_knowledge_lead_in_segment(candidate) and follow_segments: + break + if candidate_kind not in {"list", "paragraph", "kv", "clause"}: + break + + follow_segments.append(candidate) + next_index += 1 + if len(follow_segments) >= 4: + break + if candidate_kind == "paragraph" and len(candidate_content) >= 200: + break + + if follow_segments: + current["content"] = "\n".join( + [str(current.get("content") or "").strip()] + + [str(item.get("content") or "").strip() for item in follow_segments] + ) + if any(str(item.get("kind") or "").strip() == "list" for item in follow_segments): + current["kind"] = "list" + merged.append(current) + index = next_index + continue + + merged.append(current) + index += 1 + + return merged + + + def _split_knowledge_hit_into_segments(self, content: str) -> list[dict[str, str]]: + segments: list[dict[str, str]] = [] + markdown_headings: list[str] = [] + section_heading = "" + paragraph_lines: list[str] = [] + table_lines: list[str] = [] + + def current_heading() -> str: + heading_parts = [item for item in markdown_headings if item] + if section_heading: + heading_parts.append(section_heading) + return " > ".join(heading_parts) + + def flush_paragraph() -> None: + nonlocal paragraph_lines + if not paragraph_lines: + return + merged = " ".join(line.strip() for line in paragraph_lines if line.strip()).strip() + paragraph_lines = [] + if merged: + segments.append( + { + "heading": current_heading(), + "kind": "paragraph", + "content": merged, + } + ) + + def flush_table() -> None: + nonlocal table_lines + if not table_lines: + return + merged = "\n".join(line.rstrip() for line in table_lines if line.strip()).strip() + table_lines = [] + if merged: + segments.append( + { + "heading": current_heading(), + "kind": "table", + "content": merged, + } + ) + + for raw_line in str(content or "").replace("\r\n", "\n").replace("\r", "\n").splitlines(): + line = raw_line.rstrip() + stripped = line.strip() + + if not stripped: + flush_paragraph() + flush_table() + continue + + markdown_heading_match = re.match(r"^(#{1,6})\s+(.+)$", stripped) + if markdown_heading_match: + flush_paragraph() + flush_table() + level = len(markdown_heading_match.group(1)) + heading_text = markdown_heading_match.group(2).strip() + markdown_headings = markdown_headings[: max(0, level - 1)] + markdown_headings.append(heading_text) + section_heading = "" + continue + + if KNOWLEDGE_SECTION_HEADING_PATTERN.match(stripped) and len(stripped) <= 90: + flush_paragraph() + flush_table() + section_heading = stripped.lstrip("#").strip() + continue + + if stripped.count("|") >= 2 and "|" in stripped: + flush_paragraph() + table_lines.append(stripped) + continue + + flush_table() + + if KNOWLEDGE_LIST_ITEM_PATTERN.match(stripped): + flush_paragraph() + segments.append( + { + "heading": current_heading(), + "kind": "list", + "content": stripped, + } + ) + continue + + if KNOWLEDGE_NUMBERED_ITEM_PATTERN.match(stripped): + flush_paragraph() + segments.append( + { + "heading": current_heading(), + "kind": "list", + "content": stripped, + } + ) + continue + + if KNOWLEDGE_ARTICLE_PATTERN.match(stripped): + flush_paragraph() + segments.append( + { + "heading": current_heading(), + "kind": "clause", + "content": stripped, + } + ) + continue + + if (":" in stripped or ":" in stripped) and len(stripped) <= 180: + flush_paragraph() + segments.append( + { + "heading": current_heading(), + "kind": "kv", + "content": stripped, + } + ) + continue + + paragraph_lines.append(stripped) + + flush_paragraph() + flush_table() + return segments + + + def _render_knowledge_evidence_text(self, item: dict[str, Any]) -> str: + lines = self._split_clean_knowledge_lines( + str(item.get("content") or ""), + preserve_marker=True, + ) + if not lines: + return "" + if len(lines) == 1: + return self._clean_knowledge_segment_text(lines[0]) + return "\n".join(f" {line}" for line in lines) + + + def _collect_direct_knowledge_answer_lines( + self, + ordered_evidence_items: list[dict[str, Any]], + ) -> list[str]: + if not ordered_evidence_items: + return [] + + primary_item = ordered_evidence_items[0] + primary_title = str(primary_item.get("title") or "").strip() + primary_heading = str(primary_item.get("heading") or "").strip() + primary_kind = str(primary_item.get("kind") or "").strip() + + related_items = [primary_item] + if primary_kind != "table": + for item in ordered_evidence_items[1:]: + if len(related_items) >= 3: + break + if str(item.get("kind") or "").strip() != primary_kind: + continue + if str(item.get("title") or "").strip() != primary_title: + continue + if str(item.get("heading") or "").strip() != primary_heading: + continue + related_items.append(item) + + lines: list[str] = [] + seen: set[str] = set() + for item in related_items: + rendered = self._render_knowledge_evidence_text(item) + for line in rendered.splitlines(): + normalized = str(line or "").strip() + if not normalized or normalized in seen: + continue + seen.add(normalized) + lines.append(line) + return lines + + + def _summarize_knowledge_evidence_content( + self, + item: dict[str, Any], + query_terms: list[str], + ) -> str: + kind = str(item.get("kind") or "").strip() + content = str(item.get("content") or "").strip() + if kind == "table": + preview = self._extract_relevant_table_preview(content, query_terms) + preview_rows = [line for line in preview.splitlines() if line.strip()][:4] + if len(preview_rows) >= 3: + return "当前命中的直接依据是一张与问题强相关的标准表,已摘出最相关的表头和行。" + return "当前命中的直接依据是一张与问题强相关的标准表。" + lines = self._split_clean_knowledge_lines(content, preserve_marker=True) + if len(lines) >= 2: + return self._clean_knowledge_segment_text(f"{lines[0]} {' '.join(lines[1:4])}") + return self._clean_knowledge_segment_text(content) + + + def _build_missing_location_grounding_note( + self, + question: str, + evidence_items: list[dict[str, Any]], + ) -> str: + location = self._extract_query_location(question) + if not location: + return "" + + haystack = "\n".join( + str(item.get("heading") or "") + "\n" + str(item.get("content") or "") + for item in evidence_items + ) + if location in haystack: + return "" + return ( + f"当前命中的制度依据没有直接写出“{location}”对应的地区档位或映射关系," + "因此不能直接把它套用到表格中的某一列。" + ) + + + def _build_knowledge_search_answer( + self, + payload: UserAgentRequest, + citations: list[UserAgentCitation], + ) -> str: + hits = [item for item in list(payload.tool_payload.get("hits") or []) if isinstance(item, dict)] + evidence_items = self._build_knowledge_answer_evidence(payload) + primary_citation = citations[0] if citations else None + title = str( + (primary_citation.title if primary_citation else "") + or (hits[0].get("title") if hits else "") + or "相关制度" + ).strip() + user_name = str(payload.context_json.get("name") or "").strip() + prefix = f"{user_name},您好。\n" if user_name else "" + if not hits: + return ( + f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据," + "但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败," + "建议先检查主对话模型的连通性。" + ) + + evidence_lines: list[str] = [] + for item in evidence_items[:3]: + heading = str(item.get("heading") or "").strip() + heading_text = f" > {heading}" if heading else "" + if str(item.get("kind") or "") == "table": + preview = self._extract_relevant_table_preview( + str(item.get("content") or ""), + self._extract_knowledge_query_terms(self._resolve_knowledge_question(payload)), + ) + evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:\n{preview}") + continue + rendered = self._render_knowledge_evidence_text(item) + if rendered: + if "\n" in rendered: + evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:\n{rendered}") + else: + evidence_lines.append(f"- 《{item.get('title') or title}》{heading_text}:{rendered}") + + if not evidence_lines: + for item in hits[:2]: + item_title = str(item.get("title") or item.get("document_name") or "相关制度").strip() + excerpt = ( + str(item.get("excerpt") or "").strip() + or self._extract_excerpt(str(item.get("content") or "")) + ) + if not excerpt: + continue + evidence_lines.append(f"- 《{item_title}》:{excerpt}") + + if not evidence_lines: + return ( + f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据," + "但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败," + "建议先检查主对话模型的连通性。" + ) + + return "\n".join( + [ + f"{prefix}我已经命中与你这次问题最相关的制度依据,但答案整理阶段本轮没有及时返回。", + "先给你当前最直接的依据:", + *evidence_lines, + "如果你希望我继续把这些依据整理成更完整的结论、步骤或对比说明,可以继续缩小问题范围后再问一次。", + ] + ).strip() + diff --git a/server/src/app/services/user_agent_knowledge_constants.py b/server/src/app/services/user_agent_knowledge_constants.py new file mode 100644 index 0000000..72c46ac --- /dev/null +++ b/server/src/app/services/user_agent_knowledge_constants.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import re + +KNOWLEDGE_DIRECT_ANSWER_HINTS = ( + "是什么", + "标准", + "限额", + "流程", + "条件", + "规则", + "怎么", + "如何", + "哪些", + "需要", + "是否", + "区别", + "范围", + "额度", + "金额", + "多少", + "多少钱", + "上限", +) +KNOWLEDGE_QUERY_STOPWORDS = { + "什么", + "多少", + "哪些", + "怎么", + "如何", + "请问", + "一下", + "关于", + "规定", + "标准", + "可以", + "是否", + "一个", + "哪些人", + "目前", + "当前", + "一下子", +} +MAX_KNOWLEDGE_QUERY_TERMS = 12 +MAX_KNOWLEDGE_DIRECT_EVIDENCE = 4 +MAX_KNOWLEDGE_MODEL_HITS = 5 +KNOWLEDGE_SECTION_HEADING_PATTERN = re.compile( + r"^(#\s*.+|##\s*.+|###\s*.+|第[一二三四五六七八九十百零0-9]+[章节条]\s*.*|[一二三四五六七八九十]+、.*|([一二三四五六七八九十]+).*|\([一二三四五六七八九十]+\).*)$" +) +KNOWLEDGE_LIST_ITEM_PATTERN = re.compile(r"^[-*•]\s+.+$") +KNOWLEDGE_NUMBERED_ITEM_PATTERN = re.compile( + r"^(?:(?:\d+[.)、])|(?:[((][一二三四五六七八九十百零0-9]+[))])|[①②③④⑤⑥⑦⑧⑨⑩])\s*.+$" +) +KNOWLEDGE_ARTICLE_PATTERN = re.compile(r"^(第[一二三四五六七八九十百零0-9]+条)\s*.*$") diff --git a/server/src/app/services/user_agent_knowledge_helpers.py b/server/src/app/services/user_agent_knowledge_helpers.py new file mode 100644 index 0000000..b629541 --- /dev/null +++ b/server/src/app/services/user_agent_knowledge_helpers.py @@ -0,0 +1,322 @@ +from __future__ import annotations + +import re +from typing import Any + +from app.services.user_agent_knowledge_constants import ( + KNOWLEDGE_ARTICLE_PATTERN, + KNOWLEDGE_LIST_ITEM_PATTERN, + KNOWLEDGE_NUMBERED_ITEM_PATTERN, + KNOWLEDGE_QUERY_STOPWORDS, + KNOWLEDGE_SECTION_HEADING_PATTERN, + MAX_KNOWLEDGE_MODEL_HITS, + MAX_KNOWLEDGE_QUERY_TERMS, +) + + +class UserAgentKnowledgeHelpersMixin: + + @staticmethod + def _select_knowledge_model_hits( + tool_payload: dict[str, Any], + *, + question: str | None = None, + ) -> list[dict[str, Any]]: + raw_hits = [ + item + for item in list(tool_payload.get("hits") or []) + if isinstance(item, dict) + ][: max(MAX_KNOWLEDGE_MODEL_HITS + 1, 6)] + if not raw_hits: + return [] + + query_terms = UserAgentKnowledgeHelpersMixin._extract_knowledge_query_terms(question or "") + if not query_terms: + return raw_hits[:MAX_KNOWLEDGE_MODEL_HITS] + + ranked_hits = sorted( + enumerate(raw_hits), + key=lambda value: ( + UserAgentKnowledgeHelpersMixin._score_knowledge_model_hit( + value[1], + query_terms=query_terms, + rank_index=value[0], + ), + -value[0], + ), + reverse=True, + ) + return [item for _, item in ranked_hits[:MAX_KNOWLEDGE_MODEL_HITS]] + + + @staticmethod + def _score_knowledge_model_hit( + item: dict[str, Any], + *, + query_terms: list[str], + rank_index: int, + ) -> int: + title = str(item.get("title") or item.get("document_name") or "").lower() + excerpt = str(item.get("excerpt") or "").lower() + content = str(item.get("content") or "").lower() + haystack = "\n".join([title, excerpt, content[:1400]]) + + matched_terms = [term for term in query_terms if term in haystack] + score = max(1, 48 - rank_index * 4) + score += len(matched_terms) * 10 + score += sum(1 for term in matched_terms if term in title) * 8 + + leading_marker = UserAgentKnowledgeHelpersMixin._leading_knowledge_appendix_marker(content) + if leading_marker == "# 章节导航": + score -= 22 + elif leading_marker == "# 问答线索补充": + score += 6 if matched_terms else -8 + elif leading_marker == "# 重点章节摘录": + score += 4 if matched_terms else -4 + elif leading_marker == "# 结构化表格补充": + score += 8 if matched_terms else -3 + + if matched_terms and "|" in content: + score += 8 + if matched_terms and any(marker in content for marker in (":", ":")): + score += 10 + if matched_terms and "\n" in content: + score += 4 + if matched_terms and any(marker in content for marker in ("附表", "第", "条")): + score += 4 + if matched_terms and any(marker in content for marker in ("第", "条", ":", "-", "•")): + score += 4 + if re.search(r"没有.{0,8}(信息|规定|说明|依据)", content): + score -= 12 + return score + + + @staticmethod + def _leading_knowledge_appendix_marker(content: str) -> str: + normalized = str(content or "").lstrip() + for marker in ("# 章节导航", "# 重点章节摘录", "# 问答线索补充", "# 结构化表格补充"): + index = normalized.find(marker) + if 0 <= index <= 220: + return marker + return "" + + + def _prioritize_knowledge_evidence_items( + self, + question: str, + evidence_items: list[dict[str, Any]], + ) -> list[dict[str, Any]]: + if not evidence_items or not self._question_requires_explicit_condition(question): + return evidence_items + + for preferred_kind in ("table", "kv", "clause", "list"): + for index, item in enumerate(evidence_items): + if str(item.get("kind") or "") != preferred_kind: + continue + return [item, *evidence_items[:index], *evidence_items[index + 1 :]] + + for index, item in enumerate(evidence_items): + if re.search(r"\d", str(item.get("content") or "")): + return [item, *evidence_items[:index], *evidence_items[index + 1 :]] + + return evidence_items + + + + @staticmethod + def _is_knowledge_lead_in_segment(item: dict[str, str]) -> bool: + kind = str(item.get("kind") or "").strip() + content = str(item.get("content") or "").strip() + return kind in {"kv", "list", "clause"} and content.endswith((":", ":")) + + + + @staticmethod + def _extract_knowledge_marker_family(content: str) -> str: + normalized = str(content or "").strip() + if not normalized: + return "" + if KNOWLEDGE_ARTICLE_PATTERN.match(normalized): + return "article" + if re.match(r"^\d+[.)、]\s*", normalized): + return "arabic" + if re.match(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", normalized): + return "paren" + if re.match(r"^[①②③④⑤⑥⑦⑧⑨⑩]\s*", normalized): + return "circled" + if KNOWLEDGE_LIST_ITEM_PATTERN.match(normalized): + return "bullet" + return "" + + + + @staticmethod + def _format_knowledge_heading_label(heading: str) -> str: + parts = [item.strip() for item in str(heading or "").split(">") if item.strip()] + return " / ".join(parts) + + + + def _score_knowledge_evidence_candidate( + self, + item: dict[str, str], + query_terms: list[str], + ) -> int: + heading = str(item.get("heading") or "").lower() + content = str(item.get("content") or "").lower() + kind = str(item.get("kind") or "").strip() + haystack = "\n".join([heading, content]) + + matched_terms = [term for term in query_terms if term in haystack] + score = len(matched_terms) * 10 + score += sum(1 for term in matched_terms if term in heading) * 6 + + if kind == "table": + score += 10 + elif kind in {"kv", "clause", "list"}: + score += 8 + elif kind == "paragraph": + score += 4 + + if "问答线索补充" in heading or "重点章节摘录" in heading: + score += 8 + if "结构化表格补充" in heading: + score += 10 + if "章节导航" in heading or "目录" in heading: + score -= 16 + if re.search(r"[.。…]{6,}", content): + score -= 12 + if any(hint in content for hint in ("应", "需", "不得", "可以", "标准", "条件", "材料", "审批", "流程", "包括")): + score += 3 + + content_length = len(content) + if content_length > 220: + score -= min(8, (content_length - 220) // 40) + return score + + + + @staticmethod + def _extract_knowledge_query_terms(question: str) -> list[str]: + normalized_question = str(question or "").strip().lower() + if not normalized_question: + return [] + + terms: list[str] = [] + seen: set[str] = set() + + def remember(term: str) -> None: + normalized = str(term or "").strip().lower() + if ( + not normalized + or normalized in seen + or normalized in KNOWLEDGE_QUERY_STOPWORDS + ): + return + seen.add(normalized) + terms.append(normalized) + + for item in re.findall(r"[a-z0-9][a-z0-9_\-]{1,}", normalized_question): + remember(item) + + for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_question): + if len(block) <= 4: + remember(block) + continue + for size in (4, 3, 2): + for start in range(0, len(block) - size + 1): + remember(block[start : start + size]) + if len(terms) >= MAX_KNOWLEDGE_QUERY_TERMS: + return terms + + return terms[:MAX_KNOWLEDGE_QUERY_TERMS] + + + + @staticmethod + def _clean_knowledge_segment_text(content: str) -> str: + normalized = str(content or "").strip() + normalized = re.sub(r"^[-*•]\s*", "", normalized) + normalized = re.sub(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*", "", normalized) + normalized = re.sub(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", "", normalized) + normalized = re.sub(r"\s+", " ", normalized) + if len(normalized) <= 180: + return normalized + return f"{normalized[:177].rstrip()}..." + + + + @staticmethod + def _normalize_knowledge_line(content: str, *, preserve_marker: bool) -> str: + normalized = str(content or "").strip() + normalized = re.sub(r"^[-*•]\s*", "", normalized) + if not preserve_marker: + normalized = re.sub(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*", "", normalized) + normalized = re.sub(r"^[((][一二三四五六七八九十百零0-9]+[))]\s*", "", normalized) + normalized = re.sub(r"\s+", " ", normalized) + return normalized + + + + def _split_clean_knowledge_lines( + self, + content: str, + *, + preserve_marker: bool, + ) -> list[str]: + return [ + line + for line in ( + self._normalize_knowledge_line(item, preserve_marker=preserve_marker) + for item in str(content or "").splitlines() + ) + if line + ] + + + + @staticmethod + def _extract_relevant_table_preview(content: str, query_terms: list[str]) -> str: + lines = [line.strip() for line in str(content or "").splitlines() if line.strip()] + if len(lines) <= 3: + return "\n".join(lines) + + header = lines[0] + divider = lines[1] if len(lines) > 1 else "" + body = lines[2:] if divider.count("|") >= 2 else lines[1:] + + matched_rows = [ + row + for row in body + if any(term in row.lower() for term in query_terms) + ] + selected_rows = matched_rows[:3] or body[:2] + preview_lines = [header] + if divider: + preview_lines.append(divider) + preview_lines.extend(selected_rows) + return "\n".join(preview_lines).strip() + + + + @staticmethod + def _question_requires_explicit_condition(question: str) -> bool: + normalized = str(question or "").strip() + return any(keyword in normalized for keyword in ("多少", "金额", "上限", "限额", "标准", "条件", "需要")) + + + + @staticmethod + def _answer_evidence_has_numeric_or_condition(evidence_items: list[dict[str, Any]]) -> bool: + for item in evidence_items: + content = str(item.get("content") or "") + if re.search(r"\d", content): + return True + if any( + keyword in content + for keyword in ("应", "需", "不得", "可以", "条件", "材料", "审批", "流程", "标准", "适用") + ): + return True + return False + + diff --git a/server/src/app/services/user_agent_response.py b/server/src/app/services/user_agent_response.py new file mode 100644 index 0000000..7381fc0 --- /dev/null +++ b/server/src/app/services/user_agent_response.py @@ -0,0 +1,726 @@ +from __future__ import annotations + +import json +import re +from datetime import UTC, datetime, timedelta +from decimal import Decimal, InvalidOperation +from typing import Any + +from sqlalchemy import or_, select +from sqlalchemy.orm import selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetStatus, AgentAssetType +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim +from app.schemas.agent_asset import AgentAssetListItem +from app.schemas.reimbursement import TravelReimbursementCalculatorRequest +from app.schemas.user_agent import ( + UserAgentCitation, + UserAgentDraftPayload, + UserAgentExpenseQueryRecord, + UserAgentQueryPayload, + UserAgentQueryStatusGroup, + UserAgentReviewAction, + UserAgentReviewClaimGroup, + UserAgentReviewDocumentCard, + UserAgentReviewDocumentField, + UserAgentReviewEditField, + UserAgentReviewPayload, + UserAgentReviewRiskBrief, + UserAgentReviewSlotCard, + UserAgentRequest, + UserAgentSuggestedAction, +) +from app.services.agent_assets import AgentAssetService +from app.services.expense_claims import ExpenseClaimService +from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label +from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check +from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService +from app.services.user_agent_constants import * + + +class UserAgentResponseMixin: + + def _build_fallback_answer( + self, + payload: UserAgentRequest, + *, + citations: list[UserAgentCitation], + draft_payload: UserAgentDraftPayload | None, + ) -> str: + if str(payload.tool_payload.get("result_type") or "").strip() == "knowledge_search": + return self._build_explain_answer(payload, citations) + + if payload.ontology.intent in {"query", "compare"}: + return self._build_query_answer(payload) + + if payload.ontology.intent == "risk_check": + return self._build_risk_answer(payload, citations) + + if payload.ontology.intent == "draft": + tool_message = str(payload.tool_payload.get("message") or "").strip() + if payload.tool_payload.get("draft_limit_reached"): + return tool_message or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。" + if tool_message and ( + str(payload.tool_payload.get("claim_id") or "").strip() + or str(payload.tool_payload.get("claim_no") or "").strip() + ): + return tool_message + if payload.ontology.intent == "draft" and draft_payload is not None: + return ( + f"已生成 {draft_payload.title},当前仅返回待人工确认的草稿内容," + "仍需人工确认后再进入正式流程。" + ) + + return self._build_explain_answer(payload, citations) + + + def _build_guided_answer(self, payload: UserAgentRequest) -> str | None: + if not self._is_generic_expense_prompt(payload): + return self._build_implicit_expense_draft_guidance(payload) + + attachment_names = self._resolve_attachment_names(payload) + ocr_summary = str(payload.context_json.get("ocr_summary") or "").strip() + attachment_hint = "" + if ocr_summary: + attachment_hint = f" 我已读取附件 OCR 摘要:{ocr_summary}" + elif attachment_names: + attachment_hint = ( + f" 我已带入 {len(attachment_names)} 份附件名称,但目前还不能直接读取附件内容," + "仍需要你补充关键信息。" + ) + + return ( + "可以帮你发起报销。请补充费用类型、发生时间、金额、事由和相关对象," + "或者直接上传票据附件,我再继续帮你判断能否报、缺什么材料,并整理待核对信息。" + f"{attachment_hint}" + ) + + + def _build_implicit_expense_draft_guidance( + self, + payload: UserAgentRequest, + ) -> str | None: + if not self._is_implicit_expense_draft_request(payload): + return None + + amount_text = next( + (item.value for item in payload.ontology.entities if item.type == "amount"), + "", + ) + expense_type = next( + ( + EXPENSE_TYPE_LABELS.get(item.normalized_value, item.value) + for item in payload.ontology.entities + if item.type == "expense_type" + ), + "报销", + ) + time_text = payload.ontology.time_range.raw or "本次" + amount_hint = f",金额 {amount_text}" if amount_text else "" + + return ( + f"已识别到一笔{time_text}的{expense_type}支出{amount_hint}。" + "如果要继续整理报销核对信息,还需要补充客户单位、参与人员、费用明细和票据附件。" + "你也可以继续上传发票或图片,我会把这些信息带入后续对话。" + ) + + + def _generate_answer_with_model( + self, + payload: UserAgentRequest, + *, + citations: list[UserAgentCitation], + suggested_actions: list[UserAgentSuggestedAction], + risk_flags: list[str], + draft_payload: UserAgentDraftPayload | None, + fallback_answer: str, + ) -> str | None: + messages = self._build_model_messages( + payload, + citations=citations, + suggested_actions=suggested_actions, + risk_flags=risk_flags, + draft_payload=draft_payload, + fallback_answer=fallback_answer, + ) + answer = self._sanitize_model_answer( + self.runtime_chat_service.complete( + messages, + max_tokens=800 if payload.ontology.scenario == "knowledge" else 420, + temperature=0.2, + timeout_seconds=( + KNOWLEDGE_MODEL_TIMEOUT_SECONDS + if payload.ontology.scenario == "knowledge" + else None + ), + slot_timeouts=( + { + "main": KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS, + "backup": KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS, + } + if payload.ontology.scenario == "knowledge" + else None + ), + max_attempts=1 if payload.ontology.scenario == "knowledge" else None, + ) + ) + return self._reject_unsupported_location_inference(payload, answer) + + + def _sanitize_model_answer(self, answer: str | None) -> str | None: + if not answer: + return None + + cleaned = re.sub(r".*?", "", answer, flags=re.DOTALL | re.IGNORECASE) + cleaned = cleaned.strip() + leaked_reasoning_markers = ( + "用户问的是", + "让我分析一下", + "实体识别", + "从对话历史来看", + "从tool_payload来看", + "现在问题是", + "我需要:", + "关键是我", + ) + if any(marker in cleaned[:500] for marker in leaked_reasoning_markers): + return None + return cleaned or None + + + @staticmethod + def _extract_query_location(message: str) -> str: + match = re.search(r"(?:去|到|前往)([\u4e00-\u9fff]{2,8})(?:出差|开会|培训)", str(message or "")) + return match.group(1) if match else "" + + + def _reject_unsupported_location_inference( + self, + payload: UserAgentRequest, + answer: str | None, + ) -> str | None: + del payload + return answer + + + def _build_model_messages( + self, + payload: UserAgentRequest, + *, + citations: list[UserAgentCitation], + suggested_actions: list[UserAgentSuggestedAction], + risk_flags: list[str], + draft_payload: UserAgentDraftPayload | None, + fallback_answer: str, + ) -> list[dict[str, str]]: + knowledge_question = ( + self._resolve_knowledge_question(payload) + if payload.ontology.scenario == "knowledge" + else "" + ) + facts = { + "run_id": payload.run_id, + "user_message": payload.message, + "ontology": payload.ontology.model_dump(mode="json"), + "context": { + "entry_source": payload.context_json.get("entry_source"), + "user_name": payload.context_json.get("name"), + "user_role": payload.context_json.get("role"), + "user_department": payload.context_json.get("department_name") + or payload.context_json.get("department"), + "user_position": payload.context_json.get("position"), + "user_grade": payload.context_json.get("grade"), + "employee_no": payload.context_json.get("employee_no"), + "manager_name": payload.context_json.get("manager_name"), + "employee_location": payload.context_json.get("employee_location"), + "cost_center": payload.context_json.get("cost_center"), + "finance_owner_name": payload.context_json.get("finance_owner_name"), + "employee_risk_profile": payload.context_json.get("employee_risk_profile", {}), + "user_role_codes": payload.context_json.get("role_codes", []), + "is_admin": bool(payload.context_json.get("is_admin")), + "request_context": payload.context_json.get("request_context"), + "attachment_count": payload.context_json.get("attachment_count"), + "attachment_names": self._resolve_attachment_names(payload), + "ocr_summary": payload.context_json.get("ocr_summary", ""), + "ocr_documents": payload.context_json.get("ocr_documents", []), + "conversation_id": payload.context_json.get("conversation_id"), + "conversation_scenario": payload.context_json.get("conversation_scenario"), + "conversation_intent": payload.context_json.get("conversation_intent"), + "draft_claim_id": payload.context_json.get("draft_claim_id"), + "conversation_history": self._resolve_conversation_history(payload), + }, + "tool_payload": self._build_model_tool_payload( + payload.tool_payload, + question=knowledge_question, + ), + "citations": [item.model_dump(mode="json") for item in citations], + "suggested_actions": [item.model_dump(mode="json") for item in suggested_actions], + "risk_flags": risk_flags, + "draft_payload": draft_payload.model_dump(mode="json") if draft_payload is not None else None, + "selected_capability_codes": payload.selected_capability_codes, + "requires_confirmation": payload.requires_confirmation, + "fallback_answer": fallback_answer, + } + if payload.ontology.scenario == "knowledge": + facts["knowledge_evidence_blocks"] = self._build_knowledge_evidence_blocks( + payload.tool_payload, + question=knowledge_question, + ) + facts["knowledge_answer_evidence"] = [ + { + "title": str(item.get("title") or "").strip(), + "heading": str(item.get("heading") or "").strip(), + "kind": str(item.get("kind") or "").strip(), + "content": str(item.get("content") or "").strip(), + } + for item in self._build_knowledge_answer_evidence(payload) + ] + + if payload.ontology.scenario == "knowledge": + answer_style_instruction = ( + "你是财务制度知识问答助手。只能依据 facts.tool_payload.hits、facts.knowledge_answer_evidence、citations 与 conversation_history 回答," + "不要扩展成通用助手。优先直接回答,不要复述思考过程,不要输出 JSON、代码块或 。" + "回答风格要像一位真正熟悉制度的财务伙伴:先直接回应用户的核心问题,再用一张简洁表格或短段落说明依据," + "最后补充最重要的注意事项。不要写成“已检索到内容”的系统回执,也不要把命中片段连缀成答案。" + "必须优先回答用户当前这句话本身,不能把制度标题、制度全文或完整标准表当成主答案。" + "如果用户问的是某次具体行程“一共能报多少”,就先给“当前已能确认的金额”,再用一张很短的表说明项目、" + "适用标准、计算式和结果;如果总额还缺少住宿晚数、实际票据或其他必要条件,就明确写出“暂不能确认的部分”。" + "只有用户明确在问“标准有哪些”或“制度全文怎么规定”时,才展开完整标准表。" + "如果命中的知识已经足够支持计算、比较或归纳,就直接给出结论;金额、标准、天数、补贴等问题要把计算过程写清楚。" + "适合时请使用 Markdown 二级标题、短段落和表格,让回答更清晰;表格必须保证每一行列数一致,不要出现空白残列。" + "只能陈述 hits 中明确出现的事实,不能用常识、外部知识或主观推断补齐缺失条件。" + "回答前先在全部 hits 中寻找与问题最直接相关的章节、表格或条目,不能只依赖排在最前面的片段。" + "如果 facts.knowledge_answer_evidence 中已经给出更短的高相关证据,优先基于这些证据组织答案,再回看原始 hits 补上下文。" + "如果某个表格在检索片段中已经被摊平成连续文本,只有在行、列和数值对应关系能够从片段本身明确确认时才能据此计算;" + "如果列对应关系不清楚,必须说明表格结构在当前片段中不够清晰,不能把第一列或相邻数字想当然套给用户。" + "如果 hits 中出现“结构化表格补充”,它表示知识归纳阶段已经把原文表格重新整理过," + "优先使用这类结构化表格来理解行列关系,再回看原文确认上下文。" + "facts.knowledge_evidence_blocks 中保留了原始换行和定宽排版;遇到表格时,优先按这些证据块阅读," + "必须按表头从左到右逐列对应数值,不能把第一列的数值直接套给后面的列名。" + "如果完成计算或归纳仍缺少某个关键映射关系、适用条件或数值依据,必须明确说明当前知识库还缺哪一项信息,再给出已能确认的部分。" + "如果用户问题里没有明确给出某个套用条件,而 hits 或 evidence 里也没有明确出现,就不能自己补一个默认值。" + "当问题涉及追问时,必须结合 conversation_history 延续上一轮上下文,而不是重新泛化成制度全文摘录。" + "不要大段粘贴原始命中文本;只提炼与问题直接相关的规则、条件、金额和注意事项。" + "如果依据仍然不足,明确指出缺少哪一项信息,再给出当前能确认的部分。" + ) + else: + answer_style_instruction = "用 2 到 4 段完成回答,优先给结论,再补充最关键的依据与下一步建议。" + + personalization_instruction = ( + "如果 context.user_name 存在,并且当前问题与员工本人适用标准、报销额度、审批权限、职级待遇有关," + "开头应自然称呼一次用户,例如“曹笑竹,您好”。" + "如果需要根据员工身份判断标准,优先参考 context.user_grade 与 context.user_position。" + "如果问题与用户身份无关,就不要生硬加入姓名、职级或岗位。" + ) + + system_prompt = ( + "你是 X-Financial 的专业财务 AI 助手。" + "回答必须准确、自然、可执行,不要泄露中间推理。" + "当知识问题有命中依据时,先给结论,再给结构化说明。" + "不要把制度全文原样搬出来,不要把检索片段当作最终答案直接粘贴。" + "如果使用表格,确保列名简洁、数值明确。" + f"{personalization_instruction}" + f"{answer_style_instruction}" + ) + user_prompt = ( + "请严格依据下面的 facts 生成最终答复:\n" + f"{json.dumps(facts, ensure_ascii=False, indent=2)}" + ) + return [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ] + + + def _build_query_answer(self, payload: UserAgentRequest) -> str: + scenario = payload.ontology.scenario + data = payload.tool_payload + subject = self._resolve_subject(payload) + + if scenario == "expense": + query_payload = self._build_query_payload(payload) + scope_label = str(data.get("scope_label") or subject).strip() or subject + if query_payload is None: + return f"当前没有查到{scope_label}。你可以补充时间范围、单号或状态继续筛选。" + + window_prefix = ( + f"{query_payload.window_start_date} 至 {query_payload.window_end_date}" + if query_payload.recent_window_applied + and query_payload.window_start_date + and query_payload.window_end_date + else ( + f"近 {query_payload.window_days} 日内" + if query_payload.recent_window_applied and query_payload.window_days + else "当前条件下" + ) + ) + if query_payload.record_count <= 0: + if query_payload.older_record_count > 0 and query_payload.window_days: + return ( + f"{window_prefix}没有查到{query_payload.scope_label}。" + f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据," + "请前往个人报销中心查看。" + ) + return f"{window_prefix}没有查到{query_payload.scope_label}。你可以补充时间范围、单号或状态继续筛选。" + + group_lines = [ + f"{item.label} {item.count} 笔" + for item in query_payload.status_groups + if item.count > 0 + ] + answer_parts = [ + f"我先为你列出{window_prefix}的{query_payload.scope_label}," + f"共 {query_payload.record_count} 笔,金额合计 {query_payload.total_amount:.2f} 元。" + ] + if group_lines: + answer_parts.append(f"其中包括:{'、'.join(group_lines)}。") + + hint_parts: list[str] = [] + if query_payload.has_more_in_window and query_payload.preview_count < query_payload.record_count: + hint_parts.append( + f"下方先展示最近 {query_payload.preview_count} 笔,你可以直接点击单据查看详情。" + ) + elif query_payload.records: + hint_parts.append("下方已列出本次命中的真实单据,可直接点击查看详情。") + + if query_payload.older_record_count > 0 and query_payload.window_days: + hint_parts.append( + f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据," + "请前往个人报销中心查看。" + ) + + return " ".join(answer_parts + hint_parts).strip() + + if scenario == "accounts_receivable": + record_count = int(data.get("record_count") or 0) + outstanding_amount = float(data.get("outstanding_amount") or 0) + return ( + f"{subject}共命中 {record_count} 条应收,未回款金额 {outstanding_amount:.2f} 元。" + "建议结合账龄和客户分布继续排查逾期风险。" + ) + + if scenario == "accounts_payable": + record_count = int(data.get("record_count") or 0) + outstanding_amount = float(data.get("outstanding_amount") or 0) + return ( + f"{subject}共命中 {record_count} 条应付,待付金额 {outstanding_amount:.2f} 元。" + "如需推进动作,建议先生成付款建议草稿并发起人工确认。" + ) + + return "已完成当前查询,但暂时没有更多结构化结果可展示。" + + + def _build_query_payload( + self, + payload: UserAgentRequest, + ) -> UserAgentQueryPayload | None: + if payload.ontology.scenario != "expense" or payload.ontology.intent not in {"query", "compare"}: + return None + + result_type = str(payload.tool_payload.get("result_type") or "").strip() + if result_type and result_type != "expense_claim_list": + return None + + records: list[UserAgentExpenseQueryRecord] = [] + for item in payload.tool_payload.get("records") or []: + if not isinstance(item, dict): + continue + amount = float(item.get("amount") or 0) + records.append( + UserAgentExpenseQueryRecord( + claim_id=str(item.get("claim_id") or "").strip(), + claim_no=str(item.get("claim_no") or "").strip() or "未编号", + employee_name=str(item.get("employee_name") or "").strip(), + expense_type=str(item.get("expense_type") or "").strip(), + expense_type_label=str(item.get("expense_type_label") or "").strip() + or EXPENSE_TYPE_LABELS.get(str(item.get("expense_type") or "").strip(), "报销"), + amount=round(amount, 2), + status=str(item.get("status") or "").strip(), + status_label=str(item.get("status_label") or "").strip() + or EXPENSE_STATUS_LABELS.get(str(item.get("status") or "").strip(), "处理中"), + status_group=str(item.get("status_group") or "").strip() or "other", + status_group_label=str(item.get("status_group_label") or "").strip() + or EXPENSE_STATUS_GROUP_LABELS.get(str(item.get("status_group") or "").strip(), "其他状态"), + approval_stage=str(item.get("approval_stage") or "").strip() or None, + document_date=str(item.get("document_date") or "").strip(), + occurred_at=str(item.get("occurred_at") or "").strip(), + reason=str(item.get("reason") or "").strip(), + location=str(item.get("location") or "").strip(), + ) + ) + + status_groups: list[UserAgentQueryStatusGroup] = [] + for item in payload.tool_payload.get("status_groups") or []: + if not isinstance(item, dict): + continue + status_groups.append( + UserAgentQueryStatusGroup( + key=str(item.get("key") or "").strip() or "other", + label=str(item.get("label") or "").strip() or "其他状态", + count=max(0, int(item.get("count") or 0)), + ) + ) + + return UserAgentQueryPayload( + result_type="expense_claim_list", + scope_label=str(payload.tool_payload.get("scope_label") or self._resolve_subject(payload)).strip() or "报销单", + recent_window_applied=bool(payload.tool_payload.get("recent_window_applied")), + window_days=( + int(payload.tool_payload["window_days"]) + if payload.tool_payload.get("window_days") not in {None, ""} + else None + ), + window_start_date=( + str(payload.tool_payload.get("window_start_date") or "").strip() or None + ), + window_end_date=( + str(payload.tool_payload.get("window_end_date") or "").strip() or None + ), + record_count=max(0, int(payload.tool_payload.get("record_count") or 0)), + preview_count=max(0, int(payload.tool_payload.get("preview_count") or len(records))), + older_record_count=max(0, int(payload.tool_payload.get("older_record_count") or 0)), + has_more_in_window=bool(payload.tool_payload.get("has_more_in_window") or payload.tool_payload.get("has_more")), + total_amount=round(float(payload.tool_payload.get("total_amount") or 0), 2), + status_groups=status_groups, + records=records, + ) + + + def _build_explain_answer( + self, + payload: UserAgentRequest, + citations: list[UserAgentCitation], + ) -> str: + if str(payload.tool_payload.get("result_type") or "").strip() == "knowledge_search": + if citations: + return self._build_knowledge_search_answer(payload, citations) + + tool_message = str(payload.tool_payload.get("message") or "").strip() + if tool_message: + return tool_message + + if citations: + titles = "、".join(item.title for item in citations[:2]) + summary = citations[0].excerpt or "请结合制度全文进一步确认。" + return f"已检索到相关依据:{titles}。核心说明:{summary}" + + return ( + f"当前还没有与“{SCENARIO_LABELS.get(payload.ontology.scenario, '当前问题')}”" + "强匹配的已上线规则引用,建议先人工复核或补充更具体的单据上下文。" + ) + + + def _build_risk_answer( + self, + payload: UserAgentRequest, + citations: list[UserAgentCitation], + ) -> str: + risk_flags = self._resolve_risk_flags(payload) + platform_messages = self._evaluate_platform_risk_messages(payload) + if not risk_flags and not platform_messages: + return "当前未识别到明确风险标签,建议继续查看原始明细或补充更多上下文。" + + reasons = [ + f"{flag}:{RISK_REASON_MAP.get(flag, f'{flag} 需要人工进一步确认。')}" + for flag in risk_flags + ] + if platform_messages: + reasons.extend(platform_messages) + citation_text = ( + f" 参考规则:{'、'.join(item.title for item in citations[:2])}。" + if citations + else "" + ) + signal_count = len(risk_flags) + (1 if platform_messages else 0) + return ( + f"本次识别到 {signal_count} 类风险信号。" + f"触发原因:{';'.join(reasons)}。" + "建议先复核明细、附件和审批链,再决定是否继续处理。" + f"{citation_text}" + ) + + + def _evaluate_platform_risk_messages(self, payload: UserAgentRequest) -> list[str]: + claim_id = str(payload.tool_payload.get("claim_id") or "").strip() + if not claim_id: + return [] + + claim = self.db.scalar( + select(ExpenseClaim) + .where(ExpenseClaim.id == claim_id) + .options(selectinload(ExpenseClaim.items)) + ) + if claim is None: + return [] + + rule_codes = resolve_rule_codes_for_risk_check( + payload.ontology, + query_text=payload.message, + ) + review = ExpenseClaimService(self.db).evaluate_platform_risk_rules( + claim, + rule_codes=rule_codes, + ) + messages: list[str] = [] + for flag in review.get("flags") or []: + if not isinstance(flag, dict): + continue + message = str(flag.get("message") or "").strip() + if message and message not in messages: + messages.append(message) + return messages + + + def _build_draft_payload(self, payload: UserAgentRequest) -> UserAgentDraftPayload: + scenario_label = SCENARIO_LABELS.get(payload.ontology.scenario, "业务") + subject = self._resolve_subject(payload) + claim_id = str(payload.tool_payload.get("claim_id") or "").strip() or None + claim_no = str(payload.tool_payload.get("claim_no") or "").strip() or None + claim_status = str(payload.tool_payload.get("status") or "").strip() or None + approval_stage = str(payload.tool_payload.get("approval_stage") or "").strip() or None + if claim_id and (claim_no is None or claim_status is None or approval_stage is None): + claim = self.db.get(ExpenseClaim, claim_id) + if claim is not None: + claim_no = claim_no or str(claim.claim_no or "").strip() or None + claim_status = claim_status or str(claim.status or "").strip() or None + approval_stage = approval_stage or str(claim.approval_stage or "").strip() or None + is_submitted = claim_status == "submitted" + title = f"{scenario_label}处理意见草稿" + if claim_no: + title = f"{scenario_label}{'报销单' if is_submitted else '草稿'} {claim_no}" + if is_submitted: + body = ( + f"主题:{subject}\n" + f"结论:报销单已提交,当前节点为 {approval_stage or '审批中'}。\n" + "建议:后续可在个人报销列表中跟踪审批进度,必要时再补充说明或附件。\n" + f"原始问题:{payload.message}" + ) + else: + body = ( + f"主题:{subject}\n" + "结论:已根据当前语义解析结果生成草稿,尚未自动执行。\n" + "建议:请先核对明细、规则命中和所需附件,再由人工确认是否提交正式流程。\n" + f"原始问题:{payload.message}" + ) + return UserAgentDraftPayload( + draft_type=payload.ontology.scenario, + title=title, + body=body, + confirmation_required=not is_submitted, + claim_id=claim_id, + claim_no=claim_no, + status=claim_status, + approval_stage=approval_stage, + ) + + + @staticmethod + def _should_build_draft_payload(payload: UserAgentRequest) -> bool: + if payload.ontology.scenario == "expense" and payload.tool_payload.get("preview_only"): + return any( + str(payload.tool_payload.get(key) or "").strip() + for key in ("claim_id", "claim_no") + ) + if payload.ontology.intent == "draft": + return True + if payload.ontology.scenario != "expense": + return False + return any( + str(payload.tool_payload.get(key) or "").strip() + for key in ("claim_id", "claim_no", "status") + ) + + + def _build_suggested_actions( + self, + payload: UserAgentRequest, + ) -> list[UserAgentSuggestedAction]: + if payload.ontology.scenario == "knowledge": + return [] + + if self._should_prompt_expense_scene_selection(payload): + return [ + UserAgentSuggestedAction( + label=label, + action_type="select_expense_type", + description=description, + payload={ + "expense_type": code, + "expense_type_label": label, + "original_message": payload.message, + }, + ) + for code, label, description in EXPENSE_SCENE_SELECTION_OPTIONS + ] + + if self._is_generic_expense_prompt(payload): + return [ + UserAgentSuggestedAction( + label="上传票据", + action_type="ask_clarification", + description="上传发票、行程单或付款截图,继续识别报销内容。", + ), + UserAgentSuggestedAction( + label="补充报销信息", + action_type="ask_clarification", + description="补充费用类型、金额、时间和事由后继续处理。", + ), + ] + + if payload.ontology.intent in {"query", "compare"}: + return [ + UserAgentSuggestedAction( + label="查看明细", + action_type="open_detail", + description="继续查看命中记录和过滤条件。", + ), + UserAgentSuggestedAction( + label="生成处理意见", + action_type="create_draft", + description="把当前查询结果整理成可确认草稿。", + ), + ] + + if payload.ontology.intent == "risk_check": + return [ + UserAgentSuggestedAction( + label="人工复核风险", + action_type="manual_review", + description="优先检查明细、附件和规则命中原因。", + ), + UserAgentSuggestedAction( + label="生成整改建议", + action_type="create_draft", + description="把风险说明整理成处理意见草稿。", + ), + ] + + if payload.ontology.intent == "draft": + return [ + UserAgentSuggestedAction( + label="复制草稿", + action_type="copy_draft", + description="复制当前草稿后交由人工确认。", + ), + UserAgentSuggestedAction( + label="补充上下文", + action_type="ask_clarification", + description="补充单据编号、客户或供应商信息以完善草稿。", + ), + ] + + return [ + UserAgentSuggestedAction( + label="查看规则全文", + action_type="open_rule", + description="继续查看引用规则或知识内容。", + ), + UserAgentSuggestedAction( + label="补充问题上下文", + action_type="ask_clarification", + description="补充业务对象、时间或单据范围,提升回答准确度。", + ), + ] + diff --git a/server/src/app/services/user_agent_review_core.py b/server/src/app/services/user_agent_review_core.py new file mode 100644 index 0000000..ee71b66 --- /dev/null +++ b/server/src/app/services/user_agent_review_core.py @@ -0,0 +1,528 @@ +from __future__ import annotations + +import json +import re +from datetime import UTC, datetime, timedelta +from decimal import Decimal, InvalidOperation +from typing import Any + +from sqlalchemy import or_, select +from sqlalchemy.orm import selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetStatus, AgentAssetType +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim +from app.schemas.agent_asset import AgentAssetListItem +from app.schemas.reimbursement import TravelReimbursementCalculatorRequest +from app.schemas.user_agent import ( + UserAgentCitation, + UserAgentDraftPayload, + UserAgentExpenseQueryRecord, + UserAgentQueryPayload, + UserAgentQueryStatusGroup, + UserAgentReviewAction, + UserAgentReviewClaimGroup, + UserAgentReviewDocumentCard, + UserAgentReviewDocumentField, + UserAgentReviewEditField, + UserAgentReviewPayload, + UserAgentReviewRiskBrief, + UserAgentReviewSlotCard, + UserAgentRequest, + UserAgentSuggestedAction, +) +from app.services.agent_assets import AgentAssetService +from app.services.expense_claims import ExpenseClaimService +from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label +from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check +from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService +from app.services.user_agent_constants import * + + +class UserAgentReviewCoreMixin: + + def _should_prompt_expense_scene_selection(self, payload: UserAgentRequest) -> bool: + if payload.ontology.scenario != "expense": + return False + if payload.ontology.intent not in {"draft", "operate"}: + return False + if str(payload.context_json.get("review_action") or "").strip(): + return False + review_form_values = self._resolve_review_form_values(payload) + if str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip(): + return False + if self._resolve_attachment_count(payload) > 0 or self._resolve_ocr_documents(payload): + return False + return not any( + item.type == "expense_type" and str(item.normalized_value or item.value or "").strip() + for item in payload.ontology.entities + ) + + + @staticmethod + def _build_expense_scene_selection_answer(payload: UserAgentRequest) -> str: + has_time = bool(payload.ontology.time_range.start_date or payload.ontology.time_range.raw) + context_hint = "我先识别到这是一次报销申请" + if has_time: + context_hint += ",并看到了业务发生时间" + return ( + f"{context_hint}。但你还没有明确这笔单据属于哪类报销。" + "请先在下面选择报销场景,我会按你选择的场景再继续识别时间、地点、事由、金额和所需票据," + "避免系统先入为主把项目支持、部署等描述误判成差旅。" + ) + + + def _build_review_payload( + self, + payload: UserAgentRequest, + *, + citations: list[UserAgentCitation], + draft_payload: UserAgentDraftPayload | None, + ) -> UserAgentReviewPayload | None: + attachment_count = self._resolve_attachment_count(payload) + ocr_documents = self._resolve_ocr_documents(payload) + if payload.ontology.scenario != "expense": + return None + if payload.ontology.intent not in {"draft", "operate"} and attachment_count <= 0 and not ocr_documents: + return None + + document_cards = self._build_review_document_cards(payload, ocr_documents=ocr_documents) + claim_groups = self._build_review_claim_groups( + payload, + document_cards=document_cards, + ) + slot_cards = self._build_review_slot_cards( + payload, + ocr_documents=ocr_documents, + claim_groups=claim_groups, + ) + travel_receipt_state = self._build_travel_receipt_state( + payload, + document_cards=document_cards, + claim_groups=claim_groups, + ) + missing_slot_keys = self._resolve_review_missing_slot_keys( + payload, + slot_cards=slot_cards, + ) + submission_blocked = bool(payload.tool_payload.get("submission_blocked")) + risk_briefs = self._build_review_risk_briefs( + payload, + citations=citations, + document_cards=document_cards, + claim_groups=claim_groups, + ) + risk_briefs.extend(self._build_travel_receipt_briefs(travel_receipt_state)) + association_choice_pending = self._is_review_association_choice_pending(payload) + can_proceed = ( + False + if association_choice_pending or submission_blocked or travel_receipt_state.get("blocks_next_step") + else self._can_proceed_review( + payload, + missing_slot_keys=missing_slot_keys, + claim_groups=claim_groups, + ) + ) + confirmation_actions = self._build_review_confirmation_actions( + payload, + can_proceed=can_proceed, + claim_groups=claim_groups, + draft_payload=draft_payload, + missing_slot_keys=missing_slot_keys, + ) + edit_fields = self._build_review_edit_fields( + payload, + draft_payload=draft_payload, + slot_cards=slot_cards, + ) + intent_summary = self._build_review_intent_summary( + payload, + slot_cards=slot_cards, + claim_groups=claim_groups, + ) + body_message = self._build_review_body_message( + payload, + slot_cards=slot_cards, + risk_briefs=risk_briefs, + can_proceed=can_proceed, + document_cards=document_cards, + travel_receipt_state=travel_receipt_state, + ) + missing_slot_labels = [SLOT_LABELS.get(key, key) for key in missing_slot_keys] + missing_slot_labels.extend( + str(item) + for item in travel_receipt_state.get("required_missing_labels", []) + if str(item).strip() + ) + missing_slot_labels = list(dict.fromkeys(missing_slot_labels)) + + return UserAgentReviewPayload( + intent_summary=intent_summary, + body_message=body_message, + scenario=payload.ontology.scenario, + intent=payload.ontology.intent, + can_proceed=can_proceed, + missing_slots=missing_slot_labels, + risk_briefs=risk_briefs, + slot_cards=slot_cards, + document_cards=document_cards, + claim_groups=claim_groups, + confirmation_actions=confirmation_actions, + edit_fields=edit_fields, + ) + + + def _build_review_slot_cards( + self, + payload: UserAgentRequest, + *, + ocr_documents: list[dict[str, object]], + claim_groups: list[UserAgentReviewClaimGroup], + ) -> list[UserAgentReviewSlotCard]: + entity_map = self._collect_entity_values(payload) + time_slot = self._build_time_slot(payload) + location_slot = self._build_location_slot(payload) + customer_slot = self._build_customer_slot(payload, entity_map=entity_map) + participants_slot = self._build_participants_slot(payload, entity_map=entity_map) + amount_slot = self._build_amount_slot(payload, entity_map=entity_map, ocr_documents=ocr_documents) + expense_type_slot = self._build_expense_type_slot( + payload, + entity_map=entity_map, + ocr_documents=ocr_documents, + ) + merchant_slot = self._build_merchant_slot(payload, ocr_documents=ocr_documents) + reason_slot = self._build_reason_slot( + payload, + claim_groups=claim_groups, + ) + attachment_slot = self._build_attachment_slot(payload) + required_keys = self._resolve_required_review_keys( + payload, + primary_expense_type=str(expense_type_slot["normalized_value"] or ""), + claim_groups=claim_groups, + ) + + cards = [ + self._make_slot_card( + key="expense_type", + value=expense_type_slot["value"], + raw_value=expense_type_slot["raw_value"], + normalized_value=expense_type_slot["normalized_value"], + source=expense_type_slot["source"], + confidence=expense_type_slot["confidence"], + evidence=expense_type_slot["evidence"], + required="expense_type" in required_keys, + ), + self._make_slot_card( + key="customer_name", + value=customer_slot["value"], + raw_value=customer_slot["raw_value"], + normalized_value=customer_slot["normalized_value"], + source=customer_slot["source"], + confidence=customer_slot["confidence"], + evidence=customer_slot["evidence"], + required="customer_name" in required_keys, + ), + self._make_slot_card( + key="time_range", + value=time_slot["value"], + raw_value=time_slot["raw_value"], + normalized_value=time_slot["normalized_value"], + source=time_slot["source"], + confidence=time_slot["confidence"], + evidence=time_slot["evidence"], + required="time_range" in required_keys, + ), + self._make_slot_card( + key="location", + value=location_slot["value"], + raw_value=location_slot["raw_value"], + normalized_value=location_slot["normalized_value"], + source=location_slot["source"], + confidence=location_slot["confidence"], + evidence=location_slot["evidence"], + required="location" in required_keys, + ), + self._make_slot_card( + key="merchant_name", + value=merchant_slot["value"], + raw_value=merchant_slot["raw_value"], + normalized_value=merchant_slot["normalized_value"], + source=merchant_slot["source"], + confidence=merchant_slot["confidence"], + evidence=merchant_slot["evidence"], + required="merchant_name" in required_keys, + ), + self._make_slot_card( + key="amount", + value=amount_slot["value"], + raw_value=amount_slot["raw_value"], + normalized_value=amount_slot["normalized_value"], + source=amount_slot["source"], + confidence=amount_slot["confidence"], + evidence=amount_slot["evidence"], + required="amount" in required_keys, + ), + self._make_slot_card( + key="reason", + value=reason_slot["value"], + raw_value=reason_slot["raw_value"], + normalized_value=reason_slot["normalized_value"], + source=reason_slot["source"], + confidence=reason_slot["confidence"], + evidence=reason_slot["evidence"], + required="reason" in required_keys, + ), + self._make_slot_card( + key="participants", + value=participants_slot["value"], + raw_value=participants_slot["raw_value"], + normalized_value=participants_slot["normalized_value"], + source=participants_slot["source"], + confidence=participants_slot["confidence"], + evidence=participants_slot["evidence"], + required="participants" in required_keys, + ), + self._make_slot_card( + key="attachments", + value=attachment_slot["value"], + raw_value=attachment_slot["raw_value"], + normalized_value=attachment_slot["normalized_value"], + source=attachment_slot["source"], + confidence=attachment_slot["confidence"], + evidence=attachment_slot["evidence"], + required="attachments" in required_keys, + ), + ] + return cards + + + def _build_review_document_cards( + self, + payload: UserAgentRequest, + *, + ocr_documents: list[dict[str, object]], + ) -> list[UserAgentReviewDocumentCard]: + cards: list[UserAgentReviewDocumentCard] = [] + for index, item in enumerate(ocr_documents, start=1): + classified = self._classify_document(item, payload) + fields = self._extract_document_fields(item) + cards.append( + UserAgentReviewDocumentCard( + index=index, + filename=str(item.get("filename") or f"document-{index}"), + document_type=classified["document_type"], + suggested_expense_type=classified["expense_type"], + scene_label=GROUP_SCENE_LABELS.get( + classified["group_code"], + classified["scene_label"], + ), + summary=str(item.get("summary") or item.get("text") or "").strip(), + avg_score=float(item.get("avg_score") or 0.0), + preview_kind=str(item.get("preview_kind") or "").strip(), + preview_data_url=str(item.get("preview_data_url") or "").strip(), + warnings=[str(warning) for warning in item.get("warnings", []) if str(warning).strip()], + fields=[ + UserAgentReviewDocumentField( + label=label, + value=value, + source="ocr", + ) + for label, value in fields.items() + if str(value).strip() + ], + ) + ) + return cards + + + def _build_review_claim_groups( + self, + payload: UserAgentRequest, + *, + document_cards: list[UserAgentReviewDocumentCard], + ) -> list[UserAgentReviewClaimGroup]: + groups: dict[str, dict[str, object]] = {} + for card in document_cards: + group_code = self._normalize_group_code(card.suggested_expense_type) + bucket = groups.setdefault( + group_code, + { + "document_indexes": [], + "amount_total": 0.0, + "expense_type": str(card.suggested_expense_type or group_code).strip() or group_code, + "scene_label": GROUP_SCENE_LABELS.get( + str(card.suggested_expense_type or group_code).strip() or group_code, + GROUP_SCENE_LABELS.get(group_code, "其他费用"), + ), + "reasons": [], + }, + ) + bucket["document_indexes"].append(card.index) + bucket["amount_total"] = float(bucket["amount_total"]) + self._extract_amount_from_card(card) + bucket["reasons"].append(f"{card.filename} 识别为 {card.scene_label}") + current_expense_type = str(bucket["expense_type"] or "").strip() + current_card_type = str(card.suggested_expense_type or "").strip() + if current_expense_type and current_card_type and current_expense_type != current_card_type: + bucket["expense_type"] = group_code + bucket["scene_label"] = GROUP_SCENE_LABELS.get(group_code, "其他费用") + + if not groups: + expense_type_code = self._collect_entity_values(payload).get("expense_type_code", "other") + group_code = self._normalize_group_code(expense_type_code) + groups[group_code] = { + "document_indexes": [], + "amount_total": self._resolve_amount_value(payload), + "expense_type": expense_type_code or "other", + "scene_label": GROUP_SCENE_LABELS.get(group_code, "其他费用"), + "reasons": ["当前主要依据用户文本和页面上下文进行分单建议。"], + } + + claim_groups: list[UserAgentReviewClaimGroup] = [] + for index, (group_code, bucket) in enumerate(groups.items(), start=1): + title = f"建议报销单 {index}:{bucket['scene_label']}" + rationale = ( + ";".join(dict.fromkeys(str(item) for item in bucket["reasons"])) + if bucket["reasons"] + else "当前仅有单一场景,无需拆单。" + ) + claim_groups.append( + UserAgentReviewClaimGroup( + group_code=group_code, + title=title, + expense_type=str(bucket["expense_type"]), + scene_label=str(bucket["scene_label"]), + document_indexes=list(bucket["document_indexes"]), + amount_total=round(float(bucket["amount_total"]), 2), + rationale=rationale, + ) + ) + return claim_groups + + + def _build_review_risk_briefs( + self, + payload: UserAgentRequest, + *, + citations: list[UserAgentCitation], + document_cards: list[UserAgentReviewDocumentCard], + claim_groups: list[UserAgentReviewClaimGroup], + ) -> list[UserAgentReviewRiskBrief]: + briefs: list[UserAgentReviewRiskBrief] = [] + for reason in self._resolve_submission_blocked_reasons(payload): + briefs.append( + UserAgentReviewRiskBrief( + title="提交风险提示", + level=self._resolve_submission_blocked_risk_level(reason), + content=reason, + detail=( + "该项属于提交审批前的阻断条件。系统会先要求补齐基础字段、附件或业务说明," + "否则审批人无法判断成本归属、业务真实性或票据有效性。" + ), + suggestion="按提示补齐对应信息;如果业务场景本身合理,请补充说明或佐证附件后再提交。", + ) + ) + + briefs.extend( + self._build_travel_policy_precheck_briefs( + payload, + document_cards=document_cards, + claim_groups=claim_groups, + ) + ) + + employee = self._resolve_employee_profile(payload) + employee_name = ( + str(employee.name).strip() + if employee is not None and employee.name + else self._collect_entity_values(payload).get("employee_name") + or str(payload.context_json.get("name") or "").strip() + ) + current_amount = self._resolve_amount_value(payload) or sum( + self._extract_amount_from_card(card) for card in document_cards + ) + if employee_name and current_amount > 0: + since = datetime.now(UTC) - timedelta(days=90) + claim_identity_conditions = [ExpenseClaim.employee_name == employee_name] + if employee is not None: + employee_identifiers = { + str(employee.name or "").strip(), + str(employee.email or "").strip(), + str(employee.employee_no or "").strip(), + } + employee_identifiers.discard("") + claim_identity_conditions = [ + ExpenseClaim.employee_id == employee.id, + ExpenseClaim.employee_name.in_(list(employee_identifiers)), + ] + stmt = select(ExpenseClaim).where(or_(*claim_identity_conditions), ExpenseClaim.occurred_at >= since) + recent_claims = list(self.db.scalars(stmt).all()) + if recent_claims: + duplicate_count = sum( + 1 + for item in recent_claims + if abs(float(item.amount) - current_amount) < 0.01 + ) + if duplicate_count: + briefs.append( + UserAgentReviewRiskBrief( + title="金额重复预警", + level="warning", + content=( + f"近 90 天发现 {duplicate_count} 笔金额相同的报销记录," + "提交前建议核对是否为重复报销或拆分不当。" + ), + detail=( + "系统将当前金额与近 90 天历史报销金额进行比对。金额完全一致不一定违规," + "但在交通、餐饮、办公采购等场景中可能提示重复票据或拆分报销。" + ), + suggestion="核对历史单据与当前票据是否对应同一业务;如不是重复,请在事由中说明差异。", + ) + ) + + warning_count = sum(len(item.warnings) for item in document_cards) + if warning_count: + briefs.append( + UserAgentReviewRiskBrief( + title="票据识别提醒", + level="warning", + content=f"当前共有 {warning_count} 条票据识别提示,建议逐张确认 OCR 识别字段。", + detail="票据 OCR 识别存在字段缺失、置信度偏低或类型判断不稳定时,会生成该提醒。", + suggestion="打开票据明细逐张核对日期、金额、商户和票据类型,必要时更正后再提交。", + ) + ) + + if len(claim_groups) > 1: + briefs.append( + UserAgentReviewRiskBrief( + title="建议拆单", + level="warning", + content=f"系统检测到 {len(claim_groups)} 类费用场景,建议拆成多张报销单后再提交。", + detail="同一批附件中包含多类费用场景时,混在一张报销单里会影响规则匹配、附件核验和审批归口。", + suggestion="按费用场景拆成多张报销单,分别确认金额、事由和附件归属。", + ) + ) + + return self._filter_deprecated_review_risk_briefs(briefs) + + + @staticmethod + def _resolve_submission_blocked_risk_level(reason: str) -> str: + normalized = re.sub(r"\s+", "", str(reason or "")) + amount_keywords = ("金额", "超标", "费用", "价款", "票面金额", "单价", "合计") + return "high" if any(keyword in normalized for keyword in amount_keywords) else "warning" + + + @staticmethod + def _filter_deprecated_review_risk_briefs( + briefs: list[UserAgentReviewRiskBrief], + ) -> list[UserAgentReviewRiskBrief]: + filtered: list[UserAgentReviewRiskBrief] = [] + for brief in briefs: + title = str(brief.title or "").strip() + if any(keyword in title for keyword in DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS): + continue + filtered.append(brief) + return filtered + diff --git a/server/src/app/services/user_agent_review_messages.py b/server/src/app/services/user_agent_review_messages.py new file mode 100644 index 0000000..0f5bb80 --- /dev/null +++ b/server/src/app/services/user_agent_review_messages.py @@ -0,0 +1,673 @@ +from __future__ import annotations + +import json +import re +from datetime import UTC, datetime, timedelta +from decimal import Decimal, InvalidOperation +from typing import Any + +from sqlalchemy import or_, select +from sqlalchemy.orm import selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetStatus, AgentAssetType +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim +from app.schemas.agent_asset import AgentAssetListItem +from app.schemas.reimbursement import TravelReimbursementCalculatorRequest +from app.schemas.user_agent import ( + UserAgentCitation, + UserAgentDraftPayload, + UserAgentExpenseQueryRecord, + UserAgentQueryPayload, + UserAgentQueryStatusGroup, + UserAgentReviewAction, + UserAgentReviewClaimGroup, + UserAgentReviewDocumentCard, + UserAgentReviewDocumentField, + UserAgentReviewEditField, + UserAgentReviewPayload, + UserAgentReviewRiskBrief, + UserAgentReviewSlotCard, + UserAgentRequest, + UserAgentSuggestedAction, +) +from app.services.agent_assets import AgentAssetService +from app.services.expense_claims import ExpenseClaimService +from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label +from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check +from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService +from app.services.user_agent_constants import * + + +class UserAgentReviewMessageMixin: + + def _build_review_confirmation_actions( + self, + payload: UserAgentRequest, + *, + can_proceed: bool, + claim_groups: list[UserAgentReviewClaimGroup], + draft_payload: UserAgentDraftPayload | None, + missing_slot_keys: set[str] | None = None, + ) -> list[UserAgentReviewAction]: + missing_slot_keys = set(missing_slot_keys or set()) + if self._is_review_association_choice_pending(payload): + claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip() + link_label = f"关联到草稿 {claim_no}" if claim_no else "关联到现有草稿" + return [ + UserAgentReviewAction( + label=link_label, + action_type="link_to_existing_draft", + description=( + f"把本次上传票据并入现有草稿 {claim_no}。" + if claim_no + else "把本次上传票据并入现有草稿。" + ), + emphasis="primary", + ), + UserAgentReviewAction( + label="单独建立报销单", + action_type="create_new_claim_from_documents", + description="基于当前上传的多张票据,新建一张独立的报销草稿。", + emphasis="secondary", + ), + ] + + review_action = str(payload.context_json.get("review_action") or "").strip() + if "expense_type" in missing_slot_keys and not review_action: + return [ + UserAgentReviewAction( + label="保存为草稿", + action_type="save_draft", + description="先暂存当前已识别信息,稍后仍可从个人报销继续补充或提交。", + emphasis="primary", + ), + ] + + primary_action = UserAgentReviewAction( + label="继续下一步" if can_proceed else "保存为草稿", + action_type="next_step" if can_proceed else "save_draft", + description=( + "当前识别信息已满足继续处理条件,确认后进入下一步。" + if can_proceed + else "暂存当前识别结果,后续可以继续补充或修改。" + ), + emphasis="primary", + ) + if len(claim_groups) > 1 and can_proceed: + primary_action.description = f"系统建议拆分为 {len(claim_groups)} 张报销单,确认后继续下一步。" + if draft_payload is not None and draft_payload.claim_no and not can_proceed: + primary_action.description = f"保存后会生成草稿 {draft_payload.claim_no},后续仍可继续补充。" + + actions = [] + if can_proceed: + actions.append( + UserAgentReviewAction( + label="保存为草稿", + action_type="save_draft", + description="先暂存当前已识别信息,稍后仍可从个人报销继续补充或提交。", + emphasis="secondary", + ) + ) + actions.append(primary_action) + return actions + + + def _build_review_intent_summary( + self, + payload: UserAgentRequest, + *, + slot_cards: list[UserAgentReviewSlotCard], + claim_groups: list[UserAgentReviewClaimGroup], + ) -> str: + slots = {item.key: item for item in slot_cards} + expense_type = slots.get("expense_type") + amount = slots.get("amount") + time_range = slots.get("time_range") + location = slots.get("location") + customer = slots.get("customer_name") + + summary = "我先根据您当前提供的信息整理出一笔报销:" + if expense_type and expense_type.value: + summary = f"识别到您希望报销一笔“{expense_type.value}”费用:" + details: list[str] = [] + if customer and customer.value: + details.append(f"客户:{customer.value}") + if time_range and time_range.value: + details.append(f"时间:{time_range.value}") + if location and location.value: + details.append(f"地点:{location.value}") + if amount and amount.value: + details.append(f"金额:{amount.value}") + reason = slots.get("reason") + if reason and reason.value: + details.append(f"事由:{reason.value}") + if details: + return "\n\n".join([summary, "基础信息识别结果:", "\n".join(details)]) + return summary + + + def _build_review_body_answer( + self, + payload: UserAgentRequest, + *, + review_payload: UserAgentReviewPayload | None, + draft_payload: UserAgentDraftPayload | None, + ) -> str | None: + if review_payload is None: + return None + if payload.ontology.scenario != "expense": + return None + if payload.ontology.intent not in {"draft", "operate"}: + return None + if payload.tool_payload.get("draft_limit_reached"): + return ( + str(payload.tool_payload.get("message") or "").strip() + or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。" + ) + + review_action = str(payload.context_json.get("review_action") or "").strip() + if payload.tool_payload.get("preview_only") and not review_action: + return review_payload.body_message or self._build_review_intent_summary( + payload, + slot_cards=review_payload.slot_cards, + claim_groups=review_payload.claim_groups, + ) + if payload.tool_payload.get("duplicate_attachment_blocked") or payload.tool_payload.get("duplicate_invoice_blocked"): + return ( + str(payload.tool_payload.get("message") or "").strip() + or "检测到本次上传票据与当前单据已有票据重复,请重新上传不同的票据后再归集。" + ) + if review_action == "save_draft": + if draft_payload is not None and draft_payload.claim_no: + return ( + f"已按您当前确认的信息保存为草稿 {draft_payload.claim_no}。" + "后续您可以继续补充缺失项,或修改识别结果后再继续提交。" + ) + return "已按您当前确认的信息保存为草稿。后续您可以继续补充缺失项,或修改识别结果后再继续提交。" + if review_action == "link_to_existing_draft": + document_count = self._resolve_review_document_count(payload) + followup_copy = self._build_review_action_followup_copy(review_payload) + if draft_payload is not None and draft_payload.claim_no: + return ( + f"已将本次上传的 {document_count} 张票据关联到草稿 {draft_payload.claim_no}。" + f"{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" + ) + return f"已将本次上传的票据关联到现有草稿。{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" + if review_action == "create_new_claim_from_documents": + document_count = self._resolve_review_document_count(payload) + followup_copy = self._build_review_action_followup_copy(review_payload) + if draft_payload is not None and draft_payload.claim_no: + return ( + f"已按当前上传的 {document_count} 张票据新建报销草稿 {draft_payload.claim_no}。" + f"{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" + ) + return f"已按当前上传票据新建报销草稿。{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}" + if review_action == "next_step": + if draft_payload is not None and draft_payload.status == "submitted": + stage_text = draft_payload.approval_stage or "审批中" + return f"报销单 {draft_payload.claim_no or ''} 已提交,当前节点为 {stage_text}。".strip() + if payload.tool_payload.get("submission_blocked"): + reasons = self._resolve_submission_blocked_reasons(payload) + if reasons: + reason_lines = "\n".join( + f"{index}. {reason}" for index, reason in enumerate(reasons, start=1) + ) + return ( + "AI预审暂未通过,所以还没有提交到审批人。\n" + f"{reason_lines}\n" + "请先处理以上项目;处理完成后再点继续下一步。" + ) + return str(payload.tool_payload.get("message") or "").strip() or "当前报销单暂时还不能提交审批。" + return ( + f"{self._build_review_intent_summary(payload, slot_cards=review_payload.slot_cards, claim_groups=review_payload.claim_groups)}\n\n" + "当前关键信息已基本齐全,您确认无误后可以继续下一步。" + ) + return review_payload.body_message or None + + + def _build_review_body_message( + self, + payload: UserAgentRequest, + *, + slot_cards: list[UserAgentReviewSlotCard], + risk_briefs: list[UserAgentReviewRiskBrief], + can_proceed: bool, + document_cards: list[UserAgentReviewDocumentCard], + travel_receipt_state: dict[str, Any] | None = None, + ) -> str: + if self._is_review_association_choice_pending(payload): + claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip() + document_count = len(document_cards) or self._resolve_review_document_count(payload) + if claim_no: + return ( + f"已识别出本次上传的 {document_count} 张票据。" + f"系统检测到你已有草稿 {claim_no},请选择关联到该草稿,或单独建立一张新的报销单。" + ) + return ( + f"已识别出本次上传的 {document_count} 张票据。" + "系统检测到你已有可用草稿,请先选择关联到现有草稿,或单独建立一张新的报销单。" + ) + + blocked_reasons = self._resolve_submission_blocked_reasons(payload) + if blocked_reasons: + reason_text = ";".join(dict.fromkeys(reason.strip("。;;") for reason in blocked_reasons if reason)) + return ( + f"AI预审未通过:{reason_text}。" + "请先根据风险提示补充原因、调整金额或更换附件,整改后再继续提交。" + ) + + travel_message = self._build_travel_receipt_guidance_message( + payload, + travel_receipt_state=travel_receipt_state or {}, + can_proceed=can_proceed, + ) + if travel_message: + return travel_message + + missing_labels = self._resolve_review_missing_slot_labels(slot_cards) + if travel_receipt_state: + missing_labels.extend( + str(item) + for item in travel_receipt_state.get("required_missing_labels", []) + if str(item).strip() + ) + missing_labels = list(dict.fromkeys(missing_labels)) + + expense_type_slot = next((item for item in slot_cards if item.key == "expense_type"), None) + if expense_type_slot is not None and not str(expense_type_slot.value or "").strip(): + return ( + f"{self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[])}\n\n" + "我已经先保留了当前识别出的时间、地点和事由,但还不能确定这张单据应该走哪类报销流程。" + "请先点击“选择报销类型”,在差旅费、交通费、住宿费等选项中选定;" + "选定后,后续上传的票据都会作为这张单据的补充继续核对,不会重新改判报销类型。" + ) + + review_payload = UserAgentReviewPayload( + intent_summary="", + body_message="", + scenario=payload.ontology.scenario, + intent=payload.ontology.intent, + can_proceed=can_proceed, + missing_slots=missing_labels, + risk_briefs=risk_briefs, + slot_cards=slot_cards, + document_cards=[], + claim_groups=[], + confirmation_actions=[], + edit_fields=[], + ) + return "\n\n".join( + item + for item in [ + self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[]), + self._build_review_standard_calculation_copy(payload, slot_cards), + self._build_review_guidance_copy(review_payload, mention_save_draft=not can_proceed), + ] + if item + ) + + + def _build_review_standard_calculation_copy( + self, + payload: UserAgentRequest, + slot_cards: list[UserAgentReviewSlotCard], + ) -> str: + slots = {item.key: item for item in slot_cards} + expense_type = str(slots.get("expense_type").value if slots.get("expense_type") else "").strip() + if "差旅" in expense_type: + return self._build_review_travel_calculation_table(payload, slots) + if "交通" in expense_type: + return ( + "报销测算参考:交通费通常以实际票据金额为基础,结合出行地点、业务事由和票据合规性复核;" + "如果它属于差旅行程的一部分,后续也会并入差旅费测算。" + ) + if "住宿" in expense_type: + return ( + "报销测算参考:住宿费通常按“实际住宿金额”和“目的地住宿标准 × 住宿天数”取合规口径;" + "补齐酒店票据后再核对是否超标。" + ) + return ( + "报销测算参考:先以用户填写金额或票据识别金额为基础," + "再结合费用类型、发生地点、业务事由和规则中心限额进行复核。" + ) + + + def _build_review_travel_calculation_table( + self, + payload: UserAgentRequest, + slots: dict[str, UserAgentReviewSlotCard], + ) -> str: + destination = self._resolve_slot_text(slots, "location") + days = self._resolve_review_travel_days(payload, slots) + ticket_amount = self._resolve_slot_money(slots, "amount") + employee = self._resolve_employee_profile(payload) + grade = self._resolve_review_employee_grade(payload, employee=employee) + + if not destination or not grade: + return "\n".join( + [ + "报销测算参考:", + "", + "| 项目 | 当前信息 | 测算说明 |", + "| --- | --- | --- |", + f"| 出差地点 | {destination or '待确认'} | 用于匹配城市住宿标准和补贴区域 |", + f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |", + f"| 职级 | {grade or '待确认'} | 补齐后才能匹配住宿标准和补贴档位 |", + f"| 交通票据 | {self._format_decimal_money(ticket_amount)} 元 | 上传票据后会按真实金额重新复核 |", + ] + ) + + current_user = CurrentUserContext( + username=str(payload.user_id or payload.context_json.get("name") or "anonymous").strip() or "anonymous", + name=str(payload.context_json.get("name") or payload.user_id or "anonymous").strip() or "anonymous", + role_codes=[ + str(item).strip() + for item in list(payload.context_json.get("role_codes") or []) + if str(item).strip() + ], + is_admin=bool(payload.context_json.get("is_admin")), + department_name=str(payload.context_json.get("department_name") or payload.context_json.get("department") or "").strip(), + ) + try: + calculation = TravelReimbursementCalculatorService(self.db).calculate( + TravelReimbursementCalculatorRequest(days=days, location=destination, grade=grade), + current_user, + ) + except Exception: + return "\n".join( + [ + "报销测算参考:", + "", + "| 项目 | 当前信息 | 测算说明 |", + "| --- | --- | --- |", + f"| 出差地点 | {destination} | 暂时未能匹配规则中心地点 |", + f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |", + f"| 职级 | {grade} | 暂时无法自动匹配差旅标准 |", + f"| 交通票据 | {self._format_decimal_money(ticket_amount)} 元 | 上传票据后会按真实金额重新复核 |", + ] + ) + + total_amount = ( + ticket_amount + + self._coerce_decimal_money(calculation.hotel_amount) + + self._coerce_decimal_money(calculation.allowance_amount) + ).quantize(Decimal("0.01")) + ticket_basis = "当前未上传交通票据,先按 0.00 元占位" if ticket_amount <= Decimal("0.00") else "已识别或填写的交通票据金额" + return "\n".join( + [ + "报销测算参考:", + "", + ( + f"职级 {calculation.grade},目的地 {destination},匹配城市 {calculation.matched_city};" + "补齐交通、酒店等票据后,我会按真实票据金额和规则中心标准重新复核。" + ), + "", + "| 项目 | 测算口径 | 金额 |", + "| --- | --- | ---: |", + f"| 交通票据 | {ticket_basis} | {self._format_decimal_money(ticket_amount)} 元 |", + f"| 住宿标准 | {self._format_decimal_money(calculation.hotel_rate)} 元/天 × {calculation.days} 天 | {self._format_decimal_money(calculation.hotel_amount)} 元 |", + f"| 出差补贴 | {self._format_decimal_money(calculation.total_allowance_rate)} 元/天 × {calculation.days} 天 | {self._format_decimal_money(calculation.allowance_amount)} 元 |", + f"| 参考合计 | 交通票据 + 住宿标准 + 出差补贴 | {self._format_decimal_money(total_amount)} 元 |", + ] + ) + + + @staticmethod + def _resolve_slot_text(slots: dict[str, UserAgentReviewSlotCard], key: str) -> str: + item = slots.get(key) + return str(getattr(item, "value", "") or getattr(item, "raw_value", "") or "").strip() + + + def _resolve_review_travel_days( + self, + payload: UserAgentRequest, + slots: dict[str, UserAgentReviewSlotCard], + ) -> int: + text = " ".join( + [ + str(payload.message or ""), + str(payload.context_json.get("user_input_text") or ""), + self._resolve_slot_text(slots, "reason"), + self._resolve_slot_text(slots, "time_range"), + ] + ) + explicit_match = re.search(r"(?= 2: + return max(1, (max(dates).date() - min(dates).date()).days) + return 1 + + + def _resolve_slot_money( + self, + slots: dict[str, UserAgentReviewSlotCard], + key: str, + ) -> Decimal: + text = self._resolve_slot_text(slots, key).replace(",", "") + match = re.search(r"([0-9]+(?:\.[0-9]{1,2})?)", text) + if not match: + return Decimal("0.00") + return self._coerce_decimal_money(match.group(1)) + + + @staticmethod + def _build_review_action_followup_copy(review_payload: UserAgentReviewPayload) -> str: + missing_slots = [str(item).strip() for item in review_payload.missing_slots if str(item).strip()] + receipt_briefs = [ + item + for item in review_payload.risk_briefs + if "差旅票据待补充" in str(item.title or "") + ] + if missing_slots: + return f"当前仍有 {'、'.join(missing_slots)},暂时只能保存为草稿,补齐后再继续下一步。" + if receipt_briefs: + return "当前必需票据已具备;如还有市内交通、打车、地铁或停车等乘车票据,可以继续上传,也可以继续下一步或保存草稿。" + if review_payload.can_proceed: + return "当前信息已较完整,您可以继续下一步,也可以先保存为草稿。" + return "" + + + def _build_travel_receipt_guidance_message( + self, + payload: UserAgentRequest, + *, + travel_receipt_state: dict[str, Any], + can_proceed: bool, + ) -> str: + review_action = str(payload.context_json.get("review_action") or "").strip() + if review_action or not travel_receipt_state.get("has_long_distance_ticket"): + return "" + + employee = self._resolve_employee_profile(payload) + user_name = ( + str(employee.name).strip() + if employee is not None and employee.name + else str(payload.context_json.get("name") or payload.user_id or "同事").strip() + ) + destination = str(travel_receipt_state.get("destination") or "待确认").strip() + days = max(1, int(travel_receipt_state.get("days") or 1)) + ticket_type_label = str(travel_receipt_state.get("ticket_type_label") or "交通").strip() + ticket_amount = self._coerce_decimal_money(travel_receipt_state.get("ticket_amount")) + + required_labels = [ + str(item).strip() + for item in travel_receipt_state.get("required_missing_labels", []) + if str(item).strip() + ] + optional_labels = [ + str(item).strip() + for item in travel_receipt_state.get("optional_missing_labels", []) + if str(item).strip() + ] + + provide_items: list[str] = [] + if required_labels: + provide_items.append("1. 酒店住宿发票/住宿清单(必须,当前待上传)") + if optional_labels: + provide_items.append(f"{len(provide_items) + 1}. 市内交通/乘车票据(非必须,如打车、地铁、停车等)") + + sections = [ + f"您好,{user_name}。我先按票据信息做一次差旅预检。", + "\n".join( + [ + "已识别信息:", + f"1. 出差地点:{destination}", + f"2. 预计天数:{days} 天", + f"3. 票据类型:{ticket_type_label}票", + f"4. 票据金额:{self._format_decimal_money(ticket_amount)} 元", + ] + ), + ] + + if provide_items: + sections.append("还需补充:\n" + "\n".join(provide_items)) + else: + sections.append("票据完整性:当前核心票据已较完整,无需继续上传票据。") + + if required_labels: + sections.append( + "处理建议:酒店票据仍缺失,暂时不能继续下一步。" + "您可以先保存为草稿,补齐后再提交。" + ) + elif can_proceed and optional_labels: + sections.append( + "处理建议:必需票据已具备。" + "如暂时没有乘车票据,也可以继续下一步,或先保存为草稿。" + ) + elif can_proceed: + sections.append( + "处理建议:当前信息已较完整,确认无误后可以继续下一步;" + "暂时不提交时,也可以先保存为草稿。" + ) + + estimate_copy = self._build_travel_receipt_estimate_copy( + payload, + travel_receipt_state=travel_receipt_state, + ) + if estimate_copy: + sections.append(estimate_copy) + return "\n\n".join(section for section in sections if section) + + + def _build_travel_receipt_estimate_copy( + self, + payload: UserAgentRequest, + *, + travel_receipt_state: dict[str, Any], + ) -> str: + destination = str(travel_receipt_state.get("destination") or "").strip() + days = max(1, int(travel_receipt_state.get("days") or 1)) + ticket_type_label = str(travel_receipt_state.get("ticket_type_label") or "交通").strip() + ticket_amount = self._coerce_decimal_money(travel_receipt_state.get("ticket_amount")) + employee = self._resolve_employee_profile(payload) + grade = self._resolve_review_employee_grade(payload, employee=employee) + + if not destination or not grade: + return ( + "差旅费测算:\n" + f"1. 职级:{grade or '待确认'}\n" + f"2. 目的地:{destination or '出差地点待确认'}\n" + f"3. 已提交{ticket_type_label}:{self._format_decimal_money(ticket_amount)} 元\n" + "4. 住宿和补贴金额:需补齐职级或地点后再核算。" + ) + + current_user = CurrentUserContext( + username=str(payload.user_id or payload.context_json.get("name") or "anonymous").strip() or "anonymous", + name=str(payload.context_json.get("name") or payload.user_id or "anonymous").strip() or "anonymous", + role_codes=[ + str(item).strip() + for item in list(payload.context_json.get("role_codes") or []) + if str(item).strip() + ], + is_admin=bool(payload.context_json.get("is_admin")), + department_name=str(payload.context_json.get("department_name") or payload.context_json.get("department") or "").strip(), + ) + try: + calculation = TravelReimbursementCalculatorService(self.db).calculate( + TravelReimbursementCalculatorRequest(days=days, location=destination, grade=grade), + current_user, + ) + except Exception: + return ( + "差旅费测算:\n" + f"1. 职级:{grade}\n" + f"2. 目的地:{destination}\n" + f"3. 已提交{ticket_type_label}:{self._format_decimal_money(ticket_amount)} 元\n" + "4. 住宿和补贴标准:暂时无法自动测算,请以规则中心最新差旅标准为准。" + ) + + total_amount = ( + ticket_amount + + self._coerce_decimal_money(calculation.hotel_amount) + + self._coerce_decimal_money(calculation.allowance_amount) + ).quantize(Decimal("0.01")) + return ( + "差旅费测算:\n" + f"1. 职级:{calculation.grade}\n" + f"2. 目的地:{calculation.matched_city or destination}\n" + f"3. 已提交{ticket_type_label}:{self._format_decimal_money(ticket_amount)} 元\n" + f"4. 住宿标准:{self._format_decimal_money(calculation.hotel_rate)} 元/天 × {calculation.days} 天\n" + f"5. 出差补贴:{self._format_decimal_money(calculation.total_allowance_rate)} 元/天 × {calculation.days} 天\n" + f"6. 参考合计:{self._format_decimal_money(total_amount)} 元" + ) + + + @staticmethod + def _coerce_decimal_money(value: Any) -> Decimal: + try: + return Decimal(str(value or "0")).quantize(Decimal("0.01")) + except (InvalidOperation, ValueError): + return Decimal("0.00") + + + @staticmethod + def _format_decimal_money(value: Any) -> str: + return f"{UserAgentReviewMessageMixin._coerce_decimal_money(value):.2f}" + + + @staticmethod + def _resolve_review_missing_slot_labels( + slot_cards: list[UserAgentReviewSlotCard], + ) -> list[str]: + return [item.label for item in slot_cards if item.status == "missing"] + + + @staticmethod + def _build_review_guidance_copy( + review_payload: UserAgentReviewPayload, + *, + mention_save_draft: bool, + ) -> str: + reminder_count = len(review_payload.risk_briefs) + + if review_payload.can_proceed: + if reminder_count: + return ( + f"当前关键信息已基本齐全,但还有 {reminder_count} 条提醒。" + "请核查对话中的文字说明,确认无误后继续下一步。" + ) + return "当前关键信息已基本齐全,您确认无误后可以继续下一步。" + + return "" + + + @staticmethod + def _can_proceed_review( + payload: UserAgentRequest, + *, + missing_slot_keys: list[str], + claim_groups: list[UserAgentReviewClaimGroup], + ) -> bool: + if payload.ontology.ambiguity: + return False + if missing_slot_keys: + return False + if not claim_groups: + return False + return True + diff --git a/server/src/app/services/user_agent_review_profile.py b/server/src/app/services/user_agent_review_profile.py new file mode 100644 index 0000000..05bec32 --- /dev/null +++ b/server/src/app/services/user_agent_review_profile.py @@ -0,0 +1,465 @@ +from __future__ import annotations + +import json +import re +from datetime import UTC, datetime, timedelta +from decimal import Decimal, InvalidOperation +from typing import Any + +from sqlalchemy import or_, select +from sqlalchemy.orm import selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetStatus, AgentAssetType +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim +from app.schemas.agent_asset import AgentAssetListItem +from app.schemas.reimbursement import TravelReimbursementCalculatorRequest +from app.schemas.user_agent import ( + UserAgentCitation, + UserAgentDraftPayload, + UserAgentExpenseQueryRecord, + UserAgentQueryPayload, + UserAgentQueryStatusGroup, + UserAgentReviewAction, + UserAgentReviewClaimGroup, + UserAgentReviewDocumentCard, + UserAgentReviewDocumentField, + UserAgentReviewEditField, + UserAgentReviewPayload, + UserAgentReviewRiskBrief, + UserAgentReviewSlotCard, + UserAgentRequest, + UserAgentSuggestedAction, +) +from app.services.agent_assets import AgentAssetService +from app.services.expense_claims import ExpenseClaimService +from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label +from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check +from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService +from app.services.user_agent_constants import * + + +class UserAgentReviewProfileMixin: + + def _build_review_edit_fields( + self, + payload: UserAgentRequest, + *, + draft_payload: UserAgentDraftPayload | None, + slot_cards: list[UserAgentReviewSlotCard], + ) -> list[UserAgentReviewEditField]: + slot_map = {item.key: item for item in slot_cards} + employee = self._resolve_employee_profile(payload) + reporter_name = ( + slot_map.get("reporter_name").value + if slot_map.get("reporter_name") + else str(payload.context_json.get("name") or "").strip() + ) + manager_name = self._resolve_manager_name(employee) + reason = slot_map.get("reason").value if slot_map.get("reason") else "" + attachments = "、".join(self._resolve_attachment_names(payload)) + + fields = [ + UserAgentReviewEditField( + key="claim_no", + label="报销单据编号", + value=str(draft_payload.claim_no if draft_payload is not None and draft_payload.claim_no else "待生成"), + placeholder="保存草稿后自动生成", + required=False, + group="basic", + ), + UserAgentReviewEditField( + key="expense_type", + label="报销类型", + value=slot_map.get("expense_type").value if slot_map.get("expense_type") else "", + placeholder="例如:业务招待费 / 差旅费", + group="basic", + ), + UserAgentReviewEditField( + key="occurred_date", + label="业务发生时间", + value=slot_map.get("time_range").normalized_value if slot_map.get("time_range") and slot_map.get("time_range").normalized_value else slot_map.get("time_range").value if slot_map.get("time_range") else "", + placeholder="例如:2026-05-11", + group="basic", + ), + UserAgentReviewEditField( + key="reporter_name", + label="报销人", + value=reporter_name, + placeholder="请输入报销人姓名", + group="basic", + ), + UserAgentReviewEditField( + key="manager_name", + label="直属上司姓名", + value=manager_name, + placeholder="请输入直属上司姓名", + required=False, + group="basic", + ), + UserAgentReviewEditField( + key="customer_name", + label="客户名称", + value=slot_map.get("customer_name").value if slot_map.get("customer_name") else "", + placeholder="请输入客户名称", + group="business", + ), + UserAgentReviewEditField( + key="business_location", + label="业务地点", + value=slot_map.get("location").normalized_value if slot_map.get("location") and slot_map.get("location").normalized_value else slot_map.get("location").value if slot_map.get("location") else "", + placeholder="例如:北京 / 客户现场", + required=False, + group="business", + ), + UserAgentReviewEditField( + key="merchant_name", + label="酒店/商户", + value=slot_map.get("merchant_name").value if slot_map.get("merchant_name") else "", + placeholder="请输入酒店或商户名称", + required=False, + group="business", + ), + UserAgentReviewEditField( + key="amount", + label="金额", + value=slot_map.get("amount").normalized_value if slot_map.get("amount") and slot_map.get("amount").normalized_value else slot_map.get("amount").value if slot_map.get("amount") else "", + placeholder="例如:200.00元", + group="business", + ), + UserAgentReviewEditField( + key="participants", + label="参与人员", + value=slot_map.get("participants").value if slot_map.get("participants") else "", + placeholder="例如:客户 2 人,我方 1 人", + group="business", + ), + UserAgentReviewEditField( + key="reason", + label="事由", + value=reason, + placeholder="请输入报销事由", + field_type="textarea", + group="business", + ), + UserAgentReviewEditField( + key="attachment_names", + label="附件清单", + value=attachments, + placeholder="例如:发票.jpg、行程单.png", + required=False, + field_type="textarea", + group="attachments", + ), + ] + return fields + + + def _resolve_employee_profile(self, payload: UserAgentRequest) -> Employee | None: + candidates = [ + str(payload.context_json.get("name") or "").strip(), + str(payload.user_id or "").strip(), + self._collect_entity_values(payload).get("employee_name", ""), + ] + normalized = [item for item in dict.fromkeys(candidates) if item] + if not normalized: + return None + + stmt = ( + select(Employee) + .options(selectinload(Employee.organization_unit), selectinload(Employee.manager)) + .where( + or_( + Employee.name.in_(normalized), + Employee.employee_no.in_(normalized), + Employee.email.in_(normalized), + ) + ) + .limit(1) + ) + return self.db.scalar(stmt) + + + @staticmethod + def _resolve_manager_name(employee: Employee | None) -> str: + if employee is None: + return "" + if employee.manager is not None and employee.manager.name: + return employee.manager.name + if employee.organization_unit is not None and employee.organization_unit.manager_name: + return employee.organization_unit.manager_name + return "" + + + @staticmethod + def _extract_message_reason(message: str) -> str: + for line in str(message or "").splitlines(): + cleaned = line.strip() + if not cleaned: + continue + if cleaned.startswith(("附件名称:", "OCR摘要:", "关联单号:")): + continue + return cleaned[:300] + return "" + + + @staticmethod + def _looks_like_system_generated_reason_message(message: str) -> bool: + cleaned = str(message or "").strip() + if not cleaned: + return False + compact = re.sub(r"\s+", "", cleaned) + return compact.startswith(SYSTEM_GENERATED_REASON_PREFIXES) + + + def _resolve_reason_source_text(self, payload: UserAgentRequest) -> str: + explicit_text = payload.context_json.get("user_input_text") + if isinstance(explicit_text, str): + return explicit_text.strip() + if self._looks_like_system_generated_reason_message(payload.message): + return "" + return str(payload.message or "").strip() + + + @classmethod + def _resolve_reason_text(cls, message: str) -> str: + reason = cls._strip_leading_time_from_reason(cls._extract_message_reason(message)) + if not reason: + return "" + + compact = re.sub(r"\s+", "", reason) + if compact in GENERIC_EXPENSE_PROMPTS: + return "" + + instruction_prefixes = ( + "帮我生成", + "请帮我生成", + "生成", + "起草", + "创建", + "发起", + "准备", + "帮我报销", + "我要报销", + "我想报销", + ) + if compact.startswith(instruction_prefixes): + for separator in (",", ",", "。", ";", ";", ":", ":"): + if separator in reason: + trailing = reason.split(separator, 1)[1].strip() + if trailing: + return trailing[:300] + return "" + + return reason + + + @staticmethod + def _strip_leading_time_from_reason(value: str) -> str: + reason = str(value or "").strip() + for pattern in LEADING_REASON_TIME_PATTERNS: + next_reason = pattern.sub("", reason).strip() + if next_reason != reason: + return next_reason + return reason + + + @staticmethod + def _should_skip_model_answer( + payload: UserAgentRequest, + review_payload: UserAgentReviewPayload | None, + ) -> bool: + if payload.ontology.scenario == "expense" and payload.ontology.intent in {"query", "compare"}: + return True + if review_payload is None: + return False + return payload.ontology.scenario == "expense" and ( + payload.ontology.intent == "draft" + or int(payload.context_json.get("attachment_count") or 0) > 0 + ) + + + def _build_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]: + knowledge_citations = self._build_knowledge_citations(payload) + if payload.ontology.scenario == "knowledge": + return knowledge_citations[:3] + + rule_citations = self._build_rule_asset_citations(payload) + if knowledge_citations: + return (knowledge_citations + rule_citations)[:3] + return rule_citations + + + @staticmethod + def _build_knowledge_citations(payload: UserAgentRequest) -> list[UserAgentCitation]: + citations: list[UserAgentCitation] = [] + for item in list(payload.tool_payload.get("hits") or [])[:3]: + if not isinstance(item, dict): + continue + title = str(item.get("title") or item.get("document_name") or "").strip() + code = str(item.get("code") or item.get("candidate_id") or "").strip() + if not title or not code: + continue + citations.append( + UserAgentCitation( + source_type="knowledge", + code=code, + title=title, + version=str(item.get("version") or "").strip() or None, + updated_at=str(item.get("updated_at") or "").strip() or None, + excerpt=( + str(item.get("excerpt") or "").strip() + or str(item.get("content") or "").strip() + or None + ), + ) + ) + return citations + + + def _build_rule_asset_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]: + domain = self._resolve_domain(payload.ontology.scenario) + items = self.asset_service.list_assets( + asset_type=AgentAssetType.RULE.value, + status=AgentAssetStatus.ACTIVE.value, + domain=domain, + ) + ranked = self._rank_rule_assets(items, payload) + citations: list[UserAgentCitation] = [] + for item in ranked[:2]: + detail = self.asset_service.get_asset(item.id) + if detail is None: + continue + excerpt = self._extract_excerpt(str(detail.current_version_content or "")) + citations.append( + UserAgentCitation( + source_type="rule", + code=detail.code, + title=detail.name, + version=detail.current_version, + updated_at=detail.updated_at.date().isoformat(), + excerpt=excerpt, + ) + ) + return citations + + + @staticmethod + def _resolve_risk_flags(payload: UserAgentRequest) -> list[str]: + tool_flags = payload.tool_payload.get("risk_flags") + if isinstance(tool_flags, list) and tool_flags: + return [str(item) for item in tool_flags] + return [str(item) for item in payload.ontology.risk_flags] + + + @staticmethod + def _resolve_subject(payload: UserAgentRequest) -> str: + named_entities = [ + item.value + for item in payload.ontology.entities + if item.type in {"employee", "customer", "vendor", "project"} + ] + if named_entities: + return f"{'、'.join(named_entities)} 相关数据" + return f"{SCENARIO_LABELS.get(payload.ontology.scenario, '当前')}场景数据" + + + @staticmethod + def _is_generic_expense_prompt(payload: UserAgentRequest) -> bool: + if payload.ontology.scenario != "expense": + return False + normalized_message = re.sub(r"\s+", "", payload.message) + return normalized_message in GENERIC_EXPENSE_PROMPTS + + + @staticmethod + def _is_implicit_expense_draft_request(payload: UserAgentRequest) -> bool: + if payload.ontology.scenario != "expense" or payload.ontology.intent != "draft": + return False + + compact_message = re.sub(r"\s+", "", payload.message) + if any(keyword in compact_message for keyword in EXPLICIT_DRAFT_KEYWORDS): + return False + + return True + + + @staticmethod + def _resolve_attachment_names(payload: UserAgentRequest) -> list[str]: + names = payload.context_json.get("attachment_names") + if not isinstance(names, list): + return [] + return [str(name) for name in names if str(name).strip()] + + + @staticmethod + def _resolve_attachment_count(payload: UserAgentRequest) -> int: + names = UserAgentReviewProfileMixin._resolve_attachment_names(payload) + if names: + return len(names) + try: + return max(0, int(payload.context_json.get("attachment_count") or 0)) + except (TypeError, ValueError): + return 0 + + + @staticmethod + def _resolve_ocr_documents(payload: UserAgentRequest) -> list[dict[str, object]]: + documents = payload.context_json.get("ocr_documents") + if not isinstance(documents, list): + return [] + overrides = payload.context_json.get("review_document_form_values") + override_map: dict[tuple[int, str], dict[str, object]] = {} + if isinstance(overrides, list): + for item in overrides: + if not isinstance(item, dict): + continue + filename = str(item.get("filename") or "").strip() + index = int(item.get("index") or 0) + if not filename and index <= 0: + continue + override_map[(index, filename)] = item + normalized: list[dict[str, object]] = [] + for index, item in enumerate(documents[:8], start=1): + if not isinstance(item, dict): + continue + normalized_item = dict(item) + override = override_map.get((index, str(normalized_item.get("filename") or "").strip())) + if override is None: + override = override_map.get((index, "")) + if override is not None: + summary = str(override.get("summary") or "").strip() + scene_label = str(override.get("scene_label") or "").strip() + fields = override.get("fields") + if summary: + normalized_item["summary"] = summary + if scene_label: + normalized_item["scene_label"] = scene_label + if isinstance(fields, list): + normalized_item["document_fields"] = [ + { + "key": str(field.get("key") or field.get("label") or "").strip(), + "label": str(field.get("label") or "").strip(), + "value": str(field.get("value") or "").strip(), + } + for field in fields + if isinstance(field, dict) + and str(field.get("label") or "").strip() + and str(field.get("value") or "").strip() + ] + normalized.append(normalized_item) + return normalized + + + @staticmethod + def _is_review_association_choice_pending(payload: UserAgentRequest) -> bool: + return bool(payload.tool_payload.get("pending_association_decision")) + + + def _resolve_review_document_count(self, payload: UserAgentRequest) -> int: + return max( + len(self._resolve_ocr_documents(payload)), + self._resolve_attachment_count(payload), + ) + diff --git a/server/src/app/services/user_agent_review_slots.py b/server/src/app/services/user_agent_review_slots.py new file mode 100644 index 0000000..04a7f6f --- /dev/null +++ b/server/src/app/services/user_agent_review_slots.py @@ -0,0 +1,706 @@ +from __future__ import annotations + +import json +import re +from datetime import UTC, datetime, timedelta +from decimal import Decimal, InvalidOperation +from typing import Any + +from sqlalchemy import or_, select +from sqlalchemy.orm import selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetStatus, AgentAssetType +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim +from app.schemas.agent_asset import AgentAssetListItem +from app.schemas.reimbursement import TravelReimbursementCalculatorRequest +from app.schemas.user_agent import ( + UserAgentCitation, + UserAgentDraftPayload, + UserAgentExpenseQueryRecord, + UserAgentQueryPayload, + UserAgentQueryStatusGroup, + UserAgentReviewAction, + UserAgentReviewClaimGroup, + UserAgentReviewDocumentCard, + UserAgentReviewDocumentField, + UserAgentReviewEditField, + UserAgentReviewPayload, + UserAgentReviewRiskBrief, + UserAgentReviewSlotCard, + UserAgentRequest, + UserAgentSuggestedAction, +) +from app.services.agent_assets import AgentAssetService +from app.services.expense_claims import ExpenseClaimService +from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label +from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check +from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService +from app.services.user_agent_constants import * + + +class UserAgentReviewSlotMixin: + + @staticmethod + def _resolve_conversation_history(payload: UserAgentRequest) -> list[dict[str, object]]: + history = payload.context_json.get("conversation_history") + if not isinstance(history, list): + return [] + + normalized: list[dict[str, object]] = [] + for item in history[-8:]: + if not isinstance(item, dict): + continue + role = str(item.get("role") or "").strip() + content = str(item.get("content") or "").strip() + if not role or not content: + continue + normalized.append({"role": role, "content": content}) + return normalized + + + @staticmethod + def _resolve_domain(scenario: str) -> str | None: + if scenario == "expense": + return "expense" + if scenario == "accounts_receivable": + return "ar" + if scenario == "accounts_payable": + return "ap" + return None + + + @staticmethod + def _rank_rule_assets( + items: list[AgentAssetListItem], + payload: UserAgentRequest, + ) -> list[AgentAssetListItem]: + def score(item: AgentAssetListItem) -> tuple[int, str]: + tags = {str(value) for value in item.scenario_json or []} + weight = 0 + if payload.ontology.scenario in tags: + weight += 3 + if payload.ontology.intent in tags: + weight += 2 + for risk_flag in payload.ontology.risk_flags: + if risk_flag in tags: + weight += 4 + return weight, item.code + + ranked = sorted(items, key=score, reverse=True) + return [item for item in ranked if score(item)[0] > 0] + + + @staticmethod + def _extract_excerpt(content: str) -> str: + lines = [line.strip() for line in str(content).splitlines() if line.strip()] + cleaned: list[str] = [] + for line in lines: + normalized = re.sub(r"^[#>\-\*\d\.\s`]+", "", line).strip() + if normalized: + cleaned.append(normalized) + if len(cleaned) >= 2: + break + return ";".join(cleaned[:2]) + + + def _collect_entity_values(self, payload: UserAgentRequest) -> dict[str, str]: + values = { + "employee_name": "", + "customer": "", + "participants": "", + "amount": "", + "expense_type": "", + "expense_type_code": "", + } + participants: list[str] = [] + for item in payload.ontology.entities: + if item.type == "employee" and not values["employee_name"]: + values["employee_name"] = item.value + elif item.type == "customer" and not values["customer"]: + values["customer"] = item.value + elif item.type == "amount" and item.role != "threshold" and not values["amount"]: + normalized_amount = str(item.normalized_value or "").strip() + values["amount"] = f"{normalized_amount}元" if normalized_amount else item.value + elif item.type == "expense_type" and not values["expense_type_code"]: + values["expense_type_code"] = item.normalized_value + values["expense_type"] = EXPENSE_TYPE_LABELS.get( + item.normalized_value, + item.value, + ) + elif item.type in {"participant", "person"} and item.value.strip(): + participants.append(item.value.strip()) + if participants: + values["participants"] = "、".join(dict.fromkeys(participants)) + return values + + + def _format_time_range(self, payload: UserAgentRequest) -> str: + time_range = payload.ontology.time_range + if time_range.start_date and time_range.end_date: + if time_range.start_date == time_range.end_date: + return time_range.start_date + normalized = f"{time_range.start_date} 至 {time_range.end_date}" + return normalized + if time_range.raw: + return time_range.raw + return "" + + + def _resolve_location_value(self, payload: UserAgentRequest) -> str: + review_form_values = self._resolve_review_form_values(payload) + for key in ("business_location", "location"): + value = str(review_form_values.get(key) or "").strip() + if value: + return value + + if str(payload.context_json.get("entry_source") or "").strip() == "detail": + request_context = payload.context_json.get("request_context") + if isinstance(request_context, dict): + for key in ("city", "location"): + value = str(request_context.get(key) or "").strip() + if value: + return value + + labeled_match = re.search(r"(?:业务地点|发生地点|地点)[::]\s*(?P[^\n,。;]+)", payload.message) + if labeled_match: + return labeled_match.group("value").strip() + + city_match = re.search( + r"去(?P[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)", + payload.message, + ) + if city_match: + return city_match.group("city").strip() + if "客户现场" in payload.message.replace(" ", ""): + return "客户现场" + return "" + + + @staticmethod + def _resolve_review_form_values(payload: UserAgentRequest) -> dict[str, str]: + values = payload.context_json.get("review_form_values") + if not isinstance(values, dict): + return {} + normalized: dict[str, str] = {} + for key, value in values.items(): + cleaned_key = str(key or "").strip() + if not cleaned_key: + continue + normalized[cleaned_key] = str(value or "").strip() + return normalized + + + @staticmethod + def _build_slot_value( + *, + value: str = "", + raw_value: str = "", + normalized_value: str = "", + source: str = "system", + confidence: float = 0.0, + evidence: str = "", + ) -> dict[str, str | float]: + return { + "value": str(value or "").strip(), + "raw_value": str(raw_value or "").strip(), + "normalized_value": str(normalized_value or "").strip(), + "source": str(source or "system").strip() or "system", + "confidence": float(confidence), + "evidence": str(evidence or "").strip(), + } + + + def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + edited_value = str( + review_form_values.get("time_range") + or review_form_values.get("business_time") + or review_form_values.get("occurred_date") + or "" + ).strip() + if edited_value: + raw_value = str(review_form_values.get("time_range_raw") or edited_value).strip() + return self._build_slot_value( + value=edited_value, + raw_value=raw_value, + normalized_value=edited_value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + time_range = payload.ontology.time_range + if time_range.start_date and time_range.end_date: + normalized_value = ( + time_range.start_date + if time_range.start_date == time_range.end_date + else f"{time_range.start_date} 至 {time_range.end_date}" + ) + raw_value = str(time_range.raw or "").strip() + return self._build_slot_value( + value=normalized_value, + raw_value=raw_value, + normalized_value=normalized_value, + source="user_text", + confidence=0.92, + evidence="系统已根据当前日期将相对时间换算为标准日期。", + ) + + return self._build_slot_value() + + + def _build_location_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + for key in ("business_location", "location"): + value = str(review_form_values.get(key) or "").strip() + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + if str(payload.context_json.get("entry_source") or "").strip() == "detail": + request_context = payload.context_json.get("request_context") + if isinstance(request_context, dict): + for key in ("city", "location"): + value = str(request_context.get(key) or "").strip() + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="detail_context", + confidence=0.68, + evidence="来源于当前关联单据,仅作为辅助上下文,需要用户再次核对。", + ) + + value = self._resolve_location_value(payload) + if value: + evidence = "用户在文本中明确描述了业务地点。" + if value == "客户现场": + evidence = "用户明确提到“客户现场”,但未提供具体城市或地址。" + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_text", + confidence=0.82, + evidence=evidence, + ) + return self._build_slot_value() + + + def _build_customer_slot( + self, + payload: UserAgentRequest, + *, + entity_map: dict[str, str], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + value = str(review_form_values.get("customer_name") or "").strip() + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + value = entity_map.get("customer", "") + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_text", + confidence=0.88, + evidence="用户在原始描述中直接提到了客户对象。", + ) + return self._build_slot_value() + + + def _build_participants_slot( + self, + payload: UserAgentRequest, + *, + entity_map: dict[str, str], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + value = str(review_form_values.get("participants") or "").strip() + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + value = entity_map.get("participants", "") + if value: + return self._build_slot_value( + value=value, + normalized_value=value, + source="user_text", + confidence=0.8, + evidence="用户在当前描述中补充了参与人员。", + ) + return self._build_slot_value() + + + def _build_reason_slot( + self, + payload: UserAgentRequest, + *, + claim_groups: list[UserAgentReviewClaimGroup], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + edited_value = str(review_form_values.get("reason") or "").strip() + if edited_value: + return self._build_slot_value( + value=edited_value, + raw_value=edited_value, + normalized_value=edited_value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + inferred_reason = self._infer_reason_from_claim_groups( + claim_groups=claim_groups, + ) + reason_value = self._resolve_reason_text(self._resolve_reason_source_text(payload)) + if inferred_reason: + return self._build_slot_value( + value=inferred_reason, + raw_value=reason_value or inferred_reason, + normalized_value=inferred_reason, + source="ocr", + confidence=0.82, + evidence=( + "系统已根据票据识别结果预置场景类型;原始描述仍保留为补充说明。" + if reason_value + else "系统已根据票据识别场景补全通用事由,若需更具体说明可继续修改。" + ), + ) + + if reason_value: + return self._build_slot_value( + value=reason_value, + raw_value=reason_value, + normalized_value=reason_value, + source="user_text", + confidence=0.76, + evidence="系统从用户原始描述中提取了本次费用事由,建议继续核对。", + ) + return self._build_slot_value() + + + def _build_amount_slot( + self, + payload: UserAgentRequest, + *, + entity_map: dict[str, str], + ocr_documents: list[dict[str, object]], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + edited_amount = str(review_form_values.get("amount") or "").strip() + if edited_amount: + normalized = self._normalize_amount_text(edited_amount) + return self._build_slot_value( + value=normalized, + raw_value=edited_amount, + normalized_value=normalized, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + amount_value = entity_map.get("amount", "") + if amount_value: + normalized = self._normalize_amount_text(amount_value) + return self._build_slot_value( + value=normalized, + raw_value=amount_value, + normalized_value=normalized, + source="user_text", + confidence=0.92, + evidence="用户在原始描述中直接给出了金额。", + ) + + ocr_total_amount = self._sum_ocr_amounts(ocr_documents) + if ocr_total_amount > 0: + normalized = f"{ocr_total_amount:.2f}元" + return self._build_slot_value( + value=normalized, + normalized_value=normalized, + source="ocr", + confidence=0.76, + evidence="金额来自 OCR 汇总结果,仍建议用户核对票据原文。", + ) + return self._build_slot_value() + + + def _build_expense_type_slot( + self, + payload: UserAgentRequest, + *, + entity_map: dict[str, str], + ocr_documents: list[dict[str, object]], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + edited_value = str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip() + if edited_value: + normalized_code, normalized_label = self._normalize_expense_type_input(edited_value) + return self._build_slot_value( + value=normalized_label, + raw_value=edited_value, + normalized_value=normalized_code, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + expense_type_code = entity_map.get("expense_type_code", "") + expense_type_value = EXPENSE_TYPE_LABELS.get(expense_type_code, entity_map.get("expense_type", "")) + if expense_type_value: + return self._build_slot_value( + value=expense_type_value, + raw_value=expense_type_value, + normalized_value=expense_type_code, + source="user_text", + confidence=0.9, + evidence="系统根据用户描述中的业务场景判断费用类型。", + ) + + inferred_label = self._infer_expense_type_from_documents(payload, ocr_documents) if ocr_documents else "" + if inferred_label: + normalized_code, normalized_label = self._normalize_expense_type_input(inferred_label) + return self._build_slot_value( + value=normalized_label, + raw_value=inferred_label, + normalized_value=normalized_code, + source="ocr", + confidence=0.74, + evidence="系统根据票据内容推断费用类型,仍建议用户确认。", + ) + return self._build_slot_value() + + + def _build_merchant_slot( + self, + payload: UserAgentRequest, + *, + ocr_documents: list[dict[str, object]], + ) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + edited_value = str(review_form_values.get("merchant_name") or "").strip() + if edited_value: + return self._build_slot_value( + value=edited_value, + normalized_value=edited_value, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + merchant_value = "" + for document in ocr_documents: + if not self._is_hotel_document_item(document): + continue + merchant_value = self._extract_document_merchant_name(document) + if merchant_value: + break + if merchant_value: + return self._build_slot_value( + value=merchant_value, + normalized_value=merchant_value, + source="ocr", + confidence=0.72, + evidence="商户名称来自 OCR 票据识别结果,仍建议用户核对。", + ) + return self._build_slot_value() + + + def _build_attachment_slot(self, payload: UserAgentRequest) -> dict[str, str | float]: + review_form_values = self._resolve_review_form_values(payload) + attachment_names = str(review_form_values.get("attachment_names") or "").strip() + if attachment_names: + return self._build_slot_value( + value=attachment_names, + normalized_value=attachment_names, + source="user_form", + confidence=1.0, + evidence="来源于用户修改后的结构化表单。", + ) + + count = self._resolve_attachment_count(payload) + if count > 0: + names = self._resolve_attachment_names(payload) + value = "、".join(names) if names else f"{count} 份附件" + return self._build_slot_value( + value=value, + raw_value=value, + normalized_value=str(count), + source="upload", + confidence=1.0, + evidence="系统已接收到用户上传的附件。", + ) + return self._build_slot_value() + + + @staticmethod + def _normalize_amount_text(value: str) -> str: + cleaned = str(value or "").strip() + if not cleaned: + return "" + for alias, canonical in sorted(AMOUNT_UNIT_ALIASES.items(), key=lambda item: len(item[0]), reverse=True): + cleaned = cleaned.replace(alias, canonical) + match = AMOUNT_TEXT_PATTERN.search(cleaned) + if not match: + return cleaned + number = float(match.group(1)) + return f"{number:.2f}元" + + + @staticmethod + def _normalize_expense_type_input(value: str) -> tuple[str, str]: + compact = str(value or "").replace(" ", "") + if "招待" in compact or ("客户" in compact and any(keyword in compact for keyword in ("吃饭", "用餐", "宴请", "请客"))): + return "entertainment", "业务招待费" + if any(keyword in compact for keyword in ("差旅", "出差", "机票", "行程")): + return "travel", "差旅费" + if any(keyword in compact for keyword in ("住宿", "酒店", "宾馆")): + return "hotel", "住宿费" + if any(keyword in compact for keyword in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")): + return "transport", "交通费" + if any(keyword in compact for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")): + return "meal", "餐费" + if "会务" in compact: + return "meeting", "会务费" + if any(keyword in compact for keyword in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")): + return "office", "办公费" + if any(keyword in compact for keyword in ("培训费", "培训", "讲师费", "课时费", "课程费")): + return "training", "培训费" + if any(keyword in compact for keyword in ("通讯费", "话费", "流量费", "宽带费")): + return "communication", "通讯费" + if any(keyword in compact for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")): + return "welfare", "福利费" + return "other", str(value or "").strip() or "其他费用" + + + def _resolve_required_review_keys( + self, + payload: UserAgentRequest, + *, + primary_expense_type: str, + claim_groups: list[UserAgentReviewClaimGroup], + ) -> set[str]: + required = {"expense_type", "time_range", "amount", "reason", "attachments"} + scene_codes = { + str(item.group_code or "").strip() + for item in claim_groups + if str(item.group_code or "").strip() + } + if primary_expense_type: + scene_codes.add(primary_expense_type) + + for scene_code in scene_codes: + required.update(SCENE_REQUIRED_SLOT_KEYS.get(scene_code, set())) + + compact_message = re.sub(r"\s+", "", self._resolve_reason_source_text(payload) or payload.message) + if "entertainment" in scene_codes or ( + "客户" in compact_message and any(keyword in compact_message for keyword in ("招待", "吃饭", "用餐", "宴请", "请客")) + ): + required.update({"customer_name", "participants"}) + + return required + + + @staticmethod + def _infer_reason_from_claim_groups( + *, + claim_groups: list[UserAgentReviewClaimGroup], + ) -> str: + if len(claim_groups) == 1: + document_indexes = list(claim_groups[0].document_indexes or []) + if not document_indexes: + return "" + + expense_type = str(claim_groups[0].expense_type or "").strip() + group_code = str(claim_groups[0].group_code or "").strip() + if expense_type: + return INFERRED_REASON_LABELS.get(expense_type, "") or str(claim_groups[0].scene_label or "").strip() + if group_code: + return INFERRED_REASON_LABELS.get(group_code, "") or str(claim_groups[0].scene_label or "").strip() + return "" + + + @staticmethod + def _resolve_review_missing_slot_keys( + payload: UserAgentRequest, + *, + slot_cards: list[UserAgentReviewSlotCard], + ) -> list[str]: + required_keys = {item.key for item in slot_cards if item.required} + slot_map = {item.key: item for item in slot_cards} + missing_keys = { + item.key + for item in slot_cards + if item.required and (item.status == "missing" or not str(item.value).strip()) + } + for key in payload.ontology.missing_slots: + normalized_key = str(key or "").strip() + if ( + normalized_key + and normalized_key in required_keys + and ( + normalized_key not in slot_map + or slot_map[normalized_key].status == "missing" + or not str(slot_map[normalized_key].value).strip() + ) + ): + missing_keys.add(normalized_key) + + ordered_keys: list[str] = [] + for item in slot_cards: + if item.required and item.key in missing_keys and item.key not in ordered_keys: + ordered_keys.append(item.key) + return ordered_keys + + + def _make_slot_card( + self, + *, + key: str, + value: str, + raw_value: str, + normalized_value: str, + source: str, + confidence: float, + evidence: str, + required: bool = True, + ) -> UserAgentReviewSlotCard: + is_missing = required and not str(value).strip() + source_key = source if source in SOURCE_LABELS else "system" + return UserAgentReviewSlotCard( + key=key, + label=SLOT_LABELS.get(key, key), + value=str(value or "").strip(), + raw_value=str(raw_value or "").strip(), + normalized_value=str(normalized_value or "").strip(), + source=source, + source_label=SOURCE_LABELS.get(source_key, "系统判断"), + confidence=confidence, + required=required, + confirmed=not is_missing and source in {"user_text", "user_form"}, + status="missing" if is_missing else "identified" if source in {"user_text", "user_form"} else "inferred", + hint=f"建议补充 {SLOT_LABELS.get(key, key)}。" + if is_missing and required + else ("该字段来自系统辅助上下文,建议你再核对一次。" if source in {"detail_context", "ocr"} else ""), + evidence=evidence, + ) + diff --git a/server/src/app/services/user_agent_review_travel_policy.py b/server/src/app/services/user_agent_review_travel_policy.py new file mode 100644 index 0000000..7b7f353 --- /dev/null +++ b/server/src/app/services/user_agent_review_travel_policy.py @@ -0,0 +1,360 @@ +from __future__ import annotations + +import json +import re +from datetime import UTC, datetime, timedelta +from decimal import Decimal, InvalidOperation +from typing import Any + +from sqlalchemy import or_, select +from sqlalchemy.orm import selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetStatus, AgentAssetType +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim +from app.schemas.agent_asset import AgentAssetListItem +from app.schemas.reimbursement import TravelReimbursementCalculatorRequest +from app.schemas.user_agent import ( + UserAgentCitation, + UserAgentDraftPayload, + UserAgentExpenseQueryRecord, + UserAgentQueryPayload, + UserAgentQueryStatusGroup, + UserAgentReviewAction, + UserAgentReviewClaimGroup, + UserAgentReviewDocumentCard, + UserAgentReviewDocumentField, + UserAgentReviewEditField, + UserAgentReviewPayload, + UserAgentReviewRiskBrief, + UserAgentReviewSlotCard, + UserAgentRequest, + UserAgentSuggestedAction, +) +from app.services.agent_assets import AgentAssetService +from app.services.expense_claims import ExpenseClaimService +from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label +from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check +from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService +from app.services.user_agent_constants import * + + +class UserAgentReviewTravelPolicyMixin: + + def _build_travel_policy_precheck_briefs( + self, + payload: UserAgentRequest, + *, + document_cards: list[UserAgentReviewDocumentCard], + claim_groups: list[UserAgentReviewClaimGroup], + ) -> list[UserAgentReviewRiskBrief]: + if not document_cards or not self._is_travel_review_context(payload, document_cards, claim_groups): + return [] + + rule_catalog = ExpenseRuleRuntimeService(self.db).load_catalog() + policy = rule_catalog.travel_policy + if policy is None: + return [] + + employee = self._resolve_employee_profile(payload) + grade = self._resolve_review_employee_grade(payload, employee=employee) + grade_band = ExpenseClaimService._resolve_travel_policy_band(grade) + band_label = policy.band_labels.get(grade_band or "", grade or "当前职级") + declared_city = self._resolve_declared_travel_city(payload, policy) + reason_corpus = self._build_review_reason_corpus(payload) + has_exception_note = self._text_contains_any(reason_corpus, policy.standard_exception_keywords) + standard_rule_name = str(getattr(policy, "standard_rule_name", "") or policy.rule_name) + standard_rule_version = str(getattr(policy, "standard_rule_version", "") or policy.rule_version) + + briefs: list[UserAgentReviewRiskBrief] = [] + amount_measurement_lines: list[str] = [] + seen_keys: set[str] = set() + + def append_once(key: str, brief: UserAgentReviewRiskBrief) -> None: + if key in seen_keys: + return + seen_keys.add(key) + briefs.append(brief) + + for card in document_cards: + document_type = str(card.document_type or "").strip().lower() + suggested_type = str(card.suggested_expense_type or "").strip().lower() + card_text = self._build_review_document_card_text(card) + document_type_label = resolve_document_type_label(document_type) + amount = self._extract_amount_decimal_from_card(card) + + if self._is_review_hotel_card(card): + hotel_city = self._extract_policy_city_from_text(card_text, policy) or declared_city + city_tier = policy.city_tiers.get(hotel_city, "tier_3") + city_tier_label = self._format_travel_city_tier(city_tier) + + if amount is None: + amount_measurement_lines.append( + f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法完成住宿差标测算。" + ) + append_once( + f"hotel-amount-missing-{card.index}", + UserAgentReviewRiskBrief( + title="住宿金额待补充", + level="warning", + content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算的住宿金额。", + detail=( + f"依据《{standard_rule_name}》({standard_rule_version}),住宿票据需要按员工职级、城市级别和每晚金额进行差标核算。" + "当前票据缺少金额,系统无法判断是否超出差旅标准。" + ), + suggestion="请在票据识别结果中补充或更正住宿金额,再继续核对报销单。", + ), + ) + continue + + if grade_band is None: + amount_measurement_lines.append( + f"{card.filename}:识别住宿金额 {amount:.2f} 元,但缺少员工职级,无法匹配住宿标准。" + ) + append_once( + f"hotel-grade-missing-{card.index}", + UserAgentReviewRiskBrief( + title="职级信息待确认", + level="warning", + content=f"{card.filename} 已识别住宿金额 {amount:.2f} 元,但当前员工职级缺失,无法匹配住宿标准。", + detail=( + f"依据《{standard_rule_name}》({standard_rule_version}),住宿标准按职级档位和城市级别配置。" + "当前未能识别员工职级,因此无法完成创建前差标核算。" + ), + suggestion="请确认员工档案或页面上下文中的职级信息,再重新进行差旅规则预检。", + ), + ) + continue + + cap = self._resolve_review_hotel_cap( + policy, + grade_band=grade_band, + city=hotel_city, + city_tier=city_tier, + ) + if cap <= Decimal("0.00"): + continue + night_count = self._extract_review_hotel_night_count(card) + nightly_amount = (amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01")) + amount_measurement_lines.append( + f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元," + f"按 {night_count} 晚折算 {nightly_amount:.2f} 元/晚;" + f"适用标准为 {band_label}{city_tier_label} {cap:.2f} 元/晚," + f"{'超出标准' if nightly_amount > cap else '测算通过'}。" + ) + if nightly_amount <= cap: + continue + + basis = ( + f"依据《{standard_rule_name}》({standard_rule_version}),{band_label} 在{city_tier_label}" + f"住宿标准为 {cap:.2f} 元/晚;{card.filename} 识别为{document_type_label}," + f"金额 {amount:.2f} 元,按 {night_count} 晚折算约 {nightly_amount:.2f} 元/晚。" + ) + append_once( + f"hotel-over-limit-{card.index}", + UserAgentReviewRiskBrief( + title="住宿超标待说明" if not has_exception_note else "住宿超标提醒", + level="high", + content=( + f"{card.filename} 住宿金额约 {nightly_amount:.2f} 元/晚," + f"超过 {band_label} {city_tier_label}标准 {cap:.2f} 元/晚。" + ), + detail=( + basis + + ( + "当前未识别到超标说明,创建单据前需要先补充原因。" + if not has_exception_note + else "当前已识别到例外说明,后续仍需审批人重点复核。" + ) + ), + suggestion="补充超标说明、协议酒店满房/会议高峰等原因,或调整住宿金额后再继续。", + ), + ) + continue + + if document_type == "meal_receipt": + allowance = self._resolve_review_travel_allowance_standard( + policy, + declared_city=declared_city, + card_text=card_text, + ) + if allowance is not None: + region_label, standard_amount = allowance + if amount is None: + amount_measurement_lines.append( + f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法按{region_label}伙食补助标准测算。" + ) + append_once( + f"travel-meal-amount-missing-{card.index}", + UserAgentReviewRiskBrief( + title="差旅餐饮金额待补充", + level="high", + content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算金额。", + detail=( + f"依据《{standard_rule_name}》({standard_rule_version}),差旅餐饮票据优先按出差补助标准中的伙食补助进行测算。" + f"当前匹配区域为{region_label},但票据缺少金额,系统无法判断是否超出补助标准。" + ), + suggestion="请在票据识别结果中补充或更正餐饮金额,再继续创建报销单。", + ), + ) + continue + + amount_measurement_lines.append( + f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;" + f"适用《{standard_rule_name}》{region_label}伙食补助标准 {standard_amount:.2f} 元/天," + f"{'超出标准' if amount > standard_amount else '测算通过'}。" + ) + if amount > standard_amount: + append_once( + f"travel-meal-allowance-over-limit-{card.index}", + UserAgentReviewRiskBrief( + title="差旅餐饮金额超出伙食补助标准", + level="high", + content=( + f"{card.filename} 识别金额 {amount:.2f} 元," + f"超过{region_label}伙食补助标准 {standard_amount:.2f} 元/天。" + ), + detail=( + f"依据《{standard_rule_name}》({standard_rule_version})的出差补助标准," + f"{region_label}伙食补助为 {standard_amount:.2f} 元/天;" + f"当前票据类型识别为{document_type_label},识别金额 {amount:.2f} 元。" + "首轮上传阶段按单张票据先行测算,后续可结合出差天数和实际餐补口径复核。" + ), + suggestion="如该票据属于差旅餐补,请调整金额或补充超标/拆分说明;如属于业务招待或普通餐费,请改为对应费用类型后再提交。", + ), + ) + continue + + scene_code = self._resolve_review_amount_scene_code(card, payload) + scene_policy = rule_catalog.get_scene_policy(scene_code) + scene_limit = self._resolve_review_scene_amount_limit(scene_policy) + if scene_policy is not None and scene_limit is not None: + metric_label = str(getattr(scene_limit, "metric_label", "") or scene_policy.label or "金额").strip() + standard_amount = self._resolve_scene_standard_amount(scene_limit) + if amount is None: + amount_measurement_lines.append( + f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法按{metric_label}测算。" + ) + append_once( + f"{scene_code}-amount-missing-{card.index}", + UserAgentReviewRiskBrief( + title=f"{scene_policy.label}金额待补充", + level="warning", + content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算金额。", + detail=( + f"依据《{scene_policy.rule_name}》({scene_policy.rule_version})," + f"{scene_policy.label}需要按{metric_label}进行金额审核。当前票据缺少金额,系统无法判断是否合规。" + ), + suggestion="请在票据识别结果中补充或更正金额,再继续核对报销单。", + ), + ) + continue + + if standard_amount is not None: + amount_measurement_lines.append( + f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;" + f"适用《{scene_policy.rule_name}》{metric_label}标准 {standard_amount:.2f} 元," + f"{'超出标准' if amount > standard_amount else '测算通过'}。" + ) + + amount_risk = self._evaluate_review_scene_amount( + amount=amount, + limit_config=scene_limit, + reason_text=reason_corpus, + ) + if amount_risk is not None: + severity, threshold = amount_risk + append_once( + f"{scene_code}-amount-over-limit-{card.index}", + UserAgentReviewRiskBrief( + title=f"{scene_policy.label}金额超标待说明", + level="high" if severity == "high" else "warning", + content=( + f"{card.filename} 识别金额 {amount:.2f} 元," + f"超过{metric_label}标准 {threshold:.2f} 元。" + ), + detail=( + f"依据《{scene_policy.rule_name}》({scene_policy.rule_version})," + f"{scene_policy.label}按{metric_label}审核,当前票据类型识别为{document_type_label}," + f"识别金额 {amount:.2f} 元,标准阈值 {threshold:.2f} 元。" + ), + suggestion="请补充超标原因或拆分到更准确的费用类型;如属于例外场景,请在事由中写明业务背景。", + ), + ) + continue + + transport_class = self._detect_review_transport_class(card, policy) + if transport_class and grade_band is not None: + transport_kind, class_label, class_level = transport_class + allowed_level = policy.transport_limits.get(grade_band, {}).get(transport_kind) + if allowed_level is not None and class_level > allowed_level: + append_once( + f"transport-class-over-limit-{card.index}-{class_label}", + UserAgentReviewRiskBrief( + title="交通舱位超标待说明" if not has_exception_note else "交通舱位超标提醒", + level="warning", + content=f"{card.filename} 识别为 {class_label},{band_label} 当前默认不可报销该舱位/席别。", + detail=( + f"依据《{standard_rule_name}》({standard_rule_version}),{band_label} 的交通席别标准" + f"未覆盖 {class_label};票据类型识别为{document_type_label}。" + + ( + "当前未识别到例外说明,创建单据前需要补充原因。" + if not has_exception_note + else "当前已识别到例外说明,后续仍需审批人重点复核。" + ) + ), + suggestion="补充无直达、临时改签、行程变更等例外说明,或更换为符合标准的票据。", + ), + ) + continue + + if document_type == "meal_receipt" and self._is_travel_review_context(payload, document_cards, claim_groups): + if amount is not None: + amount_measurement_lines.append( + f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;需确认按餐补、餐费或业务招待口径归口。" + ) + append_once( + f"travel-meal-card-{card.index}", + UserAgentReviewRiskBrief( + title="差旅餐饮票据待归口", + level="warning", + content=f"{card.filename} 已识别为餐饮票据,当前差旅报销单需要确认是否允许并入差旅费用。", + detail=( + f"依据《{standard_rule_name}》({standard_rule_version})的差旅票据预检口径,系统优先核算交通、住宿等差旅核心票据。" + "餐饮票据可能需要按餐费或业务招待场景拆分,并补充同行人员或客户信息。" + ), + suggestion="如属于差旅餐补,请补充制度允许口径;如属于招待或普通餐费,建议拆成对应费用类型单据。", + ), + ) + continue + + if suggested_type in {"travel", "hotel", "transport"} and document_type in {"other", "travel_ticket"}: + append_once( + f"travel-type-uncertain-{card.index}", + UserAgentReviewRiskBrief( + title="差旅票据类型待确认", + level="warning", + content=f"{card.filename} 归入差旅场景,但票据类型仍需确认。", + detail=( + f"依据《{standard_rule_name}》({standard_rule_version}),差旅预检需要先明确票据是机票、火车票、住宿票据、打车票等," + "再匹配对应的金额或舱位规则。当前类型识别不够稳定。" + ), + suggestion="请在附件识别结果中更正票据类型,或重新上传更清晰的附件后再继续。", + ), + ) + + if amount_measurement_lines: + briefs.insert( + 0, + UserAgentReviewRiskBrief( + title="附件金额测算结果", + level="info", + content="系统已根据首轮上传附件识别金额,并匹配当前可执行的报销标准进行测算。", + detail=";".join(dict.fromkeys(amount_measurement_lines)), + suggestion="如测算结果超标,请补充超标说明、调整金额或更正票据类型后再继续。", + ), + ) + + return briefs + diff --git a/server/src/app/services/user_agent_review_travel_receipts.py b/server/src/app/services/user_agent_review_travel_receipts.py new file mode 100644 index 0000000..219f13f --- /dev/null +++ b/server/src/app/services/user_agent_review_travel_receipts.py @@ -0,0 +1,625 @@ +from __future__ import annotations + +import json +import re +from datetime import UTC, datetime, timedelta +from decimal import Decimal, InvalidOperation +from typing import Any + +from sqlalchemy import or_, select +from sqlalchemy.orm import selectinload + +from app.api.deps import CurrentUserContext +from app.core.agent_enums import AgentAssetStatus, AgentAssetType +from app.models.employee import Employee +from app.models.financial_record import ExpenseClaim +from app.schemas.agent_asset import AgentAssetListItem +from app.schemas.reimbursement import TravelReimbursementCalculatorRequest +from app.schemas.user_agent import ( + UserAgentCitation, + UserAgentDraftPayload, + UserAgentExpenseQueryRecord, + UserAgentQueryPayload, + UserAgentQueryStatusGroup, + UserAgentReviewAction, + UserAgentReviewClaimGroup, + UserAgentReviewDocumentCard, + UserAgentReviewDocumentField, + UserAgentReviewEditField, + UserAgentReviewPayload, + UserAgentReviewRiskBrief, + UserAgentReviewSlotCard, + UserAgentRequest, + UserAgentSuggestedAction, +) +from app.services.agent_assets import AgentAssetService +from app.services.expense_claims import ExpenseClaimService +from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label +from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check +from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService +from app.services.user_agent_constants import * + + +class UserAgentReviewTravelReceiptMixin: + + def _is_travel_review_context( + self, + payload: UserAgentRequest, + document_cards: list[UserAgentReviewDocumentCard], + claim_groups: list[UserAgentReviewClaimGroup], + ) -> bool: + entity_expense_type = self._collect_entity_values(payload).get("expense_type_code", "") + review_form_values = self._resolve_review_form_values(payload) + form_expense_type = str(review_form_values.get("expense_type") or "").strip() + message_context = " ".join( + [ + str(payload.message or ""), + str(payload.context_json.get("user_input_text") or ""), + str(payload.context_json.get("expense_type") or ""), + form_expense_type, + ] + ) + if entity_expense_type in {"travel", "hotel", "transport"}: + return True + if any(group.group_code == "travel" or group.expense_type in {"travel", "hotel", "transport"} for group in claim_groups): + return True + if any(card.suggested_expense_type in {"travel", "hotel", "transport"} for card in document_cards): + return True + return any(keyword in message_context for keyword in ("差旅", "出差", "机票", "火车", "高铁", "酒店", "住宿")) + + + def _build_travel_receipt_state( + self, + payload: UserAgentRequest, + *, + document_cards: list[UserAgentReviewDocumentCard], + claim_groups: list[UserAgentReviewClaimGroup], + ) -> dict[str, Any]: + empty_state: dict[str, Any] = { + "is_travel_context": False, + "has_long_distance_ticket": False, + "ticket_type_label": "", + "ticket_amount": Decimal("0.00"), + "destination": "", + "days": 1, + "has_hotel_invoice": False, + "has_local_transport": False, + "required_missing_labels": [], + "optional_missing_labels": [], + "blocks_next_step": False, + } + if not document_cards or not self._is_travel_review_context(payload, document_cards, claim_groups): + return empty_state + + long_distance_cards = [card for card in document_cards if self._is_long_distance_travel_card(card)] + if not long_distance_cards: + return { + **empty_state, + "is_travel_context": True, + } + + has_hotel_invoice = any(self._is_review_hotel_card(card) for card in document_cards) + has_local_transport = any(self._is_local_transport_receipt_card(card) for card in document_cards) + required_missing_labels = [] if has_hotel_invoice else ["酒店的报销票据待上传(必须)"] + optional_missing_labels = [] if has_local_transport else ["市内交通/乘车票据可继续上传(非必须)"] + ticket_amount = sum( + (self._extract_amount_decimal_from_card(card) or Decimal("0.00")) + for card in long_distance_cards + ).quantize(Decimal("0.01")) + + return { + **empty_state, + "is_travel_context": True, + "has_long_distance_ticket": True, + "ticket_type_label": self._resolve_travel_ticket_type_label(long_distance_cards), + "ticket_amount": ticket_amount, + "destination": self._resolve_travel_receipt_destination(payload, long_distance_cards), + "days": self._resolve_travel_receipt_days(payload, long_distance_cards), + "has_hotel_invoice": has_hotel_invoice, + "has_local_transport": has_local_transport, + "required_missing_labels": required_missing_labels, + "optional_missing_labels": optional_missing_labels, + "blocks_next_step": bool(required_missing_labels), + } + + + @staticmethod + def _is_long_distance_travel_card(card: UserAgentReviewDocumentCard) -> bool: + document_type = str(card.document_type or "").strip().lower() + return document_type in {"train_ticket", "flight_itinerary"} + + + @staticmethod + def _is_local_transport_receipt_card(card: UserAgentReviewDocumentCard) -> bool: + document_type = str(card.document_type or "").strip().lower() + suggested_type = str(card.suggested_expense_type or "").strip().lower() + return document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"} or ( + suggested_type == "transport" and document_type not in {"train_ticket", "flight_itinerary"} + ) + + + @staticmethod + def _resolve_travel_ticket_type_label(cards: list[UserAgentReviewDocumentCard]) -> str: + labels: list[str] = [] + for card in cards: + document_type = str(card.document_type or "").strip().lower() + if document_type == "train_ticket" and "火车" not in labels: + labels.append("火车") + if document_type == "flight_itinerary" and "飞机" not in labels: + labels.append("飞机") + return "/".join(labels) if labels else "交通" + + + def _resolve_travel_receipt_destination( + self, + payload: UserAgentRequest, + long_distance_cards: list[UserAgentReviewDocumentCard], + ) -> str: + for card in long_distance_cards: + for field in card.fields: + if str(field.label or "").strip() not in {"行程", "路线"}: + continue + destination = self._extract_travel_destination_from_route(field.value) + if destination: + return self._normalize_travel_destination(destination) + + card_text = self._build_review_document_card_text(card) + route_match = TRAVEL_ROUTE_PATTERN.search(card_text) + if route_match: + return self._normalize_travel_destination(route_match.group(2)) + + location = self._resolve_location_value(payload) + if location: + return self._normalize_travel_destination(location) + return "" + + + @staticmethod + def _extract_travel_destination_from_route(value: str) -> str: + route_text = str(value or "").strip() + if not route_text: + return "" + route_match = TRAVEL_ROUTE_PATTERN.search(route_text) + if route_match: + return route_match.group(2).strip() + parts = [ + item.strip() + for item in re.split(r"\s*(?:至|到|→|->|-|—|~|~)\s*", route_text) + if item.strip() + ] + return parts[-1] if len(parts) >= 2 else "" + + + def _normalize_travel_destination(self, value: str) -> str: + candidate = re.sub( + r"(?:火车站|高铁站|动车站|车站|站|机场|航站楼)$", + "", + str(value or "").strip(), + ) + if not candidate: + return "" + try: + policy = ExpenseRuleRuntimeService(self.db).load_catalog().travel_policy + except Exception: + policy = None + if policy is not None: + policy_city = self._extract_policy_city_from_text(candidate, policy) + if policy_city: + return policy_city + return candidate + + + def _resolve_travel_receipt_days( + self, + payload: UserAgentRequest, + long_distance_cards: list[UserAgentReviewDocumentCard], + ) -> int: + dates: list[datetime] = [] + for card in long_distance_cards: + card_text = self._build_review_document_card_text(card) + dates.extend(self._extract_dates_from_text(card_text)) + + if dates: + return max(1, (max(dates).date() - min(dates).date()).days + 1) + + start_date = self._parse_date_text(payload.ontology.time_range.start_date or "") + end_date = self._parse_date_text(payload.ontology.time_range.end_date or "") + if start_date and end_date: + return max(1, (end_date.date() - start_date.date()).days + 1) + return 1 + + + @staticmethod + def _extract_dates_from_text(text: str) -> list[datetime]: + dates: list[datetime] = [] + for match in DATE_TEXT_PATTERN.finditer(str(text or "")): + parsed = UserAgentReviewTravelReceiptMixin._parse_date_text(match.group(1)) + if parsed is not None: + dates.append(parsed) + return dates + + + @staticmethod + def _parse_date_text(value: str) -> datetime | None: + raw_value = str(value or "").strip() + if not raw_value: + return None + normalized = ( + raw_value.replace("年", "-") + .replace("月", "-") + .replace("/", "-") + .replace("日", "") + .strip() + ) + parts = [part for part in normalized.split("-") if part] + if len(parts) != 3: + return None + try: + year, month, day = (int(part) for part in parts) + return datetime(year, month, day) + except ValueError: + return None + + + def _build_travel_receipt_briefs( + self, + travel_receipt_state: dict[str, Any], + ) -> list[UserAgentReviewRiskBrief]: + if not travel_receipt_state.get("has_long_distance_ticket"): + return [] + + required_labels = [ + str(item).strip() + for item in travel_receipt_state.get("required_missing_labels", []) + if str(item).strip() + ] + optional_labels = [ + str(item).strip() + for item in travel_receipt_state.get("optional_missing_labels", []) + if str(item).strip() + ] + if not required_labels and not optional_labels: + return [] + + content_parts = [*required_labels, *optional_labels] + required_text = ";".join(required_labels) + optional_text = ";".join(optional_labels) + return [ + UserAgentReviewRiskBrief( + title="差旅票据待补充", + level="warning" if required_labels else "info", + content=";".join(content_parts), + detail=( + "系统已识别到长途交通票据,会按差旅报销口径核对住宿、交通等票据完整性。" + + (f"当前必须补充:{required_text}。" if required_text else "") + + (f"当前还可以补充:{optional_text}。" if optional_text else "") + ), + suggestion=( + "请先补充酒店住宿发票或住宿清单;在补齐前只能保存为草稿。" + if required_labels + else "如还有市内交通、打车、地铁或停车等乘车票据,可以继续上传;没有也可以进入下一步或保存草稿。" + ), + ) + ] + + + def _resolve_review_travel_allowance_standard( + self, + policy: RuntimeTravelPolicy, + *, + declared_city: str, + card_text: str, + ) -> tuple[str, Decimal] | None: + meal_limits = getattr(policy, "allowance_limits", {}).get("meal", {}) + if not meal_limits: + return None + + region_label = self._resolve_review_travel_allowance_region( + " ".join([declared_city or "", card_text or ""]) + ) + amount = meal_limits.get(region_label) + if amount is None and region_label != "其他地区": + amount = meal_limits.get("其他地区") + region_label = "其他地区" + if amount is None: + return None + return region_label, Decimal(amount).quantize(Decimal("0.01")) + + + @staticmethod + def _resolve_review_travel_allowance_region(text: str) -> str: + normalized = re.sub(r"\s+", "", str(text or "")) + if not normalized: + return "其他地区" + if any(keyword in normalized for keyword in ("境外", "国外", "海外")): + return "国外" + if any(keyword in normalized for keyword in ("香港", "澳门", "台湾", "港澳台")): + return "港澳台" + if "乌鲁木齐" in normalized: + return "新疆-乌鲁木齐" + if "新疆" in normalized: + return "新疆-其他" + if any(keyword in normalized for keyword in ("西藏", "拉萨")): + return "西藏" + if any(keyword in normalized for keyword in ("北京", "上海", "天津", "重庆", "深圳", "珠海", "汕头", "厦门")): + return "直辖市/特区" + return "其他地区" + + + def _resolve_review_amount_scene_code( + self, + card: UserAgentReviewDocumentCard, + payload: UserAgentRequest, + ) -> str: + document_type = str(card.document_type or "").strip().lower() + suggested_type = str(card.suggested_expense_type or "").strip().lower() + if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}: + return "transport" + if document_type == "meal_receipt": + entity_values = self._collect_entity_values(payload) + if suggested_type == "entertainment" or entity_values.get("expense_type_code") == "entertainment": + return "entertainment" + return "meal" + if document_type == "hotel_invoice" or suggested_type == "hotel": + return "hotel" + if suggested_type in { + "travel", + "transport", + "meal", + "entertainment", + "office", + "meeting", + "training", + "communication", + "welfare", + "other", + }: + return suggested_type + return self._collect_entity_values(payload).get("expense_type_code") or "other" + + + @staticmethod + def _resolve_review_scene_amount_limit(scene_policy: Any | None) -> Any | None: + if scene_policy is None: + return None + return getattr(scene_policy, "item_amount_limit", None) or getattr(scene_policy, "claim_amount_limit", None) + + + @staticmethod + def _resolve_scene_standard_amount(limit_config: Any | None) -> Decimal | None: + if limit_config is None: + return None + warn_amount = getattr(limit_config, "warn_amount", None) + block_amount = getattr(limit_config, "block_amount", None) + amount = warn_amount if warn_amount is not None else block_amount + if amount is None: + return None + try: + return Decimal(amount).quantize(Decimal("0.01")) + except (InvalidOperation, ValueError): + return None + + + @staticmethod + def _evaluate_review_scene_amount( + *, + amount: Decimal, + limit_config: Any, + reason_text: str, + ) -> tuple[str, Decimal] | None: + block_amount = getattr(limit_config, "block_amount", None) + warn_amount = getattr(limit_config, "warn_amount", None) + exception_keywords = list(getattr(limit_config, "exception_keywords", []) or []) + has_exception = UserAgentReviewTravelReceiptMixin._text_contains_any(reason_text, exception_keywords) + + if block_amount is not None and amount > Decimal(block_amount): + return ("high", Decimal(block_amount).quantize(Decimal("0.01"))) + if warn_amount is not None and amount > Decimal(warn_amount): + return ("high", Decimal(warn_amount).quantize(Decimal("0.01"))) + return None + + + def _resolve_review_employee_grade(self, payload: UserAgentRequest, *, employee: Employee | None) -> str: + if employee is not None and employee.grade: + return str(employee.grade).strip() + review_form_values = self._resolve_review_form_values(payload) + for source in ( + review_form_values, + payload.context_json, + payload.tool_payload, + ): + for key in ("employee_grade", "grade", "user_grade", "position_grade"): + value = str(source.get(key) or "").strip() if isinstance(source, dict) else "" + if value: + return value + return "" + + + def _build_review_reason_corpus(self, payload: UserAgentRequest) -> str: + review_form_values = self._resolve_review_form_values(payload) + parts = [ + str(payload.message or ""), + str(payload.context_json.get("user_input_text") or ""), + str(review_form_values.get("reason") or ""), + str(review_form_values.get("business_reason") or ""), + str(review_form_values.get("location") or ""), + str(review_form_values.get("business_location") or ""), + ] + return "\n".join(part.strip() for part in parts if part and part.strip()) + + + def _resolve_declared_travel_city(self, payload: UserAgentRequest, policy: RuntimeTravelPolicy) -> str: + review_form_values = self._resolve_review_form_values(payload) + candidates = [ + str(review_form_values.get("business_location") or ""), + str(review_form_values.get("location") or ""), + self._resolve_location_value(payload), + str(payload.message or ""), + ] + for candidate in candidates: + city = self._extract_policy_city_from_text(candidate, policy) + if city: + return city + return "" + + + @staticmethod + def _build_review_document_card_text(card: UserAgentReviewDocumentCard) -> str: + field_text = " ".join(f"{field.label}:{field.value}" for field in card.fields) + return " ".join( + [ + str(card.filename or ""), + str(card.document_type or ""), + str(card.scene_label or ""), + str(card.summary or ""), + field_text, + ] + ).strip() + + + @staticmethod + def _is_review_hotel_card(card: UserAgentReviewDocumentCard) -> bool: + document_type = str(card.document_type or "").strip().lower() + suggested_type = str(card.suggested_expense_type or "").strip().lower() + scene_label = str(card.scene_label or "").strip() + return document_type == "hotel_invoice" or suggested_type == "hotel" or "住宿" in scene_label + + + @staticmethod + def _extract_amount_decimal_from_card(card: UserAgentReviewDocumentCard) -> Decimal | None: + for field in card.fields: + if field.label != "金额": + continue + normalized = str(field.value or "").replace("元", "").replace("¥", "").replace("¥", "").replace(",", "").strip() + try: + amount = Decimal(normalized).quantize(Decimal("0.01")) + except (InvalidOperation, ValueError): + continue + if amount > Decimal("0.00"): + return amount + return None + + + @staticmethod + def _extract_review_hotel_night_count(card: UserAgentReviewDocumentCard) -> int: + text = f"{card.summary or ''} {' '.join(f'{field.label}:{field.value}' for field in card.fields)}" + match = TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN.search(text) + if not match: + return 1 + try: + return max(1, int(match.group(1))) + except (TypeError, ValueError): + return 1 + + + @staticmethod + def _extract_policy_city_from_text(text: str, policy: RuntimeTravelPolicy) -> str: + normalized = str(text or "").strip() + if not normalized: + return "" + city_names = set(policy.city_tiers.keys()) + city_names.update(getattr(policy, "hotel_city_limits", {}).keys()) + for city in sorted(city_names, key=lambda item: len(item), reverse=True): + if city in normalized: + return city + return "" + + + @staticmethod + def _format_travel_city_tier(city_tier: str) -> str: + return { + "tier_1": "一线城市", + "tier_2": "重点城市", + "tier_3": "其他城市", + }.get(str(city_tier or "").strip(), "当前城市") + + + @staticmethod + def _resolve_review_hotel_cap( + policy: RuntimeTravelPolicy, + *, + grade_band: str, + city: str, + city_tier: str, + ) -> Decimal: + normalized_city = str(city or "").strip() + if normalized_city and getattr(policy, "hotel_city_limits", None): + city_limits = policy.hotel_city_limits.get(normalized_city, {}) + city_cap = city_limits.get(grade_band) + if city_cap is not None: + return Decimal(city_cap).quantize(Decimal("0.01")) + return Decimal(policy.hotel_limits.get(grade_band, {}).get(city_tier, Decimal("0.00"))).quantize( + Decimal("0.01") + ) + + + def _detect_review_transport_class( + self, + card: UserAgentReviewDocumentCard, + policy: RuntimeTravelPolicy, + ) -> tuple[str, str, int] | None: + document_type = str(card.document_type or "").strip().lower() + text = re.sub(r"\s+", "", self._build_review_document_card_text(card)) + if not text: + return None + + if document_type == "flight_itinerary" or any(keyword in text for keyword in ("机票", "航班", "登机牌")): + for config in policy.flight_classes: + label = str(config.keyword or "").strip() + if label and label in text: + return "flight", label, int(config.level) + + if document_type == "train_ticket" or any(keyword in text for keyword in ("火车", "高铁", "动车", "铁路")): + for config in policy.train_classes: + label = str(config.keyword or "").strip() + if label and label in text: + return "train", label, int(config.level) + return None + + + @staticmethod + def _text_contains_any(text: str, keywords: list[str] | tuple[str, ...]) -> bool: + compact = re.sub(r"\s+", "", str(text or "")) + return bool(compact) and any(str(keyword or "").strip() and str(keyword).strip() in compact for keyword in keywords) + + + @staticmethod + def _resolve_submission_blocked_reasons(payload: UserAgentRequest) -> list[str]: + raw_reasons = payload.tool_payload.get("submission_blocked_reasons") + submission_blocked = bool(payload.tool_payload.get("submission_blocked")) + if raw_reasons is None and submission_blocked: + raw_reasons = payload.tool_payload.get("missing_fields") + if raw_reasons is None and not submission_blocked: + return [] + + reasons: list[str] = [] + if isinstance(raw_reasons, list): + reasons.extend(str(item or "").strip() for item in raw_reasons) + elif isinstance(raw_reasons, str): + reasons.extend( + item.strip() + for item in re.split(r"[;;\n]+", raw_reasons) + if item.strip() + ) + + if not reasons and submission_blocked: + message = str(payload.tool_payload.get("message") or "").strip() + for prefix in ( + "提交前请先补全信息:", + "AI预审暂未通过,原因如下:", + "AI预审未通过,原因如下:", + "AI预审暂未通过:", + "AI预审未通过:", + ): + if message.startswith(prefix): + message = message[len(prefix):].strip() + break + if message: + reasons.extend( + item.strip() + for item in re.split(r"[;;\n]+", message) + if item.strip() and not item.strip().startswith("AI预审暂未通过") + ) + + return list(dict.fromkeys(reason for reason in reasons if reason)) + diff --git a/server/tests/test_agent_foundation_endpoints.py b/server/tests/test_agent_foundation_endpoints.py index 79e9e3b..4834cb6 100644 --- a/server/tests/test_agent_foundation_endpoints.py +++ b/server/tests/test_agent_foundation_endpoints.py @@ -80,7 +80,11 @@ def test_activate_pending_rule_endpoint_is_blocked() -> None: response = client.post( f"/api/v1/agent-assets/{pending_rule.id}/activate", - headers={"x-actor": "pytest"}, + headers={ + "x-actor": "pytest", + "x-auth-username": "pytest", + "x-auth-role-codes": "manager", + }, ) assert response.status_code == 400 diff --git a/server/tests/test_expense_claim_service.py b/server/tests/test_expense_claim_service.py index 2bda007..dd32022 100644 --- a/server/tests/test_expense_claim_service.py +++ b/server/tests/test_expense_claim_service.py @@ -17,6 +17,7 @@ from app.schemas.ontology import OntologyParseRequest from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead from app.schemas.reimbursement import ExpenseClaimItemCreate, ExpenseClaimItemUpdate, ExpenseClaimUpdate from app.services.agent_conversations import AgentConversationService +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage from app.services.expense_claims import ExpenseClaimService from app.services.ontology import SemanticOntologyService from app.services.ocr import OcrService @@ -1200,7 +1201,7 @@ def test_update_claim_item_reanalyzes_existing_attachment(monkeypatch, tmp_path) ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: claim = build_claim(expense_type="office", location="深圳南山") @@ -1296,7 +1297,7 @@ def test_upload_train_ticket_attachment_backfills_item_amount(monkeypatch, tmp_p ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: claim = build_claim(expense_type="travel", location="北京") @@ -1390,7 +1391,7 @@ def test_upload_hotel_attachment_audits_date_like_amount(monkeypatch, tmp_path) ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: claim = build_claim(expense_type="hotel", location="北京") @@ -1469,7 +1470,7 @@ def test_upload_hotel_attachment_flags_amount_over_travel_policy(monkeypatch, tm ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: employee = Employee( @@ -1568,10 +1569,14 @@ def test_attachment_risk_flag_message_uses_specific_points(monkeypatch, tmp_path file_path = tmp_path / "invoice.png" file_path.write_bytes(b"fake") service = ExpenseClaimService(db) - monkeypatch.setattr(service, "_resolve_attachment_path", lambda storage_key: file_path) monkeypatch.setattr( - service, - "_read_attachment_meta", + ExpenseClaimAttachmentStorage, + "resolve_path", + lambda self, storage_key: file_path, + ) + monkeypatch.setattr( + service._attachment_storage, + "read_meta", lambda path: { "analysis": { "severity": "medium", @@ -1635,7 +1640,7 @@ def test_upload_ride_receipt_backfills_item_reason_from_addresses(monkeypatch, t ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: claim = build_claim(expense_type="transport", location="深圳") @@ -1696,7 +1701,7 @@ def test_delete_claim_item_removes_row_and_attachment_files(monkeypatch, tmp_pat ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: claim = build_claim(expense_type="office", location="深圳南山") @@ -1743,7 +1748,7 @@ def test_delete_claim_removes_all_claim_attachment_files(monkeypatch, tmp_path) role_codes=[], is_admin=False, ) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: claim = build_claim(expense_type="office", location="深圳南山") @@ -1785,7 +1790,7 @@ def test_attachment_preview_resolves_legacy_filename_in_claim_item_directory(mon is_admin=False, ) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: claim = build_claim(expense_type="transport", location="上海") @@ -1964,7 +1969,7 @@ def test_submit_claim_routes_high_risk_attachment_to_approval_with_review_flag( ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: manager = Employee( @@ -2077,7 +2082,7 @@ def test_submit_claim_routes_travel_route_mismatch_to_approval_with_review_flag( ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: manager = Employee( @@ -2228,7 +2233,7 @@ def test_submit_claim_routes_hotel_amount_over_travel_policy_to_approval_with_re ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) with build_session() as db: manager = Employee( diff --git a/server/tests/test_orchestrator_review_flow.py b/server/tests/test_orchestrator_review_flow.py index 53e1c88..5aefd3e 100644 --- a/server/tests/test_orchestrator_review_flow.py +++ b/server/tests/test_orchestrator_review_flow.py @@ -3,6 +3,7 @@ from __future__ import annotations from datetime import UTC, date, datetime from decimal import Decimal +import pytest from sqlalchemy import create_engine from sqlalchemy.orm import Session, sessionmaker from sqlalchemy.pool import StaticPool @@ -25,6 +26,14 @@ def build_session_factory() -> sessionmaker[Session]: return sessionmaker(bind=engine, autoflush=False, autocommit=False) +@pytest.fixture(autouse=True) +def skip_agent_foundation_bootstrap(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "app.services.agent_foundation.AgentFoundationService.ensure_foundation_ready", + lambda *_args, **_kwargs: None, + ) + + def test_review_next_step_run_submits_existing_claim_and_returns_draft_payload( monkeypatch, ) -> None: diff --git a/server/tests/test_reimbursement_endpoints.py b/server/tests/test_reimbursement_endpoints.py index af5620e..3679630 100644 --- a/server/tests/test_reimbursement_endpoints.py +++ b/server/tests/test_reimbursement_endpoints.py @@ -17,6 +17,7 @@ from app.models.employee import Employee from app.models.financial_record import ExpenseClaim, ExpenseClaimItem from app.models.role import Role from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead +from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage from app.services.expense_claims import ExpenseClaimService from app.services.ocr import OcrService @@ -134,7 +135,7 @@ def test_claim_item_attachment_upload_preview_and_delete(monkeypatch, tmp_path) ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) client, session_factory = build_client() with session_factory() as db: @@ -227,7 +228,7 @@ def test_claim_item_attachment_upload_flags_purpose_and_amount_mismatch(monkeypa ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) client, session_factory = build_client() with session_factory() as db: @@ -273,7 +274,7 @@ def test_claim_item_attachment_upload_flags_non_invoice_image_as_high_risk(monke ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) client, session_factory = build_client() with session_factory() as db: @@ -395,7 +396,7 @@ def test_claim_item_pdf_attachment_preview_returns_generated_image(monkeypatch, ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) client, session_factory = build_client() with session_factory() as db: @@ -447,7 +448,7 @@ def test_claim_item_delete_removes_item_and_attachment(monkeypatch, tmp_path) -> ) monkeypatch.setattr(OcrService, "recognize_files", fake_recognize) - monkeypatch.setattr(ExpenseClaimService, "_get_attachment_storage_root", lambda self: tmp_path) + monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path) client, session_factory = build_client() with session_factory() as db: diff --git a/server/tests/test_user_agent_service.py b/server/tests/test_user_agent_service.py index e8a3399..17b6e5c 100644 --- a/server/tests/test_user_agent_service.py +++ b/server/tests/test_user_agent_service.py @@ -16,6 +16,7 @@ from app.schemas.user_agent import UserAgentCitation, UserAgentRequest, UserAgen from app.services.agent_assets import AgentAssetService from app.services.ontology import SemanticOntologyService from app.services.user_agent import UserAgentService +from app.services.user_agent_documents import UserAgentDocumentService def build_session_factory() -> sessionmaker[Session]: @@ -1096,6 +1097,42 @@ def test_user_agent_returns_submitted_draft_payload_for_review_next_step() -> No assert "当前节点为 直属领导审批" in response.answer +def test_user_agent_document_service_normalizes_ocr_fields_and_scene() -> None: + document_service = UserAgentDocumentService() + + fields = document_service.extract_document_fields( + { + "filename": "北京南站火车票.png", + "document_type": "train_ticket", + "scene_code": "travel", + "summary": "电子发票 2026-03-04 广州南至北京南 二等座 票价 ¥560.00 中国铁路", + "text": "电子发票 2026-03-04 广州南至北京南 二等座 票价 ¥560.00 中国铁路", + "document_fields": [ + {"key": "amount", "label": "票价", "value": "¥560.00"}, + {"key": "date", "label": "业务发生时间", "value": "2026-03-04"}, + {"key": "merchant_name", "label": "商户", "value": "中国铁路"}, + ], + } + ) + classified = document_service.classify_document( + {"filename": "客户餐饮发票.jpg", "summary": "餐饮发票 客户招待 金额 320 元"}, + expense_type_code="entertainment", + has_customer=True, + ) + + assert fields["金额"] == "560.00元" + assert fields["列车出发时间"] == "2026-03-04" + assert "商户/酒店" not in fields + assert document_service.extract_amount_text_from_value("滴滴出行 支付金额 1 元,实付 13.4 元,订单号 12345678") == "13.40元" + assert classified["document_type"] == "meal_receipt" + assert classified["expense_type"] == "entertainment" + assert document_service.infer_expense_type_from_documents( + [{"filename": "客户餐饮发票.jpg", "summary": "餐饮发票 客户招待 金额 320 元"}], + expense_type_code="entertainment", + has_customer=True, + ) == "业务招待费" + + def test_user_agent_builds_review_payload_for_multi_document_expense_flow() -> None: session_factory = build_session_factory() with session_factory() as db: