3 Commits

Author SHA1 Message Date
caoxiaozhu
222ba0bfdc refactor(server): split oversized backend services 2026-05-22 10:42:31 +08:00
caoxiaozhu
2e57702638 docs: add agent code size standards 2026-05-22 10:42:19 +08:00
caoxiaozhu
5fe3b201d9 feat: 重构报销单服务并完善前端提交与审核交互
重构 expense_claims 服务模块结构并优化差旅票据审核逻辑,
增强用户代理服务的票据类型识别,前端报销创建页面拆分为
附件模型和会话模型模块,重构提交编排器和草稿关联确认流
程,更新知识库索引,补充单元测试。
2026-05-22 08:58:59 +08:00
123 changed files with 37096 additions and 28491 deletions

39
AGENTS.md Normal file
View File

@@ -0,0 +1,39 @@
# X-Financial Agent 协作规范
## 语言规范
- 所有分析、解释、计划、提交说明和最终回复默认使用简体中文。
- 技术结论要直击重点,必要时给出可验证的文件、命令或测试结果。
## 通用代码拆分规范
无论写前端、后端还是算法代码,都必须主动避免“所有方法堆在一个类里 / 一个组件里 / 一个模块里”的写法。遇到类、组件或核心模块持续变大时,优先按职责拆分,而不是继续追加方法和状态。
### 行数与复杂度目标
- 单个类、核心组件、核心算法模块硬上限为 800 行。
- 普通文件建议控制在 300-600 行。
- 复杂业务文件可以接近 800 行,但必须有清晰职责边界。
- 文件或类超过 800 行必须视为重构预警,不应继续直接追加功能。
- 单个类不应长期承载几十个无关方法,更不应演化成上百个方法的万能类。
### 拆分原则
- 对外 API 尽量保持稳定,先把内部实现拆到小模块。
- 按职责拆分编排、状态管理、持久化、权限、文件存储、OCR/票据分析、规则审核、响应构建、序列化、UI 交互、算法策略、数据转换。
- 新增能力时先判断归属模块;没有合适归属时新增小模块,不要默认塞回主类、主组件或主 Service。
- 拆分必须小步进行,每次提取一个明确职责,并配套运行相关测试。
### X-Financial 重点关注对象
- `ExpenseClaimService`:优先拆分申请单、明细项、附件、票据分析、草稿、规则审核、权限、序列化。
- `UserAgentService`:优先拆分知识库问答、报销预审 payload、Markdown 回复、差旅政策、表单槽位、票据分类、建议动作。
- `OrchestratorService`:优先拆分 agent 路由、工具调用、报销查询、响应构建。
- 前端大型 Vue 页面:优先拆分 composable、view model、样式分片、业务工具函数和子组件。
- 算法/规则模块:优先拆分输入解析、规则匹配、评分策略、结果解释和异常处理。
## 验证规范
- 后端改动优先在 Docker 容器 `x-financial-main` 中运行验证。
- 单元测试设置合理超时,避免长时间卡死。
- 每次重构后至少运行对应服务的定向测试;涉及公共协议时补充端到端或接口测试。

1
nul
View File

@@ -1 +0,0 @@
/usr/bin/bash: line 1: rg: command not found

View File

@@ -0,0 +1,98 @@
from __future__ import annotations
from app.models.agent_asset import AgentAsset
from app.schemas.agent_asset import AgentAssetRuleJsonRead, AgentAssetRuleJsonWrite
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY, RULE_LIBRARY_NAMES
class AgentAssetJsonRuleMixin:
def _resolve_json_risk_rule_document(self, asset: AgentAsset) -> tuple[str, str]:
config_json = dict(asset.config_json or {})
detail_mode = str(config_json.get("detail_mode") or "").strip().lower()
if detail_mode != "json_risk":
raise ValueError("当前资产不是 JSON 风险规则。")
rule_library = str(config_json.get("rule_library") or RISK_RULES_LIBRARY).strip()
if rule_library not in RULE_LIBRARY_NAMES:
raise ValueError("规则库目录不合法。")
rule_document = config_json.get("rule_document")
if not isinstance(rule_document, dict):
raise ValueError("规则资产缺少 rule_document 配置。")
file_name = str(rule_document.get("file_name") or "").strip()
if not file_name:
raise ValueError("规则资产缺少 JSON 文件名。")
return rule_library, file_name
def read_rule_json(self, asset_id: str) -> AgentAssetRuleJsonRead:
asset = self.repository.get(asset_id)
if asset is None:
raise LookupError("资产不存在。")
rule_library, file_name = self._resolve_json_risk_rule_document(asset)
payload = self.rule_library_manager.read_rule_library_json(
library=rule_library,
file_name=file_name,
)
return AgentAssetRuleJsonRead(
file_name=file_name,
rule_code=str(payload.get("rule_code") or asset.code or ""),
name=str(payload.get("name") or asset.name or ""),
description=str(payload.get("description") or asset.description or "").strip(),
evaluator=str(payload.get("evaluator") or ""),
ontology_signal=str(payload.get("ontology_signal") or "") or None,
inputs=payload.get("inputs") if isinstance(payload.get("inputs"), dict) else {},
outcomes=payload.get("outcomes") if isinstance(payload.get("outcomes"), dict) else {},
payload=payload,
)
def write_rule_json(
self,
asset_id: str,
*,
body: AgentAssetRuleJsonWrite,
actor: str,
request_id: str | None = None,
) -> AgentAssetRuleJsonRead:
asset = self.repository.get(asset_id)
if asset is None:
raise LookupError("资产不存在。")
rule_library, file_name = self._resolve_json_risk_rule_document(asset)
payload = dict(body.payload or {})
asset_code = str(asset.code or "").strip()
if asset_code and str(payload.get("rule_code") or "").strip() not in {"", asset_code}:
raise ValueError("规则 JSON 的 rule_code 必须与资产编码一致。")
if asset_code and not str(payload.get("rule_code") or "").strip():
payload["rule_code"] = asset_code
saved = self.rule_library_manager.write_rule_library_json(
library=rule_library,
file_name=file_name,
payload=payload,
)
rule_description = str(saved.get("description") or "").strip()
if rule_description:
asset.description = rule_description
rule_name = str(saved.get("name") or "").strip()
if rule_name:
asset.name = rule_name
risk_category = str(saved.get("risk_category") or "").strip()
if risk_category:
config_json = dict(asset.config_json or {})
config_json["risk_category"] = risk_category
asset.config_json = config_json
asset.scenario_json = [risk_category]
self.audit_service.log_action(
actor=actor,
action="update_agent_asset_rule_json",
resource_type=asset.asset_type,
resource_id=asset.id,
before_json={"file_name": file_name},
after_json={"file_name": file_name, "rule_code": saved.get("rule_code")},
request_id=request_id,
)
self.db.commit()
return self.read_rule_json(asset_id)

View File

@@ -0,0 +1,450 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from urllib.parse import quote
from urllib.request import Request, urlopen
import jwt
from app.api.deps import CurrentUserContext
from app.core.config import get_settings
from app.schemas.agent_asset import AgentAssetOnlyOfficeConfigRead
from app.services.agent_asset_spreadsheet import (
COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
FINANCE_RULES_LIBRARY,
SPREADSHEET_MIME_TYPE,
AgentAssetSpreadsheetManager,
RuleSpreadsheetMeta,
)
from app.services.settings import resolve_onlyoffice_settings
PREVIEW_RULE_ASSET_ID = "preview-rule-expense-company-travel-expense"
PREVIEW_RULE_CURRENT_VERSION = "v1.2.0"
PREVIEW_RULE_VERSION_FILENAMES = {
PREVIEW_RULE_CURRENT_VERSION: COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
"v1.1.0": "鍏徃宸梾璐规姤閿€瑙勫垯-v1.1.0.xlsx",
"v1.0.0": "鍏徃宸梾璐规姤閿€瑙勫垯-v1.0.0.xlsx",
}
@dataclass(slots=True)
class OnlyOfficeCallbackPayload:
status: int
download_url: str
users: list[str]
class AgentAssetOnlyOfficeMixin:
@staticmethod
def _resolve_onlyoffice_settings():
from app.services import agent_assets
return agent_assets.resolve_onlyoffice_settings()
def build_rule_spreadsheet_onlyoffice_config(
self,
asset_id: str,
current_user: CurrentUserContext,
*,
version: str | None = None,
) -> AgentAssetOnlyOfficeConfigRead:
self._ensure_ready()
if asset_id == PREVIEW_RULE_ASSET_ID:
resolved_version, metadata = self._ensure_preview_rule_spreadsheet(version=version)
return self._build_onlyoffice_spreadsheet_config(
asset_id=asset_id,
current_user=current_user,
metadata=metadata,
editable=resolved_version == PREVIEW_RULE_CURRENT_VERSION,
)
asset = self._require_spreadsheet_rule(asset_id)
_, metadata = self._resolve_current_spreadsheet_meta(asset)
editable = self._can_edit_current_spreadsheet(current_user)
return self._build_onlyoffice_spreadsheet_config(
asset_id=asset.id,
current_user=current_user,
metadata=metadata,
editable=editable,
)
def get_rule_spreadsheet_content(
self,
asset_id: str,
*,
version: str | None = None,
) -> tuple[Path, str, str]:
self._ensure_ready()
if asset_id == PREVIEW_RULE_ASSET_ID:
_, metadata = self._ensure_preview_rule_spreadsheet(version=version)
file_path = self.spreadsheet_manager.resolve_storage_path(metadata.storage_key)
if not file_path.exists():
raise FileNotFoundError(metadata.file_name)
return file_path, metadata.mime_type, metadata.file_name
asset = self._require_spreadsheet_rule(asset_id)
requested_version = str(version or "").strip()
if requested_version and requested_version != "current":
_, metadata = self._resolve_spreadsheet_version_meta(asset, version=requested_version)
else:
_, metadata = self._resolve_current_spreadsheet_meta(asset)
file_path = self.spreadsheet_manager.resolve_storage_path(metadata.storage_key)
if not file_path.exists():
raise FileNotFoundError(metadata.file_name)
return file_path, metadata.mime_type, metadata.file_name
def validate_rule_spreadsheet_access_token(
self,
asset_id: str,
access_token: str,
) -> None:
onlyoffice_settings = self._resolve_onlyoffice_settings()
try:
payload = jwt.decode(
access_token,
onlyoffice_settings.jwt_secret,
algorithms=["HS256"],
)
except jwt.PyJWTError as exc:
raise ValueError("ONLYOFFICE 文件访问令牌无效。") from exc
if (
payload.get("scope") != "agent-asset-spreadsheet"
or payload.get("asset_id") != asset_id
):
raise ValueError("ONLYOFFICE 文件访问令牌无效。")
def upload_rule_spreadsheet(
self,
asset_id: str,
*,
filename: str,
content: bytes,
actor: str,
request_id: str | None = None,
change_note: str | None = None,
source: str = "upload",
) -> AgentAssetRead:
self._ensure_ready()
asset = self._require_spreadsheet_rule(asset_id)
normalized_name = Path(str(filename or "").strip()).name.strip()
if not normalized_name:
raise ValueError("规则表文件名不能为空。")
if Path(normalized_name).suffix.lower() != ".xlsx":
raise ValueError("当前仅支持上传 .xlsx 格式的规则表。")
if not content:
raise ValueError("规则表文件内容不能为空。")
_, current_metadata = self._resolve_current_spreadsheet_meta(asset)
file_name = current_metadata.file_name or self._resolve_default_spreadsheet_file_name(asset)
sheet_changes, cell_changes = self._collect_workbook_changes_from_content(
current_metadata,
content,
)
changed_sheet_count = self._count_changed_sheets(sheet_changes, cell_changes)
changed_cell_count = len(cell_changes)
metadata = self._store_current_rule_spreadsheet(
asset,
file_name=file_name,
content=content,
actor=actor,
source=source,
)
summary = self._build_spreadsheet_change_summary(
sheet_changes,
cell_changes,
)
self.audit_service.log_action(
actor=actor,
action="edit_rule_spreadsheet",
resource_type=asset.asset_type,
resource_id=asset.id,
before_json={"storage_key": current_metadata.storage_key},
after_json={
"summary": summary,
"changed_sheet_count": changed_sheet_count,
"changed_cell_count": changed_cell_count,
"sheet_changes": [item.model_dump() for item in sheet_changes],
"cell_changes": [item.model_dump() for item in cell_changes[:500]],
"storage_key": metadata.storage_key,
},
request_id=request_id,
)
return self.get_asset(asset.id) # type: ignore[return-value]
def import_rule_spreadsheet_content(
self,
asset_id: str,
*,
filename: str,
content: bytes,
actor: str,
request_id: str | None = None,
) -> AgentAssetRead:
self._ensure_ready()
asset = self._require_spreadsheet_rule(asset_id)
normalized_name = Path(str(filename or "").strip()).name.strip()
if not normalized_name:
raise ValueError("待导入表格文件名不能为空。")
if Path(normalized_name).suffix.lower() != ".xlsx":
raise ValueError("当前仅支持导入 .xlsx 格式的规则表。")
_, current_metadata = self._resolve_current_spreadsheet_meta(asset)
imported_content = self.spreadsheet_manager.rebuild_from_uploaded_content(content)
return self.upload_rule_spreadsheet(
asset.id,
filename=current_metadata.file_name,
content=imported_content,
actor=actor,
request_id=request_id,
change_note=f"导入 Excel 表格内容:{normalized_name}",
source="content-import",
)
def handle_rule_spreadsheet_onlyoffice_callback(
self,
asset_id: str,
*,
version: str | None = None,
payload: dict[str, Any],
actor_name: str | None = None,
) -> None:
self._ensure_ready()
if asset_id == PREVIEW_RULE_ASSET_ID:
self._handle_preview_rule_spreadsheet_onlyoffice_callback(
version=version,
payload=payload,
)
return
asset = self._require_spreadsheet_rule(asset_id)
callback = self._parse_onlyoffice_callback(payload)
if callback.status not in {2, 6} or not callback.download_url:
return
_, current_metadata = self._resolve_current_spreadsheet_meta(asset)
request = Request(
callback.download_url,
headers={"User-Agent": "x-financial-onlyoffice-agent-asset"},
)
with urlopen(request, timeout=30) as response: # noqa: S310
content = response.read()
if current_metadata.checksum and current_metadata.checksum == self._hash_bytes(content):
return
resolved_actor_name = str(actor_name or "").strip() or (
callback.users[0] if callback.users else "ONLYOFFICE"
)
self.upload_rule_spreadsheet(
asset.id,
filename=current_metadata.file_name,
content=content,
actor=resolved_actor_name,
source="onlyoffice",
)
@staticmethod
def _can_edit_current_spreadsheet(current_user: CurrentUserContext) -> bool:
role_codes = {str(item).strip() for item in current_user.role_codes}
return current_user.is_admin or "manager" in role_codes or "finance" in role_codes
@staticmethod
def _build_onlyoffice_document_key(
asset_id: str,
metadata: RuleSpreadsheetMeta,
) -> str:
fingerprint = metadata.checksum or metadata.updated_at or metadata.file_name
raw_key = f"{asset_id}-{fingerprint}"
return "".join(
character if character.isalnum() or character in {"-", "_", ".", "="} else "_"
for character in raw_key
)
def _build_onlyoffice_access_token(self, asset_id: str) -> str:
onlyoffice_settings = self._resolve_onlyoffice_settings()
payload = {
"scope": "agent-asset-spreadsheet",
"asset_id": asset_id,
}
return jwt.encode(payload, onlyoffice_settings.jwt_secret, algorithm="HS256")
@staticmethod
def _parse_onlyoffice_callback(payload: dict[str, Any]) -> OnlyOfficeCallbackPayload:
return OnlyOfficeCallbackPayload(
status=int(payload.get("status") or 0),
download_url=str(payload.get("url") or "").strip(),
users=[str(item).strip() for item in payload.get("users") or [] if str(item).strip()],
)
def _build_onlyoffice_spreadsheet_config(
self,
*,
asset_id: str,
current_user: CurrentUserContext,
metadata: RuleSpreadsheetMeta,
editable: bool,
) -> AgentAssetOnlyOfficeConfigRead:
onlyoffice_settings = self._resolve_onlyoffice_settings()
settings = get_settings()
if not onlyoffice_settings.enabled:
raise ValueError("ONLYOFFICE 预览未启用。")
if not onlyoffice_settings.public_url or not onlyoffice_settings.backend_url:
raise ValueError("ONLYOFFICE 地址配置不完整。")
if not onlyoffice_settings.jwt_secret:
raise ValueError("ONLYOFFICE JWT 密钥未配置。")
backend_base_url = onlyoffice_settings.backend_url.rstrip("/")
public_url = onlyoffice_settings.public_url.rstrip("/")
access_token = self._build_onlyoffice_access_token(asset_id)
document_url = (
f"{backend_base_url}{settings.api_v1_prefix}/agent-assets/{asset_id}/spreadsheet/onlyoffice/content"
f"?access_token={access_token}"
)
callback_url = (
f"{backend_base_url}{settings.api_v1_prefix}/agent-assets/{asset_id}/spreadsheet/onlyoffice/callback"
f"?actor_name={quote(current_user.name)}"
)
config: dict[str, Any] = {
"documentType": "cell",
"document": {
"fileType": Path(metadata.file_name).suffix.lstrip(".").lower() or "xlsx",
"key": self._build_onlyoffice_document_key(asset_id, metadata),
"title": metadata.file_name,
"url": document_url,
"permissions": {
"download": True,
"edit": editable,
"print": True,
"copy": True,
},
},
"editorConfig": {
"mode": "edit" if editable else "view",
"lang": "zh-CN",
"callbackUrl": callback_url,
"user": {
"id": current_user.username,
"name": current_user.name,
},
"customization": {
"compactHeader": True,
"compactToolbar": False,
"toolbarNoTabs": False,
"autosave": False,
"forcesave": editable,
},
},
"width": "100%",
"height": "100%",
}
config["token"] = jwt.encode(config, onlyoffice_settings.jwt_secret, algorithm="HS256")
return AgentAssetOnlyOfficeConfigRead(documentServerUrl=public_url, config=config)
def _ensure_preview_rule_spreadsheet(
self,
*,
version: str | None = None,
) -> tuple[str, RuleSpreadsheetMeta]:
resolved_version = str(version or PREVIEW_RULE_CURRENT_VERSION).strip()
if resolved_version not in PREVIEW_RULE_VERSION_FILENAMES:
raise LookupError(f"版本 {resolved_version} 不存在")
file_name = PREVIEW_RULE_VERSION_FILENAMES[resolved_version]
storage_key = (
Path("rules")
/ FINANCE_RULES_LIBRARY
/ ".versions"
/ PREVIEW_RULE_ASSET_ID
/ resolved_version
/ file_name
).as_posix()
try:
file_path = self.spreadsheet_manager.resolve_storage_path(storage_key)
except FileNotFoundError:
file_path = None
if file_path is not None and file_path.exists():
content = file_path.read_bytes()
updated_at = datetime.fromtimestamp(file_path.stat().st_mtime, UTC).isoformat()
return resolved_version, RuleSpreadsheetMeta(
file_name=file_name,
storage_key=storage_key,
mime_type=SPREADSHEET_MIME_TYPE,
size_bytes=file_path.stat().st_size,
checksum=self._hash_bytes(content),
updated_at=updated_at,
updated_by="ONLYOFFICE 预览",
source="preview",
)
metadata = self.spreadsheet_manager.store_rule_library_spreadsheet_snapshot(
library=FINANCE_RULES_LIBRARY,
asset_id=PREVIEW_RULE_ASSET_ID,
version=resolved_version,
file_name=file_name,
content=AgentAssetSpreadsheetManager.build_company_travel_rule_template(),
actor_name="ONLYOFFICE 预览",
source="preview",
)
return resolved_version, metadata
def _handle_preview_rule_spreadsheet_onlyoffice_callback(
self,
*,
version: str,
payload: dict[str, Any],
) -> None:
callback = self._parse_onlyoffice_callback(payload)
if callback.status not in {2, 6} or not callback.download_url:
return
resolved_version, metadata = self._ensure_preview_rule_spreadsheet(version=version)
request = Request(
callback.download_url,
headers={"User-Agent": "x-financial-onlyoffice-agent-asset-preview"},
)
with urlopen(request, timeout=30) as response: # noqa: S310
content = response.read()
if metadata.checksum and metadata.checksum == self._hash_bytes(content):
return
actor_name = callback.users[0] if callback.users else "ONLYOFFICE"
self.spreadsheet_manager.store_rule_library_spreadsheet_snapshot(
library=FINANCE_RULES_LIBRARY,
asset_id=PREVIEW_RULE_ASSET_ID,
version=resolved_version,
file_name=metadata.file_name,
content=content,
actor_name=actor_name,
source="onlyoffice-preview",
)
@staticmethod
def _read_current_rule_document_meta(asset: AgentAsset) -> RuleSpreadsheetMeta | None:
payload = (asset.config_json or {}).get("rule_document")
if not isinstance(payload, dict):
return None
return RuleSpreadsheetMeta(
file_name=str(payload.get("file_name") or "").strip(),
storage_key=str(payload.get("storage_key") or "").strip(),
mime_type=(
str(payload.get("mime_type") or "").strip()
or "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
),
size_bytes=int(payload.get("size_bytes") or 0),
checksum=str(payload.get("checksum") or "").strip(),
updated_at=str(payload.get("updated_at") or "").strip(),
updated_by=str(payload.get("updated_by") or "system").strip() or "system",
source=str(payload.get("source") or "upload").strip() or "upload",
)

View File

@@ -0,0 +1,298 @@
from __future__ import annotations
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from app.core.agent_enums import AgentAssetType
from app.models.agent_asset import AgentAsset
from app.schemas.agent_asset import (
AgentAssetSpreadsheetDiffCellRead,
AgentAssetSpreadsheetDiffSheetRead,
)
from app.services.agent_asset_spreadsheet import (
COMPANY_COMMUNICATION_EXPENSE_RULE_CODE,
COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME,
COMPANY_TRAVEL_EXPENSE_RULE_CODE,
COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
FINANCE_RULES_LIBRARY,
RULE_LIBRARY_NAMES,
SPREADSHEET_MIME_TYPE,
AgentAssetSpreadsheetManager,
RuleSpreadsheetMeta,
)
class AgentAssetSpreadsheetHelperMixin:
def _require_spreadsheet_rule(self, asset_id: str) -> AgentAsset:
asset = self.repository.get(asset_id)
if asset is None:
raise LookupError("Asset not found")
if asset.asset_type != AgentAssetType.RULE.value:
raise ValueError("仅规则资产支持 Excel 规则表。")
detail_mode = str((asset.config_json or {}).get("detail_mode") or "").strip().lower()
if detail_mode != "spreadsheet":
raise ValueError("当前规则未配置 Excel 规则表。")
return asset
def _resolve_spreadsheet_version_meta(
self,
asset: AgentAsset,
*,
version: str | None = None,
) -> tuple[str, RuleSpreadsheetMeta]:
resolved_version = str(version or self._resolve_working_version(asset) or "").strip()
if not resolved_version:
raise ValueError("当前规则尚未配置表格版本。")
version_row = self.repository.get_version(asset.id, resolved_version)
if version_row is None:
raise LookupError(f"版本 {resolved_version} 不存在")
# 版本记录中的快照才是不变的事实来源。`/rules` 下的工作簿只是当前
# 可编辑副本,后续写入不应该反向污染某个已存在版本的内容。
metadata = self.spreadsheet_manager.parse_version_markdown(str(version_row.content or ""))
if metadata is None and self._resolve_working_version(asset) == resolved_version:
metadata = self._read_current_rule_document_meta(asset)
if metadata is None:
raise FileNotFoundError("规则表版本快照不存在。")
return resolved_version, metadata
def _resolve_current_spreadsheet_meta(
self,
asset: AgentAsset,
) -> tuple[str, RuleSpreadsheetMeta]:
config_json = dict(asset.config_json or {})
current_meta = self._read_current_rule_document_meta(asset)
file_name = (
current_meta.file_name
if current_meta is not None and current_meta.file_name
else self._resolve_default_spreadsheet_file_name(asset)
)
library = self._resolve_spreadsheet_rule_library(asset)
storage_key = (Path("rules") / library / file_name).as_posix()
file_path = self.spreadsheet_manager.resolve_storage_path(storage_key)
if not file_path.exists():
content: bytes | None = None
if current_meta is not None and current_meta.storage_key:
try:
legacy_path = self.spreadsheet_manager.resolve_storage_path(
current_meta.storage_key
)
except FileNotFoundError:
legacy_path = None
if legacy_path is not None and legacy_path.exists():
content = legacy_path.read_bytes()
if content is None:
content = AgentAssetSpreadsheetManager.build_blank_rule_workbook(
Path(file_name).stem or "规则表"
)
meta = self.spreadsheet_manager.store_rule_library_spreadsheet(
library=library,
file_name=file_name,
content=content,
actor_name=(
current_meta.updated_by
if current_meta is not None and current_meta.updated_by
else "system"
),
source="current-rule",
)
else:
content = file_path.read_bytes()
meta = RuleSpreadsheetMeta(
file_name=file_name,
storage_key=storage_key,
mime_type=(
current_meta.mime_type
if current_meta is not None and current_meta.mime_type
else SPREADSHEET_MIME_TYPE
),
size_bytes=file_path.stat().st_size,
checksum=self._hash_bytes(content),
updated_at=datetime.fromtimestamp(file_path.stat().st_mtime, UTC).isoformat(),
updated_by=(
current_meta.updated_by
if current_meta is not None and current_meta.updated_by
else "system"
),
source=(
current_meta.source
if current_meta is not None and current_meta.source
else "current-rule"
),
)
expected_document = {
**self.spreadsheet_manager.build_rule_document_config(
meta,
asset_version="current",
),
"storage_key": meta.storage_key,
}
if config_json.get("rule_document") != expected_document:
config_json["detail_mode"] = "spreadsheet"
config_json["tag"] = str(config_json.get("tag") or "财务规则").strip() or "财务规则"
config_json["rule_library"] = library
config_json["rule_document"] = expected_document
asset.config_json = config_json
self.repository.save_asset(asset)
return "current", meta
def _store_current_rule_spreadsheet(
self,
asset: AgentAsset,
*,
file_name: str,
content: bytes,
actor: str,
source: str,
) -> RuleSpreadsheetMeta:
library = self._resolve_spreadsheet_rule_library(asset)
metadata = self.spreadsheet_manager.store_rule_library_spreadsheet(
library=library,
file_name=file_name,
content=content,
actor_name=actor,
source=source,
)
config_json = dict(asset.config_json or {})
config_json["detail_mode"] = "spreadsheet"
config_json["tag"] = str(config_json.get("tag") or "财务规则").strip() or "财务规则"
config_json["rule_library"] = library
config_json["rule_document"] = {
**self.spreadsheet_manager.build_rule_document_config(
metadata,
asset_version="current",
),
"storage_key": metadata.storage_key,
}
asset.config_json = config_json
self.repository.save_asset(asset)
return metadata
@staticmethod
def _resolve_spreadsheet_rule_library(asset: AgentAsset) -> str:
config_json = dict(asset.config_json or {})
library = str(config_json.get("rule_library") or FINANCE_RULES_LIBRARY).strip()
if library not in RULE_LIBRARY_NAMES:
return FINANCE_RULES_LIBRARY
return library
@staticmethod
def _resolve_default_spreadsheet_file_name(asset: AgentAsset) -> str:
if asset.code == COMPANY_TRAVEL_EXPENSE_RULE_CODE:
return COMPANY_TRAVEL_EXPENSE_RULE_FILENAME
if asset.code == COMPANY_COMMUNICATION_EXPENSE_RULE_CODE:
return COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME
fallback = Path(str(asset.name or "规则表").strip()).name
return fallback if fallback.lower().endswith(".xlsx") else f"{fallback}.xlsx"
def _load_spreadsheet_for_compare(self, metadata: RuleSpreadsheetMeta):
from io import BytesIO
from openpyxl import load_workbook
file_path = self.spreadsheet_manager.resolve_storage_path(metadata.storage_key)
if not file_path.exists():
raise FileNotFoundError(metadata.file_name)
return load_workbook(BytesIO(file_path.read_bytes()), read_only=False, data_only=False)
def _collect_workbook_changes_from_content(
self,
base_metadata: RuleSpreadsheetMeta,
target_content: bytes,
) -> tuple[list[AgentAssetSpreadsheetDiffSheetRead], list[AgentAssetSpreadsheetDiffCellRead]]:
from io import BytesIO
from openpyxl import load_workbook
base_workbook = self._load_spreadsheet_for_compare(base_metadata)
target_workbook = load_workbook(BytesIO(target_content), read_only=False, data_only=False)
return self._collect_workbook_changes(base_workbook, target_workbook)
def _collect_workbook_changes(
self, base_workbook, target_workbook
) -> tuple[list[AgentAssetSpreadsheetDiffSheetRead], list[AgentAssetSpreadsheetDiffCellRead]]:
base_sheet_names = set(base_workbook.sheetnames)
target_sheet_names = set(target_workbook.sheetnames)
sheet_changes: list[AgentAssetSpreadsheetDiffSheetRead] = []
for sheet_name in sorted(target_sheet_names - base_sheet_names):
sheet_changes.append(
AgentAssetSpreadsheetDiffSheetRead(sheet_name=sheet_name, change_type="added")
)
for sheet_name in sorted(base_sheet_names - target_sheet_names):
sheet_changes.append(
AgentAssetSpreadsheetDiffSheetRead(sheet_name=sheet_name, change_type="removed")
)
cell_changes: list[AgentAssetSpreadsheetDiffCellRead] = []
for sheet_name in sorted(base_sheet_names & target_sheet_names):
base_sheet = base_workbook[sheet_name]
target_sheet = target_workbook[sheet_name]
max_row = max(base_sheet.max_row, target_sheet.max_row)
max_column = max(base_sheet.max_column, target_sheet.max_column)
for row_index in range(1, max_row + 1):
for column_index in range(1, max_column + 1):
before_value = base_sheet.cell(row=row_index, column=column_index).value
after_value = target_sheet.cell(row=row_index, column=column_index).value
if before_value == after_value:
continue
if before_value in (None, ""):
change_type = "added"
elif after_value in (None, ""):
change_type = "removed"
else:
change_type = "modified"
cell_changes.append(
AgentAssetSpreadsheetDiffCellRead(
sheet_name=sheet_name,
cell=target_sheet.cell(row=row_index, column=column_index).coordinate,
change_type=change_type,
before_value=before_value,
after_value=after_value,
)
)
for sheet_name in sorted({item.sheet_name for item in cell_changes}):
sheet_changes.append(
AgentAssetSpreadsheetDiffSheetRead(sheet_name=sheet_name, change_type="modified")
)
return sheet_changes, cell_changes
@staticmethod
def _count_changed_sheets(
sheet_changes: list[AgentAssetSpreadsheetDiffSheetRead],
cell_changes: list[AgentAssetSpreadsheetDiffCellRead],
) -> int:
return len(
{item.sheet_name for item in sheet_changes}
| {item.sheet_name for item in cell_changes}
)
@staticmethod
def _build_spreadsheet_change_summary(
sheet_changes: list[AgentAssetSpreadsheetDiffSheetRead],
cell_changes: list[AgentAssetSpreadsheetDiffCellRead],
) -> str:
sheet_names = sorted(
{item.sheet_name for item in sheet_changes}
| {item.sheet_name for item in cell_changes}
)
if not sheet_names:
return "文件内容已保存,未发现单元格级差异。"
preview = "".join(sheet_names[:3])
if len(sheet_names) > 3:
preview = f"{preview}"
sheet_text = f"涉及 {len(sheet_names)} 个工作表({preview}"
if cell_changes:
return f"{sheet_text},共 {len(cell_changes)} 处单元格改动。"
return f"{sheet_text},工作表结构发生变化。"

View File

@@ -0,0 +1,132 @@
from __future__ import annotations
from app.core.agent_enums import AgentReviewStatus
from app.schemas.agent_asset import (
AgentAssetSpreadsheetChangeRecordRead,
AgentAssetSpreadsheetDiffCellRead,
AgentAssetSpreadsheetDiffSheetRead,
AgentAssetVersionTimelineItemRead,
)
class AgentAssetTimelineMixin:
def list_version_timeline(self, asset_id: str) -> list[AgentAssetVersionTimelineItemRead]:
self._ensure_ready()
asset = self.repository.get(asset_id)
if asset is None:
raise LookupError("Asset not found")
events: list[AgentAssetVersionTimelineItemRead] = []
versions = self.repository.list_versions(asset_id)
for version in versions:
source_version = self._extract_restore_source_version(version.change_note)
events.append(
AgentAssetVersionTimelineItemRead(
event_type="restored" if source_version else "created",
version=version.version,
actor=version.created_by,
event_time=version.created_at,
title="恢复生成工作稿" if source_version else "创建工作版本",
description=version.change_note or "生成新版本",
note=version.change_note,
source_version=source_version,
)
)
for review in self.repository.list_reviews(asset_id):
event_type = {
AgentReviewStatus.PENDING.value: "submitted",
AgentReviewStatus.APPROVED.value: "approved",
AgentReviewStatus.REJECTED.value: "rejected",
}.get(review.review_status, "reviewed")
title = {
"submitted": "提交审核",
"approved": "审核通过",
"rejected": "审核驳回",
}.get(event_type, "审核处理")
events.append(
AgentAssetVersionTimelineItemRead(
event_type=event_type,
version=review.version,
actor=review.reviewer,
event_time=review.reviewed_at or review.created_at,
title=title,
description=review.review_note or "",
note=review.review_note,
)
)
audit_logs = self.audit_service.repository.list(
resource_type=asset.asset_type,
resource_id=asset.id,
limit=200,
)
for log in audit_logs:
if log.action != "activate_agent_asset":
continue
after_json = log.after_json or {}
version = str(
after_json.get("published_version")
or after_json.get("current_version")
or ""
).strip()
if not version:
continue
events.append(
AgentAssetVersionTimelineItemRead(
event_type="published",
version=version,
actor=log.actor,
event_time=log.created_at,
title="正式上线",
description="该版本已切换为线上正式版本。",
)
)
return sorted(events, key=lambda item: item.event_time)
def list_spreadsheet_change_records(
self,
asset_id: str,
*,
limit: int = 30,
) -> list[AgentAssetSpreadsheetChangeRecordRead]:
self._ensure_ready()
asset = self._require_spreadsheet_rule(asset_id)
logs = self.audit_service.repository.list(
resource_type=asset.asset_type,
resource_id=asset.id,
action="edit_rule_spreadsheet",
limit=min(max(limit, 1), 30),
)
return [
AgentAssetSpreadsheetChangeRecordRead(
id=log.id,
actor=log.actor,
changed_at=log.created_at,
summary=str((log.after_json or {}).get("summary") or "表格内容已保存。"),
sheet_changes=[
AgentAssetSpreadsheetDiffSheetRead.model_validate(item)
for item in ((log.after_json or {}).get("sheet_changes") or [])
],
cell_changes=[
AgentAssetSpreadsheetDiffCellRead.model_validate(item)
for item in ((log.after_json or {}).get("cell_changes") or [])
],
changed_sheet_count=int((log.after_json or {}).get("changed_sheet_count") or 0),
changed_cell_count=int((log.after_json or {}).get("changed_cell_count") or 0),
)
for log in logs
]
@staticmethod
def _extract_restore_source_version(change_note: str | None) -> str | None:
normalized = str(change_note or "").strip()
prefix = "基于历史版本 "
suffix = " 恢复生成工作稿"
if not normalized.startswith(prefix) or suffix not in normalized:
return None
return normalized.removeprefix(prefix).split(suffix, 1)[0].strip() or None

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,322 @@
from __future__ import annotations
import hashlib
import json
from datetime import UTC, date, datetime
from decimal import Decimal
from pathlib import Path
from sqlalchemy import inspect, select, text
from app.core.agent_enums import (
AgentAssetContentType,
AgentAssetDomain,
AgentAssetStatus,
AgentAssetType,
AgentName,
AgentPermissionLevel,
AgentReviewStatus,
AgentRunSource,
AgentRunStatus,
AgentToolType,
)
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.models.audit_log import AuditLog
from app.models.financial_record import (
AccountsPayableRecord,
AccountsReceivableRecord,
ExpenseClaim,
ExpenseClaimItem,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import (
AgentAssetSpreadsheetManager,
COMPANY_COMMUNICATION_EXPENSE_RULE_CODE,
COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME,
COMPANY_TRAVEL_EXPENSE_RULE_CODE,
COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
FINANCE_RULES_LIBRARY,
RISK_RULES_LIBRARY,
)
from app.services.expense_rule_runtime import (
build_scene_submission_standard_markdown,
build_travel_risk_control_standard_markdown,
)
from app.services.agent_foundation_constants import (
ATTACHMENT_RULE_ASSET_CODE,
ATTACHMENT_RULE_RUNTIME_CONFIG,
COMPANY_COMMUNICATION_RULE_SCENARIO_JSON,
COMPANY_COMMUNICATION_RULE_VERSION,
COMPANY_TRAVEL_RULE_SCENARIO_JSON,
COMPANY_TRAVEL_RULE_VERSION,
DEMO_EXPENSE_CLAIM_SIGNATURES,
DEMO_PAYABLE_SIGNATURES,
DEMO_RECEIVABLE_SIGNATURES,
LEGACY_RULE_CODES,
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
)
from app.core.logging import get_logger
logger = get_logger("app.services.agent_foundation")
class AgentFoundationAssetHelperMixin:
def _create_seed_asset(
self,
*,
asset_type: str,
code: str,
name: str,
description: str,
domain: str,
scenario_json: list[str],
owner: str,
reviewer: str,
status: str,
current_version: str,
config_json: dict[str, object],
) -> AgentAsset:
asset = AgentAsset(
asset_type=asset_type,
code=code,
name=name,
description=description,
domain=domain,
scenario_json=scenario_json,
owner=owner,
reviewer=reviewer,
status=status,
current_version=current_version,
published_version=current_version if status == AgentAssetStatus.ACTIVE.value else None,
working_version=current_version,
config_json=config_json,
)
self.db.add(asset)
self.db.flush()
return asset
def _ensure_asset_version(
self,
asset: AgentAsset,
*,
version: str,
content: str,
content_type: str,
change_note: str,
created_by: str,
) -> None:
existing = self.db.scalar(
select(AgentAssetVersion).where(
AgentAssetVersion.asset_id == asset.id,
AgentAssetVersion.version == version,
)
)
if existing is not None:
return
self.db.add(
AgentAssetVersion(
asset_id=asset.id,
version=version,
content=content,
content_type=content_type,
change_note=change_note,
created_by=created_by,
)
)
def _ensure_asset_review(
self,
asset: AgentAsset,
*,
version: str,
reviewer: str,
review_status: str,
review_note: str,
reviewed_at: datetime | None,
) -> None:
existing = self.db.scalar(
select(AgentAssetReview).where(
AgentAssetReview.asset_id == asset.id,
AgentAssetReview.version == version,
AgentAssetReview.review_status == review_status,
)
)
if existing is not None:
return
self.db.add(
AgentAssetReview(
asset_id=asset.id,
version=version,
reviewer=reviewer,
review_status=review_status,
review_note=review_note,
reviewed_at=reviewed_at,
)
)
def _remove_legacy_rule_assets(self) -> None:
assets = list(
self.db.scalars(
select(AgentAsset).where(AgentAsset.code.in_(LEGACY_RULE_CODES))
).all()
)
for asset in assets:
self.db.delete(asset)
obsolete_logs = list(
self.db.scalars(
select(AuditLog).where(AuditLog.resource_id.in_(LEGACY_RULE_CODES))
).all()
)
for log in obsolete_logs:
self.db.delete(log)
def _ensure_agent_asset_schema(self) -> None:
bind = self.db.get_bind()
inspector = inspect(bind)
if "agent_assets" not in inspector.get_table_names():
return
column_names = {column["name"] for column in inspector.get_columns("agent_assets")}
migration_statements: list[str] = []
if "published_version" not in column_names:
migration_statements.append("ALTER TABLE agent_assets ADD COLUMN published_version VARCHAR(30)")
if "working_version" not in column_names:
migration_statements.append("ALTER TABLE agent_assets ADD COLUMN working_version VARCHAR(30)")
for statement in migration_statements:
self.db.execute(text(statement))
self.db.execute(
text(
"UPDATE agent_assets "
"SET working_version = COALESCE(working_version, current_version), "
"published_version = CASE "
"WHEN published_version IS NOT NULL THEN published_version "
"WHEN status = 'active' THEN current_version "
"ELSE published_version END"
)
)
if migration_statements:
self.db.commit()

View File

@@ -0,0 +1,599 @@
from __future__ import annotations
import hashlib
import json
from datetime import UTC, date, datetime
from decimal import Decimal
from pathlib import Path
from sqlalchemy import inspect, select, text
from app.core.agent_enums import (
AgentAssetContentType,
AgentAssetDomain,
AgentAssetStatus,
AgentAssetType,
AgentName,
AgentPermissionLevel,
AgentReviewStatus,
AgentRunSource,
AgentRunStatus,
AgentToolType,
)
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.models.audit_log import AuditLog
from app.models.financial_record import (
AccountsPayableRecord,
AccountsReceivableRecord,
ExpenseClaim,
ExpenseClaimItem,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import (
AgentAssetSpreadsheetManager,
COMPANY_COMMUNICATION_EXPENSE_RULE_CODE,
COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME,
COMPANY_TRAVEL_EXPENSE_RULE_CODE,
COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
FINANCE_RULES_LIBRARY,
RISK_RULES_LIBRARY,
)
from app.services.expense_rule_runtime import (
build_scene_submission_standard_markdown,
build_travel_risk_control_standard_markdown,
)
from app.services.agent_foundation_constants import (
ATTACHMENT_RULE_ASSET_CODE,
ATTACHMENT_RULE_RUNTIME_CONFIG,
COMPANY_COMMUNICATION_RULE_SCENARIO_JSON,
COMPANY_COMMUNICATION_RULE_VERSION,
COMPANY_TRAVEL_RULE_SCENARIO_JSON,
COMPANY_TRAVEL_RULE_VERSION,
DEMO_EXPENSE_CLAIM_SIGNATURES,
DEMO_PAYABLE_SIGNATURES,
DEMO_RECEIVABLE_SIGNATURES,
LEGACY_RULE_CODES,
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
)
from app.core.logging import get_logger
logger = get_logger("app.services.agent_foundation")
class AgentFoundationAssetSeedMixin:
def _seed_agent_assets(self) -> None:
existing_codes = set(self.db.scalars(select(AgentAsset.code)).all())
if existing_codes:
self._top_up_agent_assets(existing_codes)
return
attachment_rule = AgentAsset(
asset_type=AgentAssetType.RULE.value,
code=ATTACHMENT_RULE_ASSET_CODE,
name="报销附件与单据完整性规则",
description="统一定义报销提交时的附件数量、票据类型和补件处理口径,作为上线前待审核规则。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=["expense", "risk_check", "attachment_policy", "invoice_anomaly"],
owner="财务制度管理组",
reviewer="高嘉禾",
status=AgentAssetStatus.REVIEW.value,
current_version="v1.0.0",
published_version=None,
working_version="v1.0.0",
config_json={
"severity": "high",
"enabled": False,
"runtime_kind": "policy_rule_draft",
"rule_template_key": "attachment_requirement_v1",
"rule_template_label": "附件要求模板",
"runtime_rule": ATTACHMENT_RULE_RUNTIME_CONFIG,
},
)
scene_submission_rule = AgentAsset(
asset_type=AgentAssetType.RULE.value,
code="rule.expense.scene_submission_standard",
name="报销场景提交与附件标准",
description="统一定义各报销场景的必填字段、附件类型要求和金额阈值。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=["expense", "risk_check", "scene_policy", "attachment_policy"],
owner="费用运营组",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={
"severity": "high",
"enabled": True,
"runtime_kind": "scene_matrix",
"rule_template_label": "系统内置场景矩阵规则",
},
)
travel_policy_rule = AgentAsset(
asset_type=AgentAssetType.RULE.value,
code="rule.expense.travel_risk_control_standard",
name="差旅报销风险管控制度",
description="统一定义差旅报销的行程闭环、酒店地点一致性、职级差标和风险处置口径。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=["expense", "risk_check", "travel_policy", "travel_standard"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.1.0",
published_version="v1.1.0",
working_version="v1.1.0",
config_json={
"severity": "high",
"enabled": True,
"block_on_high_risk": True,
"warning_on_medium_risk": True,
"source_doc": "document/development/risks/travel-risk-control-standard.md",
"runtime_kind": "travel_policy",
"rule_template_key": "travel_standard_v1",
"rule_template_label": "差旅标准模板",
},
)
company_travel_rule = AgentAsset(
asset_type=AgentAssetType.RULE.value,
code=COMPANY_TRAVEL_EXPENSE_RULE_CODE,
name="公司差旅费报销规则",
description="通过 Excel 明细表维护差旅费报销标准、票据要求和审批口径。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=list(COMPANY_TRAVEL_RULE_SCENARIO_JSON),
owner="财务制度管理组",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version=COMPANY_TRAVEL_RULE_VERSION,
published_version=COMPANY_TRAVEL_RULE_VERSION,
working_version=COMPANY_TRAVEL_RULE_VERSION,
config_json={
"severity": "medium",
"enabled": True,
"tag": "财务规则",
"detail_mode": "spreadsheet",
"rule_library": FINANCE_RULES_LIBRARY,
"scenario_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0],
"ai_review_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0],
"rule_template_label": "差旅报销 Excel 模板",
},
)
platform_risk_assets = self._build_platform_risk_seed_assets()
company_communication_rule = AgentAsset(
asset_type=AgentAssetType.RULE.value,
code=COMPANY_COMMUNICATION_EXPENSE_RULE_CODE,
name="公司通信费报销规则",
description="通过 Excel 明细表维护员工通信费报销标准、专项补充口径和审批要求。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=list(COMPANY_COMMUNICATION_RULE_SCENARIO_JSON),
owner="财务制度管理组",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version=COMPANY_COMMUNICATION_RULE_VERSION,
published_version=COMPANY_COMMUNICATION_RULE_VERSION,
working_version=COMPANY_COMMUNICATION_RULE_VERSION,
config_json={
"severity": "medium",
"enabled": True,
"tag": "财务规则",
"detail_mode": "spreadsheet",
"rule_library": FINANCE_RULES_LIBRARY,
"scenario_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0],
"ai_review_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0],
"rule_template_label": "通信费报销 Excel 模板",
},
)
skill_expense_asset = AgentAsset(
asset_type=AgentAssetType.SKILL.value,
code="skill.expense.summary_lookup",
name="报销汇总查询技能",
description="根据时间、员工和部门汇总报销金额与单据数量。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=["expense", "query", "summary"],
owner="平台研发组",
reviewer="陈硕",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={"input_schema": ["time_range", "employee", "department"]},
)
skill_ar_asset = AgentAsset(
asset_type=AgentAssetType.SKILL.value,
code="skill.ar.aging_summary",
name="应收账龄汇总技能",
description="按客户、账龄和逾期状态汇总应收风险分布。",
domain=AgentAssetDomain.AR.value,
scenario_json=["accounts_receivable", "query", "aging_summary"],
owner="平台研发组",
reviewer="陈硕",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={"input_schema": ["customer", "aging_bucket", "status"]},
)
invoice_mcp_asset = AgentAsset(
asset_type=AgentAssetType.MCP.value,
code="mcp.invoice.verify_mock",
name="发票验真 Mock 服务",
description="模拟发票验真、发票状态查询和异常降级说明。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["expense", "invoice_validation"],
owner="平台研发组",
reviewer="周悦宁",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={"endpoint": "mock://invoice/verify", "timeout_ms": 1200},
)
ledger_mcp_asset = AgentAsset(
asset_type=AgentAssetType.MCP.value,
code="mcp.ledger.snapshot_mock",
name="总账快照 Mock 服务",
description="模拟返回应收、应付和费用汇总快照,供 Agent 查询和巡检。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["expense", "accounts_receivable", "accounts_payable"],
owner="平台研发组",
reviewer="周悦宁",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={"endpoint": "mock://ledger/snapshot", "timeout_ms": 1500},
)
task_asset = AgentAsset(
asset_type=AgentAssetType.TASK.value,
code="task.hermes.daily_risk_scan",
name="Hermes 每日风险巡检",
description="每天早上巡检重复报销、金额超标、逾期应收和异常付款。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "risk_check"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={"cron": "0 9 * * *", "agent": AgentName.HERMES.value},
)
ar_summary_task = AgentAsset(
asset_type=AgentAssetType.TASK.value,
code="task.hermes.weekly_ar_summary",
name="Hermes 每周应收账龄汇总",
description="每周汇总逾期应收、账龄分布和客户风险变化。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "accounts_receivable", "summary"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={"cron": "0 10 * * 1", "agent": AgentName.HERMES.value},
)
rule_digest_task = AgentAsset(
asset_type=AgentAssetType.TASK.value,
code="task.hermes.rule_review_digest",
name="Hermes 规则待审摘要",
description="每天汇总待审规则、待补样例和被拒规则修订建议。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "rule_center", "review_digest"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={"cron": "0 18 * * *", "agent": AgentName.HERMES.value},
)
knowledge_index_task = AgentAsset(
asset_type=AgentAssetType.TASK.value,
code="task.hermes.knowledge_index_sync",
name="Hermes ??????",
description="?????????? LightRAG ???????",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "knowledge", "rule_center"],
owner="财务制度管理组",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json={"cron": "0 0 * * *", "agent": AgentName.HERMES.value},
)
self.db.add_all(
[
attachment_rule,
scene_submission_rule,
travel_policy_rule,
*platform_risk_assets,
company_travel_rule,
company_communication_rule,
skill_expense_asset,
skill_ar_asset,
invoice_mcp_asset,
ledger_mcp_asset,
task_asset,
ar_summary_task,
rule_digest_task,
knowledge_index_task,
]
)
self.db.flush()
company_travel_rule_meta = self._ensure_company_travel_rule_spreadsheet_seed(
company_travel_rule,
version=COMPANY_TRAVEL_RULE_VERSION,
actor_name="系统初始化",
)
company_communication_rule_meta = self._ensure_company_communication_rule_spreadsheet_seed(
company_communication_rule,
version=COMPANY_COMMUNICATION_RULE_VERSION,
actor_name="系统初始化",
)
self.db.add_all(
[
AgentAssetVersion(
asset=attachment_rule,
version="v0.9.0",
content=self._attachment_submission_requirement_markdown(
version_note="首版附件完整性规则草稿,覆盖基础票据与补件口径。",
include_review_note=True,
),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="首版草稿。",
created_by="高嘉禾",
),
AgentAssetVersion(
asset=attachment_rule,
version="v1.0.0",
content=self._attachment_submission_requirement_markdown(
version_note="补充票据缺失、收据替代和差旅等效凭证口径,待审核。",
include_review_note=True,
),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="补充票据替代与差旅等效凭证口径,待审核。",
created_by="高嘉禾",
),
AgentAssetVersion(
asset=scene_submission_rule,
version="v1.0.0",
content=self._scene_submission_standard_markdown(),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="首版报销场景提交标准,覆盖附件类型、必填字段和金额阈值。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=travel_policy_rule,
version="v1.0.0",
content=self._travel_risk_control_standard_markdown(version="v1.0.0"),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="首版差旅制度执行规则,覆盖行程闭环与基础差标校验。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=travel_policy_rule,
version="v1.1.0",
content=self._travel_risk_control_standard_markdown(version="v1.1.0"),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="补充可执行规则块,供审核引擎直接消费差旅制度标准。",
created_by="系统初始化",
),
*[
AgentAssetVersion(
asset=asset,
version="v1.0.0",
content=self._platform_risk_rule_markdown(asset),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note=f"平台通用风险规则:{asset.name}",
created_by="系统初始化",
)
for asset in platform_risk_assets
],
AgentAssetVersion(
asset=company_travel_rule,
version=COMPANY_TRAVEL_RULE_VERSION,
content=AgentAssetSpreadsheetManager.build_version_markdown(
rule_name=company_travel_rule.name,
version=COMPANY_TRAVEL_RULE_VERSION,
metadata=company_travel_rule_meta,
),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="初始化差旅费报销 Excel 规则表。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=company_communication_rule,
version=COMPANY_COMMUNICATION_RULE_VERSION,
content=AgentAssetSpreadsheetManager.build_version_markdown(
rule_name=company_communication_rule.name,
version=COMPANY_COMMUNICATION_RULE_VERSION,
metadata=company_communication_rule_meta,
),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="初始化通信费报销 Excel 规则表。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=skill_expense_asset,
version="v1.0.0",
content=self._json_content(
{
"inputs": ["time_range", "employee", "department"],
"outputs": ["total_amount", "claim_count"],
"dependencies": ["database.expense_claims"],
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化技能快照。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=skill_ar_asset,
version="v1.0.0",
content=self._json_content(
{
"inputs": ["customer", "aging_bucket", "status"],
"outputs": ["receivable_total", "overdue_total", "customer_count"],
"dependencies": ["database.accounts_receivable"],
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化应收账龄技能快照。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=invoice_mcp_asset,
version="v1.0.0",
content=self._json_content(
{
"service_type": "mock",
"auth_mode": "none",
"degrade_strategy": "return_stub_with_warning",
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化 MCP 快照。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=ledger_mcp_asset,
version="v1.0.0",
content=self._json_content(
{
"service_type": "mock",
"auth_mode": "service_account",
"degrade_strategy": "return_cached_snapshot_with_warning",
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化总账快照 MCP。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=task_asset,
version="v1.0.0",
content=self._json_content(
{
"task_type": "daily_risk_scan",
"schedule": "0 9 * * *",
"target_agent": AgentName.HERMES.value,
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化任务快照。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=ar_summary_task,
version="v1.0.0",
content=self._json_content(
{
"task_type": "weekly_ar_summary",
"schedule": "0 10 * * 1",
"target_agent": AgentName.HERMES.value,
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化应收账龄汇总任务。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=rule_digest_task,
version="v1.0.0",
content=self._json_content(
{
"task_type": "rule_review_digest",
"schedule": "0 18 * * *",
"target_agent": AgentName.HERMES.value,
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化规则待审摘要任务。",
created_by="系统初始化",
),
AgentAssetVersion(
asset=knowledge_index_task,
version="v1.0.0",
content=self._json_content(
{
"task_type": "knowledge_index_sync",
"schedule": "0 0 * * *",
"target_agent": AgentName.HERMES.value,
"folder": "报销制度",
"changed_only": True,
"index_engine": "lightrag",
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化制度知识与规则草稿形成任务。",
created_by="系统初始化",
),
]
)
self.db.add_all(
[
AgentAssetReview(
asset=attachment_rule,
version="v1.0.0",
reviewer="高嘉禾",
review_status=AgentReviewStatus.PENDING.value,
review_note="等待制度管理员确认收据替代与补件时限口径。",
reviewed_at=None,
),
AgentAssetReview(
asset=scene_submission_rule,
version="v1.0.0",
reviewer="顾承宇",
review_status=AgentReviewStatus.APPROVED.value,
review_note="可作为报销场景统一审核标准正式执行。",
reviewed_at=datetime.now(UTC),
),
AgentAssetReview(
asset=travel_policy_rule,
version="v1.1.0",
reviewer="顾承宇",
review_status=AgentReviewStatus.APPROVED.value,
review_note="制度口径已确认,并已补充可执行配置供审核引擎读取。",
reviewed_at=datetime.now(UTC),
),
AgentAssetReview(
asset=company_travel_rule,
version=COMPANY_TRAVEL_RULE_VERSION,
reviewer="顾承宇",
review_status=AgentReviewStatus.APPROVED.value,
review_note="首版 Excel 规则表已确认,可作为财务规则使用。",
reviewed_at=datetime.now(UTC),
),
AgentAssetReview(
asset=company_communication_rule,
version=COMPANY_COMMUNICATION_RULE_VERSION,
reviewer="顾承宇",
review_status=AgentReviewStatus.APPROVED.value,
review_note="首版 Excel 规则表已确认,可作为财务规则使用。",
reviewed_at=datetime.now(UTC),
),
]
)

View File

@@ -0,0 +1,667 @@
from __future__ import annotations
import hashlib
import json
from datetime import UTC, date, datetime
from decimal import Decimal
from pathlib import Path
from sqlalchemy import inspect, select, text
from app.core.agent_enums import (
AgentAssetContentType,
AgentAssetDomain,
AgentAssetStatus,
AgentAssetType,
AgentName,
AgentPermissionLevel,
AgentReviewStatus,
AgentRunSource,
AgentRunStatus,
AgentToolType,
)
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.models.audit_log import AuditLog
from app.models.financial_record import (
AccountsPayableRecord,
AccountsReceivableRecord,
ExpenseClaim,
ExpenseClaimItem,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import (
AgentAssetSpreadsheetManager,
COMPANY_COMMUNICATION_EXPENSE_RULE_CODE,
COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME,
COMPANY_TRAVEL_EXPENSE_RULE_CODE,
COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
FINANCE_RULES_LIBRARY,
RISK_RULES_LIBRARY,
)
from app.services.expense_rule_runtime import (
build_scene_submission_standard_markdown,
build_travel_risk_control_standard_markdown,
)
from app.services.agent_foundation_constants import (
ATTACHMENT_RULE_ASSET_CODE,
ATTACHMENT_RULE_RUNTIME_CONFIG,
COMPANY_COMMUNICATION_RULE_SCENARIO_JSON,
COMPANY_COMMUNICATION_RULE_VERSION,
COMPANY_TRAVEL_RULE_SCENARIO_JSON,
COMPANY_TRAVEL_RULE_VERSION,
DEMO_EXPENSE_CLAIM_SIGNATURES,
DEMO_PAYABLE_SIGNATURES,
DEMO_RECEIVABLE_SIGNATURES,
LEGACY_RULE_CODES,
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
)
from app.core.logging import get_logger
logger = get_logger("app.services.agent_foundation")
class AgentFoundationAssetTopUpMixin:
def _top_up_agent_assets(self, existing_codes: set[str]) -> None:
self._remove_legacy_rule_assets()
existing_codes = set(self.db.scalars(select(AgentAsset.code)).all())
attachment_rule = self.db.scalar(
select(AgentAsset).where(AgentAsset.code == ATTACHMENT_RULE_ASSET_CODE)
)
scene_submission_rule = self.db.scalar(
select(AgentAsset).where(AgentAsset.code == "rule.expense.scene_submission_standard")
)
travel_policy_rule = self.db.scalar(
select(AgentAsset).where(AgentAsset.code == "rule.expense.travel_risk_control_standard")
)
company_travel_rule = self.db.scalar(
select(AgentAsset).where(AgentAsset.code == COMPANY_TRAVEL_EXPENSE_RULE_CODE)
)
company_communication_rule = self.db.scalar(
select(AgentAsset).where(AgentAsset.code == COMPANY_COMMUNICATION_EXPENSE_RULE_CODE)
)
if ATTACHMENT_RULE_ASSET_CODE not in existing_codes:
attachment_rule = self._create_seed_asset(
asset_type=AgentAssetType.RULE.value,
code=ATTACHMENT_RULE_ASSET_CODE,
name="报销附件与单据完整性规则",
description="统一定义报销提交时的附件数量、票据类型和补件处理口径,作为上线前待审核规则。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=["expense", "risk_check", "attachment_policy", "invoice_anomaly"],
owner="财务制度管理组",
reviewer="高嘉禾",
status=AgentAssetStatus.REVIEW.value,
current_version="v1.0.0",
config_json={
"severity": "high",
"enabled": False,
"runtime_kind": "policy_rule_draft",
"rule_template_key": "attachment_requirement_v1",
"rule_template_label": "附件要求模板",
"runtime_rule": ATTACHMENT_RULE_RUNTIME_CONFIG,
},
)
if attachment_rule is not None:
if not str(attachment_rule.current_version or "").strip():
attachment_rule.current_version = "v1.0.0"
if not str(attachment_rule.working_version or "").strip():
attachment_rule.working_version = attachment_rule.current_version
attachment_rule.status = attachment_rule.status or AgentAssetStatus.REVIEW.value
attachment_rule.description = (
"统一定义报销提交时的附件数量、票据类型和补件处理口径,作为上线前待审核规则。"
)
attachment_rule.config_json = {
"severity": "high",
"enabled": False,
"runtime_kind": "policy_rule_draft",
"rule_template_key": "attachment_requirement_v1",
"rule_template_label": "附件要求模板",
"runtime_rule": ATTACHMENT_RULE_RUNTIME_CONFIG,
}
self._ensure_asset_version(
attachment_rule,
version="v0.9.0",
content=self._attachment_submission_requirement_markdown(
version_note="首版附件完整性规则草稿,覆盖基础票据与补件口径。",
include_review_note=True,
),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="首版草稿。",
created_by="高嘉禾",
)
self._ensure_asset_version(
attachment_rule,
version="v1.0.0",
content=self._attachment_submission_requirement_markdown(
version_note="补充票据缺失、收据替代和差旅等效凭证口径,待审核。",
include_review_note=True,
),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="补充票据替代与差旅等效凭证口径,待审核。",
created_by="高嘉禾",
)
self._ensure_asset_review(
attachment_rule,
version="v1.0.0",
reviewer="高嘉禾",
review_status=AgentReviewStatus.PENDING.value,
review_note="等待制度管理员确认收据替代与补件时限口径。",
reviewed_at=None,
)
if "rule.expense.scene_submission_standard" not in existing_codes:
scene_submission_rule = self._create_seed_asset(
asset_type=AgentAssetType.RULE.value,
code="rule.expense.scene_submission_standard",
name="报销场景提交与附件标准",
description="统一定义各报销场景的必填字段、附件类型要求和金额阈值。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=["expense", "risk_check", "scene_policy", "attachment_policy"],
owner="费用运营组",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
config_json={
"severity": "high",
"enabled": True,
"runtime_kind": "scene_matrix",
"rule_template_label": "系统内置场景矩阵规则",
},
)
if scene_submission_rule is not None:
if not str(scene_submission_rule.current_version or "").strip():
scene_submission_rule.current_version = "v1.0.0"
if not str(scene_submission_rule.working_version or "").strip():
scene_submission_rule.working_version = scene_submission_rule.current_version
if not str(scene_submission_rule.published_version or "").strip():
scene_submission_rule.published_version = scene_submission_rule.current_version
scene_submission_rule.status = (
scene_submission_rule.status or AgentAssetStatus.ACTIVE.value
)
scene_submission_rule.description = (
"统一定义各报销场景的必填字段、附件类型要求和金额阈值。"
)
scene_submission_rule.config_json = {
"severity": "high",
"enabled": True,
"runtime_kind": "scene_matrix",
"rule_template_label": "系统内置场景矩阵规则",
}
self._ensure_asset_version(
scene_submission_rule,
version="v1.0.0",
content=self._scene_submission_standard_markdown(),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="首版报销场景提交标准,覆盖附件类型、必填字段和金额阈值。",
created_by="系统初始化",
)
self._ensure_asset_review(
scene_submission_rule,
version="v1.0.0",
reviewer="顾承宇",
review_status=AgentReviewStatus.APPROVED.value,
review_note="可作为报销场景统一审核标准正式执行。",
reviewed_at=datetime.now(UTC),
)
if "rule.expense.travel_risk_control_standard" not in existing_codes:
travel_policy_rule = self._create_seed_asset(
asset_type=AgentAssetType.RULE.value,
code="rule.expense.travel_risk_control_standard",
name="差旅报销风险管控制度",
description="统一定义差旅报销的行程闭环、酒店地点一致性、职级差标和风险处置口径。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=["expense", "risk_check", "travel_policy", "travel_standard"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.1.0",
config_json={
"severity": "high",
"enabled": True,
"block_on_high_risk": True,
"warning_on_medium_risk": True,
"source_doc": "document/development/risks/travel-risk-control-standard.md",
"runtime_kind": "travel_policy",
"rule_template_key": "travel_standard_v1",
"rule_template_label": "差旅标准模板",
},
)
if travel_policy_rule is not None:
if not str(travel_policy_rule.current_version or "").strip():
travel_policy_rule.current_version = "v1.1.0"
if not str(travel_policy_rule.working_version or "").strip():
travel_policy_rule.working_version = travel_policy_rule.current_version
if not str(travel_policy_rule.published_version or "").strip():
travel_policy_rule.published_version = travel_policy_rule.current_version
travel_policy_rule.status = travel_policy_rule.status or AgentAssetStatus.ACTIVE.value
travel_policy_rule.config_json = {
"severity": "high",
"enabled": True,
"block_on_high_risk": True,
"warning_on_medium_risk": True,
"source_doc": "document/development/risks/travel-risk-control-standard.md",
"runtime_kind": "travel_policy",
"rule_template_key": "travel_standard_v1",
"rule_template_label": "差旅标准模板",
}
self._ensure_asset_version(
travel_policy_rule,
version="v1.0.0",
content=self._travel_risk_control_standard_markdown(version="v1.0.0"),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="首版差旅制度执行规则,覆盖行程闭环与基础差标校验。",
created_by="系统初始化",
)
self._ensure_asset_version(
travel_policy_rule,
version="v1.1.0",
content=self._travel_risk_control_standard_markdown(version="v1.1.0"),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="补充可执行规则块,供审核引擎直接消费差旅制度标准。",
created_by="系统初始化",
)
self._ensure_asset_review(
travel_policy_rule,
version="v1.1.0",
reviewer="顾承宇",
review_status=AgentReviewStatus.APPROVED.value,
review_note="制度口径已确认,并已补充可执行配置供审核引擎读取。",
reviewed_at=datetime.now(UTC),
)
self.sync_platform_risk_rules_from_library()
if COMPANY_TRAVEL_EXPENSE_RULE_CODE not in existing_codes:
company_travel_rule = self._create_seed_asset(
asset_type=AgentAssetType.RULE.value,
code=COMPANY_TRAVEL_EXPENSE_RULE_CODE,
name="公司差旅费报销规则",
description="通过 Excel 明细表维护差旅费报销标准、票据要求和审批口径。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=list(COMPANY_TRAVEL_RULE_SCENARIO_JSON),
owner="财务制度管理组",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version=COMPANY_TRAVEL_RULE_VERSION,
config_json={
"severity": "medium",
"enabled": True,
"tag": "财务规则",
"detail_mode": "spreadsheet",
"scenario_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0],
"ai_review_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0],
"rule_template_label": "差旅报销 Excel 模板",
},
)
if COMPANY_COMMUNICATION_EXPENSE_RULE_CODE not in existing_codes:
company_communication_rule = self._create_seed_asset(
asset_type=AgentAssetType.RULE.value,
code=COMPANY_COMMUNICATION_EXPENSE_RULE_CODE,
name="公司通信费报销规则",
description="通过 Excel 明细表维护员工通信费报销标准、专项补充口径和审批要求。",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=list(COMPANY_COMMUNICATION_RULE_SCENARIO_JSON),
owner="财务制度管理组",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version=COMPANY_COMMUNICATION_RULE_VERSION,
config_json={
"severity": "medium",
"enabled": True,
"tag": "财务规则",
"detail_mode": "spreadsheet",
"scenario_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0],
"ai_review_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0],
"rule_template_label": "通信费报销 Excel 模板",
},
)
if company_travel_rule is not None:
company_travel_rule.scenario_json = list(COMPANY_TRAVEL_RULE_SCENARIO_JSON)
if not str(company_travel_rule.current_version or "").strip():
company_travel_rule.current_version = COMPANY_TRAVEL_RULE_VERSION
if not str(company_travel_rule.working_version or "").strip():
company_travel_rule.working_version = company_travel_rule.current_version
if not str(company_travel_rule.published_version or "").strip():
company_travel_rule.published_version = company_travel_rule.current_version
if not str(company_travel_rule.status or "").strip():
company_travel_rule.status = AgentAssetStatus.ACTIVE.value
company_travel_rule.description = (
"通过 Excel 明细表维护差旅费报销标准、票据要求和审批口径。"
)
company_travel_rule.config_json = {
**(company_travel_rule.config_json or {}),
"severity": "medium",
"enabled": True,
"tag": "财务规则",
"detail_mode": "spreadsheet",
"rule_library": FINANCE_RULES_LIBRARY,
"scenario_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0],
"ai_review_category": COMPANY_TRAVEL_RULE_SCENARIO_JSON[0],
"rule_template_label": "差旅报销 Excel 模板",
}
company_travel_rule_meta = self._ensure_company_travel_rule_spreadsheet_seed(
company_travel_rule,
version=str(company_travel_rule.current_version or COMPANY_TRAVEL_RULE_VERSION),
actor_name="系统初始化",
)
self._ensure_asset_version(
company_travel_rule,
version=str(company_travel_rule.current_version or COMPANY_TRAVEL_RULE_VERSION),
content=AgentAssetSpreadsheetManager.build_version_markdown(
rule_name=company_travel_rule.name,
version=str(company_travel_rule.current_version or COMPANY_TRAVEL_RULE_VERSION),
metadata=company_travel_rule_meta,
),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="初始化差旅费报销 Excel 规则表。",
created_by="系统初始化",
)
if (
str(company_travel_rule.current_version or "").strip()
== COMPANY_TRAVEL_RULE_VERSION
):
self._ensure_asset_review(
company_travel_rule,
version=COMPANY_TRAVEL_RULE_VERSION,
reviewer="顾承宇",
review_status=AgentReviewStatus.APPROVED.value,
review_note="首版 Excel 规则表已确认,可作为财务规则使用。",
reviewed_at=datetime.now(UTC),
)
if company_communication_rule is not None:
company_communication_rule.scenario_json = list(
COMPANY_COMMUNICATION_RULE_SCENARIO_JSON
)
if not str(company_communication_rule.current_version or "").strip():
company_communication_rule.current_version = COMPANY_COMMUNICATION_RULE_VERSION
if not str(company_communication_rule.working_version or "").strip():
company_communication_rule.working_version = (
company_communication_rule.current_version
)
if not str(company_communication_rule.published_version or "").strip():
company_communication_rule.published_version = (
company_communication_rule.current_version
)
if not str(company_communication_rule.status or "").strip():
company_communication_rule.status = AgentAssetStatus.ACTIVE.value
company_communication_rule.description = (
"通过 Excel 明细表维护员工通信费报销标准、专项补充口径和审批要求。"
)
company_communication_rule.config_json = {
**(company_communication_rule.config_json or {}),
"severity": "medium",
"enabled": True,
"tag": "财务规则",
"detail_mode": "spreadsheet",
"rule_library": FINANCE_RULES_LIBRARY,
"scenario_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0],
"ai_review_category": COMPANY_COMMUNICATION_RULE_SCENARIO_JSON[0],
"rule_template_label": "通信费报销 Excel 模板",
}
company_communication_rule_meta = (
self._ensure_company_communication_rule_spreadsheet_seed(
company_communication_rule,
version=str(
company_communication_rule.current_version
or COMPANY_COMMUNICATION_RULE_VERSION
),
actor_name="系统初始化",
)
)
self._ensure_asset_version(
company_communication_rule,
version=str(
company_communication_rule.current_version or COMPANY_COMMUNICATION_RULE_VERSION
),
content=AgentAssetSpreadsheetManager.build_version_markdown(
rule_name=company_communication_rule.name,
version=str(
company_communication_rule.current_version
or COMPANY_COMMUNICATION_RULE_VERSION
),
metadata=company_communication_rule_meta,
),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note="初始化通信费报销 Excel 规则表。",
created_by="系统初始化",
)
if (
str(company_communication_rule.current_version or "").strip()
== COMPANY_COMMUNICATION_RULE_VERSION
):
self._ensure_asset_review(
company_communication_rule,
version=COMPANY_COMMUNICATION_RULE_VERSION,
reviewer="顾承宇",
review_status=AgentReviewStatus.APPROVED.value,
review_note="首版 Excel 规则表已确认,可作为财务规则使用。",
reviewed_at=datetime.now(UTC),
)
if "skill.ar.aging_summary" not in existing_codes:
asset = self._create_seed_asset(
asset_type=AgentAssetType.SKILL.value,
code="skill.ar.aging_summary",
name="应收账龄汇总技能",
description="按客户、账龄和逾期状态汇总应收风险分布。",
domain=AgentAssetDomain.AR.value,
scenario_json=["accounts_receivable", "query", "aging_summary"],
owner="平台研发组",
reviewer="陈硕",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
config_json={"input_schema": ["customer", "aging_bucket", "status"]},
)
self._ensure_asset_version(
asset,
version="v1.0.0",
content=self._json_content(
{
"inputs": ["customer", "aging_bucket", "status"],
"outputs": ["receivable_total", "overdue_total", "customer_count"],
"dependencies": ["database.accounts_receivable"],
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化应收账龄技能快照。",
created_by="系统初始化",
)
if "mcp.ledger.snapshot_mock" not in existing_codes:
asset = self._create_seed_asset(
asset_type=AgentAssetType.MCP.value,
code="mcp.ledger.snapshot_mock",
name="总账快照 Mock 服务",
description="模拟返回应收、应付和费用汇总快照,供 Agent 查询和巡检。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["expense", "accounts_receivable", "accounts_payable"],
owner="平台研发组",
reviewer="周悦宁",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
config_json={"endpoint": "mock://ledger/snapshot", "timeout_ms": 1500},
)
self._ensure_asset_version(
asset,
version="v1.0.0",
content=self._json_content(
{
"service_type": "mock",
"auth_mode": "service_account",
"degrade_strategy": "return_cached_snapshot_with_warning",
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化总账快照 MCP。",
created_by="系统初始化",
)
if "task.hermes.weekly_ar_summary" not in existing_codes:
asset = self._create_seed_asset(
asset_type=AgentAssetType.TASK.value,
code="task.hermes.weekly_ar_summary",
name="Hermes 每周应收账龄汇总",
description="每周汇总逾期应收、账龄分布和客户风险变化。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "accounts_receivable", "summary"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
config_json={"cron": "0 10 * * 1", "agent": AgentName.HERMES.value},
)
self._ensure_asset_version(
asset,
version="v1.0.0",
content=self._json_content(
{
"task_type": "weekly_ar_summary",
"schedule": "0 10 * * 1",
"target_agent": AgentName.HERMES.value,
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化应收账龄汇总任务。",
created_by="系统初始化",
)
if "task.hermes.rule_review_digest" not in existing_codes:
asset = self._create_seed_asset(
asset_type=AgentAssetType.TASK.value,
code="task.hermes.rule_review_digest",
name="Hermes 规则待审摘要",
description="每天汇总待审规则、待补样例和被拒规则修订建议。",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "rule_center", "review_digest"],
owner="风控与审计部",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
config_json={"cron": "0 18 * * *", "agent": AgentName.HERMES.value},
)
self._ensure_asset_version(
asset,
version="v1.0.0",
content=self._json_content(
{
"task_type": "rule_review_digest",
"schedule": "0 18 * * *",
"target_agent": AgentName.HERMES.value,
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化规则待审摘要任务。",
created_by="系统初始化",
)
if "task.hermes.knowledge_index_sync" not in existing_codes:
asset = self._create_seed_asset(
asset_type=AgentAssetType.TASK.value,
code="task.hermes.knowledge_index_sync",
name="Hermes ??????",
description="?????????? LightRAG ???????",
domain=AgentAssetDomain.SYSTEM.value,
scenario_json=["schedule", "knowledge", "rule_center"],
owner="财务制度管理组",
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
config_json={"cron": "0 0 * * *", "agent": AgentName.HERMES.value},
)
self._ensure_asset_version(
asset,
version="v1.0.0",
content=self._json_content(
{
"task_type": "knowledge_index_sync",
"schedule": "0 0 * * *",
"target_agent": AgentName.HERMES.value,
"folder": "报销制度",
"changed_only": True,
}
),
content_type=AgentAssetContentType.JSON.value,
change_note="初始化制度知识与规则草稿形成任务。",
created_by="系统初始化",
)

View File

@@ -0,0 +1,207 @@
from __future__ import annotations
PLATFORM_DESTINATION_LOCATION_RULE_CODE = "risk.travel.destination_receipt_location"
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME = "risk.travel.destination_receipt_location.json"
DEMO_EXPENSE_CLAIM_SIGNATURES = {
(
"EXP-202605-001",
"张三",
"华南客户拜访差旅报销",
"3280.00",
"submitted",
),
(
"EXP-202605-002",
"李四",
"客户路演餐费",
"860.00",
"approved",
),
(
"EXP-202605-003",
"王五",
"市场活动会务差旅",
"3280.00",
"review",
),
}
DEMO_RECEIVABLE_SIGNATURES = {
("AR-202605-001", "客户A", "50000.00", "partial"),
("AR-202605-002", "客户B", "78000.00", "overdue"),
}
DEMO_PAYABLE_SIGNATURES = {
("AP-202605-001", "供应商A", "33000.00", "scheduled"),
("AP-202605-002", "供应商B", "96000.00", "overdue"),
}
LEGACY_RULE_CODES = (
"rule.expense.duplicate_expense_check",
"rule.expense.travel_receipt_requirements",
"rule.ap.payment_dual_review",
)
ATTACHMENT_RULE_ASSET_CODE = "rule.expense.attachment_submission_requirements"
COMPANY_TRAVEL_RULE_VERSION = "v1.0.0"
COMPANY_COMMUNICATION_RULE_VERSION = "v1.0.0"
COMPANY_TRAVEL_RULE_SCENARIO_JSON = ("差旅",)
COMPANY_COMMUNICATION_RULE_SCENARIO_JSON = ("费用科目",)
ATTACHMENT_RULE_RUNTIME_CONFIG = {
"kind": "policy_rule_draft",
"version": 1,
"template_key": "attachment_requirement_v1",
"rule_name": "报销附件与单据完整性规则",
"scenario": "attachment_policy",
"source_document_name": "报销制度 / 单据与附件要求",
"review_required": True,
"target": {
"expense_types": [
"travel",
"hotel",
"transport",
"meal",
"office",
"meeting",
"training",
"communication",
"welfare",
"other",
],
"scene_codes": ["expense", "attachment_policy", "invoice_anomaly"],
},
"attachment_requirements": {
"min_attachment_count": 1,
"items": [
{
"document_type": "vat_invoice",
"required": True,
"min_count": 1,
"description": "金额类报销原则上必须提供合法票据。",
},
{
"document_type": "receipt",
"required": False,
"min_count": 1,
"description": "特殊场景无发票时需补充收据与情况说明。",
},
{
"document_type": "flight_itinerary",
"required": False,
"min_count": 1,
"description": "差旅交通报销需提供行程单或等效凭证。",
},
{
"document_type": "hotel_invoice",
"required": False,
"min_count": 1,
"description": "住宿报销需提供酒店票据或等效住宿凭证。",
},
],
"manual_fill_required": False,
},
"missing_attachment_action": "block",
"output": {
"risk_code": "invoice_anomaly",
"action": "block",
"message": "附件或单据不完整,需补件后再提交。",
},
}

View File

@@ -0,0 +1,726 @@
from __future__ import annotations
import hashlib
import json
from datetime import UTC, date, datetime
from decimal import Decimal
from pathlib import Path
from sqlalchemy import inspect, select, text
from app.core.agent_enums import (
AgentAssetContentType,
AgentAssetDomain,
AgentAssetStatus,
AgentAssetType,
AgentName,
AgentPermissionLevel,
AgentReviewStatus,
AgentRunSource,
AgentRunStatus,
AgentToolType,
)
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.models.audit_log import AuditLog
from app.models.financial_record import (
AccountsPayableRecord,
AccountsReceivableRecord,
ExpenseClaim,
ExpenseClaimItem,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import (
AgentAssetSpreadsheetManager,
COMPANY_COMMUNICATION_EXPENSE_RULE_CODE,
COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME,
COMPANY_TRAVEL_EXPENSE_RULE_CODE,
COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
FINANCE_RULES_LIBRARY,
RISK_RULES_LIBRARY,
)
from app.services.expense_rule_runtime import (
build_scene_submission_standard_markdown,
build_travel_risk_control_standard_markdown,
)
from app.services.agent_foundation_constants import (
ATTACHMENT_RULE_ASSET_CODE,
ATTACHMENT_RULE_RUNTIME_CONFIG,
COMPANY_COMMUNICATION_RULE_SCENARIO_JSON,
COMPANY_COMMUNICATION_RULE_VERSION,
COMPANY_TRAVEL_RULE_SCENARIO_JSON,
COMPANY_TRAVEL_RULE_VERSION,
DEMO_EXPENSE_CLAIM_SIGNATURES,
DEMO_PAYABLE_SIGNATURES,
DEMO_RECEIVABLE_SIGNATURES,
LEGACY_RULE_CODES,
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
)
from app.core.logging import get_logger
logger = get_logger("app.services.agent_foundation")
class AgentFoundationFinancialSeedMixin:
def _seed_financial_records(self) -> None:
if self.db.scalar(select(ExpenseClaim.id).limit(1)) is not None:
return
claim_1 = ExpenseClaim(
claim_no="EXP-202605-001",
employee_name="张三",
department_name="财务共享中心",
project_code="PRJ-EXP-01",
expense_type="travel",
reason="华南客户拜访差旅报销",
location="深圳",
amount=Decimal("3280.00"),
currency="CNY",
invoice_count=3,
occurred_at=datetime(2026, 5, 6, 9, 0, tzinfo=UTC),
submitted_at=datetime(2026, 5, 7, 10, 20, tzinfo=UTC),
status="submitted",
approval_stage="finance_review",
risk_flags_json=["amount_over_limit"],
)
claim_1.items = [
ExpenseClaimItem(
item_date=date(2026, 5, 5),
item_type="hotel",
item_reason="客户拜访住宿",
item_location="深圳",
item_amount=Decimal("1880.00"),
invoice_id="INV-HOTEL-001",
),
ExpenseClaimItem(
item_date=date(2026, 5, 6),
item_type="transport",
item_reason="往返交通",
item_location="深圳",
item_amount=Decimal("1400.00"),
invoice_id="INV-TRANS-009",
),
]
claim_2 = ExpenseClaim(
claim_no="EXP-202605-002",
employee_name="李四",
department_name="华东销售部",
project_code="PRJ-SALES-02",
expense_type="meal",
reason="客户路演餐费",
location="上海",
amount=Decimal("860.00"),
currency="CNY",
invoice_count=1,
occurred_at=datetime(2026, 5, 8, 12, 0, tzinfo=UTC),
submitted_at=datetime(2026, 5, 8, 18, 30, tzinfo=UTC),
status="approved",
approval_stage="completed",
risk_flags_json=[],
)
claim_3 = ExpenseClaim(
claim_no="EXP-202605-003",
employee_name="王五",
department_name="市场品牌部",
project_code="PRJ-MKT-08",
expense_type="travel",
reason="市场活动会务差旅",
location="北京",
amount=Decimal("3280.00"),
currency="CNY",
invoice_count=2,
occurred_at=datetime(2026, 5, 6, 11, 30, tzinfo=UTC),
submitted_at=datetime(2026, 5, 8, 9, 10, tzinfo=UTC),
status="review",
approval_stage="risk_check",
risk_flags_json=["duplicate_expense"],
)
ar_records = [
AccountsReceivableRecord(
receivable_no="AR-202605-001",
customer_id="CUS-A",
customer_name="客户A",
contract_no="CTR-AR-1001",
invoice_no="INV-AR-9001",
amount_receivable=Decimal("120000.00"),
amount_received=Decimal("70000.00"),
amount_outstanding=Decimal("50000.00"),
currency="CNY",
posting_date=date(2026, 4, 1),
due_date=date(2026, 4, 30),
aging_days=11,
status="partial",
risk_flags_json=[],
),
AccountsReceivableRecord(
receivable_no="AR-202605-002",
customer_id="CUS-B",
customer_name="客户B",
contract_no="CTR-AR-1002",
invoice_no="INV-AR-9002",
amount_receivable=Decimal("88000.00"),
amount_received=Decimal("10000.00"),
amount_outstanding=Decimal("78000.00"),
currency="CNY",
posting_date=date(2026, 3, 15),
due_date=date(2026, 4, 15),
aging_days=26,
status="overdue",
risk_flags_json=["ar_overdue"],
),
]
ap_records = [
AccountsPayableRecord(
payable_no="AP-202605-001",
vendor_id="VEN-A",
vendor_name="供应商A",
invoice_no="INV-AP-5001",
amount_payable=Decimal("43000.00"),
amount_paid=Decimal("10000.00"),
amount_outstanding=Decimal("33000.00"),
currency="CNY",
posting_date=date(2026, 4, 20),
due_date=date(2026, 5, 12),
aging_days=0,
status="scheduled",
risk_flags_json=[],
),
AccountsPayableRecord(
payable_no="AP-202605-002",
vendor_id="VEN-B",
vendor_name="供应商B",
invoice_no="INV-AP-5002",
amount_payable=Decimal("96000.00"),
amount_paid=Decimal("0.00"),
amount_outstanding=Decimal("96000.00"),
currency="CNY",
posting_date=date(2026, 4, 10),
due_date=date(2026, 5, 5),
aging_days=6,
status="overdue",
risk_flags_json=["ap_overdue"],
),
]
self.db.add_all([claim_1, claim_2, claim_3, *ar_records, *ap_records])
def _purge_demo_financial_records(self) -> None:
demo_claims = list(self.db.scalars(select(ExpenseClaim)).all())
for claim in demo_claims:
signature = (
str(claim.claim_no or "").strip(),
str(claim.employee_name or "").strip(),
str(claim.reason or "").strip(),
f"{Decimal(claim.amount or 0):.2f}",
str(claim.status or "").strip(),
)
if signature in DEMO_EXPENSE_CLAIM_SIGNATURES:
self.db.delete(claim)
demo_receivables = list(self.db.scalars(select(AccountsReceivableRecord)).all())
for record in demo_receivables:
signature = (
str(record.receivable_no or "").strip(),
str(record.customer_name or "").strip(),
f"{Decimal(record.amount_outstanding or 0):.2f}",
str(record.status or "").strip(),
)
if signature in DEMO_RECEIVABLE_SIGNATURES:
self.db.delete(record)
demo_payables = list(self.db.scalars(select(AccountsPayableRecord)).all())
for record in demo_payables:
signature = (
str(record.payable_no or "").strip(),
str(record.vendor_name or "").strip(),
f"{Decimal(record.amount_outstanding or 0):.2f}",
str(record.status or "").strip(),
)
if signature in DEMO_PAYABLE_SIGNATURES:
self.db.delete(record)
def _seed_runs_and_logs(self) -> None:
if self.db.scalar(select(AgentRun.id).limit(1)) is not None:
return
task_asset = self.db.scalar(
select(AgentAsset).where(AgentAsset.code == "task.hermes.daily_risk_scan")
)
user_run = AgentRun(
run_id="run_user_20260511_001",
agent=AgentName.USER_AGENT.value,
source=AgentRunSource.USER_MESSAGE.value,
user_id="emp_001",
task_id=None,
ontology_json={"scenario": "expense", "intent": "query"},
route_json={"selected_agent": AgentName.USER_AGENT.value, "route_reason": "user query"},
permission_level=AgentPermissionLevel.READ.value,
status=AgentRunStatus.SUCCEEDED.value,
result_summary="已返回本周报销金额和风险摘要。",
started_at=datetime(2026, 5, 11, 8, 35, tzinfo=UTC),
finished_at=datetime(2026, 5, 11, 8, 35, 2, tzinfo=UTC),
)
hermes_run = AgentRun(
run_id="run_hermes_20260511_001",
agent=AgentName.HERMES.value,
source=AgentRunSource.SCHEDULE.value,
user_id=None,
task_id=task_asset.id if task_asset else None,
ontology_json={"scenario": "expense", "intent": "risk_check"},
route_json={
"selected_agent": AgentName.HERMES.value,
"route_reason": "scheduled risk scan",
},
permission_level=AgentPermissionLevel.READ.value,
status=AgentRunStatus.SUCCEEDED.value,
result_summary="Hermes 已生成今日风险巡检摘要。",
started_at=datetime(2026, 5, 11, 9, 0, tzinfo=UTC),
finished_at=datetime(2026, 5, 11, 9, 0, 4, tzinfo=UTC),
)
blocked_run = AgentRun(
run_id="run_user_20260511_002",
agent=AgentName.ORCHESTRATOR.value,
source=AgentRunSource.USER_MESSAGE.value,
user_id="emp_002",
task_id=None,
ontology_json={"scenario": "accounts_payable", "intent": "operate"},
route_json={
"selected_agent": AgentName.USER_AGENT.value,
"route_reason": "payment request",
},
permission_level=AgentPermissionLevel.APPROVAL_REQUIRED.value,
status=AgentRunStatus.BLOCKED.value,
result_summary="动作需要人工确认。",
error_message="直接付款属于高风险动作,已阻断自动执行。",
started_at=datetime(2026, 5, 11, 10, 5, tzinfo=UTC),
finished_at=datetime(2026, 5, 11, 10, 5, 1, tzinfo=UTC),
)
self.db.add_all([user_run, hermes_run, blocked_run])
self.db.flush()
self.db.add_all(
[
AgentToolCall(
run_id=user_run.run_id,
tool_type=AgentToolType.DATABASE.value,
tool_name="expense_claims.lookup",
request_json={"time_range": "this_week", "employee": "all"},
response_json={"claim_count": 3, "total_amount": "7420.00"},
status="succeeded",
duration_ms=48,
),
AgentToolCall(
run_id=hermes_run.run_id,
tool_type=AgentToolType.MCP.value,
tool_name="invoice.verify_mock",
request_json={"claim_no": "EXP-202605-003"},
response_json={
"warning": "external service degraded",
"fallback": "used mock response",
},
status="failed",
duration_ms=132,
error_message="mock upstream timeout",
),
AgentToolCall(
run_id=blocked_run.run_id,
tool_type=AgentToolType.RULE_ENGINE.value,
tool_name="permission.guard",
request_json={"action": "direct_payment"},
response_json={"requires_confirmation": True},
status="succeeded",
duration_ms=5,
),
SemanticParseLog(
run_id=user_run.run_id,
user_id="emp_001",
raw_query="查一下本周报销超标风险",
scenario="expense",
intent="risk_check",
entities_json=[],
time_range_json={"start_date": "2026-05-11", "end_date": "2026-05-17"},
metrics_json=["amount"],
constraints_json=[],
risk_flags_json=["amount_over_limit"],
permission_json={"level": AgentPermissionLevel.READ.value},
confidence=0.93,
),
SemanticParseLog(
run_id=blocked_run.run_id,
user_id="emp_002",
raw_query="帮我直接付款给供应商B",
scenario="accounts_payable",
intent="operate",
entities_json=[{"type": "vendor", "value": "供应商B"}],
time_range_json={},
metrics_json=["amount"],
constraints_json=[],
risk_flags_json=["ap_overdue"],
permission_json={"level": AgentPermissionLevel.APPROVAL_REQUIRED.value},
confidence=0.96,
),
]
)
if self.db.scalar(select(AuditLog.id).limit(1)) is None:
self.db.add_all(
[
AuditLog(
actor="系统初始化",
action="save_rule_markdown",
resource_type="rule",
resource_id=ATTACHMENT_RULE_ASSET_CODE,
before_json=None,
after_json={"version": "v1.0.0"},
request_id="seed-audit-001",
),
AuditLog(
actor="高嘉禾",
action="review_rule",
resource_type="rule",
resource_id=ATTACHMENT_RULE_ASSET_CODE,
before_json={"review_status": "pending"},
after_json={"review_status": "pending"},
request_id="seed-audit-002",
),
AuditLog(
actor="系统初始化",
action="activate_rule",
resource_type="rule",
resource_id="rule.expense.scene_submission_standard",
before_json={"status": "draft"},
after_json={"status": "active"},
request_id="seed-audit-003",
),
AuditLog(
actor="Hermes",
action="update_task_status",
resource_type="task",
resource_id="task.hermes.daily_risk_scan",
before_json={"status": "idle"},
after_json={"status": "succeeded"},
request_id="seed-audit-004",
),
]
)

View File

@@ -0,0 +1,202 @@
from __future__ import annotations
import hashlib
import json
from datetime import UTC, date, datetime
from decimal import Decimal
from pathlib import Path
from sqlalchemy import inspect, select, text
from app.core.agent_enums import (
AgentAssetContentType,
AgentAssetDomain,
AgentAssetStatus,
AgentAssetType,
AgentName,
AgentPermissionLevel,
AgentReviewStatus,
AgentRunSource,
AgentRunStatus,
AgentToolType,
)
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.models.audit_log import AuditLog
from app.models.financial_record import (
AccountsPayableRecord,
AccountsReceivableRecord,
ExpenseClaim,
ExpenseClaimItem,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import (
AgentAssetSpreadsheetManager,
COMPANY_COMMUNICATION_EXPENSE_RULE_CODE,
COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME,
COMPANY_TRAVEL_EXPENSE_RULE_CODE,
COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
FINANCE_RULES_LIBRARY,
RISK_RULES_LIBRARY,
)
from app.services.expense_rule_runtime import (
build_scene_submission_standard_markdown,
build_travel_risk_control_standard_markdown,
)
from app.services.agent_foundation_constants import (
ATTACHMENT_RULE_ASSET_CODE,
ATTACHMENT_RULE_RUNTIME_CONFIG,
COMPANY_COMMUNICATION_RULE_SCENARIO_JSON,
COMPANY_COMMUNICATION_RULE_VERSION,
COMPANY_TRAVEL_RULE_SCENARIO_JSON,
COMPANY_TRAVEL_RULE_VERSION,
DEMO_EXPENSE_CLAIM_SIGNATURES,
DEMO_PAYABLE_SIGNATURES,
DEMO_RECEIVABLE_SIGNATURES,
LEGACY_RULE_CODES,
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
)
from app.core.logging import get_logger
logger = get_logger("app.services.agent_foundation")
class AgentFoundationMarkdownMixin:
def _attachment_submission_requirement_markdown(
self,
*,
version_note: str,
include_review_note: bool,
) -> str:
sections = [
"# 报销附件与单据完整性规则",
"",
"## 模板信息",
"",
"- 模板键:`attachment_requirement_v1`",
"- 来源文档:报销制度 / 单据与附件要求",
"- 审核状态:待审核",
"",
"## 目标",
"",
"统一约束报销提交时的票据、附件与替代凭证要求,避免缺件、错件和无依据流转。",
"",
"## 适用范围",
"",
"适用于员工报销提交场景,重点覆盖差旅、住宿、交通、餐费、办公和其他费用的附件校验。",
"",
"## 输入字段",
"",
"- expense_type",
"- attachments",
"- invoice_count",
"- reason",
"",
"## 判断规则",
"",
"- 报销提交前至少需要 1 份有效附件。",
"- 金额类报销原则上应提供合法票据;特殊场景无发票时,必须补充收据与情况说明。",
"- 差旅交通报销需提供行程单或等效凭证;住宿报销需提供酒店票据或等效住宿凭证。",
"- 缺少必要附件时直接拦截,并提示补件后重新提交。",
"",
"## 输出",
"",
"- 风险编码:`invoice_anomaly`",
"- 默认动作:`block`",
"- 处理说明:附件或单据不完整时退回补充。",
"",
"## 来源依据",
"",
"- 报销制度对票据、附件、替代凭证和补件要求的统一约束。",
"",
"## 审核约束",
"",
"- 当前规则属于真实业务规则,但仍处于待审核状态。",
"- 上线前需由制度管理员确认收据替代、补件时限和特殊场景豁免口径。",
f"- 当前版本说明:{version_note}",
"",
"## 管理员备注",
"",
"需要结合公司正式报销制度,补充各场景附件替代口径与例外审批要求。",
]
if include_review_note:
sections.extend(["", "```expense-rule", json.dumps(ATTACHMENT_RULE_RUNTIME_CONFIG, ensure_ascii=False, indent=2), "```"])
return "\n".join(sections)
def _scene_submission_standard_markdown(self) -> str:
return self._markdown_content(build_scene_submission_standard_markdown())
def _travel_risk_control_standard_markdown(self, *, version: str = "v1.1.0") -> str:
return self._markdown_content(build_travel_risk_control_standard_markdown())
@staticmethod
def _markdown_content(content: str) -> str:
return content
@staticmethod
def _json_content(content: dict[str, object]) -> str:
return json.dumps(content, ensure_ascii=False, sort_keys=True, indent=2)

View File

@@ -0,0 +1,474 @@
from __future__ import annotations
import hashlib
import json
from datetime import UTC, date, datetime
from decimal import Decimal
from pathlib import Path
from sqlalchemy import inspect, select, text
from app.core.agent_enums import (
AgentAssetContentType,
AgentAssetDomain,
AgentAssetStatus,
AgentAssetType,
AgentName,
AgentPermissionLevel,
AgentReviewStatus,
AgentRunSource,
AgentRunStatus,
AgentToolType,
)
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.models.audit_log import AuditLog
from app.models.financial_record import (
AccountsPayableRecord,
AccountsReceivableRecord,
ExpenseClaim,
ExpenseClaimItem,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import (
AgentAssetSpreadsheetManager,
COMPANY_COMMUNICATION_EXPENSE_RULE_CODE,
COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME,
COMPANY_TRAVEL_EXPENSE_RULE_CODE,
COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
FINANCE_RULES_LIBRARY,
RISK_RULES_LIBRARY,
)
from app.services.expense_rule_runtime import (
build_scene_submission_standard_markdown,
build_travel_risk_control_standard_markdown,
)
from app.services.agent_foundation_constants import (
ATTACHMENT_RULE_ASSET_CODE,
ATTACHMENT_RULE_RUNTIME_CONFIG,
COMPANY_COMMUNICATION_RULE_SCENARIO_JSON,
COMPANY_COMMUNICATION_RULE_VERSION,
COMPANY_TRAVEL_RULE_SCENARIO_JSON,
COMPANY_TRAVEL_RULE_VERSION,
DEMO_EXPENSE_CLAIM_SIGNATURES,
DEMO_PAYABLE_SIGNATURES,
DEMO_RECEIVABLE_SIGNATURES,
LEGACY_RULE_CODES,
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
)
from app.core.logging import get_logger
logger = get_logger("app.services.agent_foundation")
class AgentFoundationRiskRuleMixin:
def _iter_platform_risk_manifests(self) -> list[tuple[str, dict[str, object]]]:
manager = AgentAssetRuleLibraryManager()
manifests: list[tuple[str, dict[str, object]]] = []
for file_name in sorted(manager.list_rule_library_json_files(library=RISK_RULES_LIBRARY)):
payload = manager.read_rule_library_json(library=RISK_RULES_LIBRARY, file_name=file_name)
if payload.get("enabled") is False:
continue
manifests.append((file_name, payload))
return manifests
@staticmethod
def _resolve_platform_risk_category(manifest: dict[str, object]) -> str:
explicit = str(manifest.get("risk_category") or "").strip()
if explicit:
return explicit
rule_code = str(manifest.get("rule_code") or "").strip().lower()
applies_to = manifest.get("applies_to") if isinstance(manifest.get("applies_to"), dict) else {}
domains = {str(item or "").strip().lower() for item in applies_to.get("domains") or []}
expense_types = {
str(item or "").strip().lower() for item in applies_to.get("expense_types") or []
}
if rule_code.startswith("risk.invoice."):
return "发票"
if "meal" in domains or "entertainment" in expense_types:
return "餐饮招待"
if "transport" in expense_types or "consecutive_transport" in rule_code:
return "交通出行"
if "office" in expense_types:
return "办公物料"
if "travel" in domains or rule_code.startswith("risk.travel."):
return "差旅"
if rule_code.startswith("risk.expense."):
return "费用科目"
return "通用"
def _platform_risk_scenario_json(self, manifest: dict[str, object]) -> list[str]:
category = self._resolve_platform_risk_category(manifest)
return [category] if category else ["通用"]
def _platform_risk_config_json(self, file_name: str, manifest: dict[str, object]) -> dict[str, object]:
outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {}
fail_outcome = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {}
risk_category = self._resolve_platform_risk_category(manifest)
return {
"severity": str(fail_outcome.get("severity") or "medium"),
"enabled": True,
"tag": "风险规则",
"detail_mode": "json_risk",
"risk_category": risk_category,
"rule_library": RISK_RULES_LIBRARY,
"rule_document": {
"file_name": file_name,
"storage_key": f"rules/{RISK_RULES_LIBRARY}/{file_name}",
},
"ontology_signal": str(manifest.get("ontology_signal") or "").strip(),
"evaluator": str(manifest.get("evaluator") or "").strip(),
"source_ref": (
(manifest.get("metadata") or {}).get("source_ref")
if isinstance(manifest.get("metadata"), dict)
else ""
),
}
def _build_platform_risk_seed_assets(self) -> list[AgentAsset]:
assets: list[AgentAsset] = []
for file_name, manifest in self._iter_platform_risk_manifests():
rule_code = str(manifest.get("rule_code") or "").strip()
if not rule_code:
continue
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
source_ref = str(metadata.get("source_ref") or "").strip()
rule_description = str(manifest.get("description") or "").strip()
assets.append(
AgentAsset(
asset_type=AgentAssetType.RULE.value,
code=rule_code,
name=str(manifest.get("name") or rule_code),
description=rule_description
or f"平台通用风险规则:{source_ref or manifest.get('name') or rule_code}",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=self._platform_risk_scenario_json(manifest),
owner=str(metadata.get("owner") or "风控与审计部"),
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
published_version="v1.0.0",
working_version="v1.0.0",
config_json=self._platform_risk_config_json(file_name, manifest),
)
)
return assets
def sync_platform_risk_rules_from_library(self) -> int:
existing_codes = set(self.db.scalars(select(AgentAsset.code)).all())
before_count = len(existing_codes)
self._ensure_platform_risk_rules_from_library(existing_codes)
self.db.flush()
after_codes = set(self.db.scalars(select(AgentAsset.code)).all())
synced = max(len(after_codes) - before_count, 0)
manifest_count = len(self._iter_platform_risk_manifests())
logger.info(
"Platform risk rules synced from library",
extra={"manifest_count": manifest_count, "created_count": synced, "total": len(after_codes)},
)
return manifest_count
def _ensure_platform_risk_rules_from_library(self, existing_codes: set[str]) -> None:
for file_name, manifest in self._iter_platform_risk_manifests():
rule_code = str(manifest.get("rule_code") or "").strip()
if not rule_code:
continue
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
source_ref = str(metadata.get("source_ref") or "").strip()
rule_description = str(manifest.get("description") or "").strip()
config_json = self._platform_risk_config_json(file_name, manifest)
scenario_json = self._platform_risk_scenario_json(manifest)
asset = self.db.scalar(select(AgentAsset).where(AgentAsset.code == rule_code))
if asset is None and rule_code not in existing_codes:
asset = self._create_seed_asset(
asset_type=AgentAssetType.RULE.value,
code=rule_code,
name=str(manifest.get("name") or rule_code),
description=rule_description
or f"平台通用风险规则:{source_ref or manifest.get('name') or rule_code}",
domain=AgentAssetDomain.EXPENSE.value,
scenario_json=scenario_json,
owner=str(metadata.get("owner") or "风控与审计部"),
reviewer="顾承宇",
status=AgentAssetStatus.ACTIVE.value,
current_version="v1.0.0",
config_json=config_json,
)
if asset is None:
continue
if not str(asset.current_version or "").strip():
asset.current_version = "v1.0.0"
if not str(asset.working_version or "").strip():
asset.working_version = asset.current_version
if not str(asset.published_version or "").strip():
asset.published_version = asset.current_version
asset.status = asset.status or AgentAssetStatus.ACTIVE.value
asset.name = str(manifest.get("name") or asset.name or rule_code)
if rule_description:
asset.description = rule_description
asset.config_json = config_json
asset.scenario_json = scenario_json
self._ensure_asset_version(
asset,
version="v1.0.0",
content=self._platform_risk_rule_markdown(asset, manifest=manifest, file_name=file_name),
content_type=AgentAssetContentType.MARKDOWN.value,
change_note=f"平台通用风险规则:{asset.name}",
created_by="系统初始化",
)
self._ensure_asset_review(
asset,
version="v1.0.0",
reviewer="顾承宇",
review_status=AgentReviewStatus.APPROVED.value,
review_note="平台内置风险规则,供提交验审与风险问答共用。",
reviewed_at=datetime.now(UTC),
)
@staticmethod
def _platform_risk_rule_markdown(
asset: AgentAsset,
*,
manifest: dict[str, object] | None = None,
file_name: str = "",
) -> str:
config = asset.config_json if isinstance(asset.config_json, dict) else {}
rule_document = config.get("rule_document") if isinstance(config.get("rule_document"), dict) else {}
resolved_file_name = file_name or str(rule_document.get("file_name") or "").strip()
evaluator = str(config.get("evaluator") or (manifest or {}).get("evaluator") or "").strip()
ontology_signal = str(config.get("ontology_signal") or (manifest or {}).get("ontology_signal") or "").strip()
source_ref = str(config.get("source_ref") or "").strip()
if not source_ref and isinstance(manifest, dict):
metadata = manifest.get("metadata") if isinstance(manifest.get("metadata"), dict) else {}
source_ref = str(metadata.get("source_ref") or "").strip()
lines = [
f"# {asset.name}",
"",
"## 规则类型",
"",
"- 平台内置通用风险规则(`json_risk`",
]
if evaluator:
lines.append(f"- 检查器:`{evaluator}`")
if ontology_signal:
lines.append(f"- 本体信号:`{ontology_signal}`")
if source_ref:
lines.extend(["", "## 来源", "", f"- {source_ref}"])
if resolved_file_name:
lines.extend(
[
"",
"## 配置文件",
"",
f"- `rules/{RISK_RULES_LIBRARY}/{resolved_file_name}`",
]
)
return "\n".join(lines)
@staticmethod
def _platform_destination_location_risk_markdown() -> str:
return AgentFoundationRiskRuleMixin._platform_risk_rule_markdown(
AgentAsset(name="申报地点与票据地点一致", config_json={"evaluator": "location_consistency"}),
manifest={
"evaluator": "location_consistency",
"ontology_signal": "location_mismatch",
"metadata": {"source_ref": "常用risk.txt / 一、出差类 / 行程不符"},
},
file_name=PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
)

View File

@@ -0,0 +1,400 @@
from __future__ import annotations
import hashlib
import json
from datetime import UTC, date, datetime
from decimal import Decimal
from pathlib import Path
from sqlalchemy import inspect, select, text
from app.core.agent_enums import (
AgentAssetContentType,
AgentAssetDomain,
AgentAssetStatus,
AgentAssetType,
AgentName,
AgentPermissionLevel,
AgentReviewStatus,
AgentRunSource,
AgentRunStatus,
AgentToolType,
)
from app.models.agent_asset import AgentAsset, AgentAssetReview, AgentAssetVersion
from app.models.agent_run import AgentRun, AgentToolCall, SemanticParseLog
from app.models.audit_log import AuditLog
from app.models.financial_record import (
AccountsPayableRecord,
AccountsReceivableRecord,
ExpenseClaim,
ExpenseClaimItem,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import (
AgentAssetSpreadsheetManager,
COMPANY_COMMUNICATION_EXPENSE_RULE_CODE,
COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME,
COMPANY_TRAVEL_EXPENSE_RULE_CODE,
COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
FINANCE_RULES_LIBRARY,
RISK_RULES_LIBRARY,
)
from app.services.expense_rule_runtime import (
build_scene_submission_standard_markdown,
build_travel_risk_control_standard_markdown,
)
from app.services.agent_foundation_constants import (
ATTACHMENT_RULE_ASSET_CODE,
ATTACHMENT_RULE_RUNTIME_CONFIG,
COMPANY_COMMUNICATION_RULE_SCENARIO_JSON,
COMPANY_COMMUNICATION_RULE_VERSION,
COMPANY_TRAVEL_RULE_SCENARIO_JSON,
COMPANY_TRAVEL_RULE_VERSION,
DEMO_EXPENSE_CLAIM_SIGNATURES,
DEMO_PAYABLE_SIGNATURES,
DEMO_RECEIVABLE_SIGNATURES,
LEGACY_RULE_CODES,
PLATFORM_DESTINATION_LOCATION_RULE_FILENAME,
)
from app.core.logging import get_logger
logger = get_logger("app.services.agent_foundation")
class AgentFoundationSpreadsheetMixin:
def _ensure_company_travel_rule_spreadsheet_seed(
self,
asset: AgentAsset,
*,
version: str,
actor_name: str,
):
manager = AgentAssetSpreadsheetManager()
manager.ensure_rule_library_dirs()
live_document = manager.store_rule_library_spreadsheet(
library=FINANCE_RULES_LIBRARY,
file_name=COMPANY_TRAVEL_EXPENSE_RULE_FILENAME,
content=self._read_or_build_company_travel_rule_file(manager),
actor_name=actor_name,
source="rule-library",
)
existing_document = (
asset.config_json.get("rule_document")
if isinstance(asset.config_json, dict)
else None
)
storage_key = (
str(existing_document.get("storage_key") or "").strip()
if isinstance(existing_document, dict)
else ""
)
if storage_key:
try:
existing_path = manager.resolve_storage_path(storage_key)
except FileNotFoundError:
existing_path = None
if existing_path is not None and existing_path.exists():
asset.config_json = {
**(asset.config_json or {}),
"detail_mode": "spreadsheet",
"tag": "财务规则",
"rule_library": FINANCE_RULES_LIBRARY,
"rule_document": {
**AgentAssetSpreadsheetManager.build_rule_document_config(
live_document,
asset_version=version,
),
"storage_key": live_document.storage_key,
},
}
return live_document
asset.config_json = {
**(asset.config_json or {}),
"detail_mode": "spreadsheet",
"tag": "财务规则",
"rule_library": FINANCE_RULES_LIBRARY,
"rule_document": {
**AgentAssetSpreadsheetManager.build_rule_document_config(
live_document,
asset_version=version,
),
"storage_key": live_document.storage_key,
},
}
return live_document
def _ensure_company_communication_rule_spreadsheet_seed(
self,
asset: AgentAsset,
*,
version: str,
actor_name: str,
):
return self._ensure_finance_rule_spreadsheet_seed(
asset,
version=version,
actor_name=actor_name,
file_name=COMPANY_COMMUNICATION_EXPENSE_RULE_FILENAME,
fallback_sheet_name="通信费报销规则",
)
@staticmethod
def _read_or_build_company_travel_rule_file(
manager: AgentAssetSpreadsheetManager,
) -> bytes:
live_key = (
Path("rules")
/ FINANCE_RULES_LIBRARY
/ COMPANY_TRAVEL_EXPENSE_RULE_FILENAME
).as_posix()
live_path = manager.resolve_storage_path(live_key)
if live_path.exists():
return live_path.read_bytes()
return AgentAssetSpreadsheetManager.build_blank_rule_workbook("差旅费报销规则")
def _ensure_finance_rule_spreadsheet_seed(
self,
asset: AgentAsset,
*,
version: str,
actor_name: str,
file_name: str,
fallback_sheet_name: str,
):
manager = AgentAssetSpreadsheetManager()
manager.ensure_rule_library_dirs()
live_document = manager.store_rule_library_spreadsheet(
library=FINANCE_RULES_LIBRARY,
file_name=file_name,
content=self._read_or_build_finance_rule_file(
manager,
file_name=file_name,
fallback_sheet_name=fallback_sheet_name,
),
actor_name=actor_name,
source="rule-library",
)
existing_document = (
asset.config_json.get("rule_document")
if isinstance(asset.config_json, dict)
else None
)
storage_key = (
str(existing_document.get("storage_key") or "").strip()
if isinstance(existing_document, dict)
else ""
)
if storage_key:
try:
existing_path = manager.resolve_storage_path(storage_key)
except FileNotFoundError:
existing_path = None
if existing_path is not None and existing_path.exists():
asset.config_json = {
**(asset.config_json or {}),
"detail_mode": "spreadsheet",
"tag": "财务规则",
"rule_library": FINANCE_RULES_LIBRARY,
"rule_document": {
**AgentAssetSpreadsheetManager.build_rule_document_config(
live_document,
asset_version=version,
),
"storage_key": live_document.storage_key,
},
}
return live_document
asset.config_json = {
**(asset.config_json or {}),
"detail_mode": "spreadsheet",
"tag": "财务规则",
"rule_library": FINANCE_RULES_LIBRARY,
"rule_document": {
**AgentAssetSpreadsheetManager.build_rule_document_config(
live_document,
asset_version=version,
),
"storage_key": live_document.storage_key,
},
}
return live_document
@staticmethod
def _read_or_build_finance_rule_file(
manager: AgentAssetSpreadsheetManager,
*,
file_name: str,
fallback_sheet_name: str,
) -> bytes:
live_key = (
Path("rules")
/ FINANCE_RULES_LIBRARY
/ file_name
).as_posix()
live_path = manager.resolve_storage_path(live_key)
if live_path.exists():
return live_path.read_bytes()
return AgentAssetSpreadsheetManager.build_blank_rule_workbook(fallback_sheet_name)

View File

@@ -2,178 +2,20 @@ from __future__ import annotations
import json
import re
from dataclasses import dataclass
from decimal import Decimal, InvalidOperation
from typing import Any
from pydantic import BaseModel, Field, ValidationError
from pydantic import ValidationError
from sqlalchemy.orm import Session
@dataclass(frozen=True, slots=True)
class DocumentField:
key: str
label: str
value: str
@dataclass(frozen=True, slots=True)
class DocumentInsight:
document_type: str
document_type_label: str
scene_code: str
scene_label: str
expense_type: str
fields: tuple[DocumentField, ...] = ()
classification_source: str = "rule"
classification_confidence: float = 0.0
evidence: tuple[str, ...] = ()
warnings: tuple[str, ...] = ()
@dataclass(frozen=True, slots=True)
class DocumentRule:
document_type: str
document_type_label: str
scene_code: str
scene_label: str
expense_type: str
keywords: tuple[str, ...]
score_bias: float = 0.0
@dataclass(frozen=True, slots=True)
class RuleMatch:
rule: DocumentRule | None
confidence: float
evidence: tuple[str, ...]
score: float
class LlmDocumentClassification(BaseModel):
document_type: str = Field(default="other")
scene_code: str = Field(default="other")
scene_label: str = Field(default="其他票据")
expense_type: str = Field(default="other")
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
evidence: list[str] = Field(default_factory=list)
fields: list[DocumentField] = Field(default_factory=list)
DEFAULT_RULE = DocumentRule(
document_type="other",
document_type_label="其他单据",
scene_code="other",
scene_label="其他票据",
expense_type="other",
keywords=(),
score_bias=0.0,
from app.services.document_intelligence_rules import DEFAULT_RULE, DOCUMENT_RULES, DOCUMENT_TYPE_RULE_MAP, SUPPORTED_DOCUMENT_TYPES
from app.services.document_intelligence_types import (
DocumentField,
DocumentInsight,
LlmDocumentClassification,
RuleMatch,
)
DOCUMENT_RULES: tuple[DocumentRule, ...] = (
DocumentRule(
document_type="flight_itinerary",
document_type_label="机票/航班行程单",
scene_code="travel",
scene_label="差旅票据",
expense_type="travel",
keywords=("电子行程单", "航班号", "航班", "机票", "登机", "航空", "客票"),
score_bias=0.34,
),
DocumentRule(
document_type="train_ticket",
document_type_label="火车/高铁票",
scene_code="travel",
scene_label="差旅票据",
expense_type="travel",
keywords=("铁路电子客票", "电子客票", "高铁", "火车", "动车", "铁路", "车次", "检票", "二等座", "一等座", "票价"),
score_bias=0.32,
),
DocumentRule(
document_type="hotel_invoice",
document_type_label="酒店住宿票据",
scene_code="hotel",
scene_label="住宿票据",
expense_type="hotel",
keywords=("住宿", "房费", "客房", "入住", "离店", "酒店", "宾馆", "间夜"),
score_bias=0.16,
),
DocumentRule(
document_type="taxi_receipt",
document_type_label="出租车/网约车票据",
scene_code="transport",
scene_label="交通票据",
expense_type="transport",
keywords=("滴滴出行", "滴滴", "网约车", "出租车", "打车", "乘车", "用车", "叫车", "车费", "车资", "的士", "快车", "专车", "订单号", "上车", "下车", "起点", "终点", "里程", "司机"),
score_bias=0.38,
),
DocumentRule(
document_type="parking_toll_receipt",
document_type_label="停车/通行费票据",
scene_code="transport",
scene_label="交通票据",
expense_type="transport",
keywords=("停车费", "通行费", "过路费", "收费站", "停车场", "停车"),
score_bias=0.28,
),
DocumentRule(
document_type="meal_receipt",
document_type_label="餐饮票据",
scene_code="meal",
scene_label="餐饮票据",
expense_type="meal",
keywords=("餐饮", "餐费", "用餐", "饭店", "酒楼", "餐厅", "食品", "外卖", "咖啡"),
score_bias=0.14,
),
DocumentRule(
document_type="office_invoice",
document_type_label="办公用品票据",
scene_code="office",
scene_label="办公用品票据",
expense_type="office",
keywords=("办公用品", "文具", "耗材", "打印纸", "墨盒", "硒鼓", "键盘", "鼠标"),
score_bias=0.14,
),
DocumentRule(
document_type="meeting_invoice",
document_type_label="会议/会务票据",
scene_code="meeting",
scene_label="会务票据",
expense_type="meeting",
keywords=("会议", "会务", "会展", "论坛", "会议室", "会场"),
score_bias=0.12,
),
DocumentRule(
document_type="training_invoice",
document_type_label="培训票据",
scene_code="training",
scene_label="培训票据",
expense_type="training",
keywords=("培训", "课程", "讲师", "教材", "学费", "认证"),
score_bias=0.12,
),
DocumentRule(
document_type="vat_invoice",
document_type_label="增值税发票",
scene_code="other",
scene_label="通用发票",
expense_type="other",
keywords=("发票代码", "发票号码", "价税合计", "增值税", "电子发票"),
score_bias=-0.08,
),
DocumentRule(
document_type="receipt",
document_type_label="一般收据/凭证",
scene_code="other",
scene_label="其他票据",
expense_type="other",
keywords=("收据", "凭证", "票据"),
score_bias=-0.18,
),
)
DOCUMENT_TYPE_RULE_MAP = {rule.document_type: rule for rule in DOCUMENT_RULES}
SUPPORTED_DOCUMENT_TYPES = tuple(DOCUMENT_TYPE_RULE_MAP.keys()) + ("other",)
AMOUNT_PATTERNS = (
re.compile(

View File

@@ -0,0 +1,120 @@
from __future__ import annotations
from app.services.document_intelligence_types import DocumentRule
DEFAULT_RULE = DocumentRule(
document_type="other",
document_type_label="其他单据",
scene_code="other",
scene_label="其他票据",
expense_type="other",
keywords=(),
score_bias=0.0,
)
DOCUMENT_RULES: tuple[DocumentRule, ...] = (
DocumentRule(
document_type="flight_itinerary",
document_type_label="机票/航班行程单",
scene_code="travel",
scene_label="差旅票据",
expense_type="travel",
keywords=("电子行程单", "航班号", "航班", "机票", "登机", "航空", "客票"),
score_bias=0.34,
),
DocumentRule(
document_type="train_ticket",
document_type_label="火车/高铁票",
scene_code="travel",
scene_label="差旅票据",
expense_type="travel",
keywords=("铁路电子客票", "电子客票", "高铁", "火车", "动车", "铁路", "车次", "检票", "二等座", "一等座", "票价"),
score_bias=0.32,
),
DocumentRule(
document_type="hotel_invoice",
document_type_label="酒店住宿票据",
scene_code="hotel",
scene_label="住宿票据",
expense_type="hotel",
keywords=("住宿", "房费", "客房", "入住", "离店", "酒店", "宾馆", "间夜"),
score_bias=0.16,
),
DocumentRule(
document_type="taxi_receipt",
document_type_label="出租车/网约车票据",
scene_code="transport",
scene_label="交通票据",
expense_type="transport",
keywords=("滴滴出行", "滴滴", "网约车", "出租车", "打车", "乘车", "用车", "叫车", "车费", "车资", "的士", "快车", "专车", "订单号", "上车", "下车", "起点", "终点", "里程", "司机"),
score_bias=0.38,
),
DocumentRule(
document_type="parking_toll_receipt",
document_type_label="停车/通行费票据",
scene_code="transport",
scene_label="交通票据",
expense_type="transport",
keywords=("停车费", "通行费", "过路费", "收费站", "停车场", "停车"),
score_bias=0.28,
),
DocumentRule(
document_type="meal_receipt",
document_type_label="餐饮票据",
scene_code="meal",
scene_label="餐饮票据",
expense_type="meal",
keywords=("餐饮", "餐费", "用餐", "饭店", "酒楼", "餐厅", "食品", "外卖", "咖啡"),
score_bias=0.14,
),
DocumentRule(
document_type="office_invoice",
document_type_label="办公用品票据",
scene_code="office",
scene_label="办公用品票据",
expense_type="office",
keywords=("办公用品", "文具", "耗材", "打印纸", "墨盒", "硒鼓", "键盘", "鼠标"),
score_bias=0.14,
),
DocumentRule(
document_type="meeting_invoice",
document_type_label="会议/会务票据",
scene_code="meeting",
scene_label="会务票据",
expense_type="meeting",
keywords=("会议", "会务", "会展", "论坛", "会议室", "会场"),
score_bias=0.12,
),
DocumentRule(
document_type="training_invoice",
document_type_label="培训票据",
scene_code="training",
scene_label="培训票据",
expense_type="training",
keywords=("培训", "课程", "讲师", "教材", "学费", "认证"),
score_bias=0.12,
),
DocumentRule(
document_type="vat_invoice",
document_type_label="增值税发票",
scene_code="other",
scene_label="通用发票",
expense_type="other",
keywords=("发票代码", "发票号码", "价税合计", "增值税", "电子发票"),
score_bias=-0.08,
),
DocumentRule(
document_type="receipt",
document_type_label="一般收据/凭证",
scene_code="other",
scene_label="其他票据",
expense_type="other",
keywords=("收据", "凭证", "票据"),
score_bias=-0.18,
),
)
DOCUMENT_TYPE_RULE_MAP = {rule.document_type: rule for rule in DOCUMENT_RULES}
SUPPORTED_DOCUMENT_TYPES = tuple(DOCUMENT_TYPE_RULE_MAP.keys()) + ("other",)

View File

@@ -0,0 +1,53 @@
from __future__ import annotations
from dataclasses import dataclass
from pydantic import BaseModel, ConfigDict, Field
@dataclass(frozen=True, slots=True)
class DocumentField:
key: str
label: str
value: str
@dataclass(frozen=True, slots=True)
class DocumentInsight:
document_type: str
document_type_label: str
scene_code: str
scene_label: str
expense_type: str
fields: tuple[DocumentField, ...] = ()
classification_source: str = "rule"
classification_confidence: float = 0.0
evidence: tuple[str, ...] = ()
warnings: tuple[str, ...] = ()
@dataclass(frozen=True, slots=True)
class DocumentRule:
document_type: str
document_type_label: str
scene_code: str
scene_label: str
expense_type: str
keywords: tuple[str, ...]
score_bias: float = 0.0
@dataclass(frozen=True, slots=True)
class RuleMatch:
rule: DocumentRule | None
confidence: float
evidence: tuple[str, ...]
score: float
class LlmDocumentClassification(BaseModel):
model_config = ConfigDict(arbitrary_types_allowed=True)
document_type: str = Field(default="other")
scene_code: str = Field(default="other")
scene_label: str = Field(default="其他票据")
expense_type: str = Field(default="other")
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
evidence: list[str] = Field(default_factory=list)
fields: list[DocumentField] = Field(default_factory=list)

View File

@@ -20,10 +20,7 @@ from app.models.role import Role
from app.repositories.employee import EmployeeRepository
from app.schemas.employee import (
EmployeeCreate,
EmployeeHistoryRead,
EmployeeImportErrorRead,
EmployeeImportResultRead,
EmployeeImportSummaryRead,
EmployeeMetaRead,
EmployeeOrganizationRead,
EmployeeRead,
@@ -31,13 +28,12 @@ from app.schemas.employee import (
EmployeeStatusSummaryRead,
EmployeeUpdate,
)
from app.services.employee_spreadsheet import (
EmployeeImportRow,
EmployeeSpreadsheetError,
build_export_workbook_bytes,
build_import_template_bytes,
parse_employee_workbook,
from app.services.employee_import import EmployeeImportCoordinator
from app.services.employee_serialization import (
format_history_datetime as serialize_history_datetime,
serialize_employee,
)
from app.services.employee_spreadsheet import build_import_template_bytes
from app.services.employee_seed import (
EMPLOYEE_DEFINITIONS,
EMPLOYEE_PROFILE_REPAIRS,
@@ -440,288 +436,21 @@ class EmployeeService:
def export_employees(self, status: str | None = None, keyword: str | None = None) -> bytes:
self.ensure_directory_ready()
employees = self.repository.list(status=status, keyword=keyword)
rows: list[list[str]] = []
for employee in employees:
organization = employee.organization_unit
role_codes = ",".join(role.role_code for role in self._sorted_roles(list(employee.roles)))
rows.append(
[
employee.employee_no,
employee.name,
employee.email,
employee.gender or "",
self._format_date(employee.birth_date) or "",
employee.phone or "",
self._format_date(employee.join_date) or "",
employee.location or "",
employee.position,
employee.grade,
organization.unit_code if organization else "",
employee.manager.employee_no if employee.manager else "",
employee.finance_owner_name or "",
employee.cost_center or "",
employee.employment_status,
role_codes,
]
)
return build_export_workbook_bytes(rows)
return self._import_coordinator().export_employees(status=status, keyword=keyword)
def import_employees(self, content: bytes, actor: str = "系统管理员") -> EmployeeImportResultRead:
self.ensure_directory_ready()
parsed_rows, parse_errors = parse_employee_workbook(content)
if parse_errors:
return self._build_import_failure(parse_errors, total_rows=len(parsed_rows))
return self._import_coordinator().import_employees(content, actor=actor)
validation_errors = self._validate_import_rows(parsed_rows)
if validation_errors:
return self._build_import_failure(validation_errors, total_rows=len(parsed_rows))
try:
summary = self._apply_import_rows(parsed_rows, actor=actor)
except Exception:
self.db.rollback()
logger.exception("Employee import failed during database write")
raise
imported_at = self._format_datetime(datetime.now(UTC)) or ""
message = f"导入成功:新增 {summary['created']} 人,更新 {summary['updated']} 人。"
logger.info(
"Imported employees created=%d updated=%d total=%d",
summary["created"],
summary["updated"],
len(parsed_rows),
)
return EmployeeImportResultRead(
success=True,
message=message,
summary=EmployeeImportSummaryRead(
totalRows=len(parsed_rows),
created=summary["created"],
updated=summary["updated"],
errorCount=0,
),
errors=[],
importedAt=imported_at,
)
def _validate_import_rows(
self, rows: list[EmployeeImportRow]
) -> list[EmployeeSpreadsheetError]:
errors: list[EmployeeSpreadsheetError] = []
employee_nos_in_file: dict[str, int] = {}
emails_in_file: dict[str, int] = {}
roles_by_code = {role.role_code: role for role in self.repository.list_roles()}
organizations_by_code = {
unit.unit_code: unit for unit in self.repository.list_organization_units()
}
employees_by_no = {
employee.employee_no: employee for employee in self.repository.list()
}
import_employee_nos = {row.employee_no for row in rows}
for row in rows:
if row.employee_no in employee_nos_in_file:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="员工编号*",
employee_no=row.employee_no,
message=f"员工编号 {row.employee_no} 在文件中重复。",
)
)
else:
employee_nos_in_file[row.employee_no] = row.row_number
if row.email in emails_in_file:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="邮箱*",
employee_no=row.employee_no,
message=f"邮箱 {row.email} 在文件中重复。",
)
)
else:
emails_in_file[row.email] = row.row_number
existing_by_email = self.repository.get_by_email(row.email)
if existing_by_email is not None and existing_by_email.employee_no != row.employee_no:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="邮箱*",
employee_no=row.employee_no,
message=(
f"邮箱 {row.email} 已被员工 "
f"{existing_by_email.employee_no} 使用。"
),
)
)
if row.organization_unit_code and row.organization_unit_code not in organizations_by_code:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="部门编码",
employee_no=row.employee_no,
message=f"部门编码 {row.organization_unit_code} 不存在。",
)
)
if row.manager_employee_no:
manager_exists = (
row.manager_employee_no in employees_by_no
or row.manager_employee_no in import_employee_nos
)
if not manager_exists:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="直属上级工号",
employee_no=row.employee_no,
message=f"直属上级工号 {row.manager_employee_no} 不存在。",
)
)
if row.manager_employee_no == row.employee_no:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="直属上级工号",
employee_no=row.employee_no,
message="直属上级不能是员工本人。",
)
)
invalid_role_codes = [
code for code in row.role_codes if code not in roles_by_code
]
if invalid_role_codes:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="角色编码",
employee_no=row.employee_no,
message=f"角色不存在:{''.join(invalid_role_codes)}",
)
)
return errors
def _apply_import_rows(
self,
rows: list[EmployeeImportRow],
*,
actor: str,
) -> dict[str, int]:
roles_by_code = {role.role_code: role for role in self.repository.list_roles()}
organizations_by_code = {
unit.unit_code: unit for unit in self.repository.list_organization_units()
}
employees_by_no = {
employee.employee_no: employee for employee in self.repository.list()
}
created = 0
updated = 0
now = datetime.now(UTC)
try:
for row in rows:
employee = employees_by_no.get(row.employee_no)
is_new = employee is None
if is_new:
employee = Employee(
employee_no=row.employee_no,
name=row.name,
email=row.email,
password_hash=hash_password(DEFAULT_EMPLOYEE_PASSWORD),
)
self.db.add(employee)
employees_by_no[row.employee_no] = employee
created += 1
else:
updated += 1
employee.name = row.name
employee.email = row.email
employee.gender = row.gender
employee.birth_date = row.birth_date
employee.phone = row.phone
employee.join_date = row.join_date
employee.location = row.location
employee.position = row.position
employee.grade = row.grade
employee.finance_owner_name = row.finance_owner_name
employee.cost_center = row.cost_center
employee.employment_status = row.employment_status
employee.sync_state = "已同步"
employee.last_sync_at = now
if row.organization_unit_code:
employee.organization_unit = organizations_by_code[row.organization_unit_code]
else:
employee.organization_unit = None
employee.roles = self._sorted_roles(
[roles_by_code[code] for code in row.role_codes if code in roles_by_code]
)
action = (
"通过 Excel 导入新建员工档案"
if is_new
else "通过 Excel 导入更新员工档案"
)
self._append_change_log(employee, action=action, owner=actor, occurred_at=now)
self.db.flush()
for row in rows:
employee = employees_by_no[row.employee_no]
if row.manager_employee_no:
employee.manager = employees_by_no.get(row.manager_employee_no)
else:
employee.manager = None
self.db.commit()
except Exception:
self.db.rollback()
raise
return {"created": created, "updated": updated}
def _build_import_failure(
self,
errors: list[EmployeeSpreadsheetError],
*,
total_rows: int,
) -> EmployeeImportResultRead:
error_reads = [
EmployeeImportErrorRead(
row=item.row,
column=item.column,
employeeNo=item.employee_no,
message=item.message,
)
for item in errors
]
return EmployeeImportResultRead(
success=False,
message=(
f"导入未执行:共发现 {len(error_reads)} 处错误,请修正后重新导入。"
"原有员工数据未变更。"
),
summary=EmployeeImportSummaryRead(
totalRows=total_rows,
created=0,
updated=0,
errorCount=len(error_reads),
),
errors=error_reads,
importedAt=None,
def _import_coordinator(self) -> EmployeeImportCoordinator:
return EmployeeImportCoordinator(
self.db,
self.repository,
sorted_roles=self._sorted_roles,
append_change_log=self._append_change_log,
format_date=self._format_date,
format_datetime=self._format_datetime,
default_password=DEFAULT_EMPLOYEE_PASSWORD,
)
def _seed_roles(self) -> None:
@@ -1006,78 +735,18 @@ class EmployeeService:
self.db.delete(stale)
def _serialize_employee(self, employee: Employee) -> EmployeeRead:
organization = employee.organization_unit
roles = self._sorted_roles(list(employee.roles))
role_labels = [role.name for role in roles]
role_codes = [role.role_code for role in roles]
history = [
EmployeeHistoryRead(
action=item.action,
owner=item.owner,
time=self._format_history_datetime(item.occurred_at),
occurredAt=self._format_history_datetime(item.occurred_at),
)
for item in self._sorted_change_logs(employee)[:MAX_EMPLOYEE_CHANGE_LOGS]
]
return EmployeeRead(
id=employee.id,
avatar=(employee.name or "?")[:1],
name=employee.name,
employeeNo=employee.employee_no,
department=organization.name if organization else "",
position=employee.position,
grade=employee.grade,
manager=employee.manager.name if employee.manager else "CEO",
managerEmployeeNo=employee.manager.employee_no if employee.manager else None,
financeOwner=employee.finance_owner_name or "",
roles=role_labels,
roleCodes=role_codes,
status=employee.employment_status,
statusTone=STATUS_TONE_MAP.get(employee.employment_status, "neutral"),
gender=employee.gender,
age=self._calculate_age(employee.birth_date),
birthDate=self._format_date(employee.birth_date),
email=employee.email,
phone=employee.phone,
joinDate=self._format_date(employee.join_date),
location=employee.location,
costCenter=employee.cost_center,
updatedAt=self._format_datetime(employee.updated_at or employee.created_at),
lastSync=self._format_datetime(employee.last_sync_at),
syncState=employee.sync_state,
spotlight=employee.spotlight,
permissions=self._collect_permissions(role_codes),
history=history,
organization=(
EmployeeOrganizationRead(
id=organization.id,
code=organization.unit_code,
name=organization.name,
unitType=organization.unit_type,
costCenter=organization.cost_center,
location=organization.location,
managerName=organization.manager_name,
)
if organization
else None
),
return serialize_employee(
employee,
sorted_roles=self._sorted_roles(list(employee.roles)),
sorted_change_logs=self._sorted_change_logs(employee),
format_date=self._format_date,
format_datetime=self._format_datetime,
format_history_datetime=self._format_history_datetime,
role_permission_map=ROLE_PERMISSION_MAP,
status_tone_map=STATUS_TONE_MAP,
max_change_logs=MAX_EMPLOYEE_CHANGE_LOGS,
)
def _collect_permissions(self, role_codes: list[str]) -> list[str]:
permissions: list[str] = []
seen: set[str] = set()
for role_code in role_codes:
for permission in ROLE_PERMISSION_MAP.get(role_code, []):
if permission in seen:
continue
permissions.append(permission)
seen.add(permission)
return permissions
def _sorted_roles(self, roles: list[Role]) -> list[Role]:
return sorted(roles, key=lambda item: (ROLE_DISPLAY_ORDER.get(item.role_code, 999), item.name))
@@ -1125,21 +794,7 @@ class EmployeeService:
@staticmethod
def _format_history_datetime(value: datetime | None) -> str:
if value is None:
return ""
local = EmployeeService._to_display_datetime(value)
return (
f"{local.year}{local.month}{local.day}"
f"{local.hour}{local.minute}"
return serialize_history_datetime(
value,
to_display_datetime=EmployeeService._to_display_datetime,
)
@staticmethod
def _calculate_age(birth_date: date | None) -> int | None:
if birth_date is None:
return None
today = date.today()
age = today.year - birth_date.year
if (today.month, today.day) < (birth_date.month, birth_date.day):
age -= 1
return age

View File

@@ -0,0 +1,331 @@
from __future__ import annotations
from collections.abc import Callable
from datetime import UTC, date, datetime
from sqlalchemy.orm import Session
from app.core.logging import get_logger
from app.core.security import hash_password
from app.models.employee import Employee
from app.models.role import Role
from app.repositories.employee import EmployeeRepository
from app.schemas.employee import (
EmployeeImportErrorRead,
EmployeeImportResultRead,
EmployeeImportSummaryRead,
)
from app.services.employee_spreadsheet import (
EmployeeImportRow,
EmployeeSpreadsheetError,
build_export_workbook_bytes,
parse_employee_workbook,
)
logger = get_logger("app.services.employee")
class EmployeeImportCoordinator:
def __init__(
self,
db: Session,
repository: EmployeeRepository,
*,
sorted_roles: Callable[[list[Role]], list[Role]],
append_change_log: Callable[[Employee, str, str, datetime | None], None],
format_date: Callable[[date | None], str | None],
format_datetime: Callable[[datetime | None], str | None],
default_password: str,
) -> None:
self.db = db
self.repository = repository
self.sorted_roles = sorted_roles
self.append_change_log = append_change_log
self.format_date = format_date
self.format_datetime = format_datetime
self.default_password = default_password
def export_employees(self, status: str | None = None, keyword: str | None = None) -> bytes:
employees = self.repository.list(status=status, keyword=keyword)
rows: list[list[str]] = []
for employee in employees:
organization = employee.organization_unit
role_codes = ",".join(role.role_code for role in self.sorted_roles(list(employee.roles)))
rows.append(
[
employee.employee_no,
employee.name,
employee.email,
employee.gender or "",
self.format_date(employee.birth_date) or "",
employee.phone or "",
self.format_date(employee.join_date) or "",
employee.location or "",
employee.position,
employee.grade,
organization.unit_code if organization else "",
employee.manager.employee_no if employee.manager else "",
employee.finance_owner_name or "",
employee.cost_center or "",
employee.employment_status,
role_codes,
]
)
return build_export_workbook_bytes(rows)
def import_employees(self, content: bytes, actor: str = "系统管理员") -> EmployeeImportResultRead:
parsed_rows, parse_errors = parse_employee_workbook(content)
if parse_errors:
return self._build_import_failure(parse_errors, total_rows=len(parsed_rows))
validation_errors = self._validate_import_rows(parsed_rows)
if validation_errors:
return self._build_import_failure(validation_errors, total_rows=len(parsed_rows))
try:
summary = self._apply_import_rows(parsed_rows, actor=actor)
except Exception:
self.db.rollback()
logger.exception("Employee import failed during database write")
raise
imported_at = self.format_datetime(datetime.now(UTC)) or ""
message = f"导入成功:新增 {summary['created']} 人,更新 {summary['updated']} 人。"
logger.info(
"Imported employees created=%d updated=%d total=%d",
summary["created"],
summary["updated"],
len(parsed_rows),
)
return EmployeeImportResultRead(
success=True,
message=message,
summary=EmployeeImportSummaryRead(
totalRows=len(parsed_rows),
created=summary["created"],
updated=summary["updated"],
errorCount=0,
),
errors=[],
importedAt=imported_at,
)
def _validate_import_rows(
self, rows: list[EmployeeImportRow]
) -> list[EmployeeSpreadsheetError]:
errors: list[EmployeeSpreadsheetError] = []
employee_nos_in_file: dict[str, int] = {}
emails_in_file: dict[str, int] = {}
roles_by_code = {role.role_code: role for role in self.repository.list_roles()}
organizations_by_code = {
unit.unit_code: unit for unit in self.repository.list_organization_units()
}
employees_by_no = {
employee.employee_no: employee for employee in self.repository.list()
}
import_employee_nos = {row.employee_no for row in rows}
for row in rows:
if row.employee_no in employee_nos_in_file:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="员工编号*",
employee_no=row.employee_no,
message=f"员工编号 {row.employee_no} 在文件中重复。",
)
)
else:
employee_nos_in_file[row.employee_no] = row.row_number
if row.email in emails_in_file:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="邮箱*",
employee_no=row.employee_no,
message=f"邮箱 {row.email} 在文件中重复。",
)
)
else:
emails_in_file[row.email] = row.row_number
existing_by_email = self.repository.get_by_email(row.email)
if existing_by_email is not None and existing_by_email.employee_no != row.employee_no:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="邮箱*",
employee_no=row.employee_no,
message=(
f"邮箱 {row.email} 已被员工 "
f"{existing_by_email.employee_no} 使用。"
),
)
)
if row.organization_unit_code and row.organization_unit_code not in organizations_by_code:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="部门编码",
employee_no=row.employee_no,
message=f"部门编码 {row.organization_unit_code} 不存在。",
)
)
if row.manager_employee_no:
manager_exists = (
row.manager_employee_no in employees_by_no
or row.manager_employee_no in import_employee_nos
)
if not manager_exists:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="直属上级工号",
employee_no=row.employee_no,
message=f"直属上级工号 {row.manager_employee_no} 不存在。",
)
)
if row.manager_employee_no == row.employee_no:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="直属上级工号",
employee_no=row.employee_no,
message="直属上级不能是员工本人。",
)
)
invalid_role_codes = [
code for code in row.role_codes if code not in roles_by_code
]
if invalid_role_codes:
errors.append(
EmployeeSpreadsheetError(
row=row.row_number,
column="角色编码",
employee_no=row.employee_no,
message=f"角色不存在:{''.join(invalid_role_codes)}",
)
)
return errors
def _apply_import_rows(
self,
rows: list[EmployeeImportRow],
*,
actor: str,
) -> dict[str, int]:
roles_by_code = {role.role_code: role for role in self.repository.list_roles()}
organizations_by_code = {
unit.unit_code: unit for unit in self.repository.list_organization_units()
}
employees_by_no = {
employee.employee_no: employee for employee in self.repository.list()
}
created = 0
updated = 0
now = datetime.now(UTC)
try:
for row in rows:
employee = employees_by_no.get(row.employee_no)
is_new = employee is None
if is_new:
employee = Employee(
employee_no=row.employee_no,
name=row.name,
email=row.email,
password_hash=hash_password(self.default_password),
)
self.db.add(employee)
employees_by_no[row.employee_no] = employee
created += 1
else:
updated += 1
employee.name = row.name
employee.email = row.email
employee.gender = row.gender
employee.birth_date = row.birth_date
employee.phone = row.phone
employee.join_date = row.join_date
employee.location = row.location
employee.position = row.position
employee.grade = row.grade
employee.finance_owner_name = row.finance_owner_name
employee.cost_center = row.cost_center
employee.employment_status = row.employment_status
employee.sync_state = "已同步"
employee.last_sync_at = now
if row.organization_unit_code:
employee.organization_unit = organizations_by_code[row.organization_unit_code]
else:
employee.organization_unit = None
employee.roles = self.sorted_roles(
[roles_by_code[code] for code in row.role_codes if code in roles_by_code]
)
action = (
"通过 Excel 导入新建员工档案"
if is_new
else "通过 Excel 导入更新员工档案"
)
self.append_change_log(employee, action=action, owner=actor, occurred_at=now)
self.db.flush()
for row in rows:
employee = employees_by_no[row.employee_no]
if row.manager_employee_no:
employee.manager = employees_by_no.get(row.manager_employee_no)
else:
employee.manager = None
self.db.commit()
except Exception:
self.db.rollback()
raise
return {"created": created, "updated": updated}
def _build_import_failure(
self,
errors: list[EmployeeSpreadsheetError],
*,
total_rows: int,
) -> EmployeeImportResultRead:
error_reads = [
EmployeeImportErrorRead(
row=item.row,
column=item.column,
employeeNo=item.employee_no,
message=item.message,
)
for item in errors
]
return EmployeeImportResultRead(
success=False,
message=(
f"导入未执行:共发现 {len(error_reads)} 处错误,请修正后重新导入。"
"原有员工数据未变更。"
),
summary=EmployeeImportSummaryRead(
totalRows=total_rows,
created=0,
updated=0,
errorCount=len(error_reads),
),
errors=error_reads,
importedAt=None,
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,112 @@
from __future__ import annotations
ORGANIZATION_DEFINITIONS = [
{
"unit_code": "ORG-ROOT",
"name": "星海科技",
"unit_type": "company",
"parent_code": None,
"cost_center": "CC-0000",
"location": "上海",
"manager_name": "李文静",
},
{
"unit_code": "EXEC-OFFICE",
"name": "总经办",
"unit_type": "department",
"parent_code": "ORG-ROOT",
"cost_center": "CC-1001",
"location": "上海",
"manager_name": "李文静",
},
{
"unit_code": "FIN-SSC",
"name": "财务共享中心",
"unit_type": "department",
"parent_code": "ORG-ROOT",
"cost_center": "CC-2108",
"location": "上海",
"manager_name": "张晓晴",
},
{
"unit_code": "HR-OD",
"name": "人力与组织",
"unit_type": "department",
"parent_code": "ORG-ROOT",
"cost_center": "CC-3206",
"location": "杭州",
"manager_name": "陈硕",
},
{
"unit_code": "SALES-SOUTH",
"name": "华南销售部",
"unit_type": "department",
"parent_code": "ORG-ROOT",
"cost_center": "CC-4102",
"location": "深圳",
"manager_name": "陈嘉",
},
{
"unit_code": "SALES-EAST",
"name": "华东销售部",
"unit_type": "department",
"parent_code": "ORG-ROOT",
"cost_center": "CC-4108",
"location": "上海",
"manager_name": "秦墨然",
},
{
"unit_code": "MKT-BRAND",
"name": "市场品牌部",
"unit_type": "department",
"parent_code": "ORG-ROOT",
"cost_center": "CC-5203",
"location": "北京",
"manager_name": "刘思雨",
},
{
"unit_code": "RND-CENTER",
"name": "产品研发中心",
"unit_type": "department",
"parent_code": "ORG-ROOT",
"cost_center": "CC-6105",
"location": "北京",
"manager_name": "吴磊",
},
{
"unit_code": "OPS-ADMIN",
"name": "行政采购部",
"unit_type": "department",
"parent_code": "ORG-ROOT",
"cost_center": "CC-7204",
"location": "南京",
"manager_name": "梁雨辰",
},
{
"unit_code": "AUDIT-RISK",
"name": "风控与审计部",
"unit_type": "department",
"parent_code": "ORG-ROOT",
"cost_center": "CC-8102",
"location": "上海",
"manager_name": "顾承宇",
},
]
EMPLOYEE_PROFILE_REPAIRS = [
{
"employee_no": "E90919",
"name": "曹笑竹",
"email": "caoxiaozhu@xf.com",
"location": "武汉",
"position": "财务智能化产品经理",
"grade": "P5",
"organization_unit_code": "RND-CENTER",
"manager_employee_no": "E11745",
"finance_owner_name": "研发财务BP",
"cost_center": "CC-6112",
"employment_status": "在职",
"sync_state": "已同步",
"role_codes": ["user"],
},
]

View File

@@ -0,0 +1,434 @@
from __future__ import annotations
EMPLOYEE_DEFINITIONS_PART_1 = [
{
"employee_no": "E10018",
"name": "李文静",
"gender": "",
"birth_date": "1987-03-26",
"phone": "13900187688",
"email": "wenjing.li@xfinance.com",
"join_date": "2018-06-21",
"location": "上海",
"position": "高级财务总监",
"grade": "D2",
"organization_unit_code": "EXEC-OFFICE",
"manager_employee_no": None,
"finance_owner_name": "集团财务",
"cost_center": "CC-1001",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-05 16:20",
"last_sync_at": "2026-05-05 16:20",
"role_codes": ["executive", "approver"],
},
{
"employee_no": "E10234",
"name": "张晓晴",
"gender": "",
"birth_date": "1994-08-12",
"phone": "13810234567",
"email": "xiaoqing.zhang@xfinance.com",
"join_date": "2021-03-15",
"location": "上海",
"position": "费用运营经理",
"grade": "M3",
"organization_unit_code": "FIN-SSC",
"manager_employee_no": "E10018",
"finance_owner_name": "华东财务组",
"cost_center": "CC-2108",
"employment_status": "在职",
"sync_state": "待生效",
"spotlight": True,
"updated_at": "2026-05-06 10:24",
"last_sync_at": "2026-05-06 10:24",
"role_codes": ["manager", "finance", "approver"],
"history": [
{
"action": "新增“审批负责人”角色",
"owner": "系统管理员 · 王敏",
"occurred_at": "2026-05-06 10:24",
},
{
"action": "调整财务归口为华东财务组",
"owner": "组织管理员 · 陈硕",
"occurred_at": "2026-05-05 18:10",
},
],
},
{
"employee_no": "E10258",
"name": "孙楠",
"gender": "",
"birth_date": "1992-09-17",
"phone": "13722580312",
"email": "nan.sun@xfinance.com",
"join_date": "2020-11-09",
"location": "上海",
"position": "财务分析师",
"grade": "P5",
"organization_unit_code": "FIN-SSC",
"manager_employee_no": "E10234",
"finance_owner_name": "华东财务组",
"cost_center": "CC-2111",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-04 15:18",
"last_sync_at": "2026-05-04 15:18",
"role_codes": ["finance"],
},
{
"employee_no": "E10271",
"name": "周悦宁",
"gender": "",
"birth_date": "1993-04-21",
"phone": "13622711986",
"email": "yuening.zhou@xfinance.com",
"join_date": "2021-07-05",
"location": "上海",
"position": "财务系统专员",
"grade": "P5",
"organization_unit_code": "FIN-SSC",
"manager_employee_no": "E10234",
"finance_owner_name": "华东财务组",
"cost_center": "CC-2112",
"employment_status": "在职",
"sync_state": "同步中",
"spotlight": False,
"updated_at": "2026-05-07 09:35",
"last_sync_at": "2026-05-07 09:10",
"role_codes": ["finance", "auditor"],
},
{
"employee_no": "E10289",
"name": "高嘉禾",
"gender": "",
"birth_date": "1996-02-14",
"phone": "13522895642",
"email": "jiahe.gao@xfinance.com",
"join_date": "2023-01-10",
"location": "上海",
"position": "差旅合规专员",
"grade": "P4",
"organization_unit_code": "FIN-SSC",
"manager_employee_no": "E10234",
"finance_owner_name": "华东财务组",
"cost_center": "CC-2115",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-03 11:42",
"last_sync_at": "2026-05-03 11:42",
"role_codes": ["finance"],
},
{
"employee_no": "E10867",
"name": "王敏",
"gender": "",
"birth_date": "1996-11-05",
"phone": "13688671200",
"email": "min.wang@xfinance.com",
"join_date": "2022-08-08",
"location": "杭州",
"position": "组织发展主管",
"grade": "P6",
"organization_unit_code": "HR-OD",
"manager_employee_no": "E11618",
"finance_owner_name": "总部财务BP",
"cost_center": "CC-3206",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-05 09:18",
"last_sync_at": "2026-05-05 09:18",
"role_codes": ["manager", "auditor"],
},
{
"employee_no": "E11618",
"name": "陈硕",
"gender": "",
"birth_date": "1990-05-09",
"phone": "13816186540",
"email": "shuo.chen@xfinance.com",
"join_date": "2019-09-16",
"location": "杭州",
"position": "人力资源经理",
"grade": "M2",
"organization_unit_code": "HR-OD",
"manager_employee_no": "E10018",
"finance_owner_name": "总部财务BP",
"cost_center": "CC-3201",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-04 17:08",
"last_sync_at": "2026-05-04 17:08",
"role_codes": ["manager", "approver"],
},
{
"employee_no": "E12311",
"name": "何思成",
"gender": "",
"birth_date": "1998-07-19",
"phone": "13723117654",
"email": "sicheng.he@xfinance.com",
"join_date": "2026-02-17",
"location": "杭州",
"position": "HRBP",
"grade": "P4",
"organization_unit_code": "HR-OD",
"manager_employee_no": "E11618",
"finance_owner_name": "总部财务BP",
"cost_center": "CC-3208",
"employment_status": "试用中",
"sync_state": "待生效",
"spotlight": False,
"updated_at": "2026-05-07 08:42",
"last_sync_at": "2026-05-07 08:42",
"role_codes": ["user"],
},
{
"employee_no": "E11026",
"name": "刘思雨",
"gender": "",
"birth_date": "1991-12-03",
"phone": "13921036540",
"email": "siyu.liu@xfinance.com",
"join_date": "2020-04-13",
"location": "北京",
"position": "品牌市场经理",
"grade": "M2",
"organization_unit_code": "MKT-BRAND",
"manager_employee_no": "E10018",
"finance_owner_name": "市场财务BP",
"cost_center": "CC-5203",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-06 14:36",
"last_sync_at": "2026-05-06 14:36",
"role_codes": ["user", "approver"],
},
{
"employee_no": "E12408",
"name": "冯可欣",
"gender": "",
"birth_date": "1997-10-28",
"phone": "13624085542",
"email": "kexin.feng@xfinance.com",
"join_date": "2024-03-11",
"location": "北京",
"position": "品牌策划",
"grade": "P4",
"organization_unit_code": "MKT-BRAND",
"manager_employee_no": "E11026",
"finance_owner_name": "市场财务BP",
"cost_center": "CC-5207",
"employment_status": "在职",
"sync_state": "同步中",
"spotlight": False,
"updated_at": "2026-05-07 10:02",
"last_sync_at": "2026-05-07 09:48",
"role_codes": ["user"],
},
{
"employee_no": "E12419",
"name": "许泽航",
"gender": "",
"birth_date": "1995-05-15",
"phone": "13524199508",
"email": "zehang.xu@xfinance.com",
"join_date": "2023-06-19",
"location": "北京",
"position": "数字营销专员",
"grade": "P4",
"organization_unit_code": "MKT-BRAND",
"manager_employee_no": "E11026",
"finance_owner_name": "市场财务BP",
"cost_center": "CC-5209",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-03 16:52",
"last_sync_at": "2026-05-03 16:52",
"role_codes": ["user"],
},
{
"employee_no": "E11602",
"name": "陈嘉",
"gender": "",
"birth_date": "1997-02-18",
"phone": "13716029901",
"email": "jia.chen@xfinance.com",
"join_date": "2026-03-01",
"location": "深圳",
"position": "区域销售经理",
"grade": "M2",
"organization_unit_code": "SALES-SOUTH",
"manager_employee_no": "E10018",
"finance_owner_name": "华南财务组",
"cost_center": "CC-4102",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-04 14:12",
"last_sync_at": "2026-05-04 14:12",
"role_codes": ["user", "approver"],
},
{
"employee_no": "E12476",
"name": "马骁然",
"gender": "",
"birth_date": "1994-01-08",
"phone": "13824760139",
"email": "xiaoran.ma@xfinance.com",
"join_date": "2022-09-05",
"location": "深圳",
"position": "销售运营专家",
"grade": "P5",
"organization_unit_code": "SALES-SOUTH",
"manager_employee_no": "E11602",
"finance_owner_name": "华南财务组",
"cost_center": "CC-4106",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-06 18:15",
"last_sync_at": "2026-05-06 18:15",
"role_codes": ["user"],
},
{
"employee_no": "E12508",
"name": "唐子墨",
"gender": "",
"birth_date": "1996-06-11",
"phone": "13925088761",
"email": "zimo.tang@xfinance.com",
"join_date": "2024-02-26",
"location": "深圳",
"position": "大客户代表",
"grade": "P4",
"organization_unit_code": "SALES-SOUTH",
"manager_employee_no": "E11602",
"finance_owner_name": "华南财务组",
"cost_center": "CC-4109",
"employment_status": "停用",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-01 11:06",
"last_sync_at": "2026-05-01 11:06",
"role_codes": ["user"],
},
{
"employee_no": "E12514",
"name": "罗欣怡",
"gender": "",
"birth_date": "2000-03-02",
"phone": "13625141227",
"email": "xinyi.luo@xfinance.com",
"join_date": "2026-02-24",
"location": "深圳",
"position": "销售协调专员",
"grade": "P3",
"organization_unit_code": "SALES-SOUTH",
"manager_employee_no": "E11602",
"finance_owner_name": "华南财务组",
"cost_center": "CC-4112",
"employment_status": "试用中",
"sync_state": "待生效",
"spotlight": False,
"updated_at": "2026-05-05 15:42",
"last_sync_at": "2026-05-05 15:42",
"role_codes": ["user"],
},
{
"employee_no": "E11745",
"name": "吴磊",
"gender": "",
"birth_date": "1989-09-27",
"phone": "13817459812",
"email": "lei.wu@xfinance.com",
"join_date": "2019-12-09",
"location": "北京",
"position": "研发平台主管",
"grade": "M3",
"organization_unit_code": "RND-CENTER",
"manager_employee_no": "E10018",
"finance_owner_name": "研发财务BP",
"cost_center": "CC-6105",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-06 13:08",
"last_sync_at": "2026-05-06 13:08",
"role_codes": ["user", "approver", "auditor"],
},
{
"employee_no": "E11991",
"name": "赵明",
"gender": "",
"birth_date": "1994-06-09",
"phone": "13519913300",
"email": "ming.zhao@xfinance.com",
"join_date": "2023-11-18",
"location": "北京",
"position": "产品经理",
"grade": "P5",
"organization_unit_code": "RND-CENTER",
"manager_employee_no": "E11745",
"finance_owner_name": "研发财务BP",
"cost_center": "CC-6112",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-02 11:32",
"last_sync_at": "2026-05-02 11:32",
"role_codes": ["user"],
},
{
"employee_no": "E12611",
"name": "彭一凡",
"gender": "",
"birth_date": "1995-02-03",
"phone": "13726114588",
"email": "yifan.peng@xfinance.com",
"join_date": "2022-04-18",
"location": "北京",
"position": "后端工程师",
"grade": "P5",
"organization_unit_code": "RND-CENTER",
"manager_employee_no": "E11745",
"finance_owner_name": "研发财务BP",
"cost_center": "CC-6114",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-06 09:44",
"last_sync_at": "2026-05-06 09:44",
"role_codes": ["user"],
},
{
"employee_no": "E12618",
"name": "苏清禾",
"gender": "",
"birth_date": "1994-12-25",
"phone": "13626188763",
"email": "qinghe.su@xfinance.com",
"join_date": "2022-05-16",
"location": "北京",
"position": "数据工程师",
"grade": "P5",
"organization_unit_code": "RND-CENTER",
"manager_employee_no": "E11745",
"finance_owner_name": "研发财务BP",
"cost_center": "CC-6116",
"employment_status": "在职",
"sync_state": "同步中",
"spotlight": False,
"updated_at": "2026-05-07 10:26",
"last_sync_at": "2026-05-07 10:18",
"role_codes": ["user"],
},
]

View File

@@ -0,0 +1,412 @@
from __future__ import annotations
EMPLOYEE_DEFINITIONS_PART_2 = [
{
"employee_no": "E12624",
"name": "沈知远",
"gender": "",
"birth_date": "1992-11-06",
"phone": "13926241855",
"email": "zhiyuan.shen@xfinance.com",
"join_date": "2021-11-22",
"location": "北京",
"position": "测试负责人",
"grade": "P6",
"organization_unit_code": "RND-CENTER",
"manager_employee_no": "E11745",
"finance_owner_name": "研发财务BP",
"cost_center": "CC-6119",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-05 13:12",
"last_sync_at": "2026-05-05 13:12",
"role_codes": ["user"],
},
{
"employee_no": "E11852",
"name": "周晓彤",
"gender": "",
"birth_date": "1997-05-27",
"phone": "13818529954",
"email": "xiaotong.zhou@xfinance.com",
"join_date": "2022-06-30",
"location": "南京",
"position": "行政采购专员",
"grade": "P4",
"organization_unit_code": "OPS-ADMIN",
"manager_employee_no": "E12653",
"finance_owner_name": "行政财务BP",
"cost_center": "CC-7204",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-05 11:22",
"last_sync_at": "2026-05-05 11:22",
"role_codes": ["user"],
},
{
"employee_no": "E12653",
"name": "梁雨辰",
"gender": "",
"birth_date": "1991-08-30",
"phone": "13726539876",
"email": "yuchen.liang@xfinance.com",
"join_date": "2021-01-04",
"location": "南京",
"position": "行政运营经理",
"grade": "M1",
"organization_unit_code": "OPS-ADMIN",
"manager_employee_no": "E10018",
"finance_owner_name": "行政财务BP",
"cost_center": "CC-7201",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-06 17:44",
"last_sync_at": "2026-05-06 17:44",
"role_codes": ["user", "approver"],
},
{
"employee_no": "E12661",
"name": "顾承宇",
"gender": "",
"birth_date": "1988-04-16",
"phone": "13926614528",
"email": "chengyu.gu@xfinance.com",
"join_date": "2020-02-03",
"location": "上海",
"position": "风控审计经理",
"grade": "M2",
"organization_unit_code": "AUDIT-RISK",
"manager_employee_no": "E10018",
"finance_owner_name": "集团财务",
"cost_center": "CC-8102",
"employment_status": "在职",
"sync_state": "待生效",
"spotlight": True,
"updated_at": "2026-05-07 09:52",
"last_sync_at": "2026-05-07 09:52",
"role_codes": ["auditor", "finance"],
"history": [
{
"action": "更新审计观察范围",
"owner": "系统管理员 · 张晓晴",
"occurred_at": "2026-05-07 09:52",
},
{
"action": "补充高风险费用抽样规则",
"owner": "审计管理员 · 王敏",
"occurred_at": "2026-05-06 18:30",
},
],
},
{
"employee_no": "E12679",
"name": "郑若彤",
"gender": "",
"birth_date": "1997-09-13",
"phone": "13626794520",
"email": "ruotong.zheng@xfinance.com",
"join_date": "2024-01-08",
"location": "上海",
"position": "审计专员",
"grade": "P4",
"organization_unit_code": "AUDIT-RISK",
"manager_employee_no": "E12661",
"finance_owner_name": "集团财务",
"cost_center": "CC-8105",
"employment_status": "在职",
"sync_state": "同步中",
"spotlight": False,
"updated_at": "2026-05-07 08:58",
"last_sync_at": "2026-05-07 08:40",
"role_codes": ["auditor"],
},
{
"employee_no": "E12688",
"name": "方逸晨",
"gender": "",
"birth_date": "1995-01-20",
"phone": "13526881142",
"email": "yichen.fang@xfinance.com",
"join_date": "2023-08-14",
"location": "南京",
"position": "采购合规分析师",
"grade": "P4",
"organization_unit_code": "OPS-ADMIN",
"manager_employee_no": "E12653",
"finance_owner_name": "行政财务BP",
"cost_center": "CC-7208",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-03 14:16",
"last_sync_at": "2026-05-03 14:16",
"role_codes": ["user", "finance"],
},
{
"employee_no": "E12067",
"name": "秦墨然",
"gender": "",
"birth_date": "1990-10-10",
"phone": "13820674519",
"email": "moran.qin@xfinance.com",
"join_date": "2020-07-20",
"location": "上海",
"position": "华东销售总监",
"grade": "M2",
"organization_unit_code": "SALES-EAST",
"manager_employee_no": "E10018",
"finance_owner_name": "华东财务组",
"cost_center": "CC-4108",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-06 12:40",
"last_sync_at": "2026-05-06 12:40",
"role_codes": ["user", "approver"],
},
{
"employee_no": "E12703",
"name": "宋知夏",
"gender": "",
"birth_date": "1994-07-07",
"phone": "13727031129",
"email": "zhixia.song@xfinance.com",
"join_date": "2022-12-12",
"location": "上海",
"position": "重点客户经理",
"grade": "P5",
"organization_unit_code": "SALES-EAST",
"manager_employee_no": "E12067",
"finance_owner_name": "华东财务组",
"cost_center": "CC-4111",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-04 10:58",
"last_sync_at": "2026-05-04 10:58",
"role_codes": ["user"],
},
{
"employee_no": "E12716",
"name": "杜嘉宁",
"gender": "",
"birth_date": "1999-11-16",
"phone": "13627161248",
"email": "jianing.du@xfinance.com",
"join_date": "2026-01-19",
"location": "上海",
"position": "销售代表",
"grade": "P3",
"organization_unit_code": "SALES-EAST",
"manager_employee_no": "E12067",
"finance_owner_name": "华东财务组",
"cost_center": "CC-4114",
"employment_status": "试用中",
"sync_state": "待生效",
"spotlight": False,
"updated_at": "2026-05-05 12:26",
"last_sync_at": "2026-05-05 12:26",
"role_codes": ["user"],
},
{
"employee_no": "E12722",
"name": "邵宁远",
"gender": "",
"birth_date": "1998-12-01",
"phone": "13527221506",
"email": "ningyuan.shao@xfinance.com",
"join_date": "2026-02-08",
"location": "北京",
"position": "数据分析师",
"grade": "P4",
"organization_unit_code": "RND-CENTER",
"manager_employee_no": "E11745",
"finance_owner_name": "研发财务BP",
"cost_center": "CC-6122",
"employment_status": "试用中",
"sync_state": "同步中",
"spotlight": False,
"updated_at": "2026-05-07 09:06",
"last_sync_at": "2026-05-07 08:55",
"role_codes": ["user"],
},
{
"employee_no": "E12739",
"name": "林可昕",
"gender": "",
"birth_date": "1996-10-23",
"phone": "13827394510",
"email": "kexin.lin@xfinance.com",
"join_date": "2023-04-17",
"location": "上海",
"position": "费用核算专员",
"grade": "P4",
"organization_unit_code": "FIN-SSC",
"manager_employee_no": "E10234",
"finance_owner_name": "华东财务组",
"cost_center": "CC-2118",
"employment_status": "停用",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-04-30 18:05",
"last_sync_at": "2026-04-30 18:05",
"role_codes": ["finance"],
},
{
"employee_no": "E12744",
"name": "赵予安",
"gender": "",
"birth_date": "1993-01-30",
"phone": "13727442139",
"email": "yuan.zhao@xfinance.com",
"join_date": "2021-10-11",
"location": "上海",
"position": "预算控制经理",
"grade": "M1",
"organization_unit_code": "FIN-SSC",
"manager_employee_no": "E10234",
"finance_owner_name": "集团财务",
"cost_center": "CC-2120",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-06 15:34",
"last_sync_at": "2026-05-06 15:34",
"role_codes": ["finance", "approver"],
},
{
"employee_no": "E12750",
"name": "谢知行",
"gender": "",
"birth_date": "1995-09-14",
"phone": "13627501386",
"email": "zhixing.xie@xfinance.com",
"join_date": "2022-07-25",
"location": "深圳",
"position": "渠道销售经理",
"grade": "P5",
"organization_unit_code": "SALES-SOUTH",
"manager_employee_no": "E11602",
"finance_owner_name": "华南财务组",
"cost_center": "CC-4116",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-04 09:48",
"last_sync_at": "2026-05-04 09:48",
"role_codes": ["user"],
},
{
"employee_no": "E12758",
"name": "顾南枝",
"gender": "",
"birth_date": "1994-04-12",
"phone": "13827584522",
"email": "nanzhi.gu@xfinance.com",
"join_date": "2022-05-09",
"location": "北京",
"position": "内容运营经理",
"grade": "P5",
"organization_unit_code": "MKT-BRAND",
"manager_employee_no": "E11026",
"finance_owner_name": "市场财务BP",
"cost_center": "CC-5211",
"employment_status": "在职",
"sync_state": "同步中",
"spotlight": False,
"updated_at": "2026-05-07 11:08",
"last_sync_at": "2026-05-07 10:50",
"role_codes": ["user"],
},
{
"employee_no": "E12763",
"name": "孟书言",
"gender": "",
"birth_date": "1992-02-09",
"phone": "13527633148",
"email": "shuyan.meng@xfinance.com",
"join_date": "2021-06-28",
"location": "北京",
"position": "架构工程师",
"grade": "P6",
"organization_unit_code": "RND-CENTER",
"manager_employee_no": "E11745",
"finance_owner_name": "研发财务BP",
"cost_center": "CC-6125",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-06 19:05",
"last_sync_at": "2026-05-06 19:05",
"role_codes": ["user"],
},
{
"employee_no": "E12771",
"name": "孔令谦",
"gender": "",
"birth_date": "1993-07-18",
"phone": "13627711572",
"email": "lingqian.kong@xfinance.com",
"join_date": "2021-09-13",
"location": "南京",
"position": "供应商管理专员",
"grade": "P4",
"organization_unit_code": "OPS-ADMIN",
"manager_employee_no": "E12653",
"finance_owner_name": "行政财务BP",
"cost_center": "CC-7210",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-02 17:22",
"last_sync_at": "2026-05-02 17:22",
"role_codes": ["user"],
},
{
"employee_no": "E12782",
"name": "乔语岚",
"gender": "",
"birth_date": "1996-05-06",
"phone": "13727823045",
"email": "yulan.qiao@xfinance.com",
"join_date": "2023-03-06",
"location": "上海",
"position": "风控策略分析师",
"grade": "P4",
"organization_unit_code": "AUDIT-RISK",
"manager_employee_no": "E12661",
"finance_owner_name": "集团财务",
"cost_center": "CC-8108",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-03 13:18",
"last_sync_at": "2026-05-03 13:18",
"role_codes": ["auditor"],
},
{
"employee_no": "E12790",
"name": "邹闻韬",
"gender": "",
"birth_date": "1991-03-11",
"phone": "13827903167",
"email": "wentao.zou@xfinance.com",
"join_date": "2020-10-26",
"location": "上海",
"position": "合规产品负责人",
"grade": "P7",
"organization_unit_code": "RND-CENTER",
"manager_employee_no": "E11745",
"finance_owner_name": "研发财务BP",
"cost_center": "CC-6128",
"employment_status": "在职",
"sync_state": "已同步",
"spotlight": False,
"updated_at": "2026-05-06 08:56",
"last_sync_at": "2026-05-06 08:56",
"role_codes": ["user", "auditor"],
},
]

View File

@@ -0,0 +1,52 @@
from __future__ import annotations
ROLE_DISPLAY_ORDER = {
"manager": 1,
"finance": 2,
"approver": 3,
"executive": 4,
"auditor": 5,
"user": 6,
}
ROLE_DEFINITIONS = [
{
"role_code": "user",
"name": "使用者",
"description": "可以发起报销、查看个人单据和使用 AI 助手。",
},
{
"role_code": "finance",
"name": "财务人员",
"description": "可以处理复核、查看财务知识与风险校验结果。",
},
{
"role_code": "manager",
"name": "管理员",
"description": "可以维护员工档案、组织结构和角色权限。",
},
{
"role_code": "executive",
"name": "高级管理人员",
"description": "可以查看跨部门数据看板与关键审批结果。",
},
{
"role_code": "approver",
"name": "审批负责人",
"description": "可以处理审批中心中的待审单据。",
},
{
"role_code": "auditor",
"name": "审计观察员",
"description": "可以查看变更记录和权限调整历史。",
},
]
ROLE_PERMISSION_MAP = {
"user": ["可发起差旅申请与报销", "可查看个人单据与票据识别结果"],
"finance": ["可处理财务复核任务", "可查看风险校验与财务知识库"],
"manager": ["可维护员工档案与组织结构", "可配置系统角色与访问边界"],
"executive": ["可查看跨部门经营看板", "可处理高金额报销最终审批"],
"approver": ["可处理本部门待审单据", "可查看审批链路与 SLA 状态"],
"auditor": ["可查看权限变更与审计留痕", "可导出员工权限观察记录"],
}

View File

@@ -0,0 +1,126 @@
from __future__ import annotations
from collections.abc import Callable
from datetime import date, datetime
from app.models.employee import Employee
from app.models.employee_change_log import EmployeeChangeLog
from app.models.role import Role
from app.schemas.employee import (
EmployeeHistoryRead,
EmployeeOrganizationRead,
EmployeeRead,
)
def serialize_employee(
employee: Employee,
*,
sorted_roles: list[Role],
sorted_change_logs: list[EmployeeChangeLog],
format_date: Callable[[date | None], str | None],
format_datetime: Callable[[datetime | None], str | None],
format_history_datetime: Callable[[datetime | None], str],
role_permission_map: dict[str, list[str]],
status_tone_map: dict[str, str],
max_change_logs: int,
) -> EmployeeRead:
organization = employee.organization_unit
role_labels = [role.name for role in sorted_roles]
role_codes = [role.role_code for role in sorted_roles]
history = [
EmployeeHistoryRead(
action=item.action,
owner=item.owner,
time=format_history_datetime(item.occurred_at),
occurredAt=format_history_datetime(item.occurred_at),
)
for item in sorted_change_logs[:max_change_logs]
]
return EmployeeRead(
id=employee.id,
avatar=(employee.name or "?")[:1],
name=employee.name,
employeeNo=employee.employee_no,
department=organization.name if organization else "",
position=employee.position,
grade=employee.grade,
manager=employee.manager.name if employee.manager else "CEO",
managerEmployeeNo=employee.manager.employee_no if employee.manager else None,
financeOwner=employee.finance_owner_name or "",
roles=role_labels,
roleCodes=role_codes,
status=employee.employment_status,
statusTone=status_tone_map.get(employee.employment_status, "neutral"),
gender=employee.gender,
age=calculate_age(employee.birth_date),
birthDate=format_date(employee.birth_date),
email=employee.email,
phone=employee.phone,
joinDate=format_date(employee.join_date),
location=employee.location,
costCenter=employee.cost_center,
updatedAt=format_datetime(employee.updated_at or employee.created_at),
lastSync=format_datetime(employee.last_sync_at),
syncState=employee.sync_state,
spotlight=employee.spotlight,
permissions=collect_permissions(role_codes, role_permission_map),
history=history,
organization=(
EmployeeOrganizationRead(
id=organization.id,
code=organization.unit_code,
name=organization.name,
unitType=organization.unit_type,
costCenter=organization.cost_center,
location=organization.location,
managerName=organization.manager_name,
)
if organization
else None
),
)
def collect_permissions(
role_codes: list[str],
role_permission_map: dict[str, list[str]],
) -> list[str]:
permissions: list[str] = []
seen: set[str] = set()
for role_code in role_codes:
for permission in role_permission_map.get(role_code, []):
if permission in seen:
continue
permissions.append(permission)
seen.add(permission)
return permissions
def format_history_datetime(
value: datetime | None,
*,
to_display_datetime: Callable[[datetime], datetime],
) -> str:
if value is None:
return ""
local = to_display_datetime(value)
return (
f"{local.year}{local.month}{local.day}"
f"{local.hour}{local.minute}"
)
def calculate_age(birth_date: date | None) -> int | None:
if birth_date is None:
return None
today = date.today()
age = today.year - birth_date.year
if (today.month, today.day) < (birth_date.month, birth_date.day):
age -= 1
return age

View File

@@ -0,0 +1,401 @@
from __future__ import annotations
import re
from typing import Any
from sqlalchemy import and_, func, or_, select
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.models.organization import OrganizationUnit
PRIVILEGED_CLAIM_ROLE_CODES = {"finance", "executive"}
APPROVAL_VISIBLE_CLAIM_ROLE_CODES = {"manager", "approver"}
CLAIM_DELETE_ROLE_CODES = {"executive"}
class ExpenseClaimAccessPolicy:
def __init__(self, db: Session) -> None:
self.db = db
@staticmethod
def has_privileged_claim_access(current_user: CurrentUserContext) -> bool:
if current_user.is_admin:
return True
return bool(ExpenseClaimAccessPolicy.normalize_role_codes(current_user) & PRIVILEGED_CLAIM_ROLE_CODES)
@staticmethod
def has_claim_delete_access(current_user: CurrentUserContext) -> bool:
if current_user.is_admin:
return True
return bool(ExpenseClaimAccessPolicy.normalize_role_codes(current_user) & CLAIM_DELETE_ROLE_CODES)
def can_return_claim(self, current_user: CurrentUserContext, claim: ExpenseClaim) -> bool:
if self.has_privileged_claim_access(current_user):
return True
role_codes = self.normalize_role_codes(current_user)
if not (role_codes & APPROVAL_VISIBLE_CLAIM_ROLE_CODES):
return False
if str(claim.status or "").strip().lower() != "submitted":
return False
if str(claim.approval_stage or "").strip() != "直属领导审批":
return False
current_employee = self.resolve_current_employee(current_user)
if current_employee is not None and str(claim.employee_id or "").strip() == current_employee.id:
return False
claim_employee = claim.employee
if current_employee is not None and claim_employee is not None:
if claim_employee.manager_id == current_employee.id:
return True
if claim_employee.manager is not None and claim_employee.manager.id == current_employee.id:
return True
approver_name = str(
current_employee.name if current_employee is not None and current_employee.name else current_user.name or ""
).strip()
if not approver_name:
return False
return self.resolve_claim_manager_name(claim) == approver_name
def can_approve_claim(self, current_user: CurrentUserContext, claim: ExpenseClaim) -> bool:
stage = str(claim.approval_stage or "").strip()
if stage == "直属领导审批":
return self.is_current_direct_manager_approver(current_user, claim)
if stage == "财务审批":
role_codes = self.normalize_role_codes(current_user)
return current_user.is_admin or "finance" in role_codes
return False
def is_current_direct_manager_approver(self, current_user: CurrentUserContext, claim: ExpenseClaim) -> bool:
role_codes = self.normalize_role_codes(current_user)
if not (role_codes & APPROVAL_VISIBLE_CLAIM_ROLE_CODES):
return False
if str(claim.status or "").strip().lower() != "submitted":
return False
if str(claim.approval_stage or "").strip() != "直属领导审批":
return False
current_employee = self.resolve_current_employee(current_user)
if current_employee is not None and str(claim.employee_id or "").strip() == current_employee.id:
return False
claim_employee = claim.employee
if current_employee is not None and claim_employee is not None:
if claim_employee.manager_id == current_employee.id:
return True
if claim_employee.manager is not None and claim_employee.manager.id == current_employee.id:
return True
approver_name = str(
current_employee.name if current_employee is not None and current_employee.name else current_user.name or ""
).strip()
if not approver_name:
return False
return self.resolve_claim_manager_name(claim) == approver_name
@staticmethod
def normalize_role_codes(current_user: CurrentUserContext) -> set[str]:
return {
str(item).strip().lower()
for item in current_user.role_codes
if str(item).strip()
}
def resolve_current_employee(self, current_user: CurrentUserContext) -> Employee | None:
return self.resolve_employee_by_identity_candidates(
[
str(current_user.username or "").strip(),
str(current_user.name or "").strip(),
]
)
def resolve_current_user_display_name(self, current_user: CurrentUserContext) -> str:
current_employee = self.resolve_current_employee(current_user)
if current_employee is not None and str(current_employee.name or "").strip():
return str(current_employee.name).strip()
for candidate in (current_user.name, current_user.username):
normalized = str(candidate or "").strip()
if normalized and not self.is_email_like(normalized):
return normalized
return str(current_user.username or current_user.name or "anonymous").strip() or "anonymous"
def is_claim_owned_by_current_user(self, claim: ExpenseClaim, current_user: CurrentUserContext) -> bool:
current_employee = self.resolve_current_employee(current_user)
if current_employee is not None:
if str(claim.employee_id or "").strip() == current_employee.id:
return True
identity_values = {
str(current_employee.name or "").strip(),
str(current_employee.email or "").strip(),
str(current_employee.employee_no or "").strip(),
}
else:
identity_values = set()
identity_values.update(
{
str(current_user.username or "").strip(),
str(current_user.name or "").strip(),
}
)
identity_values.discard("")
return str(claim.employee_name or "").strip() in identity_values
@staticmethod
def is_email_like(value: str) -> bool:
return bool(re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", str(value or "").strip()))
def resolve_claim_employee_for_backfill(self, claim: ExpenseClaim) -> Employee | None:
if claim.employee is not None:
employee = self.db.scalar(
select(Employee)
.options(
selectinload(Employee.organization_unit),
selectinload(Employee.manager),
selectinload(Employee.roles),
)
.where(Employee.id == claim.employee.id)
.limit(1)
)
return employee or claim.employee
employee_id = str(claim.employee_id or "").strip()
if employee_id:
employee = self.db.scalar(
select(Employee)
.options(
selectinload(Employee.organization_unit),
selectinload(Employee.manager),
selectinload(Employee.roles),
)
.where(Employee.id == employee_id)
.limit(1)
)
if employee is not None:
return employee
return self.resolve_employee_by_identity_candidates([str(claim.employee_name or "").strip()])
def resolve_employee_by_identity_candidates(self, candidates: list[str]) -> Employee | None:
normalized_candidates = [
item
for item in dict.fromkeys(str(candidate or "").strip() for candidate in candidates)
if item
]
if not normalized_candidates:
return None
load_options = (
selectinload(Employee.organization_unit),
selectinload(Employee.manager),
selectinload(Employee.roles),
)
for candidate in normalized_candidates:
employee = self.db.scalar(
select(Employee)
.options(*load_options)
.where(
or_(
func.lower(Employee.email) == candidate.lower(),
func.lower(Employee.employee_no) == candidate.lower(),
)
)
.limit(1)
)
if employee is not None:
return employee
for candidate in normalized_candidates:
matches = list(
self.db.scalars(
select(Employee)
.options(*load_options)
.where(Employee.name == candidate)
.limit(2)
).all()
)
if len(matches) == 1:
return matches[0]
return None
def backfill_claim_identity_from_current_user(
self,
claim: ExpenseClaim,
current_user: CurrentUserContext,
) -> None:
employee = self.resolve_claim_employee_for_backfill(claim) or self.resolve_current_employee(current_user)
if employee is not None:
claim_employee_id = str(claim.employee_id or "").strip()
claim_employee_name = str(claim.employee_name or "").strip()
employee_names = {
str(employee.name or "").strip(),
str(employee.email or "").strip(),
str(employee.employee_no or "").strip(),
}
employee_names.discard("")
can_apply_employee = (
not claim_employee_id
or claim_employee_id == employee.id
or self.is_missing_value(claim_employee_name)
or claim_employee_name in employee_names
)
if can_apply_employee:
claim.employee = employee
claim.employee_id = employee.id
if employee.name:
claim.employee_name = employee.name
if employee.organization_unit is not None:
claim.department_id = employee.organization_unit_id
claim.department_name = employee.organization_unit.name
return
context_department = str(
getattr(current_user, "department_name", "")
or getattr(current_user, "department", "")
or getattr(current_user, "departmentName", "")
or ""
).strip()
if context_department and self.is_missing_value(claim.department_name):
claim.department_name = context_department
context_name = str(current_user.name or current_user.username or "").strip()
if context_name and self.is_missing_value(claim.employee_name):
claim.employee_name = context_name
def employee_name_is_unique(self, employee: Employee) -> bool:
normalized_name = str(employee.name or "").strip()
if not normalized_name:
return False
same_name_count = int(
self.db.scalar(
select(func.count()).select_from(Employee).where(Employee.name == normalized_name)
)
or 0
)
return same_name_count == 1
def build_personal_claim_conditions(self, current_user: CurrentUserContext) -> list[Any]:
conditions = []
username = str(current_user.username or "").strip()
employee = self.resolve_current_employee(current_user)
def add_condition(field_name: str, value: str | None) -> None:
normalized = str(value or "").strip()
if not normalized:
return
if field_name == "employee_id":
conditions.append(ExpenseClaim.employee_id == normalized)
return
conditions.append(ExpenseClaim.employee_name == normalized)
if employee is not None:
add_condition("employee_id", employee.id)
add_condition("employee_name", employee.email)
if self.employee_name_is_unique(employee):
add_condition("employee_name", employee.name)
else:
add_condition("employee_id", username)
add_condition("employee_name", username)
return conditions
def build_approval_claim_conditions(self, current_user: CurrentUserContext) -> list[Any]:
role_codes = self.normalize_role_codes(current_user)
if not (role_codes & APPROVAL_VISIBLE_CLAIM_ROLE_CODES):
return []
employee = self.resolve_current_employee(current_user)
manager_name = str(
employee.name if employee is not None and employee.name else current_user.name or ""
).strip()
pending_leader_approval_parts = [
ExpenseClaim.status == "submitted",
ExpenseClaim.approval_stage == "直属领导审批",
]
if employee is not None:
pending_leader_approval_parts.append(
or_(ExpenseClaim.employee_id.is_(None), ExpenseClaim.employee_id != employee.id)
)
if manager_name:
pending_leader_approval_parts.append(ExpenseClaim.employee_name != manager_name)
pending_leader_approval = and_(*pending_leader_approval_parts)
conditions = []
if employee is not None:
subordinate_ids = select(Employee.id).where(Employee.manager_id == employee.id)
conditions.append(and_(pending_leader_approval, ExpenseClaim.employee_id.in_(subordinate_ids)))
if manager_name:
managed_department_ids = select(OrganizationUnit.id).where(OrganizationUnit.manager_name == manager_name)
managed_department_names = select(OrganizationUnit.name).where(OrganizationUnit.manager_name == manager_name)
conditions.append(and_(pending_leader_approval, ExpenseClaim.department_id.in_(managed_department_ids)))
conditions.append(and_(pending_leader_approval, ExpenseClaim.department_name.in_(managed_department_names)))
return conditions
def apply_approval_claim_scope(self, stmt: Any, current_user: CurrentUserContext) -> Any:
role_codes = self.normalize_role_codes(current_user)
if current_user.is_admin or "executive" in role_codes:
return stmt.where(ExpenseClaim.status == "submitted")
if "finance" in role_codes:
return stmt.where(
ExpenseClaim.status == "submitted",
ExpenseClaim.approval_stage == "财务审批",
)
conditions = self.build_approval_claim_conditions(current_user)
if not conditions:
return stmt.where(ExpenseClaim.id == "__no_visible_claim__")
return stmt.where(or_(*conditions))
def apply_claim_scope(
self,
stmt: Any,
current_user: CurrentUserContext,
*,
include_approval_scope: bool = False,
) -> Any:
if self.has_privileged_claim_access(current_user):
return stmt
conditions = self.build_personal_claim_conditions(current_user)
if not conditions:
return stmt.where(ExpenseClaim.id == "__no_visible_claim__")
if include_approval_scope:
conditions.extend(self.build_approval_claim_conditions(current_user))
return stmt.where(or_(*conditions))
@staticmethod
def resolve_claim_manager_name(claim: ExpenseClaim) -> str:
if claim.employee is not None:
if claim.employee.manager is not None and claim.employee.manager.name:
return str(claim.employee.manager.name).strip()
if claim.employee.organization_unit is not None and claim.employee.organization_unit.manager_name:
return str(claim.employee.organization_unit.manager_name).strip()
return ""
@staticmethod
def is_missing_value(value: Any) -> bool:
normalized = str(value or "").strip()
return not normalized or normalized in {"待补充", "待确认", "N/A", "n/a", ""}

View File

@@ -0,0 +1,668 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimAttachmentAnalysisMixin:
def _build_attachment_expense_audit_points(
self,
*,
document: Any,
item: ExpenseClaimItem,
document_info: dict[str, Any],
) -> list[str]:
text = " ".join(
[
str(getattr(document, "summary", "") or "").strip(),
str(getattr(document, "text", "") or "").strip(),
]
).strip()
document_payload = {
"document_fields": document_info.get("fields") or [],
"summary": str(getattr(document, "summary", "") or ""),
"text": str(getattr(document, "text", "") or ""),
}
field_amount = self._resolve_document_field_amount(document_payload)
audited_amount = self._resolve_document_item_amount(document_payload)
item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01"))
points: list[str] = []
if (
field_amount is not None
and audited_amount is not None
and self._is_date_like_amount_candidate(field_amount, text)
and abs(field_amount - audited_amount) > Decimal("1.00")
):
points.append(
"费用核算OCR 金额疑似误取日期"
f" {self._format_decimal_amount(field_amount)}"
f"已按票据文本中的总费用 {self._format_decimal_amount(audited_amount)} 元回填,"
"请核对酒店或票据原文总额。"
)
if (
audited_amount is not None
and item_amount > Decimal("0.00")
and abs(audited_amount - item_amount) > Decimal("1.00")
):
points.append(
f"费用核算:票据文本复核金额为 {self._format_decimal_amount(audited_amount)} 元,"
f"当前明细金额为 {self._format_decimal_amount(item_amount)} 元,请确认是否需要调整。"
)
return points
def _build_attachment_travel_policy_audit(
self,
*,
document: Any,
item: ExpenseClaimItem,
document_info: dict[str, Any],
claim: ExpenseClaim | None = None,
) -> dict[str, Any]:
policy = self._get_expense_rule_catalog().travel_policy
if policy is None:
return {"points": [], "rule_basis": [], "has_high_risk": False}
item_type = str(item.item_type or "").strip().lower()
document_type = str(document_info.get("document_type") or "").strip().lower()
scene_code = str(document_info.get("scene_code") or "").strip().lower()
if not (
item_type in {"hotel", "hotel_ticket"}
or document_type == "hotel_invoice"
or scene_code == "hotel"
):
return {"points": [], "rule_basis": [], "has_high_risk": False}
item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01"))
if item_amount <= Decimal("0.00"):
return {"points": [], "rule_basis": [], "has_high_risk": False}
claim = claim or getattr(item, "claim", None)
grade_band = self._resolve_travel_policy_band(getattr(claim, "employee_grade", None))
rule_name = str(policy.standard_rule_name or policy.rule_name or "公司差旅费报销规则").strip()
rule_version = str(policy.standard_rule_version or policy.rule_version or "").strip()
version_text = f"{rule_version}" if rule_version else ""
rule_basis = [
f"依据《{rule_name}{version_text},住宿费按员工职级、出差城市和每晚金额进行差标核算。"
]
if grade_band is None:
return {
"points": ["住宿标准:当前员工职级缺失,无法匹配规则中心的住宿报销标准。"],
"rule_basis": rule_basis,
"has_high_risk": False,
}
text = " ".join(
[
str(getattr(document, "summary", "") or "").strip(),
str(getattr(document, "text", "") or "").strip(),
]
).strip()
context = {
"item": item,
"document_info": document_info,
"ocr_summary": str(getattr(document, "summary", "") or "").strip(),
"ocr_text": str(getattr(document, "text", "") or "").strip(),
}
hotel_city = self._extract_hotel_city(context, policy)
claim_city = self._extract_city_from_text(str(getattr(claim, "location", "") or ""), policy) if claim else ""
reason_city = self._extract_city_from_text(str(getattr(claim, "reason", "") or ""), policy) if claim else ""
baseline_city = hotel_city or claim_city or reason_city
if not baseline_city:
baseline_city = self._extract_city_from_text(text, policy)
if not baseline_city:
return {
"points": ["住宿标准:未能从酒店名称、出差地点或票据内容匹配到规则中心城市,无法核算住宿差标。"],
"rule_basis": rule_basis,
"has_high_risk": False,
}
standard = self._resolve_travel_policy_hotel_standard(
policy=policy,
grade_band=grade_band,
city=baseline_city,
)
if standard is None:
return {"points": [], "rule_basis": rule_basis, "has_high_risk": False}
cap, standard_label = standard
night_count = self._extract_hotel_night_count(context)
nightly_amount = (item_amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01"))
if nightly_amount <= cap:
return {"points": [], "rule_basis": rule_basis, "has_high_risk": False}
band_label = policy.band_labels.get(grade_band, str(getattr(claim, "employee_grade", "") or "当前职级").strip())
over_amount = (nightly_amount - cap).quantize(Decimal("0.01"))
return {
"points": [
(
f"住宿标准:{band_label}{standard_label}的住宿标准为 "
f"{self._format_decimal_amount(cap)} 元/晚,票据识别金额 "
f"{self._format_decimal_amount(item_amount)} 元 / {night_count} 晚,"
f"{self._format_decimal_amount(nightly_amount)} 元/晚,"
f"超出 {self._format_decimal_amount(over_amount)} 元/晚。"
)
],
"rule_basis": rule_basis,
"has_high_risk": True,
}
def _build_attachment_requirement_check(
self,
*,
item: ExpenseClaimItem,
document_info: dict[str, Any],
) -> dict[str, Any]:
expense_type = str(item.item_type or "").strip().lower() or "other"
policy = self._get_expense_scene_policy(expense_type)
expense_label = policy.label if policy is not None else self._resolve_expense_type_label(expense_type)
allowed_scenes = set(policy.allowed_scene_codes) if policy is not None else set()
allowed_document_types = set(policy.allowed_document_types) if policy is not None else set()
allowed_scene_labels = [self._resolve_document_scene_label(code) for code in sorted(allowed_scenes)]
allowed_document_type_labels = [
resolve_document_type_label(document_type)
for document_type in sorted(allowed_document_types)
]
recognized_scene_code = str(document_info.get("scene_code") or "other").strip() or "other"
recognized_scene_label = str(
document_info.get("scene_label") or self._resolve_document_scene_label(recognized_scene_code)
).strip()
recognized_document_type = str(document_info.get("document_type") or "other").strip() or "other"
recognized_document_type_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据"
matches = (
(not allowed_scenes and not allowed_document_types)
or recognized_scene_code in allowed_scenes
or recognized_document_type in allowed_document_types
)
if matches:
if allowed_scene_labels or allowed_document_type_labels:
message = (
f"当前费用项目为{expense_label},已识别为{recognized_document_type_label}"
f"符合当前{expense_label}场景的附件要求。"
)
else:
message = f"当前费用项目为{expense_label},已识别为{recognized_document_type_label}"
else:
expected_parts = [label + "相关票据" for label in allowed_scene_labels]
expected_parts.extend(allowed_document_type_labels)
expected_text = "".join(dict.fromkeys(part for part in expected_parts if part)) or "对应场景票据"
message = (
f"当前费用项目为{expense_label},要求上传{expected_text}"
f"当前识别为{recognized_document_type_label},不符合当前场景,建议过滤或更换附件。"
)
return {
"matches": matches,
"current_expense_type": expense_type,
"current_expense_type_label": expense_label,
"allowed_scene_labels": allowed_scene_labels,
"allowed_document_type_labels": allowed_document_type_labels,
"recognized_scene_code": recognized_scene_code,
"recognized_scene_label": recognized_scene_label,
"recognized_document_type": recognized_document_type,
"recognized_document_type_label": recognized_document_type_label,
"mismatch_severity": policy.attachment_mismatch_severity if policy is not None else "high",
"rule_code": policy.rule_code if policy is not None else DEFAULT_SCENE_RULE_ASSET_CODE,
"rule_name": policy.rule_name if policy is not None else "报销场景提交与附件标准",
"message": message,
}
@staticmethod
def _resolve_document_scene_label(scene_code: str) -> str:
normalized = str(scene_code or "").strip().lower()
return DOCUMENT_SCENE_LABELS.get(normalized, "其他票据")
@staticmethod
def _extract_amount_candidates(text: str) -> list[Decimal]:
return extract_amount_candidates(text)
@staticmethod
def _is_amount_match_date_fragment(
amount: Decimal,
text: str,
start: int,
end: int,
) -> bool:
return is_amount_match_date_fragment(amount, text, start, end)
@staticmethod
def _has_date_like_text(text: str) -> bool:
return bool(re.search(r"(20\d{2}[年/\-.]\d{1,2}[月/\-.]\d{1,2}日?)", text))
@staticmethod
def _normalize_match_text(text: str) -> str:
return re.sub(r"\s+", "", str(text or "")).lower()
@staticmethod
def _resolve_expense_type_label(expense_type: str | None) -> str:
normalized = str(expense_type or "").strip().lower()
return EXPENSE_TYPE_LABELS.get(normalized, "其他")
def _resolve_allowed_document_scenes(self, expense_type: str | None) -> set[str]:
normalized = str(expense_type or "").strip().lower()
policy = self._get_expense_scene_policy(normalized)
allowed_scenes = set(policy.allowed_scene_codes) if policy is not None else set()
allowed_scenes.update(EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES.get(normalized, set()))
return allowed_scenes
def _resolve_document_analysis_scenes(self, document_info: dict[str, Any], text: str) -> set[str]:
scenes: set[str] = set()
recognized_scene_code = str(document_info.get("scene_code") or "").strip().lower()
if recognized_scene_code and recognized_scene_code != "other":
scenes.add(recognized_scene_code)
recognized_document_type = str(document_info.get("document_type") or "").strip().lower()
mapped_scene = DOCUMENT_TYPE_SCENE_MAP.get(recognized_document_type)
if mapped_scene:
scenes.add(mapped_scene)
if scenes:
return scenes
return set(self._detect_expense_scenes(text).keys())
def _detect_expense_scenes(self, text: str) -> dict[str, list[str]]:
normalized = self._normalize_match_text(text)
if not normalized:
return {}
matches: dict[str, list[str]] = {}
for scene, keywords in EXPENSE_SCENE_KEYWORDS.items():
matched = [keyword for keyword in keywords if keyword in normalized]
if matched:
matches[scene] = matched[:3]
return matches
def _format_scene_labels(self, scene_codes: set[str]) -> str:
labels = [self._resolve_expense_type_label(code) for code in scene_codes]
unique_labels = list(dict.fromkeys(label for label in labels if label))
return "".join(unique_labels) if unique_labels else "其他"
def _build_purpose_mismatch_point(
self,
*,
item: ExpenseClaimItem,
document_scenes: set[str],
) -> str | None:
if not document_scenes:
return None
allowed_scenes = self._resolve_allowed_document_scenes(item.item_type)
document_scene_labels = self._format_scene_labels(document_scenes)
if allowed_scenes and document_scenes.isdisjoint(allowed_scenes):
expense_label = self._resolve_expense_type_label(item.item_type)
return f"附件类型:当前费用项目为{expense_label},但附件内容更像{document_scene_labels}相关票据。"
return None
@staticmethod
def _is_valid_route_description(value: str) -> bool:
text = str(value or "").strip()
if not text:
return False
if DOCUMENT_DATE_PATTERN.search(text):
return False
return bool(DOCUMENT_ROUTE_FORMAT_PATTERN.match(text))
def _build_route_format_point(
self,
*,
item: ExpenseClaimItem,
document_info: dict[str, Any],
) -> str | None:
item_type = str(item.item_type or "").strip().lower()
document_type = str(document_info.get("document_type") or "").strip().lower()
route_required = item_type in ROUTE_DESCRIPTION_ITEM_TYPES or document_type in {
"train_ticket",
"flight_itinerary",
"taxi_receipt",
"transport_receipt",
}
if not route_required:
return None
reason = str(item.item_reason or "").strip()
if self._is_valid_route_description(reason):
return None
example = "广州南-北京南" if item_type != "ride_ticket" else "深圳北站-腾讯滨海大厦"
current = f"当前为“{reason[:30]}”," if reason else ""
return (
f"行程说明:{current}格式应为“起始地-目的地”,"
f"例如“{example}”,请按票据行程补充。"
)
def _build_fallback_attachment_analysis(
self,
*,
media_type: str | None,
item: ExpenseClaimItem,
) -> dict[str, Any]:
return {
"severity": "medium",
"label": "中风险",
"headline": "AI提示附件已上传待识别结果",
"summary": "附件已成功保存,但当前尚未拿到有效识别结果,建议人工先核对票据内容。",
"points": [
f"附件格式:{self._attachment_presentation.resolve_media_type('attachment', fallback=media_type)}",
f"费用金额:当前明细金额为 {item.item_amount}",
],
"suggestion": "建议打开附件确认金额、日期和票据类型是否完整,再继续提交审批。",
}
def _build_failed_ocr_attachment_analysis(
self,
*,
media_type: str | None,
error_message: str,
item: ExpenseClaimItem,
) -> dict[str, Any]:
return {
"severity": "medium",
"label": "中风险",
"headline": "AI提示附件已上传但识别失败",
"summary": "文件已经保存成功,但本次 AI 识别未完成,因此无法给出完整票据核验结论。",
"points": [
f"识别异常:{error_message or 'OCR 服务暂不可用'}",
f"费用金额:当前明细金额为 {item.item_amount}",
f"附件格式:{self._attachment_presentation.resolve_media_type('attachment', fallback=media_type)}",
],
"suggestion": "建议重新上传更清晰的票据图片,或稍后重试识别后再提交。",
}
def _build_attachment_analysis(
self,
*,
document: Any,
item: ExpenseClaimItem,
claim: ExpenseClaim | None = None,
document_info: dict[str, Any] | None = None,
requirement_check: dict[str, Any] | None = None,
) -> dict[str, Any]:
warnings = [str(value).strip() for value in list(getattr(document, "warnings", []) or []) if str(value).strip()]
text = " ".join(
[
str(getattr(document, "summary", "") or "").strip(),
str(getattr(document, "text", "") or "").strip(),
]
).strip()
compact_text = text.replace(" ", "")
avg_score = float(getattr(document, "avg_score", 0.0) or 0.0)
line_count = int(getattr(document, "line_count", 0) or 0)
document_info = document_info or self._build_attachment_document_info(document)
requirement_check = requirement_check or self._build_attachment_requirement_check(
item=item,
document_info=document_info,
)
document_scenes = self._resolve_document_analysis_scenes(document_info, text)
purpose_mismatch_point = self._build_purpose_mismatch_point(
item=item,
document_scenes=document_scenes,
)
route_format_point = self._build_route_format_point(
item=item,
document_info=document_info,
)
expense_audit_points = self._build_attachment_expense_audit_points(
document=document,
item=item,
document_info=document_info,
)
travel_policy_audit = self._build_attachment_travel_policy_audit(
document=document,
item=item,
claim=claim,
document_info=document_info,
)
travel_policy_points = [
str(point).strip()
for point in list(travel_policy_audit.get("points") or [])
if str(point).strip()
]
travel_policy_rule_basis = [
str(point).strip()
for point in list(travel_policy_audit.get("rule_basis") or [])
if str(point).strip()
]
travel_policy_high_risk = bool(travel_policy_audit.get("has_high_risk"))
recognized_document_type = str(document_info.get("document_type") or "other").strip().lower() or "other"
recognized_document_label = str(document_info.get("document_type_label") or "其他单据").strip() or "其他单据"
requirement_matches = bool(requirement_check.get("matches"))
mismatch_severity = str(requirement_check.get("mismatch_severity") or "high").strip().lower() or "high"
has_ticket_keyword = any(
keyword in compact_text
for keyword in (
"发票",
"票据",
"增值税",
"电子行程单",
"购买方",
"销售方",
"税额",
"价税",
"票号",
"发票代码",
"凭证",
)
)
amount_candidates = self._extract_amount_candidates(text)
item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01"))
has_matching_amount = any(abs(candidate - item_amount) <= Decimal("1.00") for candidate in amount_candidates)
has_date_text = self._has_date_like_text(text)
amount_mismatch = bool(amount_candidates) and item_amount > Decimal("0.00") and not has_matching_amount
points: list[str] = []
if warnings:
points.append(f"识别提示:{warnings[0]}")
if line_count == 0 or not compact_text:
points.append("附件内容:未识别到有效文字,当前附件更像普通图片或内容过于模糊。")
if recognized_document_type == "other" and not has_ticket_keyword:
points.append("票据类型:未识别到发票、票据、电子行程单等关键字,暂无法判断票据类型。")
if not amount_candidates:
points.append("金额字段:未识别到可用于核对的金额。")
elif amount_mismatch:
candidate_text = "".join(str(candidate) for candidate in amount_candidates[:3])
points.append(f"金额字段:附件识别金额 {candidate_text} 元与报销金额 {item_amount} 元不一致。")
if not has_date_text:
date_requirement = DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS.get(
recognized_document_type,
"开票日期或业务发生日期",
)
points.append(f"日期字段:未识别到{date_requirement}")
if not requirement_matches:
points.append(f"附件类型要求:{requirement_check.get('message')}")
points.extend(expense_audit_points)
points.extend(travel_policy_points)
if purpose_mismatch_point:
points.append(purpose_mismatch_point)
if route_format_point:
points.append(route_format_point)
if avg_score and avg_score < 0.72:
points.append(f"识别质量OCR 置信度偏低({avg_score:.0%}),可能影响票据核验准确性。")
issue_count = len(points)
if issue_count == 0:
return {
"severity": "pass",
"label": "AI提示符合条件",
"headline": "AI提示附件符合基础校验条件",
"summary": "已识别到票据类型和关键字段,且符合当前费用场景的附件要求。",
"points": [
f"票据类型:已识别为{recognized_document_label}",
f"附件类型要求:{requirement_check.get('message')}",
f"金额字段:已识别到与当前明细接近的金额 {item_amount} 元。",
],
"rule_basis": travel_policy_rule_basis,
"suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。",
}
severity = "low"
label = "低风险"
headline = "AI提示附件存在轻微待核对项"
summary = "当前附件已识别出部分票据要素,但仍建议人工继续复核。"
if travel_policy_high_risk:
severity = "high"
label = "高风险"
headline = "AI提示住宿金额超出报销标准"
summary = "当前住宿票据金额超过规则中心差旅住宿标准,强行提交前需补充超标原因。"
elif (
line_count == 0
or not compact_text
or (recognized_document_type == "other" and not has_ticket_keyword and issue_count >= 2)
or (not requirement_matches and mismatch_severity == "high")
or (purpose_mismatch_point and amount_mismatch)
):
severity = "high"
label = "高风险"
headline = "AI提示附件不符合票据校验条件"
summary = "当前附件存在明显异常,票据类型与当前费用场景不匹配,或无法作为有效报销材料。"
elif (
purpose_mismatch_point
or route_format_point
or expense_audit_points
or travel_policy_points
or amount_mismatch
or issue_count >= 2
or warnings
or (avg_score and avg_score < 0.72)
or (not requirement_matches and mismatch_severity in {"medium", "low"})
):
severity = "medium"
label = "中风险"
headline = "AI提示附件存在明显待整改项"
summary = "当前附件可见部分内容,但金额、用途、日期或附件类型仍有缺失或不一致。"
if route_format_point and issue_count == 1:
summary = "票据行程已识别,但费用明细说明未按“起始地-目的地”格式填写。"
elif expense_audit_points and issue_count == len(expense_audit_points):
summary = "OCR 金额已完成二次核算,请按票据原文总额复核。"
elif travel_policy_points and issue_count == len(travel_policy_points):
summary = "住宿票据已识别,但当前缺少职级或城市信息,无法完成差旅住宿标准核算。"
suggestion = {
"high": "建议过滤当前不匹配的票据,重新上传符合当前费用场景的清晰原件。",
"medium": "建议根据风险点补齐清晰票据,或修正金额、日期、费用说明后再提交。",
"low": "建议人工再次核对金额和业务说明,确认后可继续流转。",
}[severity]
if travel_policy_high_risk:
suggestion = "请核对住宿发票金额、晚数和出差城市;如确需超标,需在附加说明中补充超标说明并提交审批重点复核。"
return {
"severity": severity,
"label": label,
"headline": headline,
"summary": summary,
"points": points,
"rule_basis": list(dict.fromkeys(travel_policy_rule_basis)),
"suggestion": suggestion,
}

View File

@@ -0,0 +1,336 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimAttachmentDocumentMixin:
def _build_attachment_payload(self, item: ExpenseClaimItem) -> dict[str, Any]:
file_path, media_type, filename = self._resolve_item_attachment_content(item)
metadata = self._attachment_storage.read_meta(file_path)
metadata = self._repair_pdf_text_layer_metadata_if_needed(
file_path=file_path,
metadata=metadata,
item=item,
)
uploaded_at_value = metadata.get("uploaded_at")
uploaded_at = None
if isinstance(uploaded_at_value, str) and uploaded_at_value.strip():
try:
uploaded_at = datetime.fromisoformat(uploaded_at_value)
except ValueError:
uploaded_at = None
analysis = metadata.get("analysis")
if not isinstance(analysis, dict):
analysis = None
document_info = metadata.get("document_info")
if not isinstance(document_info, dict):
document_info = None
requirement_check = metadata.get("requirement_check")
if not isinstance(requirement_check, dict):
requirement_check = None
preview_kind = str(metadata.get("preview_kind") or "").strip()
previewable = bool(metadata.get("previewable", self._attachment_presentation.is_previewable_media_type(media_type, filename)))
preview_url = self._attachment_presentation.build_preview_client_path(item.claim_id, item.id) if previewable else ""
return {
"file_name": str(metadata.get("file_name") or filename),
"storage_key": str(item.invoice_id or ""),
"media_type": str(metadata.get("media_type") or media_type),
"size_bytes": int(metadata.get("size_bytes") or file_path.stat().st_size),
"uploaded_at": uploaded_at,
"previewable": previewable,
"preview_kind": preview_kind or self._attachment_presentation.resolve_preview_kind(media_type, filename),
"preview_url": preview_url,
"analysis": analysis,
"document_info": document_info,
"requirement_check": requirement_check,
}
def _build_attachment_document_info(self, document: Any) -> dict[str, Any]:
insight = build_document_insight(
filename=str(getattr(document, "filename", "") or ""),
summary=str(getattr(document, "summary", "") or ""),
text=str(getattr(document, "text", "") or ""),
)
document_type = str(getattr(document, "document_type", "") or "").strip()
if document_type in {"", "other"}:
document_type = insight.document_type
document_type_label = str(getattr(document, "document_type_label", "") or "").strip()
if not document_type_label or document_type_label == "其他单据":
document_type_label = insight.document_type_label
scene_code = str(getattr(document, "scene_code", "") or "").strip()
if scene_code in {"", "other"}:
scene_code = insight.scene_code
scene_label = str(getattr(document, "scene_label", "") or "").strip()
if not scene_label or scene_label == "其他票据":
scene_label = insight.scene_label
raw_fields = list(getattr(document, "document_fields", []) or [])
normalized_fields: list[dict[str, str]] = []
for item in raw_fields:
key = ""
label = ""
value = ""
if isinstance(item, dict):
key = str(item.get("key") or "").strip()
label = str(item.get("label") or "").strip()
value = str(item.get("value") or "").strip()
else:
key = str(getattr(item, "key", "") or "").strip()
label = str(getattr(item, "label", "") or "").strip()
value = str(getattr(item, "value", "") or "").strip()
if key and label and value:
label = self._resolve_document_field_display_label(
document_type=document_type,
key=key,
label=label,
)
normalized_fields.append(
{
"key": key,
"label": label,
"value": value,
}
)
if not normalized_fields:
normalized_fields = [
{
"key": field.key,
"label": field.label,
"value": field.value,
}
for field in insight.fields
if field.value
]
return {
"document_type": document_type,
"document_type_label": document_type_label,
"scene_code": scene_code,
"scene_label": scene_label,
"fields": normalized_fields,
}
@staticmethod
def _resolve_document_field_display_label(
*,
document_type: str,
key: str,
label: str,
) -> str:
trip_label = DOCUMENT_TRIP_DATE_LABELS.get(
str(document_type or "").strip().lower()
)
if not trip_label:
return label
normalized_key = str(key or "").strip().lower().replace("_", "")
normalized_label = str(label or "").replace(" ", "")
if normalized_key in DOCUMENT_INVOICE_DATE_KEYS or any(
token in normalized_label for token in DOCUMENT_INVOICE_DATE_LABEL_TOKENS
):
return label
is_date_field = (
normalized_key
in DOCUMENT_TRIP_DATE_KEYS
| DOCUMENT_GENERIC_DATE_KEYS
or any(
token in normalized_label
for token in (
*DOCUMENT_TRIP_DATE_LABEL_TOKENS,
*DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
)
)
)
return trip_label if is_date_field else label
def _backfill_item_type_from_attachment(
self,
*,
item: ExpenseClaimItem,
document_info: dict[str, Any],
) -> None:
current_type = str(item.item_type or "").strip().lower()
if current_type not in GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES:
return
document_type = str(document_info.get("document_type") or "").strip()
mapped_type = DOCUMENT_TYPE_ITEM_TYPE_MAP.get(document_type)
if mapped_type:
item.item_type = mapped_type
def _backfill_item_amount_from_attachment(
self,
*,
item: ExpenseClaimItem,
document: Any,
document_info: dict[str, Any],
) -> None:
current_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01"))
if current_amount > Decimal("0.00"):
return
amount = self._resolve_document_item_amount(
{
"document_fields": document_info.get("fields") or [],
"summary": str(getattr(document, "summary", "") or ""),
"text": str(getattr(document, "text", "") or ""),
}
)
if amount is not None and amount > Decimal("0.00"):
item.item_amount = amount
def _backfill_item_date_from_attachment(
self,
*,
item: ExpenseClaimItem,
document: Any,
document_info: dict[str, Any],
) -> None:
document_payload = {
"document_type": str(document_info.get("document_type") or "").strip(),
"scene_code": str(document_info.get("scene_code") or "").strip(),
"summary": str(getattr(document, "summary", "") or "").strip(),
"text": str(getattr(document, "text", "") or "").strip(),
"document_fields": list(document_info.get("fields") or []),
}
parsed = self._resolve_document_item_date_candidate(document_payload)
if parsed is not None:
item.item_date = parsed
def _backfill_item_reason_from_attachment(
self,
*,
item: ExpenseClaimItem,
document: Any,
document_info: dict[str, Any],
) -> None:
reason = self._resolve_document_item_reason(
{
"document_type": str(document_info.get("document_type") or "").strip(),
"scene_code": str(document_info.get("scene_code") or "").strip(),
"scene_label": str(document_info.get("scene_label") or "").strip(),
"document_fields": document_info.get("fields") or [],
"summary": str(getattr(document, "summary", "") or ""),
"text": str(getattr(document, "text", "") or ""),
},
fallback=str(item.item_reason or "").strip(),
)
if reason:
item.item_reason = reason

View File

@@ -0,0 +1,495 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimAttachmentOperationsMixin:
def upload_claim_item_attachment(
self,
*,
claim_id: str,
item_id: str,
filename: str,
content: bytes,
media_type: str | None,
current_user: CurrentUserContext,
) -> dict[str, Any] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
if claim is None:
return None
self._ensure_draft_claim(claim)
self._ensure_mutable_claim_item(item)
normalized_name = self._attachment_storage.normalize_filename(filename)
if not content:
raise ValueError("上传文件不能为空。")
before_json = self._serialize_claim(claim)
attachment_dir = self._attachment_storage.build_item_dir(claim.id, item.id)
shutil.rmtree(attachment_dir, ignore_errors=True)
attachment_dir.mkdir(parents=True, exist_ok=True)
file_path = attachment_dir / normalized_name
file_path.write_bytes(content)
resolved_media_type = self._attachment_presentation.resolve_media_type(
normalized_name,
fallback=media_type,
)
attachment_analysis = self._build_fallback_attachment_analysis(
media_type=media_type,
item=item,
)
ocr_document = None
document_info = None
requirement_check = None
ocr_status = "empty"
ocr_error = ""
try:
ocr_result = OcrService(self.db).recognize_files(
[(normalized_name, content, media_type or "application/octet-stream")]
)
documents = list(ocr_result.documents or [])
if documents:
ocr_document = documents[0]
ocr_status = "recognized"
document_info = self._build_attachment_document_info(ocr_document)
self._backfill_item_type_from_attachment(
item=item,
document_info=document_info,
)
self._backfill_item_amount_from_attachment(
item=item,
document=ocr_document,
document_info=document_info,
)
self._backfill_item_date_from_attachment(
item=item,
document=ocr_document,
document_info=document_info,
)
self._backfill_item_reason_from_attachment(
item=item,
document=ocr_document,
document_info=document_info,
)
requirement_check = self._build_attachment_requirement_check(
item=item,
document_info=document_info,
)
attachment_analysis = self._build_attachment_analysis(
document=ocr_document,
item=item,
claim=claim,
document_info=document_info,
requirement_check=requirement_check,
)
except Exception as exc: # pragma: no cover - fallback path depends on OCR runtime
ocr_status = "failed"
ocr_error = str(exc)
attachment_analysis = self._build_failed_ocr_attachment_analysis(
media_type=media_type,
error_message=ocr_error,
item=item,
)
item.invoice_id = self._attachment_storage.to_storage_key(file_path)
preview_meta = self._attachment_presentation.build_preview_meta(
file_path=file_path,
media_type=resolved_media_type,
ocr_document=ocr_document,
)
meta = {
"file_name": normalized_name,
"storage_key": item.invoice_id,
"media_type": resolved_media_type,
"size_bytes": len(content),
"uploaded_at": datetime.now(UTC).isoformat(),
"previewable": bool(preview_meta["previewable"]),
"preview_kind": str(preview_meta["preview_kind"]),
"preview_storage_key": str(preview_meta["preview_storage_key"]),
"preview_media_type": str(preview_meta["preview_media_type"]),
"preview_file_name": str(preview_meta["preview_file_name"]),
"analysis": attachment_analysis,
"document_info": document_info,
"requirement_check": requirement_check,
"ocr_status": ocr_status,
"ocr_error": ocr_error,
"ocr_text": str(getattr(ocr_document, "text", "") or ""),
"ocr_summary": str(getattr(ocr_document, "summary", "") or ""),
"ocr_avg_score": float(getattr(ocr_document, "avg_score", 0.0) or 0.0),
"ocr_line_count": int(getattr(ocr_document, "line_count", 0) or 0),
"ocr_classification_source": str(getattr(ocr_document, "classification_source", "") or ""),
"ocr_classification_confidence": float(getattr(ocr_document, "classification_confidence", 0.0) or 0.0),
"ocr_classification_evidence": [
str(item)
for item in getattr(ocr_document, "classification_evidence", []) or []
if str(item).strip()
],
"ocr_warnings": [str(item) for item in getattr(ocr_document, "warnings", []) or []],
}
self._attachment_storage.write_meta(file_path, meta)
self._sync_claim_from_items(claim)
self.db.commit()
self.db.refresh(claim)
self.audit_service.log_action(
actor=current_user.name or current_user.username,
action="expense_claim.attachment_upload",
resource_type="expense_claim",
resource_id=claim.id,
before_json=before_json,
after_json=self._serialize_claim(claim),
)
return {
"message": f"{normalized_name} 已上传并关联到当前费用明细。",
"claim_id": claim.id,
"item_id": item.id,
"invoice_id": item.invoice_id,
"item_date": item.item_date.isoformat() if item.item_date else None,
"item_type": item.item_type,
"item_reason": item.item_reason,
"item_location": item.item_location,
"item_amount": item.item_amount,
"claim_amount": claim.amount,
"attachment": self._build_attachment_payload(item),
}
def get_claim_item_attachment_meta(
self,
*,
claim_id: str,
item_id: str,
current_user: CurrentUserContext,
) -> dict[str, Any] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
if claim is None:
return None
return self._build_attachment_payload(item)
def get_claim_item_attachment_content(
self,
*,
claim_id: str,
item_id: str,
current_user: CurrentUserContext,
) -> tuple[Path, str, str] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
if claim is None:
return None
return self._resolve_item_attachment_content(item)
def get_claim_item_attachment_preview_content(
self,
*,
claim_id: str,
item_id: str,
current_user: CurrentUserContext,
) -> tuple[Path, str, str] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
if claim is None:
return None
return self._resolve_item_attachment_preview_content(item)
def delete_claim_item_attachment(
self,
*,
claim_id: str,
item_id: str,
current_user: CurrentUserContext,
) -> dict[str, Any] | None:
claim, item = self._get_claim_item_or_raise(
claim_id=claim_id,
item_id=item_id,
current_user=current_user,
)
if claim is None:
return None
self._ensure_draft_claim(claim)
self._ensure_mutable_claim_item(item)
before_json = self._serialize_claim(claim)
previous_name = self._attachment_presentation.resolve_display_name(item.invoice_id)
self._attachment_storage.delete_item_files(item)
item.invoice_id = None
self._sync_claim_from_items(claim)
self.db.commit()
self.db.refresh(claim)
self.audit_service.log_action(
actor=current_user.name or current_user.username,
action="expense_claim.attachment_delete",
resource_type="expense_claim",
resource_id=claim.id,
before_json=before_json,
after_json=self._serialize_claim(claim),
)
return {
"message": f"{previous_name or '附件'} 已删除。",
"claim_id": claim.id,
"item_id": item.id,
"invoice_id": item.invoice_id,
"attachment": None,
}
def _get_claim_item_or_raise(
self,
*,
claim_id: str,
item_id: str,
current_user: CurrentUserContext,
) -> tuple[ExpenseClaim | None, ExpenseClaimItem]:
claim = self.get_claim(claim_id, current_user)
if claim is None:
return None, None # type: ignore[return-value]
item = next((entry for entry in claim.items if entry.id == item_id), None)
if item is None:
raise LookupError("Item not found")
return claim, item
def _resolve_item_attachment_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]:
file_path = self._attachment_storage.resolve_item_path(item)
if file_path is None or not file_path.exists():
raise FileNotFoundError("Attachment not found")
metadata = self._attachment_storage.read_meta(file_path)
filename = str(metadata.get("file_name") or file_path.name)
media_type = self._attachment_presentation.resolve_media_type(
filename,
fallback=str(metadata.get("media_type") or ""),
)
return file_path, media_type, filename
def _repair_pdf_text_layer_metadata_if_needed(
self,
*,
file_path: Path,
metadata: dict[str, Any],
item: ExpenseClaimItem | None = None,
) -> dict[str, Any]:
if not metadata:
return metadata
media_type = str(metadata.get("media_type") or self._attachment_presentation.resolve_media_type(file_path.name)).strip()
if media_type != "application/pdf":
return metadata
ocr_text = str(metadata.get("ocr_text") or "")
ocr_summary = str(metadata.get("ocr_summary") or "")
if OcrService._placeholder_ratio(f"{ocr_summary}\n{ocr_text}") < 0.12:
return metadata
text_layer = OcrService(self.db)._extract_pdf_text_layer(file_path)
repaired_text, used_text_layer = OcrService._choose_document_text(
ocr_text=ocr_text,
text_layer=text_layer,
)
if not used_text_layer or not repaired_text:
return metadata
repaired_summary = OcrService._summarize_text(repaired_text)
document = SimpleNamespace(
filename=str(metadata.get("file_name") or file_path.name),
text=repaired_text,
summary=repaired_summary,
avg_score=float(metadata.get("ocr_avg_score") or 0.0),
line_count=int(metadata.get("ocr_line_count") or 0),
document_type="",
document_type_label="",
scene_code="",
scene_label="",
document_fields=[],
warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()],
)
document_info = self._build_attachment_document_info(document)
document.document_type = document_info.get("document_type", "")
document.document_type_label = document_info.get("document_type_label", "")
document.scene_code = document_info.get("scene_code", "")
document.scene_label = document_info.get("scene_label", "")
document.document_fields = list(document_info.get("fields") or [])
metadata["ocr_text"] = repaired_text
metadata["ocr_summary"] = repaired_summary
metadata["document_info"] = document_info
metadata["previewable"] = True
metadata["preview_kind"] = "pdf"
metadata["preview_storage_key"] = str(
metadata.get("storage_key") or self._attachment_storage.to_storage_key(file_path)
)
metadata["preview_media_type"] = "application/pdf"
metadata["preview_file_name"] = str(metadata.get("file_name") or file_path.name)
if item is not None:
requirement_check = self._build_attachment_requirement_check(
item=item,
document_info=document_info,
)
metadata["requirement_check"] = requirement_check
metadata["analysis"] = self._build_attachment_analysis(
document=document,
item=item,
claim=getattr(item, "claim", None),
document_info=document_info,
requirement_check=requirement_check,
)
self._attachment_storage.write_meta(file_path, metadata)
return metadata
def _resolve_item_attachment_preview_content(self, item: ExpenseClaimItem) -> tuple[Path, str, str]:
file_path, media_type, filename = self._resolve_item_attachment_content(item)
metadata = self._attachment_storage.read_meta(file_path)
metadata = self._repair_pdf_text_layer_metadata_if_needed(
file_path=file_path,
metadata=metadata,
item=item,
)
preview_storage_key = str(metadata.get("preview_storage_key") or "").strip()
preview_file_name = str(metadata.get("preview_file_name") or "").strip()
preview_media_type = str(metadata.get("preview_media_type") or "").strip()
if preview_storage_key:
preview_path = self._attachment_storage.resolve_path(preview_storage_key)
if preview_path is not None and preview_path.exists():
resolved_name = preview_file_name or preview_path.name
resolved_media_type = self._attachment_presentation.resolve_media_type(
resolved_name,
fallback=preview_media_type,
)
return preview_path, resolved_media_type, resolved_name
if self._attachment_presentation.is_previewable_media_type(media_type, filename):
return file_path, media_type, filename
raise FileNotFoundError("Attachment preview not found")

View File

@@ -0,0 +1,138 @@
from __future__ import annotations
import base64
import binascii
import mimetypes
import re
from pathlib import Path
from typing import Any
from urllib.parse import quote
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
class ExpenseClaimAttachmentPresentation:
def __init__(self, storage: ExpenseClaimAttachmentStorage) -> None:
self.storage = storage
def build_preview_meta(
self,
*,
file_path: Path,
media_type: str,
ocr_document: Any | None,
) -> dict[str, Any]:
filename = file_path.name
storage_key = self.storage.to_storage_key(file_path)
preview_kind = self.resolve_preview_kind(media_type, filename)
preview_data_url = str(getattr(ocr_document, "preview_data_url", "") or "").strip()
preview_source_kind = str(getattr(ocr_document, "preview_kind", "") or "").strip()
if preview_source_kind == "image" and preview_data_url:
preview_asset = self._write_preview_asset_from_data_url(
attachment_dir=file_path.parent,
original_filename=filename,
preview_data_url=preview_data_url,
)
if preview_asset is not None:
preview_path, preview_media_type, preview_file_name = preview_asset
return {
"previewable": True,
"preview_kind": "image",
"preview_storage_key": self.storage.to_storage_key(preview_path),
"preview_media_type": preview_media_type,
"preview_file_name": preview_file_name,
}
if preview_kind:
return {
"previewable": True,
"preview_kind": preview_kind,
"preview_storage_key": storage_key,
"preview_media_type": media_type,
"preview_file_name": filename,
}
return {
"previewable": False,
"preview_kind": "",
"preview_storage_key": "",
"preview_media_type": "",
"preview_file_name": "",
}
@staticmethod
def resolve_preview_kind(media_type: str | None, filename: str) -> str:
resolved = str(media_type or "").strip() or (mimetypes.guess_type(filename)[0] or "")
if resolved.startswith("image/"):
return "image"
if resolved == "application/pdf":
return "pdf"
return ""
@staticmethod
def decode_data_url(payload: str) -> tuple[str, bytes] | None:
normalized = str(payload or "").strip()
matched = re.match(r"^data:(?P<media>[\w.+-]+/[\w.+-]+);base64,(?P<body>.+)$", normalized, flags=re.DOTALL)
if not matched:
return None
try:
content = base64.b64decode(matched.group("body"), validate=True)
except (binascii.Error, ValueError):
return None
return matched.group("media"), content
def _write_preview_asset_from_data_url(
self,
*,
attachment_dir: Path,
original_filename: str,
preview_data_url: str,
) -> tuple[Path, str, str] | None:
decoded = self.decode_data_url(preview_data_url)
if decoded is None:
return None
preview_media_type, preview_content = decoded
suffix = mimetypes.guess_extension(preview_media_type) or ".bin"
preview_name = f"{Path(original_filename).stem}.preview{suffix}"
preview_path = attachment_dir / preview_name
preview_path.write_bytes(preview_content)
return preview_path, preview_media_type, preview_name
@staticmethod
def build_preview_client_path(claim_id: str, item_id: str) -> str:
return (
"/reimbursements/claims/"
f"{quote(str(claim_id or '').strip(), safe='')}"
f"/items/{quote(str(item_id or '').strip(), safe='')}/attachment/preview"
)
@staticmethod
def resolve_media_type(filename: str, *, fallback: str | None = None) -> str:
guessed = mimetypes.guess_type(filename)[0]
return str(guessed or fallback or "application/octet-stream")
@staticmethod
def is_previewable_media_type(media_type: str | None, filename: str) -> bool:
resolved = str(media_type or "").strip() or (mimetypes.guess_type(filename)[0] or "")
return resolved.startswith("image/") or resolved == "application/pdf"
@staticmethod
def resolve_display_name(storage_key: str | None) -> str:
return Path(str(storage_key or "").strip()).name
@classmethod
def merge_reference(cls, current_invoice_id: str | None, next_invoice_id: str | None) -> str | None:
normalized_next = str(next_invoice_id or "").strip()
if not normalized_next:
return None
normalized_current = str(current_invoice_id or "").strip()
if (
normalized_current
and cls.resolve_display_name(normalized_current) == cls.resolve_display_name(normalized_next)
):
return normalized_current
return normalized_next

View File

@@ -0,0 +1,129 @@
from __future__ import annotations
import json
import re
import shutil
from pathlib import Path
from app.core.config import get_settings
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
class ExpenseClaimAttachmentStorage:
"""Centralizes filesystem operations for expense claim attachments."""
def root(self) -> Path:
return (get_settings().resolved_storage_root_dir / "expense_claims").resolve()
def build_item_dir(self, claim_id: str, item_id: str) -> Path:
return (self.root() / claim_id / item_id).resolve()
def delete_claim_files(self, claim: ExpenseClaim) -> None:
for item in list(claim.items or []):
self.delete_item_files(item)
self.delete_claim_root(claim.id)
def delete_claim_root(self, claim_id: str) -> None:
claim_root = self._assert_child(self.root() / claim_id)
self._delete_path(claim_root)
@staticmethod
def normalize_filename(filename: str | None) -> str:
normalized = Path(str(filename or "").strip()).name
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._")
suffix = Path(normalized).suffix
if normalized:
return normalized
return f"attachment{suffix or '.bin'}"
def resolve_path(self, storage_key: str | None) -> Path | None:
normalized = str(storage_key or "").strip()
if not normalized:
return None
root = self.root()
path = (root / normalized).resolve()
try:
path.relative_to(root)
except ValueError as exc:
raise FileNotFoundError("Attachment path is invalid") from exc
return path
def resolve_item_path(self, item: ExpenseClaimItem) -> Path | None:
if not str(item.invoice_id or "").strip():
return None
file_path = self.resolve_path(item.invoice_id)
if file_path is not None and file_path.exists():
return file_path
filename = self.normalize_filename(item.invoice_id)
if not filename:
return file_path
fallback_path = (self.build_item_dir(item.claim_id, item.id) / filename).resolve()
try:
fallback_path.relative_to(self.root())
except ValueError as exc:
raise FileNotFoundError("Attachment path is invalid") from exc
return fallback_path
def to_storage_key(self, file_path: Path) -> str:
return file_path.resolve().relative_to(self.root()).as_posix()
def delete_item_files(self, item: ExpenseClaimItem) -> None:
file_path = self.resolve_item_path(item)
if file_path is None:
return
root = self.root()
if file_path.parent == root:
self._delete_path(file_path)
self._delete_path(self.meta_path(file_path))
return
self._delete_path(file_path.parent)
@staticmethod
def meta_path(file_path: Path) -> Path:
return file_path.with_name(f"{file_path.name}.meta.json")
def write_meta(self, file_path: Path, payload: dict) -> None:
meta_path = self.meta_path(file_path)
meta_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
def read_meta(self, file_path: Path) -> dict:
meta_path = self.meta_path(file_path)
if not meta_path.exists():
return {}
try:
payload = json.loads(meta_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
return {}
return payload if isinstance(payload, dict) else {}
def _assert_child(self, path: Path) -> Path:
root = self.root()
resolved = path.resolve()
try:
resolved.relative_to(root)
except ValueError as exc:
raise FileNotFoundError("Attachment path is invalid") from exc
return resolved
def _delete_path(self, path: Path | None) -> None:
if path is None:
return
target = self._assert_child(path)
if not target.exists():
return
if target.is_dir():
shutil.rmtree(target)
else:
target.unlink()
if target.exists():
raise OSError(f"Attachment path was not deleted: {target}")

View File

@@ -0,0 +1,361 @@
from __future__ import annotations
import re
from decimal import Decimal
EXPENSE_TYPE_LABELS = {
"travel": "差旅",
"train_ticket": "火车票",
"flight_ticket": "机票",
"hotel_ticket": "住宿票",
"ride_ticket": "乘车",
"travel_allowance": "出差补贴",
"hotel": "住宿",
"transport": "交通",
"meal": "餐费",
"meeting": "会务",
"entertainment": "招待",
"office": "办公",
"training": "培训",
"communication": "通讯",
"welfare": "福利",
}
MAX_DRAFT_CLAIMS_PER_USER = 3
EDITABLE_CLAIM_STATUSES = ("draft", "supplement", "returned")
SYSTEM_GENERATED_ITEM_TYPES = {"travel_allowance"}
TRAVEL_DETAIL_ITEM_TYPES = {
"train_ticket",
"flight_ticket",
"hotel_ticket",
"ride_ticket",
"travel_allowance",
}
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES = {"train_ticket", "flight_ticket"}
DOCUMENT_TYPE_ITEM_TYPE_MAP = {
"train_ticket": "train_ticket",
"flight_itinerary": "flight_ticket",
"hotel_invoice": "hotel_ticket",
"taxi_receipt": "ride_ticket",
"transport_receipt": "ride_ticket",
}
DOCUMENT_TYPE_SCENE_MAP = {
"train_ticket": "travel",
"flight_itinerary": "travel",
"hotel_invoice": "hotel",
"taxi_receipt": "transport",
"transport_receipt": "transport",
"parking_toll_receipt": "transport",
"meal_receipt": "meal",
"office_invoice": "office",
"meeting_invoice": "meeting",
"training_invoice": "training",
}
DOCUMENT_FACT_ITEM_TYPES = {"train_ticket", "flight_ticket", "hotel_ticket", "ride_ticket", "ship_ticket", "ferry_ticket"}
ROUTE_DESCRIPTION_ITEM_TYPES = {"train_ticket", "flight_ticket", "ship_ticket", "ferry_ticket", "ride_ticket"}
DOCUMENT_TRIP_DATE_LABELS = {
"train_ticket": "列车出发时间",
"flight_itinerary": "起飞日期",
"taxi_receipt": "乘车时间",
"transport_receipt": "乘车时间",
"parking_toll_receipt": "通行日期",
}
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS = {
"train_ticket": "列车出发时间或乘车日期",
"flight_itinerary": "起飞日期或航班日期",
"taxi_receipt": "乘车时间",
"transport_receipt": "乘车时间",
"parking_toll_receipt": "通行日期",
"hotel_invoice": "入住或离店日期",
}
DOCUMENT_TRIP_DATE_KEYS = {
"traveldate",
"tripdate",
"journeydate",
"departuredate",
"departuretime",
"departdate",
"departtime",
"boardingdate",
"boardingtime",
"traindate",
"traintime",
"traindeparturetime",
"scheduleddeparturetime",
"flightdate",
"flighttime",
"ridedate",
"ridetime",
"pickuptime",
"starttime",
}
DOCUMENT_GENERIC_DATE_KEYS = {"date", "time", "occurredat", "occurreddate", "businessdate"}
DOCUMENT_INVOICE_DATE_KEYS = {"issuedat", "issuedate", "invoicedate", "billingdate"}
DOCUMENT_TRIP_DATE_LABEL_TOKENS = (
"出发日期",
"出发时间",
"列车出发时间",
"发车日期",
"发车时间",
"开车时间",
"乘车日期",
"乘车时间",
"起飞日期",
"航班日期",
"行程日期",
"上车时间",
"用车时间",
"通行日期",
)
DOCUMENT_GENERIC_DATE_LABEL_TOKENS = ("日期", "时间", "发生时间", "业务发生日期")
DOCUMENT_INVOICE_DATE_LABEL_TOKENS = ("开票日期", "发票日期")
DOCUMENT_ROUTE_FORMAT_PATTERN = re.compile(
r"^[A-Za-z0-9\u4e00-\u9fa5()·]{2,40}\s*-\s*"
r"[A-Za-z0-9\u4e00-\u9fa5()·]{2,40}$"
)
DOCUMENT_ROUTE_TEXT_PATTERN = re.compile(
r"([A-Za-z0-9\u4e00-\u9fa5()·]{2,40})\s*(?:至|到|→|->|—||-)\s*"
r"([A-Za-z0-9\u4e00-\u9fa5()·]{2,40})"
)
DOCUMENT_ROUTE_ORIGIN_LABELS = {"起点", "上车", "上车地点", "上车地址", "出发", "出发地", "出发站", "始发站", "乘车起点"}
DOCUMENT_ROUTE_DESTINATION_LABELS = {
"终点",
"下车",
"下车地点",
"下车地址",
"到达",
"到达地",
"到达站",
"目的地",
"乘车终点",
}
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES = {"", "other", "travel", "transport", "hotel"}
LOCATION_REQUIRED_EXPENSE_TYPES = {"travel", "meeting", "entertainment"}
EXPENSE_SCENE_KEYWORDS = {
"travel": ("差旅", "出差", "行程"),
"hotel": ("酒店", "住宿", "房费", "客房", "入住", "离店"),
"transport": (
"交通",
"打车",
"出租车",
"网约车",
"滴滴",
"出行",
"乘车",
"用车",
"叫车",
"车费",
"车资",
"的士",
"高铁",
"动车",
"火车",
"机票",
"航班",
"行程单",
"登机",
"客票",
"公交",
"地铁",
"过路费",
"通行费",
"停车",
),
"meal": ("餐饮", "餐费", "用餐", "外卖", "快餐", "酒楼", "饭店", "饭馆", "食品", "咖啡"),
"entertainment": ("招待", "宴请", "接待", "客户餐", "商务餐", "业务招待"),
"office": ("办公", "办公用品", "文具", "耗材", "打印", "纸张", "硒鼓", "墨盒", "鼠标", "键盘", "电脑"),
"meeting": ("会议", "会务", "会展", "会议室", "会场", "场地费", "论坛"),
"training": ("培训", "课程", "讲师", "教材", "学费", "认证"),
}
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES = {
"travel": {"travel", "hotel", "transport", "meal"},
"train_ticket": {"travel"},
"flight_ticket": {"travel"},
"hotel_ticket": {"hotel"},
"ride_ticket": {"transport"},
"travel_allowance": set(),
"hotel": {"hotel"},
"transport": {"transport", "travel"},
"meal": {"meal", "entertainment"},
"entertainment": {"entertainment", "meal"},
"office": {"office"},
"meeting": {"meeting"},
"training": {"training"},
}
DOCUMENT_SCENE_LABELS = {
"travel": "差旅",
"hotel": "住宿",
"transport": "交通",
"meal": "餐饮",
"entertainment": "业务招待",
"office": "办公用品",
"meeting": "会务",
"training": "培训",
"other": "其他票据",
}
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS = {
"link_to_existing_draft",
"create_new_claim_from_documents",
}
PERSISTENT_EXPENSE_REVIEW_ACTIONS = {
"save_draft",
"next_step",
*DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
}
RETURN_REASON_OPTIONS = {
"missing_attachment": "附件缺失或不清晰",
"invoice_mismatch": "票据类型/金额与明细不一致",
"over_policy": "超出制度标准或缺少超标说明",
"business_explanation": "业务事由/地点/人员信息不完整",
"duplicate_or_abnormal": "疑似重复或异常票据",
"approval_question": "审批人需要补充说明",
}
MAX_CLAIM_NO_RETRY_ATTEMPTS = 3
DOCUMENT_DATE_PATTERN = re.compile(r"((?:20\d{2}|19\d{2})[-/年.](?:1[0-2]|0?[1-9])[-/月.](?:3[01]|[12]\d|0?[1-9])日?)")
SYSTEM_GENERATED_REASON_PREFIXES = (
"我上传了",
"请按当前已识别信息",
"请把当前上传的票据",
"请基于当前上传的多张票据",
"我已核对右侧识别结果",
"请同步修正逐票据识别结果",
"我已修改识别信息",
"查看报销草稿",
"请解释一下当前这笔报销的合规风险和待补充项",
)
LEADING_REASON_TIME_PATTERNS = (
re.compile(
r"^\s*(?:识别事项(?:有)?[:]\s*)?"
r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[:]?\s*"
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
r"(?:\s*(?:至|到|~||—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
r"\s*[,。;;、]?\s*"
),
re.compile(
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
r"(?:\s*(?:至|到|~||—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
r"\s*[,。;;、]\s*"
),
)
AI_REVIEW_LOOKBACK_DAYS = 90
AI_REVIEW_REPEAT_RISK_WARNING_COUNT = 1
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT = 2
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES = {"travel", "hotel", "transport"}
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES = {"flight_itinerary", "train_ticket"}
TRAVEL_POLICY_CITY_TIERS = {
"北京": "tier_1",
"上海": "tier_1",
"广州": "tier_1",
"深圳": "tier_1",
"杭州": "tier_2",
"南京": "tier_2",
"苏州": "tier_2",
"武汉": "tier_2",
"成都": "tier_2",
"重庆": "tier_2",
"西安": "tier_2",
"天津": "tier_2",
"宁波": "tier_2",
"厦门": "tier_2",
"青岛": "tier_2",
"长沙": "tier_2",
"郑州": "tier_2",
"合肥": "tier_2",
"济南": "tier_2",
"沈阳": "tier_2",
"大连": "tier_2",
"福州": "tier_2",
"昆明": "tier_2",
"海口": "tier_2",
"三亚": "tier_2",
"无锡": "tier_2",
"东莞": "tier_2",
"佛山": "tier_2",
}
TRAVEL_POLICY_CITY_MATCH_ORDER = tuple(
sorted(TRAVEL_POLICY_CITY_TIERS.keys(), key=lambda item: len(item), reverse=True)
)
TRAVEL_POLICY_BAND_LABELS = {
"junior": "P1-P3",
"mid": "P4-P5",
"senior": "P6-P7",
"manager": "M1-M2",
"executive": "M3及以上 / D序列",
}
TRAVEL_POLICY_HOTEL_LIMITS = {
"junior": {
"tier_1": Decimal("450.00"),
"tier_2": Decimal("380.00"),
"tier_3": Decimal("320.00"),
},
"mid": {
"tier_1": Decimal("550.00"),
"tier_2": Decimal("480.00"),
"tier_3": Decimal("380.00"),
},
"senior": {
"tier_1": Decimal("700.00"),
"tier_2": Decimal("620.00"),
"tier_3": Decimal("520.00"),
},
"manager": {
"tier_1": Decimal("900.00"),
"tier_2": Decimal("820.00"),
"tier_3": Decimal("720.00"),
},
"executive": {
"tier_1": Decimal("1200.00"),
"tier_2": Decimal("1000.00"),
"tier_3": Decimal("900.00"),
},
}
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS = {
"junior": {"flight": 1, "train": 1},
"mid": {"flight": 1, "train": 1},
"senior": {"flight": 2, "train": 2},
"manager": {"flight": 3, "train": 3},
"executive": {"flight": 4, "train": 3},
}
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS = (
"中转",
"转机",
"经停",
"改签",
"多地出差",
"多城市",
"多站",
"异地返程",
"异地结束",
"临时变更",
"继续前往",
"第二站",
)
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS = (
"超标说明",
"无直达",
"展会高峰",
"会议高峰",
"协议酒店满房",
"客户指定",
"临时改签",
"行程变更",
"红眼航班",
"晚到店",
)
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS = (
("头等舱", 4),
("公务舱", 3),
("商务舱", 3),
("超级经济舱", 2),
("高端经济舱", 2),
("明珠经济舱", 2),
("经济舱", 1),
)
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS = (
("商务座", 3),
("一等座", 2),
("软卧", 2),
("二等座", 1),
("二等卧", 1),
("硬卧", 1),
)
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)")

View File

@@ -0,0 +1,560 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimDocumentItemBuilderMixin:
def _resolve_context_documents(self, context_json: dict[str, Any]) -> list[dict[str, Any]]:
documents = context_json.get("ocr_documents")
if not isinstance(documents, list):
documents = []
normalized: list[dict[str, Any]] = []
for index, item in enumerate(documents[:10], start=1):
if not isinstance(item, dict):
continue
normalized.append(
{
"index": index,
"filename": str(item.get("filename") or "").strip(),
"summary": str(item.get("summary") or "").strip(),
"text": str(item.get("text") or "").strip(),
"document_type": str(item.get("document_type") or "").strip(),
"scene_code": str(item.get("scene_code") or "").strip(),
"scene_label": str(item.get("scene_label") or "").strip(),
"document_fields": self._normalize_document_fields(item.get("document_fields")),
}
)
overrides = context_json.get("review_document_form_values")
if not isinstance(overrides, list) or not normalized:
return normalized
override_map: dict[tuple[int, str], dict[str, Any]] = {}
for item in overrides:
if not isinstance(item, dict):
continue
filename = str(item.get("filename") or "").strip()
index = int(item.get("index") or 0)
if not filename and index <= 0:
continue
override_map[(index, filename)] = item
for item in normalized:
override = override_map.get((int(item["index"]), str(item["filename"])))
if override is None:
override = override_map.get((int(item["index"]), ""))
if override is None:
continue
summary = str(override.get("summary") or "").strip()
scene_label = str(override.get("scene_label") or "").strip()
fields = override.get("fields")
if summary:
item["summary"] = summary
if scene_label:
item["scene_label"] = scene_label
if isinstance(fields, list):
item["document_fields"] = self._normalize_document_fields(fields)
return normalized
@staticmethod
def _normalize_document_fields(raw_fields: Any) -> list[dict[str, str]]:
if not isinstance(raw_fields, list):
return []
normalized: list[dict[str, str]] = []
for field in raw_fields:
if not isinstance(field, dict):
continue
label = str(field.get("label") or "").strip()
value = str(field.get("value") or "").strip()
key = str(field.get("key") or label or "").strip()
if not label or not value:
continue
normalized.append(
{
"key": key,
"label": label,
"value": value,
}
)
return normalized
def _build_context_item_specs(
self,
*,
context_documents: list[dict[str, Any]],
attachment_names: list[str],
occurred_at: datetime,
expense_type: str,
amount: Decimal,
reason: str,
location: str,
context_json: dict[str, Any],
employee_grade: str | None = None,
user_id: str = "",
) -> list[dict[str, Any]]:
specs: list[dict[str, Any]] = []
if context_documents:
for document in context_documents:
specs.append(
{
"item_date": self._resolve_document_item_date(document, fallback=occurred_at.date()),
"item_type": self._resolve_document_item_type(document, fallback=expense_type),
"item_reason": self._resolve_document_item_reason(document, fallback=reason),
"item_location": location,
"item_amount": self._resolve_document_item_amount(document),
"invoice_id": str(document.get("filename") or "").strip() or None,
}
)
elif attachment_names:
for attachment_name in attachment_names:
specs.append(
{
"item_date": occurred_at.date(),
"item_type": expense_type,
"item_reason": reason,
"item_location": location,
"item_amount": None,
"invoice_id": attachment_name,
}
)
if not specs:
return []
total_recognized = sum(
spec["item_amount"] for spec in specs if isinstance(spec.get("item_amount"), Decimal)
)
missing_specs = [spec for spec in specs if spec.get("item_amount") is None]
if missing_specs:
remaining = (amount - total_recognized).quantize(Decimal("0.01"))
if remaining > Decimal("0.00"):
missing_specs[0]["item_amount"] = remaining
for spec in specs:
if spec.get("item_amount") is None:
spec["item_amount"] = Decimal("0.00")
allowance_spec = self._build_travel_allowance_item_spec(
context_documents=context_documents,
specs=specs,
occurred_at=occurred_at,
expense_type=expense_type,
location=location,
context_json=context_json,
employee_grade=employee_grade,
user_id=user_id,
)
if allowance_spec is not None:
specs = [spec for spec in specs if str(spec.get("item_type") or "").strip() != "travel_allowance"]
specs.append(allowance_spec)
return specs
def _build_travel_allowance_item_spec(
self,
*,
context_documents: list[dict[str, Any]],
specs: list[dict[str, Any]],
occurred_at: datetime,
expense_type: str,
location: str,
context_json: dict[str, Any],
employee_grade: str | None,
user_id: str,
) -> dict[str, Any] | None:
if not self._should_add_travel_allowance_item(
expense_type=expense_type,
context_documents=context_documents,
context_json=context_json,
):
return None
grade = str(employee_grade or context_json.get("grade") or "").strip()
if not grade:
return None
days, _, end_date = self._resolve_travel_allowance_days(
context_json=context_json,
occurred_at=occurred_at,
)
allowance_location = self._resolve_travel_allowance_location(
location=location,
context_documents=context_documents,
)
if days < 1 or not allowance_location:
return None
try:
from app.services.travel_reimbursement_calculator import (
TravelReimbursementCalculatorService,
)
result = TravelReimbursementCalculatorService(self.db).calculate(
TravelReimbursementCalculatorRequest(
days=days,
location=allowance_location,
grade=grade,
),
CurrentUserContext(
username=user_id,
name="",
role_codes=[],
is_admin=False,
),
)
except ValueError:
return None
allowance_amount = Decimal(result.allowance_amount or Decimal("0.00")).quantize(Decimal("0.01"))
allowance_rate = Decimal(result.total_allowance_rate or Decimal("0.00")).quantize(Decimal("0.01"))
if allowance_amount <= Decimal("0.00") or allowance_rate <= Decimal("0.00"):
return None
return {
"item_date": end_date,
"item_type": "travel_allowance",
"item_reason": (
f"系统自动计算出差补贴:{result.matched_city}{days}天,"
f"{allowance_rate:.2f}元/天"
),
"item_location": str(result.allowance_region or allowance_location).strip(),
"item_amount": allowance_amount,
"invoice_id": None,
}
@staticmethod
def _should_add_travel_allowance_item(
*,
expense_type: str,
context_documents: list[dict[str, Any]],
context_json: dict[str, Any],
) -> bool:
normalized_expense_type = str(expense_type or "").strip().lower()
if normalized_expense_type == "travel":
return True
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
review_type = str(
review_form_values.get("expense_type")
or review_form_values.get("scene_label")
or review_form_values.get("reason_value")
or ""
)
if any(keyword in review_type for keyword in ("差旅", "出差")):
return True
for document in context_documents:
document_type = str(document.get("document_type") or "").strip()
scene_code = str(document.get("scene_code") or "").strip()
if document_type in {"train_ticket", "flight_itinerary"} or scene_code == "travel":
return True
return False
def _resolve_travel_allowance_days(
self,
*,
context_json: dict[str, Any],
occurred_at: datetime,
) -> tuple[int, date, date]:
start_date = occurred_at.date()
end_date = start_date
explicit_days = self._extract_travel_allowance_days_from_context(context_json)
business_time_context = context_json.get("business_time_context")
if isinstance(business_time_context, dict):
start_date = self._parse_iso_date_or_default(business_time_context.get("start_date"), start_date)
end_date = self._parse_iso_date_or_default(business_time_context.get("end_date"), start_date)
else:
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
time_text = str(
review_form_values.get("time_range")
or review_form_values.get("business_time")
or review_form_values.get("occurred_date")
or ""
).strip()
matched_dates = re.findall(r"\d{4}-\d{2}-\d{2}", time_text)
if matched_dates:
start_date = self._parse_iso_date_or_default(matched_dates[0], start_date)
end_date = self._parse_iso_date_or_default(matched_dates[-1], start_date)
if end_date < start_date:
end_date = start_date
if explicit_days > 0:
return explicit_days, start_date, start_date + timedelta(days=explicit_days - 1)
days = (end_date - start_date).days + 1
return max(1, days), start_date, end_date
@staticmethod
def _extract_travel_allowance_days_from_context(context_json: dict[str, Any]) -> int:
review_form_values = context_json.get("review_form_values")
text_parts: list[str] = []
if isinstance(review_form_values, dict):
text_parts.extend(
str(review_form_values.get(key) or "")
for key in (
"reason",
"business_reason",
"reason_value",
"scene_label",
"time_range",
"business_time",
)
)
text_parts.extend(
str(context_json.get(key) or "")
for key in ("user_input_text", "message", "raw_text", "ocr_summary")
)
return ExpenseClaimDocumentItemBuilderMixin._extract_travel_day_count(" ".join(text_parts))
@staticmethod
def _extract_travel_day_count(text: str) -> int:
normalized = str(text or "").replace(" ", "")
if not normalized:
return 0
patterns = (
r"(?:出差|差旅|行程|支撑|支持|部署|项目|业务)\D{0,12}?(\d{1,2})天",
r"(\d{1,2})天(?:出差|差旅|行程)",
)
for pattern in patterns:
match = re.search(pattern, normalized)
if not match:
continue
try:
return max(1, int(match.group(1)))
except ValueError:
continue
return 0
@staticmethod
def _parse_iso_date_or_default(value: Any, fallback: date) -> date:
try:
return date.fromisoformat(str(value or "").strip())
except ValueError:
return fallback
@staticmethod
def _resolve_travel_allowance_location(
*,
location: str,
context_documents: list[dict[str, Any]],
) -> str:
normalized_location = str(location or "").strip()
if normalized_location and normalized_location not in {"待补充", "未知", "暂无"}:
return normalized_location
for document in context_documents:
for field in list(document.get("document_fields") or []):
if not isinstance(field, dict):
continue
key = str(field.get("key") or "").strip().lower()
label = str(field.get("label") or "").strip()
value = str(field.get("value") or "").strip()
if key == "route" or "行程" in label:
separators = ("-", "", "", "->")
for separator in separators:
if separator in value:
return value.split(separator)[-1].strip()
if key in {"destination", "arrival_city"} or label in {"目的地", "到达城市"}:
return value
return ""
def _collect_invoice_keys_from_incoming_document(self, document: dict[str, Any]) -> list[str]:
document_info = dict(document or {})
if "fields" not in document_info and isinstance(document_info.get("document_fields"), list):
document_info["fields"] = document_info.get("document_fields")
return self._collect_invoice_keys_from_document_info(document_info)
def _resolve_document_item_type(self, document: dict[str, Any], *, fallback: str) -> str:
document_type = str(document.get("document_type") or "").strip()
mapped_type = DOCUMENT_TYPE_ITEM_TYPE_MAP.get(document_type)
if mapped_type:
return mapped_type
scene_code = str(document.get("scene_code") or "").strip()
if scene_code in {"travel", "hotel", "transport", "meal", "office", "meeting", "training"}:
return scene_code
if document_type in {"flight_itinerary", "train_ticket"}:
return "travel"
if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}:
return "transport"
if document_type == "hotel_invoice":
return "hotel"
if document_type == "meal_receipt":
return "meal"
if document_type == "office_invoice":
return "office"
if document_type == "meeting_invoice":
return "meeting"
if document_type == "training_invoice":
return "training"
scene_label = str(document.get("scene_label") or "").strip()
if "交通" in scene_label:
return "transport"
if "住宿" in scene_label:
return "hotel"
if "" in scene_label:
return "meal"
if "会务" in scene_label or "会议" in scene_label:
return "meeting"
if "培训" in scene_label:
return "training"
return fallback or "other"
def _resolve_document_item_reason(self, document: dict[str, Any], *, fallback: str) -> str:
document_type = str(document.get("document_type") or "").strip().lower()
item_type = self._resolve_document_item_type(document, fallback="")
if document_type in {"train_ticket", "flight_itinerary"} or item_type in {"train_ticket", "flight_ticket"}:
route = self._resolve_document_route_value(document)
trip_no = self._resolve_document_fact_field(
document,
keys={"trip_no", "flight_no", "train_no"},
labels={"车次", "航班"},
)
if route and trip_no:
return f"{self._format_document_route(route)}{trip_no}"
if route:
return self._format_document_route(route)
if document_type in {"taxi_receipt", "transport_receipt"} or item_type == "ride_ticket":
route = self._resolve_document_route_value(document)
if route:
return self._format_document_route(route)
if document_type == "hotel_invoice" or item_type == "hotel_ticket":
merchant = self._resolve_document_fact_field(
document,
keys={"merchant_name", "merchant", "seller_name", "vendor_name", "hotel_name"},
labels={"商户", "酒店", "宾馆", "销售方", "开票方"},
)
stay_range = self._resolve_document_stay_range(document)
if merchant and stay_range:
return f"{merchant}{stay_range}"
if merchant:
return merchant
if stay_range:
return stay_range
merchant = self._resolve_document_fact_field(
document,
keys={"merchant_name", "merchant", "seller_name", "vendor_name"},
labels={"商户", "销售方", "开票方", "收款方"},
)
if merchant:
return merchant
summary = str(document.get("summary") or "").strip()
return summary or fallback or ""

View File

@@ -0,0 +1,396 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimDocumentParsingMixin:
def _resolve_document_route_value(self, document: dict[str, Any]) -> str:
route = self._resolve_document_fact_field(
document,
keys={"route", "trip_route"},
labels={"行程", "路线"},
)
if route:
return route
origin = self._resolve_document_fact_field(
document,
keys={
"origin",
"from",
"from_city",
"departure",
"departure_city",
"start",
"start_location",
"start_address",
"pickup_location",
"pickup_address",
"boarding_station",
},
labels=DOCUMENT_ROUTE_ORIGIN_LABELS,
)
destination = self._resolve_document_fact_field(
document,
keys={
"destination",
"to",
"to_city",
"arrival",
"arrival_city",
"end",
"end_location",
"end_address",
"dropoff_location",
"dropoff_address",
"alighting_station",
},
labels=DOCUMENT_ROUTE_DESTINATION_LABELS,
)
if origin and destination:
return f"{origin}-{destination}"
text = " ".join(
[
str(document.get("summary") or "").strip(),
str(document.get("text") or "").strip(),
]
).strip()
text_route = self._extract_document_route_from_text(text)
if text_route:
return text_route
text_origin = self._extract_document_labeled_text_value(text, DOCUMENT_ROUTE_ORIGIN_LABELS)
text_destination = self._extract_document_labeled_text_value(text, DOCUMENT_ROUTE_DESTINATION_LABELS)
if text_origin and text_destination:
return f"{text_origin}-{text_destination}"
return ""
@staticmethod
def _resolve_document_fact_field(
document: dict[str, Any],
*,
keys: set[str],
labels: set[str],
) -> str:
raw_fields = document.get("document_fields")
if not isinstance(raw_fields, list):
raw_fields = document.get("fields")
if not isinstance(raw_fields, list):
return ""
normalized_keys = {str(key or "").strip().lower().replace("_", "") for key in keys}
for field in raw_fields:
if not isinstance(field, dict):
continue
field_key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if field_key in normalized_keys or any(token in label for token in labels):
return value
return ""
@staticmethod
def _format_document_route(route: str) -> str:
normalized = (
str(route or "")
.strip()
.replace("->", "-")
.replace("", "-")
.replace("", "-")
.replace("", "-")
.replace("", "-")
.replace("", "-")
)
if "-" not in normalized:
return str(route or "").strip()
origin, destination = [part.strip() for part in normalized.split("-", 1)]
origin = origin.removeprefix("").strip()
destination = destination.removeprefix("").removeprefix("").strip()
if not origin or not destination or origin == destination:
return str(route or "").strip()
return f"{origin}-{destination}"
@staticmethod
def _extract_document_route_from_text(text: str) -> str:
for match in DOCUMENT_ROUTE_TEXT_PATTERN.finditer(str(text or "")):
origin = str(match.group(1) or "").strip()
destination = str(match.group(2) or "").strip()
if not origin or not destination or origin == destination:
continue
if origin.isdigit() and destination.isdigit():
continue
if DOCUMENT_DATE_PATTERN.search(f"{origin}-{destination}"):
continue
return f"{origin}-{destination}"
return ""
@staticmethod
def _extract_document_labeled_text_value(text: str, labels: set[str]) -> str:
for label in sorted(labels, key=len, reverse=True):
pattern = re.compile(
rf"{re.escape(label)}[:\s]*"
r"([A-Za-z0-9\u4e00-\u9fa5()·\-路街道号弄区县市省园桥站机场中心]{2,50})"
)
match = pattern.search(str(text or ""))
if match:
return str(match.group(1) or "").strip()
return ""
def _resolve_document_stay_range(self, document: dict[str, Any]) -> str:
check_in = self._resolve_document_fact_field(
document,
keys={"check_in", "checkin", "arrival_date", "start_date"},
labels={"入住", "入住日期", "到店", "开始日期"},
)
check_out = self._resolve_document_fact_field(
document,
keys={"check_out", "checkout", "departure_date", "end_date"},
labels={"离店", "退房", "离店日期", "结束日期"},
)
if check_in and check_out:
return f"{check_in}{check_out}"
nights = self._resolve_document_fact_field(
document,
keys={"nights", "night_count", "room_nights"},
labels={"间夜", "晚数", "入住天数"},
)
if nights:
return f"{nights}"
return ""
def _resolve_document_item_amount(self, document: dict[str, Any]) -> Decimal | None:
return resolve_document_item_amount(document)
def _resolve_document_field_amount(self, document: dict[str, Any]) -> Decimal | None:
return resolve_document_field_amount(document)
def _resolve_document_text_amount(self, text: str) -> Decimal | None:
return resolve_document_text_amount(text)
def _parse_document_amount_value(self, value: str) -> Decimal | None:
return parse_document_amount_value(value)
@staticmethod
def _parse_plain_document_amount_value(value: str) -> Decimal | None:
return parse_plain_document_amount_value(value)
@staticmethod
def _is_probable_year_amount(amount: Decimal | None) -> bool:
return is_probable_year_amount(amount)
@classmethod
def _is_date_like_amount_candidate(cls, amount: Decimal | None, text: str) -> bool:
return is_date_like_amount_candidate(amount, text)
@staticmethod
def _format_decimal_amount(amount: Decimal | None) -> str:
return format_decimal_amount(amount)
def _resolve_document_item_date(self, document: dict[str, Any], *, fallback: date) -> date:
return self._resolve_document_item_date_candidate(document) or fallback
def _resolve_document_item_date_candidate(self, document: dict[str, Any]) -> date | None:
document_type = str(document.get("document_type") or "").strip().lower()
if document_type in DOCUMENT_TRIP_DATE_LABELS:
parsed = self._resolve_document_date_from_fields(
document,
keys=DOCUMENT_TRIP_DATE_KEYS,
labels=DOCUMENT_TRIP_DATE_LABEL_TOKENS,
)
if parsed is not None:
return parsed
parsed = self._resolve_document_date_from_fields(
document,
keys=DOCUMENT_GENERIC_DATE_KEYS,
labels=DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
excluded_labels=DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
)
if parsed is not None:
return parsed
parsed = self._parse_document_date(
" ".join(
[
str(document.get("summary") or "").strip(),
str(document.get("text") or "").strip(),
]
).strip()
)
if parsed is not None:
return parsed
return None
for field in list(document.get("document_fields") or []):
if not isinstance(field, dict):
continue
key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if key in {"date", "time", "issuedat", "issuedate", "invoicedate"} or any(
token in label for token in ("日期", "时间", "开票日期", "发生时间")
):
parsed = self._parse_document_date(value)
if parsed is not None:
return parsed
parsed = self._parse_document_date(
" ".join(
[
str(document.get("summary") or "").strip(),
str(document.get("text") or "").strip(),
]
).strip()
)
return parsed
def _resolve_document_date_from_fields(
self,
document: dict[str, Any],
*,
keys: set[str],
labels: tuple[str, ...],
excluded_labels: tuple[str, ...] = (),
) -> date | None:
for field in list(document.get("document_fields") or []):
if not isinstance(field, dict):
continue
key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
if excluded_labels and any(token in label for token in excluded_labels):
continue
if key not in keys and not any(token in label for token in labels):
continue
parsed = self._parse_document_date(str(field.get("value") or ""))
if parsed is not None:
return parsed
return None
@staticmethod
def _parse_document_date(value: str) -> date | None:
match = DOCUMENT_DATE_PATTERN.search(str(value or ""))
if not match:
return None
raw_value = str(match.group(1) or "").strip()
normalized = raw_value.replace("", "-").replace("", "-").replace("", "")
normalized = normalized.replace("/", "-").replace(".", "-")
parts = [part for part in normalized.split("-") if part]
if len(parts) != 3:
return None
try:
return date(int(parts[0]), int(parts[1]), int(parts[2]))
except ValueError:
return None

View File

@@ -0,0 +1,612 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimDraftFlowMixin:
def upsert_draft_from_ontology(
self,
*,
run_id: str,
user_id: str | None,
message: str,
ontology: OntologyParseResult,
context_json: dict[str, Any],
) -> dict[str, Any]:
self._ensure_ready()
context_json = dict(context_json or {})
retry_count = self._resolve_claim_no_retry_count(context_json)
review_action = str(context_json.get("review_action") or "").strip()
attachment_names = self._resolve_attachment_names(context_json)
context_documents = self._resolve_context_documents(context_json)
employee = self._resolve_employee(
ontology=ontology,
context_json=context_json,
user_id=user_id,
)
draft_owner_name = (
employee.name
if employee is not None
else self._resolve_employee_name(
ontology=ontology,
context_json=context_json,
user_id=user_id,
)
)
association_candidate = self._find_association_candidate(
ontology=ontology,
context_json=context_json,
user_id=user_id,
employee=employee,
)
if self._should_defer_multi_document_association(
context_json=context_json,
review_action=review_action,
association_candidate=association_candidate,
context_documents=context_documents,
):
document_count = max(len(context_documents), len(attachment_names), self._resolve_attachment_count(context_json))
return {
"message": (
f"检测到你已有草稿 {association_candidate.claim_no}"
f"当前新上传了 {document_count} 张票据,请先选择关联到现有草稿,或单独建立新的报销单。"
),
"draft_only": False,
"status": "pending_association_decision",
"pending_association_decision": True,
"association_candidate_claim_id": association_candidate.id,
"association_candidate_claim_no": association_candidate.claim_no,
}
claim = self._find_target_claim(
ontology=ontology,
context_json=context_json,
review_action=review_action,
association_candidate=association_candidate,
)
is_new_claim = claim is None
before_json = self._serialize_claim(claim) if claim is not None else None
if is_new_claim:
existing_draft_count = self._count_draft_claims_for_owner(
employee=employee,
user_id=user_id,
)
if existing_draft_count >= MAX_DRAFT_CLAIMS_PER_USER:
return {
"message": (
f"你当前已保存 {MAX_DRAFT_CLAIMS_PER_USER} 个草稿,请先完成已保存的草稿,"
"才能再次新建草稿。"
),
"draft_limit_reached": True,
"draft_only": False,
"status": "blocked",
"draft_count": existing_draft_count,
"max_draft_count": MAX_DRAFT_CLAIMS_PER_USER,
}
amount = self._resolve_amount(ontology.entities, context_json=context_json)
occurred_at = self._resolve_occurred_at(ontology, context_json=context_json)
explicit_expense_type = self._resolve_explicit_review_expense_type(context_json)
inferred_expense_type = self._resolve_expense_type(ontology.entities, context_json=context_json)
locked_expense_type = explicit_expense_type
if not locked_expense_type and claim is not None and review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS:
locked_expense_type = str(claim.expense_type or "").strip()
expense_type = locked_expense_type or inferred_expense_type
location = self._resolve_location(message=message, context_json=context_json)
reason = self._resolve_reason(
message=message,
context_json=context_json,
allow_message_fallback=is_new_claim,
)
attachment_count = len(attachment_names) or self._resolve_attachment_count(context_json)
final_amount = amount if amount is not None else (claim.amount if claim is not None else Decimal("0.00"))
final_occurred_at = (
occurred_at if occurred_at is not None else (claim.occurred_at if claim is not None else datetime.now(UTC))
)
final_expense_type = expense_type or (claim.expense_type if claim is not None else "other")
final_location = location or (claim.location if claim is not None else "待补充")
final_reason = reason or (claim.reason if claim is not None else "待补充")
final_attachment_count = (
attachment_count if attachment_count > 0 else int(claim.invoice_count or 0) if claim is not None else 0
)
final_risk_flags = self._merge_persistent_claim_risk_flags(
existing_flags=list(claim.risk_flags_json or []) if claim is not None else [],
next_flags=list(ontology.risk_flags),
)
if context_documents or attachment_names:
document_specs = self._build_context_item_specs(
context_documents=context_documents,
attachment_names=attachment_names,
occurred_at=final_occurred_at,
expense_type=final_expense_type,
amount=final_amount,
reason=final_reason,
location=final_location,
context_json=context_json,
employee_grade=str(employee.grade or "").strip() if employee is not None else "",
user_id=user_id,
)
else:
document_specs = []
if claim is not None and review_action == "link_to_existing_draft" and document_specs:
duplicate_result = self._build_duplicate_attachment_block_result(
claim=claim,
document_specs=document_specs,
context_documents=context_documents,
)
if duplicate_result is not None:
return duplicate_result
try:
if claim is None:
claim = ExpenseClaim(
claim_no=self._generate_claim_no(final_occurred_at),
employee_id=employee.id if employee is not None else None,
employee_name=draft_owner_name,
department_id=employee.organization_unit_id if employee is not None else None,
department_name=self._resolve_department_name(
employee=employee,
context_json=context_json,
),
project_code=self._resolve_project_code(ontology.entities),
expense_type=final_expense_type,
reason=final_reason,
location=final_location,
amount=final_amount,
currency="CNY",
invoice_count=final_attachment_count,
occurred_at=final_occurred_at,
status="draft",
approval_stage="待提交",
risk_flags_json=final_risk_flags,
)
self.db.add(claim)
else:
claim.employee_id = employee.id if employee is not None else claim.employee_id
claim.employee_name = (
employee.name
if employee is not None
else self._resolve_employee_name(
ontology=ontology,
context_json=context_json,
user_id=user_id,
fallback=claim.employee_name,
)
)
claim.department_id = employee.organization_unit_id if employee is not None else claim.department_id
claim.department_name = self._resolve_department_name(
employee=employee,
context_json=context_json,
fallback=claim.department_name,
)
claim.project_code = self._resolve_project_code(ontology.entities) or claim.project_code
claim.expense_type = final_expense_type
claim.reason = final_reason
claim.location = final_location
claim.amount = final_amount
claim.invoice_count = final_attachment_count
claim.occurred_at = final_occurred_at
claim.status = "draft"
claim.approval_stage = "待提交"
claim.risk_flags_json = final_risk_flags
self.db.flush()
if document_specs and (is_new_claim or review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS):
if review_action == "link_to_existing_draft" and claim.items:
self._append_document_items(
claim=claim,
item_specs=document_specs,
)
else:
self._replace_claim_items(
claim=claim,
item_specs=document_specs,
)
self._sync_claim_from_items(claim)
else:
self._upsert_primary_item(
claim=claim,
occurred_at=final_occurred_at,
expense_type=final_expense_type,
amount=final_amount,
reason=final_reason,
location=final_location,
attachment_names=attachment_names,
)
self._sync_claim_from_items(claim)
if locked_expense_type:
claim.expense_type = locked_expense_type
self.db.commit()
self.db.refresh(claim)
except IntegrityError as exc:
self.db.rollback()
if (
is_new_claim
and retry_count < MAX_CLAIM_NO_RETRY_ATTEMPTS
and self._is_claim_no_conflict_error(exc)
):
retry_context = dict(context_json)
retry_context["_claim_no_retry_count"] = retry_count + 1
return self.upsert_draft_from_ontology(
run_id=run_id,
user_id=user_id,
message=message,
ontology=ontology,
context_json=retry_context,
)
raise
except Exception:
self.db.rollback()
raise
self.audit_service.log_action(
actor=user_id or claim.employee_name or "anonymous",
action="expense_claim.draft_upsert",
resource_type="expense_claim",
resource_id=claim.id,
before_json=before_json,
after_json=self._serialize_claim(claim),
request_id=run_id,
)
return {
"message": (
f"{'创建' if is_new_claim else '更新'}报销草稿 {claim.claim_no},当前状态为 draft。"
"请核对识别结果,确认无误后继续提交。"
),
"draft_only": True,
"claim_id": claim.id,
"claim_no": claim.claim_no,
"status": claim.status,
"amount": float(claim.amount),
"invoice_count": int(claim.invoice_count or 0),
}
def _find_target_claim(
self,
*,
ontology: OntologyParseResult,
context_json: dict[str, Any],
review_action: str = "",
association_candidate: ExpenseClaim | None = None,
) -> ExpenseClaim | None:
if review_action == "create_new_claim_from_documents":
return None
if review_action == "link_to_existing_draft" and association_candidate is not None:
return association_candidate
draft_claim_id = str(context_json.get("draft_claim_id") or "").strip()
if draft_claim_id:
claim = self.db.get(ExpenseClaim, draft_claim_id)
if claim is not None and self._is_editable_claim_status(claim.status):
return claim
return None
claim_codes = [
item.normalized_value
for item in ontology.entities
if item.type == "expense_claim" and item.normalized_value
]
if not claim_codes:
return None
stmt = (
select(ExpenseClaim)
.where(ExpenseClaim.claim_no.in_(claim_codes))
.where(ExpenseClaim.status.in_(EDITABLE_CLAIM_STATUSES))
.limit(1)
)
return self.db.scalar(stmt)
def _find_association_candidate(
self,
*,
ontology: OntologyParseResult,
context_json: dict[str, Any],
user_id: str | None,
employee: Employee | None,
) -> ExpenseClaim | None:
draft_claim_id = str(context_json.get("draft_claim_id") or "").strip()
if draft_claim_id:
claim = self.db.get(ExpenseClaim, draft_claim_id)
if claim is not None and self._is_editable_claim_status(claim.status):
return claim
owner_filters = self._build_draft_owner_filters(
employee=employee,
user_id=user_id,
)
if not owner_filters:
fallback_name = self._resolve_employee_name(
ontology=ontology,
context_json=context_json,
user_id=user_id,
fallback="",
)
if fallback_name:
owner_filters = [ExpenseClaim.employee_name == fallback_name]
if not owner_filters:
return None
stmt = (
select(ExpenseClaim)
.where(ExpenseClaim.status.in_(EDITABLE_CLAIM_STATUSES))
.where(or_(*owner_filters))
.order_by(ExpenseClaim.updated_at.desc(), ExpenseClaim.created_at.desc())
.limit(1)
)
return self.db.scalar(stmt)
def _should_defer_multi_document_association(
self,
*,
context_json: dict[str, Any],
review_action: str,
association_candidate: ExpenseClaim | None,
context_documents: list[dict[str, Any]],
) -> bool:
if association_candidate is None:
return False
if review_action in DOCUMENT_ASSOCIATION_REVIEW_ACTIONS:
return False
document_count = max(
len(context_documents),
len(self._resolve_attachment_names(context_json)),
self._resolve_attachment_count(context_json),
)
return document_count > 1
def _replace_claim_items(
self,
*,
claim: ExpenseClaim,
item_specs: list[dict[str, Any]],
) -> None:
existing_items = sorted(
list(claim.items),
key=lambda item: (
item.item_date or date.max,
self._normalize_sort_datetime(item.created_at),
),
)
for index, spec in enumerate(item_specs):
item = existing_items[index] if index < len(existing_items) else None
if item is None:
item = ExpenseClaimItem(claim_id=claim.id)
claim.items.append(item)
self.db.add(item)
item.item_date = spec["item_date"]
item.item_type = spec["item_type"]
item.item_reason = spec["item_reason"]
item.item_location = spec["item_location"]
item.item_amount = spec["item_amount"]
item.invoice_id = (
None
if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES
else self._attachment_presentation.merge_reference(item.invoice_id, spec["invoice_id"])
)
for stale_item in existing_items[len(item_specs) :]:
claim.items.remove(stale_item)
self.db.delete(stale_item)
def _append_document_items(
self,
*,
claim: ExpenseClaim,
item_specs: list[dict[str, Any]],
) -> None:
system_specs = [
spec for spec in item_specs if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES
]
normal_specs = [
spec for spec in item_specs if str(spec.get("item_type") or "").strip() not in SYSTEM_GENERATED_ITEM_TYPES
]
existing_invoice_ids = {
str(item.invoice_id or "").strip()
for item in claim.items
if str(item.invoice_id or "").strip()
}
existing_invoice_names = {
self._attachment_presentation.resolve_display_name(item.invoice_id)
for item in claim.items
if str(item.invoice_id or "").strip()
}
for spec in normal_specs:
invoice_id = str(spec.get("invoice_id") or "").strip()
invoice_name = self._attachment_presentation.resolve_display_name(invoice_id)
if invoice_id and (invoice_id in existing_invoice_ids or invoice_name in existing_invoice_names):
continue
claim.items.append(
ExpenseClaimItem(
claim_id=claim.id,
item_date=spec["item_date"],
item_type=spec["item_type"],
item_reason=spec["item_reason"],
item_location=spec["item_location"],
item_amount=spec["item_amount"],
invoice_id=spec["invoice_id"],
)
)
self.db.add(claim.items[-1])
if invoice_id:
existing_invoice_ids.add(invoice_id)
existing_invoice_names.add(invoice_name)
if system_specs:
existing_system_items = [
item for item in list(claim.items) if str(item.item_type or "").strip() in SYSTEM_GENERATED_ITEM_TYPES
]
for stale_item in existing_system_items:
claim.items.remove(stale_item)
self.db.delete(stale_item)
for spec in system_specs:
claim.items.append(
ExpenseClaimItem(
claim_id=claim.id,
item_date=spec["item_date"],
item_type=spec["item_type"],
item_reason=spec["item_reason"],
item_location=spec["item_location"],
item_amount=spec["item_amount"],
invoice_id=spec["invoice_id"],
)
)
self.db.add(claim.items[-1])
def _build_duplicate_attachment_block_result(
self,
*,
claim: ExpenseClaim,
document_specs: list[dict[str, Any]],
context_documents: list[dict[str, Any]],
) -> dict[str, Any] | None:
duplicate_matches = self._find_duplicate_attachment_matches(
claim=claim,
document_specs=document_specs,
context_documents=context_documents,
)
if not duplicate_matches:
return None
duplicate_labels = list(
dict.fromkeys(
str(item.get("incoming_label") or item.get("existing_label") or "").strip()
for item in duplicate_matches
if str(item.get("incoming_label") or item.get("existing_label") or "").strip()
)
)
duplicate_text = "".join(duplicate_labels[:3]) or "本次上传票据"
reason = (
f"检测到本次上传的票据与草稿 {claim.claim_no} 中已有票据重复:{duplicate_text}"
"请重新上传不同的票据后再归集。"
)
return {
"message": reason,
"draft_only": False,
"status": "blocked",
"duplicate_attachment_blocked": True,
"duplicate_invoice_blocked": True,
"submission_blocked": True,
"submission_blocked_reasons": [reason],
"missing_fields": [reason],
"risk_flags": ["duplicate_invoice"],
"duplicate_attachments": duplicate_matches,
"claim_id": claim.id,
"claim_no": claim.claim_no,
"amount": float(claim.amount or Decimal("0.00")),
"invoice_count": int(claim.invoice_count or 0),
}

View File

@@ -0,0 +1,343 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimDraftPersistenceMixin:
def _find_duplicate_attachment_matches(
self,
*,
claim: ExpenseClaim,
document_specs: list[dict[str, Any]],
context_documents: list[dict[str, Any]],
) -> list[dict[str, str]]:
existing_tokens: dict[str, dict[str, str]] = {}
for item in list(claim.items or []):
if str(item.item_type or "").strip() in SYSTEM_GENERATED_ITEM_TYPES:
continue
invoice_id = str(item.invoice_id or "").strip()
if not invoice_id:
continue
display_name = self._attachment_presentation.resolve_display_name(invoice_id)
for token in self._build_duplicate_attachment_tokens(invoice_id):
existing_tokens.setdefault(
token,
{
"existing_label": display_name or invoice_id,
"existing_item_id": str(item.id or ""),
"match_type": "filename",
},
)
file_path = self._attachment_storage.resolve_item_path(item)
if file_path is not None and file_path.exists():
metadata = self._attachment_storage.read_meta(file_path)
document_info = metadata.get("document_info")
if isinstance(document_info, dict):
for invoice_key in self._collect_invoice_keys_from_document_info(document_info):
token = self._normalize_duplicate_attachment_token(invoice_key)
if token:
existing_tokens.setdefault(
token,
{
"existing_label": display_name or invoice_id,
"existing_item_id": str(item.id or ""),
"match_type": "invoice_key",
},
)
if not existing_tokens:
return []
document_by_filename = {
str(document.get("filename") or "").strip(): document
for document in context_documents
if isinstance(document, dict) and str(document.get("filename") or "").strip()
}
matches: list[dict[str, str]] = []
seen_tokens: set[str] = set()
for spec in document_specs:
if str(spec.get("item_type") or "").strip() in SYSTEM_GENERATED_ITEM_TYPES:
continue
invoice_id = str(spec.get("invoice_id") or "").strip()
if not invoice_id:
continue
incoming_tokens = self._build_duplicate_attachment_tokens(invoice_id)
document = document_by_filename.get(invoice_id)
if document is not None:
incoming_tokens.extend(
self._normalize_duplicate_attachment_token(invoice_key)
for invoice_key in self._collect_invoice_keys_from_incoming_document(document)
)
for token in incoming_tokens:
if not token or token in seen_tokens or token not in existing_tokens:
continue
seen_tokens.add(token)
existing = existing_tokens[token]
matches.append(
{
"incoming_label": self._attachment_presentation.resolve_display_name(invoice_id) or invoice_id,
"existing_label": existing.get("existing_label", ""),
"existing_item_id": existing.get("existing_item_id", ""),
"match_type": existing.get("match_type", "filename"),
}
)
return matches
@classmethod
def _build_duplicate_attachment_tokens(cls, value: str | None) -> list[str]:
raw = str(value or "").strip()
display_name = ExpenseClaimAttachmentPresentation.resolve_display_name(raw)
candidates = [raw, display_name]
return list(
dict.fromkeys(
token
for token in (cls._normalize_duplicate_attachment_token(candidate) for candidate in candidates)
if token
)
)
@staticmethod
def _normalize_duplicate_attachment_token(value: str | None) -> str:
normalized = Path(str(value or "").strip()).name.lower()
normalized = re.sub(r"\s+", "", normalized)
normalized = re.sub(r"[^\w.\-\u4e00-\u9fff]+", "_", normalized).strip("._")
return normalized
def _upsert_primary_item(
self,
*,
claim: ExpenseClaim,
occurred_at: datetime,
expense_type: str,
amount: Decimal,
reason: str,
location: str,
attachment_names: list[str],
) -> None:
item = claim.items[0] if claim.items else None
if item is None:
item = ExpenseClaimItem(
claim_id=claim.id,
item_date=occurred_at.date(),
item_type=expense_type,
item_reason=reason,
item_location=location,
item_amount=amount,
invoice_id=attachment_names[0] if attachment_names else None,
)
claim.items.append(item)
self.db.add(item)
return
item.item_date = occurred_at.date()
item.item_type = expense_type
item.item_reason = reason
item.item_location = location
item.item_amount = amount
item.invoice_id = (
self._attachment_presentation.merge_reference(item.invoice_id, attachment_names[0])
if attachment_names
else item.invoice_id
)
def _generate_claim_no(self, occurred_at: datetime) -> str:
month_code = occurred_at.strftime("%Y%m")
prefix = f"EXP-{month_code}-"
existing_claim_nos = list(
self.db.scalars(
select(ExpenseClaim.claim_no).where(ExpenseClaim.claim_no.like(f"{prefix}%"))
)
)
max_suffix = 0
for claim_no in existing_claim_nos:
normalized = str(claim_no or "").strip()
if not normalized.startswith(prefix):
continue
suffix = normalized[len(prefix):]
if not suffix.isdigit():
continue
max_suffix = max(max_suffix, int(suffix))
return f"{prefix}{max_suffix + 1:03d}"
@staticmethod
def _resolve_claim_no_retry_count(context_json: dict[str, Any]) -> int:
try:
return max(0, int(context_json.get("_claim_no_retry_count") or 0))
except (TypeError, ValueError):
return 0
@staticmethod
def _is_claim_no_conflict_error(exc: IntegrityError) -> bool:
message = str(exc).lower()
return (
"claim_no" in message
and (
"unique" in message
or "duplicate key" in message
or "ix_expense_claims_claim_no" in message
or "expense_claims.claim_no" in message
)
)
def _count_draft_claims_for_owner(
self,
*,
employee: Employee | None,
user_id: str | None,
) -> int:
owner_filters = self._build_draft_owner_filters(
employee=employee,
user_id=user_id,
)
if not owner_filters:
return 0
stmt = (
select(func.count())
.select_from(ExpenseClaim)
.where(ExpenseClaim.status == "draft")
.where(or_(*owner_filters))
)
return int(self.db.scalar(stmt) or 0)
def _build_draft_owner_filters(
self,
*,
employee: Employee | None,
user_id: str | None,
) -> list[Any]:
conditions: list[Any] = []
seen: set[tuple[str, str]] = set()
def add_condition(field_name: str, value: str | None) -> None:
normalized = str(value or "").strip()
if not normalized or normalized == "待补充":
return
marker = (field_name, normalized.lower())
if marker in seen:
return
seen.add(marker)
if field_name == "employee_id":
conditions.append(ExpenseClaim.employee_id == normalized)
return
conditions.append(ExpenseClaim.employee_name == normalized)
if employee is not None:
add_condition("employee_id", employee.id)
add_condition("employee_name", employee.email)
if self._access_policy.employee_name_is_unique(employee):
add_condition("employee_name", employee.name)
add_condition("employee_name", user_id)
return conditions

View File

@@ -0,0 +1,7 @@
from __future__ import annotations
class ExpenseClaimSubmissionBlockedError(ValueError):
def __init__(self, issues: list[str]) -> None:
self.issues = [str(issue or "").strip() for issue in issues if str(issue or "").strip()]
super().__init__("提交前请先补全信息:" + "".join(self.issues))

View File

@@ -0,0 +1,461 @@
from __future__ import annotations
import re
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal
from types import SimpleNamespace
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.expense_claim_constants import (
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
DOCUMENT_FACT_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_rule_runtime import (
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
)
class ExpenseClaimItemSyncMixin:
def _sync_travel_allowance_item(self, claim: ExpenseClaim) -> None:
items = list(claim.items or [])
allowance_items = [
item for item in items if str(item.item_type or "").strip().lower() == "travel_allowance"
]
business_items = [
item for item in items if str(item.item_type or "").strip().lower() != "travel_allowance"
]
business_types = {str(item.item_type or "").strip().lower() for item in business_items}
is_travel_claim = str(claim.expense_type or "").strip().lower() == "travel"
has_travel_detail = bool(business_types & TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES)
if not is_travel_claim and not has_travel_detail:
for item in allowance_items:
self._discard_claim_item(claim, item)
return
grade = str(claim.employee_grade or "").strip()
if not grade:
return
allowance_location = self._resolve_travel_allowance_location_from_claim(
claim=claim,
business_items=business_items,
)
if not allowance_location:
return
existing_allowance = allowance_items[0] if allowance_items else None
days, start_date, end_date = self._resolve_travel_allowance_days_from_claim(
claim=claim,
business_items=business_items,
existing_allowance=existing_allowance,
)
if days < 1:
return
try:
from app.services.travel_reimbursement_calculator import (
TravelReimbursementCalculatorService,
)
result = TravelReimbursementCalculatorService(self.db).calculate(
TravelReimbursementCalculatorRequest(
days=days,
location=allowance_location,
grade=grade,
),
CurrentUserContext(
username=str(claim.employee_id or claim.employee_name or "system"),
name=str(claim.employee_name or ""),
role_codes=[],
is_admin=False,
),
)
except ValueError:
return
allowance_amount = Decimal(result.allowance_amount or Decimal("0.00")).quantize(Decimal("0.01"))
allowance_rate = Decimal(result.total_allowance_rate or Decimal("0.00")).quantize(Decimal("0.01"))
if allowance_amount <= Decimal("0.00") or allowance_rate <= Decimal("0.00"):
return
item = existing_allowance
if item is None:
item = ExpenseClaimItem(claim_id=claim.id)
claim.items.append(item)
self.db.add(item)
for duplicate in allowance_items[1:]:
self._discard_claim_item(claim, duplicate)
item.item_date = end_date
item.item_type = "travel_allowance"
item.item_reason = (
f"系统自动计算出差补贴:{result.matched_city}{days}天,"
f"{allowance_rate:.2f}元/天"
)
item.item_location = str(result.allowance_region or allowance_location).strip()
item.item_amount = allowance_amount
item.invoice_id = None
def _discard_claim_item(self, claim: ExpenseClaim, item: ExpenseClaimItem) -> None:
if item in claim.items:
claim.items.remove(item)
state = sqlalchemy_inspect(item)
if state.persistent:
self.db.delete(item)
elif state.pending:
self.db.expunge(item)
def _resolve_travel_allowance_days_from_claim(
self,
*,
claim: ExpenseClaim,
business_items: list[ExpenseClaimItem],
existing_allowance: ExpenseClaimItem | None,
) -> tuple[int, date, date]:
dated_items = sorted(
[item.item_date for item in business_items if item.item_date is not None]
)
if dated_items:
start_date = dated_items[0]
end_date = dated_items[-1]
elif claim.occurred_at is not None:
start_date = claim.occurred_at.date()
end_date = start_date
else:
start_date = date.today()
end_date = start_date
days = (end_date - start_date).days + 1
explicit_days = max(
(self._extract_travel_day_count(item.item_reason) for item in business_items),
default=0,
)
if explicit_days > 0:
days = explicit_days
end_date = start_date + timedelta(days=days - 1)
return max(1, days), start_date, end_date
existing_days = self._extract_travel_allowance_days(existing_allowance)
unique_dates = {value for value in dated_items}
if existing_days > days and len(unique_dates) <= 1:
days = existing_days
end_date = start_date + timedelta(days=days - 1)
return max(1, days), start_date, end_date
@staticmethod
def _extract_travel_allowance_days(item: ExpenseClaimItem | None) -> int:
if item is None:
return 0
match = re.search(r"(\d+)\s*天", str(item.item_reason or ""))
if not match:
return 0
try:
return max(0, int(match.group(1)))
except ValueError:
return 0
def _resolve_travel_allowance_location_from_claim(
self,
*,
claim: ExpenseClaim,
business_items: list[ExpenseClaimItem],
) -> str:
claim_location = str(claim.location or "").strip()
if claim_location and claim_location not in {"待补充", "未知", "暂无", "非必填"}:
return claim_location
sorted_items = sorted(
business_items,
key=lambda item: (item.item_date or date.max, self._normalize_sort_datetime(item.created_at)),
)
for item in sorted_items:
location = str(item.item_location or "").strip()
if location and location not in {"待补充", "未知", "暂无", "非必填"}:
return location
reason = str(item.item_reason or "").strip()
for separator in ("-", "", "", "", "->"):
if separator in reason:
destination = reason.split(separator)[-1].strip()
if destination:
return destination
return ""
def _sync_claim_from_items(self, claim: ExpenseClaim) -> None:
self._sync_travel_allowance_item(claim)
if not claim.items:
claim.amount = Decimal("0.00")
claim.invoice_count = 0
claim.risk_flags_json = self._merge_claim_attachment_risk_flags(claim, [])
return
ordered_items = sorted(
claim.items,
key=lambda item: (
item.item_date or date.max,
self._normalize_sort_datetime(item.created_at),
),
)
primary_item = ordered_items[0]
total_amount = sum((item.item_amount for item in ordered_items), Decimal("0.00"))
claim.amount = total_amount.quantize(Decimal("0.01"))
claim.invoice_count = sum(1 for item in ordered_items if str(item.invoice_id or "").strip())
claim.occurred_at = datetime(
primary_item.item_date.year,
primary_item.item_date.month,
primary_item.item_date.day,
tzinfo=UTC,
)
claim.expense_type = self._resolve_claim_expense_type_from_items(
ordered_items,
fallback=str(primary_item.item_type or claim.expense_type or "other").strip() or "other",
)
primary_item_type = str(primary_item.item_type or "").strip()
if primary_item_type not in DOCUMENT_FACT_ITEM_TYPES:
claim.reason = (
self._normalize_optional_text(primary_item.item_reason, fallback=claim.reason or "待补充")
or "待补充"
)
claim.location = (
self._normalize_optional_text(primary_item.item_location, fallback=claim.location or "待补充")
or "待补充"
)
claim.risk_flags_json = self._merge_claim_attachment_risk_flags(
claim,
self._build_claim_attachment_risk_flags(ordered_items),
)
if str(claim.status or "").strip().lower() == "draft":
claim.approval_stage = "待提交"
@staticmethod
def _resolve_claim_expense_type_from_items(
items: list[ExpenseClaimItem],
*,
fallback: str,
) -> str:
fallback_type = str(fallback or "").strip() or "other"
item_types = {str(item.item_type or "").strip().lower() for item in items}
if item_types & (TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES | {"travel_allowance"}):
return "travel"
return fallback_type
def _refresh_item_attachment_analysis(self, item: ExpenseClaimItem) -> None:
file_path = self._attachment_storage.resolve_path(item.invoice_id)
if file_path is None or not file_path.exists():
return
metadata = self._attachment_storage.read_meta(file_path)
media_type = str(metadata.get("media_type") or self._attachment_presentation.resolve_media_type(file_path.name)).strip()
ocr_status = str(metadata.get("ocr_status") or "").strip().lower()
if ocr_status == "failed":
analysis = self._build_failed_ocr_attachment_analysis(
media_type=media_type,
error_message=str(metadata.get("ocr_error") or ""),
item=item,
)
elif ocr_status == "recognized" or any(
(
str(metadata.get("ocr_text") or "").strip(),
str(metadata.get("ocr_summary") or "").strip(),
int(metadata.get("ocr_line_count") or 0),
list(metadata.get("ocr_warnings") or []),
)
):
stored_document_info = metadata.get("document_info")
if not isinstance(stored_document_info, dict):
stored_document_info = {}
document = SimpleNamespace(
filename=str(metadata.get("file_name") or file_path.name),
text=str(metadata.get("ocr_text") or ""),
summary=str(metadata.get("ocr_summary") or ""),
avg_score=float(metadata.get("ocr_avg_score") or 0.0),
line_count=int(metadata.get("ocr_line_count") or 0),
document_type=str(stored_document_info.get("document_type") or ""),
document_type_label=str(stored_document_info.get("document_type_label") or ""),
scene_code=str(stored_document_info.get("scene_code") or ""),
scene_label=str(stored_document_info.get("scene_label") or ""),
document_fields=list(stored_document_info.get("fields") or []),
warnings=[str(value) for value in list(metadata.get("ocr_warnings") or []) if str(value).strip()],
)
document_info = self._build_attachment_document_info(document)
requirement_check = self._build_attachment_requirement_check(
item=item,
document_info=document_info,
)
analysis = self._build_attachment_analysis(
document=document,
item=item,
claim=getattr(item, "claim", None),
document_info=document_info,
requirement_check=requirement_check,
)
metadata["document_info"] = document_info
metadata["requirement_check"] = requirement_check
else:
analysis = self._build_fallback_attachment_analysis(media_type=media_type, item=item)
metadata["analysis"] = analysis
self._attachment_storage.write_meta(file_path, metadata)
def _build_claim_attachment_risk_flags(
self, ordered_items: list[ExpenseClaimItem]
) -> list[dict[str, Any]]:
derived_flags: list[dict[str, Any]] = []
for index, item in enumerate(ordered_items, start=1):
file_path = self._attachment_storage.resolve_path(item.invoice_id)
if file_path is None or not file_path.exists():
continue
metadata = self._attachment_storage.read_meta(file_path)
analysis = metadata.get("analysis")
if not isinstance(analysis, dict):
continue
severity = str(analysis.get("severity") or "").strip().lower()
if severity in {"", "pass", "low"}:
continue
summary = (
str(analysis.get("summary") or analysis.get("headline") or "").strip()
or "附件存在待核对风险。"
)
points = [
str(point or "").strip()
for point in list(analysis.get("points") or [])
if str(point or "").strip()
]
message_detail = "".join(points[:3]) if points else summary
label = str(
analysis.get("label") or ("高风险" if severity == "high" else "中风险")
).strip()
derived_flags.append(
{
"source": "attachment_analysis",
"item_id": item.id,
"severity": severity,
"label": label,
"message": f"费用明细第 {index} 条:{message_detail}",
"summary": summary,
"points": points,
}
)
return derived_flags
def _get_expense_rule_catalog(self) -> Any:
cached = getattr(self, "_expense_rule_catalog", None)
if cached is not None:
return cached
db = getattr(self, "db", None)
if db is None:
catalog = build_default_expense_rule_catalog()
else:
catalog = ExpenseRuleRuntimeService(db).load_catalog()
setattr(self, "_expense_rule_catalog", catalog)
return catalog
def _get_expense_scene_policy(self, expense_type: str | None) -> Any | None:
return self._get_expense_rule_catalog().get_scene_policy(expense_type)
def _resolve_min_attachment_count(self, expense_type: str | None) -> int:
policy = self._get_expense_scene_policy(expense_type)
if policy is None:
return 1
return max(0, int(policy.min_attachment_count or 0))
def _build_scene_reason_corpus(self, claim: ExpenseClaim) -> str:
parts = [str(claim.reason or "").strip(), str(claim.location or "").strip()]
for item in claim.items:
parts.append(str(item.item_reason or "").strip())
parts.append(str(item.item_location or "").strip())
return "\n".join(part for part in parts if part)
@staticmethod
def _merge_claim_attachment_risk_flags(
claim: ExpenseClaim,
attachment_risk_flags: list[dict[str, Any]],
) -> list[Any]:
preserved_flags = [
flag
for flag in list(claim.risk_flags_json or [])
if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis")
]
return preserved_flags + attachment_risk_flags
@staticmethod
def _format_submission_blocked_message(issues: list[str]) -> str:
normalized_issues = [str(issue or "").strip() for issue in issues if str(issue or "").strip()]
if not normalized_issues:
return "AI预审未通过但没有返回明确原因请刷新草稿后重试。"
return "AI预审暂未通过原因如下\n" + "\n".join(
f"{index}. {issue}" for index, issue in enumerate(normalized_issues, start=1)
)
def _validate_claim_for_submission(self, claim: ExpenseClaim) -> list[str]:
issues: list[str] = []
claim_location_required = self._is_location_required_expense_type(claim.expense_type)
claim_min_attachment_count = self._resolve_min_attachment_count(claim.expense_type)
if self._is_missing_value(claim.employee_name):
issues.append("申请人未完善")
if self._is_missing_value(claim.department_name):
issues.append("所属部门未完善")
if self._is_missing_value(claim.expense_type):
issues.append("报销类型未完善")
if self._is_missing_value(claim.reason):
issues.append("报销事由未完善")
if claim_location_required and self._is_missing_value(claim.location):
issues.append("业务地点未完善")
if claim.amount is None or claim.amount <= Decimal("0.00"):
issues.append("报销金额未完善")
if claim.occurred_at is None:
issues.append("发生时间未完善")
if int(claim.invoice_count or 0) < claim_min_attachment_count:
issues.append("票据附件数量不足")
if not claim.items:
issues.append("费用明细不能为空")
for index, item in enumerate(claim.items, start=1):
prefix = f"费用明细第 {index}"
is_system_generated = str(item.item_type or "").strip().lower() in SYSTEM_GENERATED_ITEM_TYPES
item_location_required = self._is_location_required_expense_type(item.item_type or claim.expense_type)
if item.item_date is None:
issues.append(f"{prefix}缺少日期")
if self._is_missing_value(item.item_type):
issues.append(f"{prefix}缺少费用项目")
if self._is_missing_value(item.item_reason):
issues.append(f"{prefix}缺少说明")
if item_location_required and self._is_missing_value(item.item_location):
issues.append(f"{prefix}缺少地点")
if item.item_amount is None or item.item_amount <= Decimal("0.00"):
issues.append(f"{prefix}缺少金额")
if not is_system_generated and self._is_missing_value(item.invoice_id):
issues.append(f"{prefix}缺少票据标识")
return issues
def _is_location_required_expense_type(self, expense_type: str | None) -> bool:
policy = self._get_expense_scene_policy(expense_type)
if policy is None:
return str(expense_type or "").strip().lower() in LOCATION_REQUIRED_EXPENSE_TYPES
return bool(policy.location_required)

View File

@@ -0,0 +1,392 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimOntologyResolverMixin:
def _resolve_employee(
self,
*,
ontology: OntologyParseResult,
context_json: dict[str, Any],
user_id: str | None,
) -> Employee | None:
normalized_user_id = str(user_id or "").strip()
if normalized_user_id:
stmt = (
select(Employee)
.options(selectinload(Employee.organization_unit), selectinload(Employee.manager))
.where(func.lower(Employee.email) == normalized_user_id.lower())
.limit(1)
)
employee = self.db.scalar(stmt)
if employee is not None:
return employee
employee_name = self._resolve_employee_name(
ontology=ontology,
context_json=context_json,
user_id=None,
)
if not employee_name:
return None
stmt = (
select(Employee)
.options(selectinload(Employee.organization_unit), selectinload(Employee.manager))
.where(Employee.name == employee_name)
.limit(1)
)
return self.db.scalar(stmt)
@staticmethod
def _resolve_employee_name(
*,
ontology: OntologyParseResult,
context_json: dict[str, Any],
user_id: str | None,
fallback: str = "待补充",
) -> str:
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
for key in ("reporter_name", "employee_name", "claimant_name"):
value = str(review_form_values.get(key) or "").strip()
if value:
return value
for item in ontology.entities:
if item.type == "employee" and item.value.strip():
return item.value.strip()
for key in ("name", "user_name", "employee_name"):
value = str(context_json.get(key) or "").strip()
if value:
return value
return str(user_id or fallback).strip() or fallback
@staticmethod
def _resolve_department_name(
*,
employee: Employee | None,
context_json: dict[str, Any],
fallback: str = "待补充",
) -> str:
if employee is not None and employee.organization_unit is not None:
return employee.organization_unit.name
request_context = context_json.get("request_context")
if isinstance(request_context, dict):
for key in ("department", "department_name", "deptName"):
value = str(request_context.get(key) or "").strip()
if value:
return value
for key in ("department_name", "department"):
value = str(context_json.get(key) or "").strip()
if value:
return value
return fallback
@staticmethod
def _resolve_project_code(entities: list[OntologyEntity]) -> str | None:
for item in entities:
if item.type == "project" and item.normalized_value.strip():
return item.normalized_value.strip()
return None
@staticmethod
def _resolve_explicit_review_expense_type(context_json: dict[str, Any]) -> str | None:
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
compact = str(
review_form_values.get("expense_type")
or review_form_values.get("reimbursement_type")
or ""
).replace(" ", "")
if compact:
if "招待" in compact or ("客户" in compact and any(word in compact for word in ("吃饭", "宴请", "请客", "用餐"))):
return "entertainment"
if any(word in compact for word in ("差旅", "出差", "机票", "行程")):
return "travel"
if any(word in compact for word in ("住宿", "酒店", "宾馆")):
return "hotel"
if any(word in compact for word in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")):
return "transport"
if any(word in compact for word in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")):
return "meal"
if "会务" in compact:
return "meeting"
if any(word in compact for word in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")):
return "office"
if any(word in compact for word in ("培训费", "培训", "讲师费", "课时费", "课程费")):
return "training"
if any(word in compact for word in ("通讯费", "话费", "流量费", "宽带费")):
return "communication"
if any(word in compact for word in ("福利费", "团建", "慰问", "节日福利", "体检费")):
return "welfare"
return None
@staticmethod
def _resolve_expense_type(
entities: list[OntologyEntity],
*,
context_json: dict[str, Any],
) -> str | None:
explicit_expense_type = ExpenseClaimOntologyResolverMixin._resolve_explicit_review_expense_type(context_json)
if explicit_expense_type:
return explicit_expense_type
for item in entities:
if item.type == "expense_type":
normalized = item.normalized_value.strip()
if normalized:
return normalized
return None
@staticmethod
def _resolve_reason(
*,
message: str,
context_json: dict[str, Any],
allow_message_fallback: bool,
) -> str | None:
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
for key in ("reason", "business_reason"):
value = str(review_form_values.get(key) or "").strip()
if value:
return ExpenseClaimOntologyResolverMixin._strip_leading_time_from_reason(value)
explicit_text = context_json.get("user_input_text")
if isinstance(explicit_text, str):
normalized_explicit_text = explicit_text.strip()
if normalized_explicit_text:
return ExpenseClaimOntologyResolverMixin._strip_leading_time_from_reason(normalized_explicit_text)[:500] or None
return None
request_context = context_json.get("request_context")
if (
isinstance(request_context, dict)
and str(context_json.get("entry_source") or "").strip() == "detail"
):
for key in ("reason", "title"):
value = str(request_context.get(key) or "").strip()
if value:
return value
if not allow_message_fallback:
return None
normalized_message = str(message or "").strip()
compact_message = re.sub(r"\s+", "", normalized_message)
if compact_message.startswith(SYSTEM_GENERATED_REASON_PREFIXES):
return None
return ExpenseClaimOntologyResolverMixin._strip_leading_time_from_reason(normalized_message)[:500] or None
@staticmethod
def _strip_leading_time_from_reason(value: str) -> str:
reason = str(value or "").strip()
for pattern in LEADING_REASON_TIME_PATTERNS:
next_reason = pattern.sub("", reason).strip()
if next_reason != reason:
return next_reason
return reason
@staticmethod
def _resolve_location(*, message: str, context_json: dict[str, Any]) -> str | None:
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
for key in ("business_location", "location"):
value = str(review_form_values.get(key) or "").strip()
if value:
return value
request_context = context_json.get("request_context")
if (
isinstance(request_context, dict)
and str(context_json.get("entry_source") or "").strip() == "detail"
):
for key in ("city", "location"):
value = str(request_context.get(key) or "").strip()
if value:
return value
compact = str(message or "").replace(" ", "")
city_match = re.search(
r"去(?P<city>[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)",
compact,
)
if city_match:
return city_match.group("city").strip()
if "客户现场" in compact:
return "客户现场"
return None
@staticmethod
def _resolve_occurred_at(
ontology: OntologyParseResult,
*,
context_json: dict[str, Any],
) -> datetime | None:
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
for key in ("occurred_date", "time_range", "business_time"):
value = str(review_form_values.get(key) or "").strip()
if not value:
continue
try:
parsed = date.fromisoformat(value)
return datetime(parsed.year, parsed.month, parsed.day, tzinfo=UTC)
except ValueError:
continue
start_date = ontology.time_range.start_date
if start_date:
try:
parsed = date.fromisoformat(start_date)
return datetime(parsed.year, parsed.month, parsed.day, tzinfo=UTC)
except ValueError:
pass
return None
@staticmethod
def _resolve_amount(
entities: list[OntologyEntity],
*,
context_json: dict[str, Any],
) -> Decimal | None:
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
raw_value = str(review_form_values.get("amount") or "").strip()
if raw_value:
compact = raw_value.replace("", "").replace(",", "").strip()
try:
return Decimal(compact).quantize(Decimal("0.01"))
except (InvalidOperation, ValueError):
pass
for item in entities:
if item.type != "amount" or item.role == "threshold":
continue
try:
return Decimal(item.normalized_value).quantize(Decimal("0.01"))
except (InvalidOperation, ValueError):
continue
return None
@staticmethod
def _resolve_attachment_names(context_json: dict[str, Any]) -> list[str]:
names = context_json.get("attachment_names")
if not isinstance(names, list):
return []
return [str(name).strip() for name in names if str(name).strip()]
def _resolve_attachment_count(self, context_json: dict[str, Any]) -> int:
names = self._resolve_attachment_names(context_json)
if names:
return len(names)
try:
return max(0, int(context_json.get("attachment_count") or 0))
except (TypeError, ValueError):
return 0

View File

@@ -0,0 +1,733 @@
from __future__ import annotations
import re
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal
from types import SimpleNamespace
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.expense_claim_constants import (
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
DOCUMENT_FACT_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_rule_runtime import (
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
)
class ExpenseClaimPlatformRiskMixin:
def evaluate_platform_risk_rules(
self,
claim: ExpenseClaim,
*,
rule_codes: list[str] | None = None,
) -> dict[str, list[Any]]:
manifests = self._load_platform_risk_rule_manifests(rule_codes=rule_codes)
if not manifests:
return {"flags": [], "blocking_reasons": []}
contexts = self._build_claim_attachment_contexts(claim)
flags: list[dict[str, Any]] = []
blocking_reasons: list[str] = []
for manifest in manifests:
if not self._risk_manifest_applies_to_claim(manifest, claim=claim, contexts=contexts):
continue
flag = self._evaluate_platform_risk_manifest(
manifest,
claim=claim,
contexts=contexts,
)
if flag is None:
continue
flags.append(flag)
severity = str(flag.get("severity") or "").strip().lower()
action = str(flag.get("action") or "").strip().lower()
if severity == "high" or action == "block":
blocking_reasons.append(str(flag.get("message") or flag.get("label") or "").strip())
deduplicated_reasons = list(
dict.fromkeys(reason for reason in blocking_reasons if reason)
)
return {"flags": flags, "blocking_reasons": deduplicated_reasons}
def _load_platform_risk_rule_manifests(
self,
*,
rule_codes: list[str] | None,
) -> list[dict[str, Any]]:
code_filter = {
str(code or "").strip()
for code in list(rule_codes or [])
if str(code or "").strip()
}
manifests_by_code: dict[str, dict[str, Any]] = {}
assets = list(
self.db.scalars(
select(AgentAsset)
.where(AgentAsset.asset_type == AgentAssetType.RULE.value)
.where(AgentAsset.status == AgentAssetStatus.ACTIVE.value)
.where(AgentAsset.domain == AgentAssetDomain.EXPENSE.value)
.order_by(AgentAsset.updated_at.desc(), AgentAsset.created_at.desc())
).all()
)
library_manager = AgentAssetRuleLibraryManager()
for asset in assets:
config_json = asset.config_json if isinstance(asset.config_json, dict) else {}
if str(config_json.get("detail_mode") or "").strip().lower() != "json_risk":
continue
rule_code = str(asset.code or "").strip()
if code_filter and rule_code not in code_filter:
continue
rule_document = config_json.get("rule_document")
if not isinstance(rule_document, dict):
continue
file_name = str(rule_document.get("file_name") or "").strip()
rule_library = (
str(config_json.get("rule_library") or RISK_RULES_LIBRARY).strip()
or RISK_RULES_LIBRARY
)
if not file_name:
continue
try:
payload = library_manager.read_rule_library_json(
library=rule_library,
file_name=file_name,
)
except (FileNotFoundError, ValueError):
continue
manifest_code = str(payload.get("rule_code") or rule_code).strip()
if not manifest_code or (code_filter and manifest_code not in code_filter):
continue
if payload.get("enabled") is False:
continue
payload = dict(payload)
payload.setdefault("rule_code", manifest_code)
payload["_rule_version"] = str(
asset.published_version or asset.current_version or "v1.0.0"
)
payload["_rule_asset_id"] = asset.id
manifests_by_code[manifest_code] = payload
missing_codes = code_filter - set(manifests_by_code)
should_load_fallback = not code_filter or bool(missing_codes)
if should_load_fallback:
try:
files = library_manager.list_rule_library_json_files(library=RISK_RULES_LIBRARY)
except ValueError:
files = []
for file_name in files:
try:
payload = library_manager.read_rule_library_json(
library=RISK_RULES_LIBRARY,
file_name=file_name,
)
except (FileNotFoundError, ValueError):
continue
rule_code = str(payload.get("rule_code") or "").strip()
if not rule_code or rule_code in manifests_by_code:
continue
if code_filter and rule_code not in missing_codes:
continue
if payload.get("enabled") is False:
continue
payload = dict(payload)
payload["_rule_version"] = "v1.0.0"
manifests_by_code[rule_code] = payload
return list(manifests_by_code.values())
def _risk_manifest_applies_to_claim(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> bool:
applies_to = manifest.get("applies_to")
if not isinstance(applies_to, dict):
applies_to = {}
try:
min_attachments = int(applies_to.get("min_attachments") or 0)
except (TypeError, ValueError):
min_attachments = 0
if min_attachments and int(claim.invoice_count or 0) < min_attachments and not contexts:
return False
expense_types = {
str(claim.expense_type or "").strip().lower(),
*{
str(item.item_type or "").strip().lower()
for item in list(claim.items or [])
if str(item.item_type or "").strip()
},
}
domains = {
str(value or "").strip().lower()
for value in list(applies_to.get("domains") or [])
if str(value or "").strip()
}
configured_expense_types = {
str(value or "").strip().lower()
for value in list(applies_to.get("expense_types") or [])
if str(value or "").strip()
}
if configured_expense_types and not (expense_types & configured_expense_types):
return False
if domains and not self._risk_domains_match_claim(
domains,
expense_types=expense_types,
contexts=contexts,
):
return False
return True
def _risk_domains_match_claim(
self,
domains: set[str],
*,
expense_types: set[str],
contexts: list[dict[str, Any]],
) -> bool:
normalized_contexts: list[dict[str, str]] = []
for context in contexts:
document_info = context.get("document_info") or {}
normalized_contexts.append(
{
"scene_code": str(document_info.get("scene_code") or "").strip().lower(),
"document_type": str(
document_info.get("document_type") or ""
).strip().lower(),
"item_type": str(
getattr(context.get("item"), "item_type", "") or ""
).strip().lower(),
}
)
if "travel" in domains:
if expense_types & {"travel", "hotel", "transport"}:
return True
if any(
item["scene_code"] in {"travel", "hotel", "transport"}
or item["document_type"]
in {
"flight_itinerary",
"train_ticket",
"hotel_invoice",
"taxi_receipt",
}
for item in normalized_contexts
):
return True
if "meal" in domains:
if expense_types & {"meal", "entertainment"}:
return True
if any(
item["scene_code"] == "meal" or item["document_type"] == "meal_receipt"
for item in normalized_contexts
):
return True
return bool(domains & expense_types)
def _evaluate_platform_risk_manifest(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> dict[str, Any] | None:
evaluator = str(manifest.get("evaluator") or "").strip().lower()
if evaluator == "reason_too_brief":
return self._evaluate_reason_too_brief_risk(manifest, claim=claim)
if evaluator == "entertainment_reason_missing":
return self._evaluate_entertainment_reason_missing_risk(manifest, claim=claim)
if evaluator == "document_expense_mismatch":
return self._evaluate_document_expense_mismatch_risk(
manifest,
claim=claim,
contexts=contexts,
)
if evaluator == "location_consistency":
return self._evaluate_location_consistency_risk(
manifest,
claim=claim,
contexts=contexts,
)
if evaluator == "duplicate_invoice":
return self._evaluate_duplicate_invoice_risk(manifest, claim=claim, contexts=contexts)
if evaluator == "identity_consistency":
return self._evaluate_identity_consistency_risk(
manifest,
claim=claim,
contexts=contexts,
)
if evaluator == "cross_year_invoice":
return self._evaluate_cross_year_invoice_risk(manifest, claim=claim, contexts=contexts)
if evaluator == "void_or_red_invoice":
return self._evaluate_text_keyword_risk(
manifest,
contexts=contexts,
keywords=["作废", "红冲", "红字", "冲红"],
fallback_message="票据文本中出现作废、红冲或红字发票相关信息,建议退回补充或人工复核。",
)
if evaluator == "vague_goods_description":
return self._evaluate_text_keyword_risk(
manifest,
contexts=contexts,
keywords=["详见清单", "服务费", "咨询费", "其他", "办公用品"],
fallback_message="票据商品或服务描述较笼统,建议审批人核对真实用途和明细清单。",
)
if evaluator == "multi_city_reason_required":
return self._evaluate_multi_city_reason_required_risk(
manifest,
claim=claim,
contexts=contexts,
)
return None
def _evaluate_reason_too_brief_risk(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
) -> dict[str, Any] | None:
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
try:
min_reason_length = max(1, int(params.get("min_reason_length") or 6))
except (TypeError, ValueError):
min_reason_length = 6
reason_corpus = re.sub(r"\s+", "", self._build_scene_reason_corpus(claim))
if len(reason_corpus) >= min_reason_length:
return None
return self._build_platform_risk_flag(
manifest,
message=f"报销事由有效描述不足 {min_reason_length} 个字符,暂不足以支撑真实性判断。",
evidence={"reason_length": len(reason_corpus), "min_reason_length": min_reason_length},
)
def _evaluate_entertainment_reason_missing_risk(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
) -> dict[str, Any] | None:
expense_types = {
str(claim.expense_type or "").strip().lower(),
*{str(item.item_type or "").strip().lower() for item in list(claim.items or [])},
}
reason_corpus = self._build_scene_reason_corpus(claim)
compact_reason = re.sub(r"\s+", "", reason_corpus)
looks_like_entertainment = (
"entertainment" in expense_types
or "招待" in compact_reason
or "客户" in compact_reason
)
if not looks_like_entertainment:
return None
required_keywords = ("客户", "项目", "参与", "人员", "对象", "商务", "会议")
has_detail = any(keyword in compact_reason for keyword in required_keywords)
if has_detail:
return None
return self._build_platform_risk_flag(
manifest,
message="招待或餐饮类费用未识别到客户、项目、参与人员等必要说明,建议补充后再流转。",
evidence={"reason": reason_corpus[:300]},
)
def _evaluate_document_expense_mismatch_risk(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> dict[str, Any] | None:
mismatches: list[str] = []
for context in contexts:
item = context["item"]
item_type = (
str(item.item_type or claim.expense_type or "other").strip().lower()
or "other"
)
policy = self._get_expense_scene_policy(item_type)
if policy is None:
continue
document_info = context.get("document_info") or {}
recognized_scene_code = (
str(document_info.get("scene_code") or "other").strip().lower()
or "other"
)
recognized_document_type = (
str(document_info.get("document_type") or "other").strip().lower()
or "other"
)
if (
recognized_scene_code in set(policy.allowed_scene_codes)
or recognized_document_type in set(policy.allowed_document_types)
):
continue
recognized_label = str(
document_info.get("document_type_label")
or recognized_document_type
or "未知票据"
)
mismatches.append(f"{context['index']} 条明细为{policy.label},附件识别为{recognized_label}")
if not mismatches:
return None
return self._build_platform_risk_flag(
manifest,
message="".join(mismatches[:3]) + ",与当前费用场景不匹配。",
evidence={"mismatches": mismatches[:5]},
)
def _evaluate_location_consistency_risk(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> dict[str, Any] | None:
policy = self._get_expense_rule_catalog().travel_policy
if policy is None:
return None
declared_cities = self._extract_known_cities_from_text(
" ".join(
[
str(claim.location or ""),
*[str(item.item_location or "") for item in list(claim.items or [])],
]
),
policy,
)
evidence_cities = self._collect_attachment_cities(contexts, policy)
if not declared_cities or not evidence_cities:
return None
if set(declared_cities) & set(evidence_cities):
return None
declared_text = "".join(declared_cities)
evidence_text = "".join(evidence_cities[:5])
return self._build_platform_risk_flag(
manifest,
message=f"申报地点 {declared_text} 与票据识别地点 {evidence_text} 不一致,建议补充异地说明或更换附件。",
evidence={"declared_cities": declared_cities, "evidence_cities": evidence_cities},
)
def _evaluate_duplicate_invoice_risk(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> dict[str, Any] | None:
invoice_keys = self._collect_invoice_keys_from_contexts(contexts)
duplicate_keys = [
key
for key, count in self._count_values(invoice_keys).items()
if count > 1
]
if duplicate_keys:
return self._build_platform_risk_flag(
manifest,
message=f"当前报销单内存在重复票据号码:{''.join(duplicate_keys[:3])}",
evidence={"duplicate_invoice_keys": duplicate_keys[:5]},
)
if not invoice_keys:
return None
other_items = list(
self.db.scalars(
select(ExpenseClaimItem)
.where(ExpenseClaimItem.claim_id != claim.id)
.where(ExpenseClaimItem.invoice_id.is_not(None))
).all()
)
matched_claim_ids: set[str] = set()
for other_item in other_items:
other_path = self._attachment_storage.resolve_path(other_item.invoice_id)
if other_path is None or not other_path.exists():
continue
other_meta = self._attachment_storage.read_meta(other_path)
other_document_info = other_meta.get("document_info")
if not isinstance(other_document_info, dict):
continue
other_keys = self._collect_invoice_keys_from_document_info(other_document_info)
if set(invoice_keys) & set(other_keys):
matched_claim_ids.add(str(other_item.claim_id or ""))
if not matched_claim_ids:
return None
return self._build_platform_risk_flag(
manifest,
message=f"票据号码已在其他报销单中出现,疑似重复报销:{''.join(invoice_keys[:3])}",
evidence={
"invoice_keys": invoice_keys[:5],
"matched_claim_ids": sorted(matched_claim_ids)[:5],
},
)
def _evaluate_identity_consistency_risk(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> dict[str, Any] | None:
params = manifest.get("params") if isinstance(manifest.get("params"), dict) else {}
allow_keywords = [
str(value)
for value in list(params.get("allow_keywords") or [])
if str(value).strip()
]
claimant = str(claim.employee_name or "").strip()
if not claimant:
return None
mismatched_buyers: list[str] = []
for context in contexts:
buyer = self._resolve_first_document_field_value(
context.get("document_info") or {},
keys={"buyer_name", "buyer", "purchaser_name", "claimant"},
labels={"购买方", "抬头", "买方", "购方"},
)
if not buyer:
continue
if claimant in buyer or any(keyword in buyer for keyword in allow_keywords):
continue
mismatched_buyers.append(buyer)
if not mismatched_buyers:
return None
return self._build_platform_risk_flag(
manifest,
message=f"发票抬头 {mismatched_buyers[0]} 与报销人 {claimant} 不一致,建议人工复核。",
evidence={"claimant": claimant, "buyers": mismatched_buyers[:5]},
)
def _evaluate_cross_year_invoice_risk(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> dict[str, Any] | None:
claim_year = claim.occurred_at.year if claim.occurred_at is not None else None
if claim_year is None:
return None
issue_years: list[int] = []
for context in contexts:
text = " ".join(
[
self._resolve_first_document_field_value(
context.get("document_info") or {},
keys={"date", "issue_date", "invoice_date"},
labels={"日期", "开票日期", "发生时间"},
),
str(context.get("ocr_summary") or ""),
str(context.get("ocr_text") or ""),
]
)
for match in re.findall(r"(20\d{2}|19\d{2})[年/\-.]", text):
try:
issue_years.append(int(match))
except ValueError:
continue
mismatch_years = sorted({year for year in issue_years if year != claim_year})
if not mismatch_years:
return None
return self._build_platform_risk_flag(
manifest,
message=f"票据年份 {mismatch_years[0]} 与费用发生年份 {claim_year} 不一致,建议确认是否跨年报销。",
evidence={"claim_year": claim_year, "invoice_years": mismatch_years},
)
def _evaluate_text_keyword_risk(
self,
manifest: dict[str, Any],
*,
contexts: list[dict[str, Any]],
keywords: list[str],
fallback_message: str,
) -> dict[str, Any] | None:
matched: list[str] = []
for context in contexts:
text = f"{context.get('ocr_summary') or ''}\n{context.get('ocr_text') or ''}"
for keyword in keywords:
if keyword in text and keyword not in matched:
matched.append(keyword)
if not matched:
return None
return self._build_platform_risk_flag(
manifest,
message=fallback_message,
evidence={"matched_keywords": matched},
)
def _evaluate_multi_city_reason_required_risk(
self,
manifest: dict[str, Any],
*,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
) -> dict[str, Any] | None:
policy = self._get_expense_rule_catalog().travel_policy
if policy is None:
return None
cities = self._collect_attachment_cities(contexts, policy)
for item in list(claim.items or []):
for city in self._extract_known_cities_from_text(str(item.item_location or ""), policy):
if city not in cities:
cities.append(city)
if len(cities) <= 2:
return None
reason_corpus = self._build_travel_reason_corpus(claim)
if self._text_contains_keywords(reason_corpus, policy.route_exception_keywords):
return None
return self._build_platform_risk_flag(
manifest,
message=f"本次报销识别到多城市行程({''.join(cities[:5])}),但事由中未说明中转、多地拜访或改签原因。",
evidence={"cities": cities[:8]},
)
def _build_platform_risk_flag(
self,
manifest: dict[str, Any],
*,
message: str,
evidence: dict[str, Any],
) -> dict[str, Any]:
outcomes = manifest.get("outcomes") if isinstance(manifest.get("outcomes"), dict) else {}
fail_outcome = outcomes.get("fail") if isinstance(outcomes.get("fail"), dict) else {}
severity = str(fail_outcome.get("severity") or "medium").strip().lower() or "medium"
default_action = "block" if severity == "high" else "manual_review"
action = str(fail_outcome.get("action") or default_action).strip()
label = str(manifest.get("name") or manifest.get("rule_code") or "风险规则命中").strip()
return {
"source": "submission_review",
"hit_source": "rule_center",
"rule_type": "risk",
"rule_code": str(manifest.get("rule_code") or "").strip(),
"rule_version": str(manifest.get("_rule_version") or "v1.0.0").strip(),
"severity": severity,
"action": action,
"label": label,
"message": message,
"evidence": evidence,
}
@staticmethod
def _count_values(values: list[str]) -> dict[str, int]:
counts: dict[str, int] = {}
for value in values:
normalized = str(value or "").strip()
if not normalized:
continue
counts[normalized] = counts.get(normalized, 0) + 1
return counts
def _collect_invoice_keys_from_contexts(self, contexts: list[dict[str, Any]]) -> list[str]:
invoice_keys: list[str] = []
for context in contexts:
document_info = context.get("document_info") or {}
for key in self._collect_invoice_keys_from_document_info(document_info):
if key not in invoice_keys:
invoice_keys.append(key)
return invoice_keys
def _collect_invoice_keys_from_document_info(self, document_info: dict[str, Any]) -> list[str]:
keys: list[str] = []
for field in list(document_info.get("fields") or []):
if not isinstance(field, dict):
continue
field_key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if field_key in {"invoiceno", "invoicenumber", "number", "code"} or any(
token in label for token in ("发票号码", "票号", "发票代码", "号码")
):
normalized = re.sub(r"\s+", "", value)
if normalized and normalized not in keys:
keys.append(normalized)
return keys
def _collect_attachment_cities(
self,
contexts: list[dict[str, Any]],
policy: RuntimeTravelPolicy,
) -> list[str]:
cities: list[str] = []
for context in contexts:
document_info = context.get("document_info") or {}
parts = [
str(context.get("ocr_summary") or ""),
str(context.get("ocr_text") or ""),
str(context.get("item").item_location if context.get("item") is not None else ""),
]
for field in list(document_info.get("fields") or []):
if isinstance(field, dict):
parts.append(str(field.get("value") or ""))
for city in self._extract_known_cities_from_text(" ".join(parts), policy):
if city not in cities:
cities.append(city)
return cities
@staticmethod
def _extract_known_cities_from_text(text: str, policy: RuntimeTravelPolicy) -> list[str]:
normalized = str(text or "").strip()
if not normalized:
return []
cities: list[str] = []
for city in sorted(policy.city_tiers.keys(), key=lambda item: len(item), reverse=True):
if city in normalized and city not in cities:
cities.append(city)
return cities
@staticmethod
def _resolve_first_document_field_value(
document_info: dict[str, Any],
*,
keys: set[str],
labels: set[str],
) -> str:
normalized_keys = {key.replace("_", "").lower() for key in keys}
for field in list(document_info.get("fields") or []):
if not isinstance(field, dict):
continue
field_key = str(field.get("key") or "").strip().lower().replace("_", "")
label = str(field.get("label") or "").replace(" ", "")
value = str(field.get("value") or "").strip()
if not value:
continue
if field_key in normalized_keys or any(token in label for token in labels):
return value
return ""

View File

@@ -0,0 +1,654 @@
from __future__ import annotations
import re
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal
from types import SimpleNamespace
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.expense_claim_constants import (
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
DOCUMENT_FACT_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_rule_runtime import (
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
)
class ExpenseClaimPolicyReviewMixin:
def _run_scene_policy_review(self, claim: ExpenseClaim) -> dict[str, list[Any]]:
catalog = self._get_expense_rule_catalog()
flags: list[dict[str, Any]] = []
blocking_reasons: list[str] = []
reason_corpus = self._build_scene_reason_corpus(claim)
scene_totals: dict[str, Decimal] = defaultdict(lambda: Decimal("0.00"))
scene_warned: set[str] = set()
for item in claim.items:
item_type = str(item.item_type or claim.expense_type or "other").strip().lower() or "other"
policy = catalog.get_scene_policy(item_type)
if policy is None:
continue
scene_totals[item_type] += Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01"))
if policy.always_warn and item_type not in scene_warned:
scene_warned.add(item_type)
flags.append(
{
"source": "submission_review",
"severity": "medium",
"label": f"{policy.label}人工重点复核",
"message": policy.always_warn_message or f"{policy.label}默认需要人工重点复核。",
"rule_code": policy.rule_code,
}
)
item_limit = policy.item_amount_limit
item_amount = Decimal(item.item_amount or Decimal("0.00")).quantize(Decimal("0.01"))
if item_limit is not None and item_amount > Decimal("0.00"):
exceeded = self._evaluate_amount_limit(
amount=item_amount,
limit_config=item_limit,
reason_text="\n".join(
part
for part in [reason_corpus, str(item.item_reason or "").strip()]
if part
),
)
if exceeded is not None:
severity, threshold = exceeded
label = (
f"{policy.label}金额超标待说明"
if severity == "high"
else f"{policy.label}金额超标提醒"
)
message = (
f"{policy.label}当前识别金额为 {item_amount} 元,"
f"已超过制度阈值 {threshold} 元。"
)
if severity == "high":
message += " 当前未识别到例外说明,请先补充原因。"
blocking_reasons.append(f"{policy.label}金额超出制度阈值,且未补充例外说明。")
else:
message += " 已识别到例外说明,请审批人重点复核。"
flags.append(
{
"source": "submission_review",
"severity": severity,
"label": label,
"message": message,
"rule_code": policy.rule_code,
}
)
for scene_code, total_amount in scene_totals.items():
policy = catalog.get_scene_policy(scene_code)
if policy is None or policy.claim_amount_limit is None or total_amount <= Decimal("0.00"):
continue
exceeded = self._evaluate_amount_limit(
amount=total_amount,
limit_config=policy.claim_amount_limit,
reason_text=reason_corpus,
)
if exceeded is None:
continue
severity, threshold = exceeded
label = f"{policy.label}合计超标待说明" if severity == "high" else f"{policy.label}合计超标提醒"
message = (
f"{policy.label}当前合计金额为 {total_amount} 元,"
f"已超过制度阈值 {threshold} 元。"
)
if severity == "high":
message += " 当前未识别到例外说明,请先补充原因。"
blocking_reasons.append(f"{policy.label}合计金额超出制度阈值,且未补充例外说明。")
else:
message += " 已识别到例外说明,请审批人重点复核。"
flags.append(
{
"source": "submission_review",
"severity": severity,
"label": label,
"message": message,
"rule_code": policy.rule_code,
}
)
return {
"flags": flags,
"blocking_reasons": list(dict.fromkeys(reason for reason in blocking_reasons if reason)),
}
def _evaluate_amount_limit(
self,
*,
amount: Decimal,
limit_config: Any,
reason_text: str,
) -> tuple[str, Decimal] | None:
block_amount = getattr(limit_config, "block_amount", None)
warn_amount = getattr(limit_config, "warn_amount", None)
exception_keywords = list(getattr(limit_config, "exception_keywords", []) or [])
has_exception = self._text_contains_keywords(reason_text, exception_keywords)
if block_amount is not None and amount > Decimal(block_amount):
return ("medium" if has_exception else "high", Decimal(block_amount))
if warn_amount is not None and amount > Decimal(warn_amount):
return ("medium", Decimal(warn_amount))
return None
def _run_travel_policy_review(self, claim: ExpenseClaim) -> dict[str, list[Any]]:
policy = self._get_expense_rule_catalog().travel_policy
if policy is None:
return {"flags": [], "blocking_reasons": []}
contexts = [
context
for context in self._build_claim_attachment_contexts(claim)
if self._is_travel_policy_relevant_context(context, policy)
]
if not contexts:
return {"flags": [], "blocking_reasons": []}
reason_corpus = self._build_travel_reason_corpus(claim)
has_route_exception = self._text_contains_keywords(
reason_corpus,
policy.route_exception_keywords,
)
has_standard_exception = self._text_contains_keywords(
reason_corpus,
policy.standard_exception_keywords,
)
grade_band = self._resolve_travel_policy_band(claim.employee_grade)
band_label = policy.band_labels.get(grade_band or "", str(claim.employee_grade or "").strip() or "当前职级")
itinerary_segments: list[dict[str, Any]] = []
itinerary_cities: list[str] = []
hotel_contexts: list[dict[str, Any]] = []
flags: list[dict[str, Any]] = []
blocking_reasons: list[str] = []
for context in contexts:
route_segment = self._extract_route_segment(context, policy)
if route_segment and self._is_long_distance_travel_context(context, policy):
itinerary_segments.append(
{
"item": context["item"],
"origin": route_segment[0],
"destination": route_segment[1],
}
)
itinerary_cities.extend([route_segment[0], route_segment[1]])
scene_code = str(context["document_info"].get("scene_code") or "").strip().lower()
document_type = str(context["document_info"].get("document_type") or "").strip().lower()
item_type = str(context["item"].item_type or "").strip().lower()
if "hotel" in {scene_code, document_type, item_type} or document_type == "hotel_invoice":
hotel_contexts.append(context)
unique_itinerary_cities = list(dict.fromkeys(city for city in itinerary_cities if city))
expected_destination_city = self._resolve_expected_travel_city(
claim,
contexts,
unique_itinerary_cities,
policy,
)
if itinerary_segments:
unique_destinations = list(
dict.fromkeys(segment["destination"] for segment in itinerary_segments if segment["destination"])
)
first_origin = str(itinerary_segments[0]["origin"] or "").strip()
last_destination = str(itinerary_segments[-1]["destination"] or "").strip()
for previous, current in zip(itinerary_segments, itinerary_segments[1:]):
previous_destination = str(previous["destination"] or "").strip()
current_origin = str(current["origin"] or "").strip()
if previous_destination and current_origin and previous_destination != current_origin:
message = (
f"差旅行程未形成连续链路:上一段到达 {previous_destination}"
f"下一段却从 {current_origin} 出发,请补充中转或改签说明。"
)
flags.append(
{
"source": "submission_review",
"severity": "high",
"label": "行程闭环异常",
"message": message,
"rule_code": policy.rule_code,
}
)
blocking_reasons.append("差旅行程未形成连续闭环,请补充中转、改签或异地出发原因。")
break
if (
expected_destination_city
and last_destination
and last_destination not in {expected_destination_city, first_origin}
):
message = (
f"差旅行程终点识别为 {last_destination}"
f"与申报目的地 {expected_destination_city} 不一致,请补充多地出差或后续行程说明。"
)
flags.append(
{
"source": "submission_review",
"severity": "high",
"label": "行程终点异常",
"message": message,
"rule_code": policy.rule_code,
}
)
blocking_reasons.append("差旅行程终点与申报目的地不一致,请补充多地出差说明或补齐后续票据。")
expected_city_set = {
city
for city in (expected_destination_city, first_origin)
if city
}
extra_destinations = [
city
for city in unique_destinations
if city and city not in expected_city_set
]
if extra_destinations and not has_route_exception:
destinations_text = "".join(extra_destinations[:3])
flags.append(
{
"source": "submission_review",
"severity": "high",
"label": "多城市行程待说明",
"message": (
f"检测到本次差旅涉及 {destinations_text} 多个目的地,"
"但当前报销事由未说明中转、多地拜访或改签原因。"
),
"rule_code": policy.rule_code,
}
)
blocking_reasons.append("检测到多城市差旅行程,但当前未补充中转或多地出差说明。")
allowed_hotel_cities = {
city
for city in [expected_destination_city, *unique_itinerary_cities]
if city
}
for context in hotel_contexts:
hotel_city = self._extract_hotel_city(context, policy)
if hotel_city and allowed_hotel_cities and hotel_city not in allowed_hotel_cities:
expected_text = "".join(sorted(allowed_hotel_cities))
flags.append(
{
"source": "submission_review",
"severity": "high",
"label": "酒店地点异常",
"message": (
f"酒店票据识别城市为 {hotel_city}"
f"与当前差旅目的地/行程城市 {expected_text} 不一致,请补充异地住宿原因。"
),
"rule_code": policy.rule_code,
}
)
blocking_reasons.append("酒店票据地点与差旅目的地不一致,请补充异地住宿原因或更换附件。")
if grade_band is None:
continue
baseline_city = hotel_city or expected_destination_city
standard = self._resolve_travel_policy_hotel_standard(
policy=policy,
grade_band=grade_band,
city=baseline_city,
)
if standard is None:
continue
cap, standard_label = standard
night_count = self._extract_hotel_night_count(context)
item_amount = Decimal(context["item"].item_amount or Decimal("0.00")).quantize(Decimal("0.01"))
nightly_amount = (item_amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01"))
if nightly_amount <= cap:
continue
hotel_message = (
f"{band_label} 职级在{standard_label}的住宿标准为 {cap} 元/晚,"
f"当前酒店识别金额约 {nightly_amount} 元/晚。"
)
item_reason = str(context["item"].item_reason or "").strip()
item_has_exception = self._text_contains_keywords(item_reason, policy.standard_exception_keywords)
if has_standard_exception or item_has_exception:
flags.append(
{
"source": "submission_review",
"severity": "medium",
"label": "住宿超标提醒",
"message": hotel_message + " 已识别到补充说明,请直属领导重点复核。",
"rule_code": policy.rule_code,
}
)
else:
flags.append(
{
"source": "submission_review",
"severity": "high",
"label": "住宿超标待说明",
"message": hotel_message + " 当前未识别到超标说明,请先补充原因。",
"rule_code": policy.rule_code,
}
)
blocking_reasons.append("住宿金额超出当前职级差标,且未补充超标说明。")
if grade_band is not None:
for context in contexts:
transport_class = self._detect_transport_class(context, policy)
if transport_class is None:
continue
transport_kind, class_label, class_level = transport_class
allowed_level = policy.transport_limits.get(grade_band, {}).get(transport_kind)
if allowed_level is None or class_level <= allowed_level:
continue
item_reason = str(context["item"].item_reason or "").strip()
item_has_exception = self._text_contains_keywords(item_reason, policy.standard_exception_keywords)
message = f"{band_label} 职级当前默认不可报销 {class_label}"
if has_standard_exception or item_has_exception:
flags.append(
{
"source": "submission_review",
"severity": "medium",
"label": "交通舱位超标提醒",
"message": message + " 已识别到补充说明,请审批人重点复核。",
"rule_code": policy.rule_code,
}
)
else:
flags.append(
{
"source": "submission_review",
"severity": "high",
"label": "交通舱位超标待说明",
"message": message + " 当前未识别到例外说明,请先补充原因。",
"rule_code": policy.rule_code,
}
)
blocking_reasons.append("交通舱位或席别超出当前职级差标,且未补充例外说明。")
return {
"flags": flags,
"blocking_reasons": list(dict.fromkeys(reason for reason in blocking_reasons if reason)),
}
def _build_claim_attachment_contexts(self, claim: ExpenseClaim) -> list[dict[str, Any]]:
contexts: list[dict[str, Any]] = []
ordered_items = sorted(
claim.items,
key=lambda item: (
item.item_date or date.max,
self._normalize_sort_datetime(item.created_at),
),
)
for index, item in enumerate(ordered_items, start=1):
file_path = self._attachment_storage.resolve_path(item.invoice_id)
if file_path is None or not file_path.exists():
continue
metadata = self._attachment_storage.read_meta(file_path)
document_info = metadata.get("document_info")
contexts.append(
{
"index": index,
"item": item,
"document_info": document_info if isinstance(document_info, dict) else {},
"ocr_text": str(metadata.get("ocr_text") or ""),
"ocr_summary": str(metadata.get("ocr_summary") or ""),
}
)
return contexts
def _is_travel_policy_relevant_context(
self,
context: dict[str, Any],
policy: RuntimeTravelPolicy,
) -> bool:
item = context.get("item")
document_info = context.get("document_info") or {}
item_type = str(getattr(item, "item_type", "") or "").strip().lower()
scene_code = str(document_info.get("scene_code") or "").strip().lower()
document_type = str(document_info.get("document_type") or "").strip().lower()
return (
item_type in set(policy.relevant_expense_types)
or scene_code in set(policy.relevant_expense_types)
or document_type in {"hotel_invoice", *set(policy.long_distance_document_types)}
)
@staticmethod
def _resolve_document_field_value(document_info: dict[str, Any], key: str) -> str:
normalized_key = str(key or "").strip().lower()
for field in list(document_info.get("fields") or []):
if not isinstance(field, dict):
continue
field_key = str(field.get("key") or "").strip().lower()
if field_key == normalized_key:
return str(field.get("value") or "").strip()
return ""
@staticmethod
def _text_contains_keywords(text: str, keywords: tuple[str, ...] | list[str]) -> bool:
compact = re.sub(r"\s+", "", str(text or ""))
if not compact:
return False
return any(keyword in compact for keyword in keywords)
def _build_travel_reason_corpus(self, claim: ExpenseClaim) -> str:
parts = [str(claim.reason or "").strip(), str(claim.location or "").strip()]
for item in claim.items:
parts.append(str(item.item_reason or "").strip())
parts.append(str(item.item_location or "").strip())
return "\n".join(part for part in parts if part)
@staticmethod
def _resolve_travel_policy_band(grade: str | None) -> str | None:
normalized = str(grade or "").strip().upper()
if not normalized:
return None
p_match = re.search(r"P(\d+)", normalized)
if p_match:
level = int(p_match.group(1))
if level <= 3:
return "junior"
if level <= 5:
return "mid"
return "senior"
m_match = re.search(r"M(\d+)", normalized)
if m_match:
level = int(m_match.group(1))
if level <= 2:
return "manager"
return "executive"
if normalized.startswith("D"):
return "executive"
return None
def _resolve_expected_travel_city(
self,
claim: ExpenseClaim,
contexts: list[dict[str, Any]],
itinerary_cities: list[str],
policy: RuntimeTravelPolicy,
) -> str:
claim_city = self._extract_city_from_text(str(claim.location or ""), policy)
if claim_city:
return claim_city
for context in contexts:
hotel_city = self._extract_hotel_city(context, policy)
if hotel_city:
return hotel_city
if len(itinerary_cities) >= 2 and itinerary_cities[1]:
return itinerary_cities[1]
for city in itinerary_cities:
if city:
return city
return ""
def _extract_route_segment(
self,
context: dict[str, Any],
policy: RuntimeTravelPolicy,
) -> tuple[str, str] | None:
document_info = context["document_info"]
route_value = self._resolve_document_field_value(document_info, "route")
if not route_value or "-" not in route_value:
return None
origin_text, destination_text = [segment.strip() for segment in route_value.split("-", 1)]
origin_city = self._extract_city_from_text(origin_text, policy)
destination_city = self._extract_city_from_text(destination_text, policy)
if not origin_city or not destination_city or origin_city == destination_city:
return None
return origin_city, destination_city
def _extract_hotel_city(self, context: dict[str, Any], policy: RuntimeTravelPolicy) -> str:
document_info = context["document_info"]
item = context["item"]
merchant_name = self._resolve_document_field_value(document_info, "merchant_name")
for candidate in (
merchant_name,
str(item.item_location or ""),
str(context.get("ocr_summary") or ""),
str(context.get("ocr_text") or ""),
):
city = self._extract_city_from_text(candidate, policy)
if city:
return city
return ""
@staticmethod
def _format_travel_policy_city_tier(city_tier: str) -> str:
return {
"tier_1": "一线城市",
"tier_2": "重点城市",
"tier_3": "其他城市",
}.get(str(city_tier or "").strip(), "当前城市")
def _resolve_travel_policy_hotel_standard(
self,
*,
policy: RuntimeTravelPolicy,
grade_band: str,
city: str,
) -> tuple[Decimal, str] | None:
normalized_city = str(city or "").strip()
city_limits = getattr(policy, "hotel_city_limits", {}) or {}
city_entry = city_limits.get(normalized_city) if normalized_city else None
if city_entry and city_entry.get(grade_band) is not None:
cap = Decimal(city_entry[grade_band]).quantize(Decimal("0.01"))
return cap, normalized_city
city_tier = (getattr(policy, "city_tiers", {}) or {}).get(normalized_city, "tier_3")
tier_entry = (getattr(policy, "hotel_limits", {}) or {}).get(grade_band, {})
tier_cap = tier_entry.get(city_tier)
if tier_cap is None:
return None
tier_label = self._format_travel_policy_city_tier(city_tier)
cap = Decimal(tier_cap).quantize(Decimal("0.01"))
return cap, tier_label
@staticmethod
def _extract_city_from_text(text: str, policy: RuntimeTravelPolicy) -> str:
normalized = str(text or "").strip()
if not normalized:
return ""
city_names = set(policy.city_tiers.keys())
city_names.update((getattr(policy, "hotel_city_limits", {}) or {}).keys())
city_match_order = sorted(city_names, key=lambda item: len(item), reverse=True)
for city in city_match_order:
if city in normalized:
return city
return ""
@staticmethod
def _extract_hotel_night_count(context: dict[str, Any]) -> int:
text = " ".join(
[
str(context.get("ocr_summary") or "").strip(),
str(context.get("ocr_text") or "").strip(),
]
).strip()
match = TRAVEL_POLICY_HOTEL_NIGHT_PATTERN.search(text)
if not match:
return 1
try:
return max(1, int(match.group(1)))
except (TypeError, ValueError):
return 1
def _detect_transport_class(
self,
context: dict[str, Any],
policy: RuntimeTravelPolicy,
) -> tuple[str, str, int] | None:
document_info = context["document_info"]
document_type = str(document_info.get("document_type") or "").strip().lower()
text = " ".join(
[
str(context.get("ocr_summary") or "").strip(),
str(context.get("ocr_text") or "").strip(),
]
).strip()
compact_text = re.sub(r"\s+", "", text)
if not compact_text:
return None
if document_type == "flight_itinerary":
for config in policy.flight_classes:
label = str(config.keyword or "").strip()
level = int(config.level)
if label in compact_text:
return "flight", label, level
return None
if document_type == "train_ticket":
for config in policy.train_classes:
label = str(config.keyword or "").strip()
level = int(config.level)
if label in compact_text:
return "train", label, level
return None
return None
def _is_long_distance_travel_context(
self,
context: dict[str, Any],
policy: RuntimeTravelPolicy,
) -> bool:
document_info = context["document_info"]
document_type = str(document_info.get("document_type") or "").strip().lower()
scene_code = str(document_info.get("scene_code") or "").strip().lower()
if document_type in set(policy.long_distance_document_types):
return True
return scene_code == "travel"

View File

@@ -0,0 +1,269 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimReadModelMixin:
@staticmethod
def _serialize_claim(claim: ExpenseClaim) -> dict[str, Any]:
return {
"id": claim.id,
"claim_no": claim.claim_no,
"employee_name": claim.employee_name,
"department_name": claim.department_name,
"project_code": claim.project_code,
"expense_type": claim.expense_type,
"reason": claim.reason,
"location": claim.location,
"amount": float(claim.amount),
"invoice_count": int(claim.invoice_count or 0),
"status": claim.status,
"approval_stage": claim.approval_stage,
"risk_flags_json": list(claim.risk_flags_json or []),
}
@staticmethod
def _collect_return_flags(risk_flags: Any) -> list[dict[str, Any]]:
if not isinstance(risk_flags, list):
return []
return [
flag
for flag in risk_flags
if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "manual_return"
]
@staticmethod
def _normalize_return_reason_codes(reason_codes: list[str] | None) -> list[str]:
return ExpenseClaimReadModelMixin._normalize_return_reason_code_payload(reason_codes)["reason_codes"]
@staticmethod
def _normalize_return_reason_code_payload(reason_codes: list[str] | None) -> dict[str, list[str]]:
normalized_codes: list[str] = []
unknown_codes: list[str] = []
for item in reason_codes or []:
code = str(item or "").strip()
if not code:
continue
if code in RETURN_REASON_OPTIONS and code not in normalized_codes:
normalized_codes.append(code)
elif code not in RETURN_REASON_OPTIONS and code not in unknown_codes:
unknown_codes.append(code)
return {
"reason_codes": normalized_codes,
"unknown_reason_codes": unknown_codes,
}
@staticmethod
def _merge_persistent_claim_risk_flags(*, existing_flags: list[Any], next_flags: list[Any]) -> list[Any]:
if not next_flags:
return list(existing_flags or [])
merged_flags = list(next_flags or [])
next_return_markers = {
ExpenseClaimReadModelMixin._build_return_flag_marker(flag)
for flag in merged_flags
if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "manual_return"
}
for flag in list(existing_flags or []):
if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "manual_return"):
continue
marker = ExpenseClaimReadModelMixin._build_return_flag_marker(flag)
if marker in next_return_markers:
continue
merged_flags.append(flag)
next_return_markers.add(marker)
return merged_flags
@staticmethod
def _build_return_flag_marker(flag: dict[str, Any]) -> tuple[str, str, str]:
event_id = str(flag.get("return_event_id") or "").strip()
if event_id:
return ("event_id", event_id, "")
return (
str(flag.get("return_count") or "").strip(),
str(flag.get("created_at") or "").strip(),
str(flag.get("message") or flag.get("reason") or "").strip(),
)
@staticmethod
def _build_default_return_message(*, operator: str, risk_points: list[str]) -> str:
if risk_points:
return f"{operator} 退回该报销单:{''.join(risk_points)}。请申请人调整后重新提交。"
return f"{operator} 已退回该报销单,请申请人调整后重新提交。"
@staticmethod
def _normalize_return_stage_key(stage: str | None) -> str:
normalized = str(stage or "").strip()
if "直属" in normalized or "领导" in normalized or "负责人" in normalized:
return "direct_manager"
if "财务" in normalized:
return "finance"
if "AI" in normalized or "预审" in normalized:
return "ai_review"
if "归档" in normalized or "入账" in normalized:
return "archive"
return "unknown"
@staticmethod
def _is_editable_claim_status(status: str | None) -> bool:
return str(status or "").strip().lower() in EDITABLE_CLAIM_STATUSES
@staticmethod
def _normalize_optional_text(value: str | None, *, fallback: str = "", allow_empty: bool = False) -> str | None:
normalized = str(value or "").strip()
if normalized:
return normalized
if allow_empty:
return None
return fallback
@staticmethod
def _normalize_sort_datetime(value: datetime | None) -> datetime:
if value is None:
return datetime.max.replace(tzinfo=UTC)
if value.tzinfo is None:
return value.replace(tzinfo=UTC)
return value
@staticmethod
def _is_missing_value(value: Any) -> bool:
text = str(value or "").strip()
if not text:
return True
compact = text.replace(" ", "")
return compact in {"待补充", "暂无", "", "未知", "处理中"}
def _ensure_draft_claim(self, claim: ExpenseClaim) -> None:
if not self._is_editable_claim_status(claim.status):
raise ValueError("只有草稿、待补充或退回待提交状态的报销单才允许执行该操作。")
@staticmethod
def _ensure_draft_pending_claim(claim: ExpenseClaim) -> None:
status = str(claim.status or "").strip().lower()
if status != "draft":
raise ValueError("只有草稿待提交状态的报销单才允许编辑附加说明。")
@staticmethod
def _ensure_mutable_claim_item(item: ExpenseClaimItem) -> None:
if str(item.item_type or "").strip().lower() in SYSTEM_GENERATED_ITEM_TYPES:
raise ValueError("系统自动计算的费用明细不可手动修改。")
def _delete_claim_assistant_sessions(self, claim_id: str | None) -> None:
from app.services.agent_conversations import AgentConversationService
AgentConversationService(self.db).delete_conversations_for_draft_claim(
claim_id=claim_id,
source="user_message",
session_type="expense",
)
def _ensure_ready(self) -> None:
AgentFoundationService(self.db).ensure_foundation_ready()

View File

@@ -0,0 +1,393 @@
from __future__ import annotations
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import UTC, date, datetime, timedelta
from decimal import Decimal, InvalidOperation
from pathlib import Path
from types import SimpleNamespace
from typing import Any
from sqlalchemy import func, or_, select
from sqlalchemy import inspect as sqlalchemy_inspect
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session, selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetDomain, AgentAssetStatus, AgentAssetType
from app.models.agent_asset import AgentAsset
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
from app.schemas.ontology import OntologyEntity, OntologyParseResult
from app.schemas.reimbursement import (
ExpenseClaimItemCreate,
ExpenseClaimItemUpdate,
ExpenseClaimUpdate,
TravelReimbursementCalculatorRequest,
)
from app.services.agent_asset_rule_library import AgentAssetRuleLibraryManager
from app.services.agent_asset_spreadsheet import RISK_RULES_LIBRARY
from app.services.agent_foundation import AgentFoundationService
from app.services.audit import AuditLogService
from app.services.document_intelligence import build_document_insight
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
from app.services.expense_claim_attachment_presentation import ExpenseClaimAttachmentPresentation
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
from app.services.expense_claim_errors import ExpenseClaimSubmissionBlockedError
from app.services.expense_claim_constants import (
EXPENSE_TYPE_LABELS,
MAX_DRAFT_CLAIMS_PER_USER,
EDITABLE_CLAIM_STATUSES,
SYSTEM_GENERATED_ITEM_TYPES,
TRAVEL_DETAIL_ITEM_TYPES,
TRAVEL_ALLOWANCE_TRIGGER_ITEM_TYPES,
DOCUMENT_TYPE_ITEM_TYPE_MAP,
DOCUMENT_TYPE_SCENE_MAP,
DOCUMENT_FACT_ITEM_TYPES,
ROUTE_DESCRIPTION_ITEM_TYPES,
DOCUMENT_TRIP_DATE_LABELS,
DOCUMENT_TRIP_DATE_REQUIREMENT_LABELS,
DOCUMENT_TRIP_DATE_KEYS,
DOCUMENT_GENERIC_DATE_KEYS,
DOCUMENT_INVOICE_DATE_KEYS,
DOCUMENT_TRIP_DATE_LABEL_TOKENS,
DOCUMENT_GENERIC_DATE_LABEL_TOKENS,
DOCUMENT_INVOICE_DATE_LABEL_TOKENS,
DOCUMENT_ROUTE_FORMAT_PATTERN,
DOCUMENT_ROUTE_TEXT_PATTERN,
DOCUMENT_ROUTE_ORIGIN_LABELS,
DOCUMENT_ROUTE_DESTINATION_LABELS,
GENERIC_ATTACHMENT_BACKFILL_ITEM_TYPES,
LOCATION_REQUIRED_EXPENSE_TYPES,
EXPENSE_SCENE_KEYWORDS,
EXPENSE_TYPE_ALLOWED_DOCUMENT_SCENES,
DOCUMENT_SCENE_LABELS,
DOCUMENT_ASSOCIATION_REVIEW_ACTIONS,
PERSISTENT_EXPENSE_REVIEW_ACTIONS,
RETURN_REASON_OPTIONS,
MAX_CLAIM_NO_RETRY_ATTEMPTS,
DOCUMENT_DATE_PATTERN,
SYSTEM_GENERATED_REASON_PREFIXES,
LEADING_REASON_TIME_PATTERNS,
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
TRAVEL_REVIEW_RELEVANT_EXPENSE_TYPES,
TRAVEL_REVIEW_LONG_DISTANCE_DOCUMENT_TYPES,
TRAVEL_POLICY_CITY_TIERS,
TRAVEL_POLICY_CITY_MATCH_ORDER,
TRAVEL_POLICY_BAND_LABELS,
TRAVEL_POLICY_HOTEL_LIMITS,
TRAVEL_POLICY_ALLOWED_TRANSPORT_LEVELS,
TRAVEL_POLICY_ROUTE_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_STANDARD_EXCEPTION_KEYWORDS,
TRAVEL_POLICY_FLIGHT_CLASS_PATTERNS,
TRAVEL_POLICY_TRAIN_CLASS_PATTERNS,
TRAVEL_POLICY_HOTEL_NIGHT_PATTERN,
)
from app.services.expense_claim_risk_review import ExpenseClaimRiskReviewMixin
from app.services.expense_amounts import (
extract_amount_candidates,
format_decimal_amount,
is_amount_match_date_fragment,
is_date_like_amount_candidate,
is_probable_year_amount,
parse_document_amount_value,
parse_plain_document_amount_value,
resolve_document_field_amount,
resolve_document_item_amount,
resolve_document_text_amount,
)
from app.services.expense_rule_runtime import (
DEFAULT_SCENE_RULE_ASSET_CODE,
ExpenseRuleRuntimeService,
RuntimeTravelPolicy,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.ocr import OcrService
class ExpenseClaimReviewPreviewMixin:
def save_or_submit_from_ontology(
self,
*,
run_id: str,
user_id: str | None,
message: str,
ontology: OntologyParseResult,
context_json: dict[str, Any],
) -> dict[str, Any]:
review_action = str(context_json.get("review_action") or "").strip()
if review_action not in PERSISTENT_EXPENSE_REVIEW_ACTIONS:
return self._build_expense_review_preview_result(
user_id=user_id,
message=message,
ontology=ontology,
context_json=context_json,
)
result = self.upsert_draft_from_ontology(
run_id=run_id,
user_id=user_id,
message=message,
ontology=ontology,
context_json=context_json,
)
if review_action != "next_step":
return result
claim_id = str(result.get("claim_id") or "").strip()
if not claim_id or result.get("draft_limit_reached"):
return result
current_user = CurrentUserContext(
username=str(user_id or context_json.get("name") or "anonymous").strip() or "anonymous",
name=str(context_json.get("name") or user_id or "anonymous").strip() or "anonymous",
role_codes=[
str(item).strip()
for item in list(context_json.get("role_codes") or [])
if str(item).strip()
],
is_admin=bool(context_json.get("is_admin")),
department_name=str(context_json.get("department_name") or context_json.get("department") or "").strip(),
)
try:
claim = self.submit_claim(claim_id, current_user)
except ExpenseClaimSubmissionBlockedError as exc:
return {
**result,
"message": self._format_submission_blocked_message(exc.issues),
"submission_blocked": True,
"submission_blocked_reasons": exc.issues,
"missing_fields": exc.issues,
"draft_only": False,
}
except ValueError as exc:
message = str(exc)
return {
**result,
"message": message,
"submission_blocked": True,
"submission_blocked_reasons": [message] if message else [],
"missing_fields": [message] if message else [],
"draft_only": False,
}
if claim is None:
return {
**result,
"message": "未找到可提交的报销单,请刷新后重试。",
"submission_blocked": True,
"draft_only": False,
}
if str(claim.status or "").strip().lower() != "submitted":
review_message = ""
for flag in list(claim.risk_flags_json or []):
if not isinstance(flag, dict):
continue
if str(flag.get("source") or "").strip() != "submission_review":
continue
review_message = str(flag.get("message") or "").strip()
if review_message:
break
return {
"message": review_message or f"报销单 {claim.claim_no} 经 AI预审后转为待补充请先修正后再提交。",
"submission_blocked": True,
"draft_only": False,
"claim_id": claim.id,
"claim_no": claim.claim_no,
"status": claim.status,
"approval_stage": claim.approval_stage,
"amount": float(claim.amount),
"invoice_count": int(claim.invoice_count or 0),
}
return {
"message": (
f"报销单 {claim.claim_no} 已完成 AI预审"
f"当前节点为 {claim.approval_stage or '审批中'}"
),
"draft_only": False,
"claim_id": claim.id,
"claim_no": claim.claim_no,
"status": claim.status,
"approval_stage": claim.approval_stage,
"amount": float(claim.amount),
"invoice_count": int(claim.invoice_count or 0),
}
def _build_expense_review_preview_result(
self,
*,
user_id: str | None,
message: str,
ontology: OntologyParseResult,
context_json: dict[str, Any],
) -> dict[str, Any]:
attachment_count = self._resolve_attachment_count(context_json)
calculation_copy = self._build_expense_review_preview_calculation_copy(
user_id=user_id,
message=message,
ontology=ontology,
context_json=context_json,
)
return {
"message": "\n\n".join(
item
for item in [
"我已先整理出本次报销的待核对信息。下面是基于当前信息的制度测算,票据补齐后会按真实金额重新复核。",
calculation_copy,
]
if item
),
"draft_only": True,
"preview_only": True,
"status": "preview",
"invoice_count": attachment_count,
}
def _build_expense_review_preview_calculation_copy(
self,
*,
user_id: str | None,
message: str,
ontology: OntologyParseResult,
context_json: dict[str, Any],
) -> str:
expense_type = self._resolve_explicit_review_expense_type(context_json) or self._resolve_expense_type(
ontology.entities,
context_json=context_json,
)
if expense_type == "travel" or (
(not expense_type or expense_type == "other")
and self._should_preview_as_travel(message=message, context_json=context_json)
):
return self._build_travel_review_preview_calculation_copy(
user_id=user_id,
message=message,
ontology=ontology,
context_json=context_json,
)
amount = self._resolve_amount(ontology.entities, context_json=context_json) or Decimal("0.00")
expense_label = EXPENSE_TYPE_LABELS.get(str(expense_type or "").strip(), "当前费用")
return "\n".join(
[
"报销测算参考:",
"",
"| 项目 | 当前信息 | 复核口径 |",
"| --- | --- | --- |",
f"| 费用类型 | {expense_label} | 匹配规则中心对应费用标准 |",
f"| 票据金额 | {self._format_decimal_amount(amount)} 元 | 以真实票据识别金额和用户确认金额为准 |",
"| 规则校验 | 待票据和关键信息补齐 | 按费用类型、发生地点、业务事由和审批口径复核 |",
]
)
def _build_travel_review_preview_calculation_copy(
self,
*,
user_id: str | None,
message: str,
ontology: OntologyParseResult,
context_json: dict[str, Any],
) -> str:
location = self._resolve_location(message=message, context_json=context_json) or "待确认"
occurred_at = self._resolve_occurred_at(ontology, context_json=context_json) or datetime.now(UTC)
days, _, _ = self._resolve_travel_allowance_days(
context_json=context_json,
occurred_at=occurred_at,
)
amount = self._resolve_amount(ontology.entities, context_json=context_json) or Decimal("0.00")
employee = self._resolve_employee(
ontology=ontology,
context_json=context_json,
user_id=user_id,
)
grade = str(
context_json.get("employee_grade")
or context_json.get("grade")
or context_json.get("user_grade")
or (employee.grade if employee is not None else "")
or ""
).strip()
if location == "待确认" or not grade:
return "\n".join(
[
"报销测算参考:",
"",
"| 项目 | 当前信息 | 测算说明 |",
"| --- | --- | --- |",
f"| 出差地点 | {location} | 用于匹配城市住宿标准和补贴区域 |",
f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |",
f"| 职级 | {grade or '待确认'} | 补齐后才能匹配住宿标准和补贴档位 |",
f"| 交通票据 | {self._format_decimal_amount(amount)} 元 | 上传票据后按真实金额重新复核 |",
]
)
try:
from app.services.travel_reimbursement_calculator import (
TravelReimbursementCalculatorService,
)
result = TravelReimbursementCalculatorService(self.db).calculate(
TravelReimbursementCalculatorRequest(days=days, location=location, grade=grade),
CurrentUserContext(
username=str(user_id or context_json.get("name") or "anonymous").strip() or "anonymous",
name=str(context_json.get("name") or user_id or "anonymous").strip() or "anonymous",
role_codes=[],
is_admin=False,
),
)
except ValueError:
return "\n".join(
[
"报销测算参考:",
"",
"| 项目 | 当前信息 | 测算说明 |",
"| --- | --- | --- |",
f"| 出差地点 | {location} | 暂时未能匹配规则中心地点 |",
f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |",
f"| 职级 | {grade} | 暂时无法自动匹配差旅标准 |",
f"| 交通票据 | {self._format_decimal_amount(amount)} 元 | 上传票据后按真实金额重新复核 |",
]
)
ticket_amount = amount.quantize(Decimal("0.01"))
total_amount = (
ticket_amount
+ Decimal(result.hotel_amount or Decimal("0.00"))
+ Decimal(result.allowance_amount or Decimal("0.00"))
).quantize(Decimal("0.01"))
ticket_basis = "当前未上传交通票据,先按 0.00 元占位" if ticket_amount <= Decimal("0.00") else "已识别或填写的交通票据金额"
return "\n".join(
[
"报销测算参考:",
"",
f"职级 {grade},目的地 {location},匹配城市 {result.matched_city};补齐交通、酒店等票据后,我会按真实票据金额和规则中心标准重新复核。",
"",
"| 项目 | 测算口径 | 金额 |",
"| --- | --- | ---: |",
f"| 交通票据 | {ticket_basis} | {self._format_decimal_amount(ticket_amount)} 元 |",
f"| 住宿标准 | {self._format_decimal_amount(result.hotel_rate)} 元/天 × {days} 天 | {self._format_decimal_amount(result.hotel_amount)} 元 |",
f"| 出差补贴 | {self._format_decimal_amount(result.total_allowance_rate)} 元/天 × {days} 天 | {self._format_decimal_amount(result.allowance_amount)} 元 |",
f"| 参考合计 | 交通票据 + 住宿标准 + 出差补贴 | {self._format_decimal_amount(total_amount)} 元 |",
]
)
@staticmethod
def _should_preview_as_travel(*, message: str, context_json: dict[str, Any]) -> bool:
text_parts = [message]
review_form_values = context_json.get("review_form_values")
if isinstance(review_form_values, dict):
text_parts.extend(str(value or "") for value in review_form_values.values())
text_parts.extend(str(context_json.get(key) or "") for key in ("user_input_text", "raw_text", "ocr_summary"))
compact = "".join(text_parts)
return any(keyword in compact for keyword in ("差旅", "出差", "火车票", "机票", "酒店", "住宿票"))

View File

@@ -0,0 +1,177 @@
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from typing import Any
from sqlalchemy import or_, select
from app.models.financial_record import ExpenseClaim
from app.services.expense_claim_constants import (
AI_REVIEW_LOOKBACK_DAYS,
AI_REVIEW_REPEAT_RISK_BLOCK_COUNT,
AI_REVIEW_REPEAT_RISK_WARNING_COUNT,
)
from app.services.expense_claim_item_sync import ExpenseClaimItemSyncMixin
from app.services.expense_claim_platform_risk import ExpenseClaimPlatformRiskMixin
from app.services.expense_claim_policy_review import ExpenseClaimPolicyReviewMixin
class ExpenseClaimRiskReviewMixin(
ExpenseClaimPlatformRiskMixin,
ExpenseClaimPolicyReviewMixin,
ExpenseClaimItemSyncMixin,
):
def _run_ai_submission_review(self, claim: ExpenseClaim) -> dict[str, Any]:
base_flags = list(claim.risk_flags_json or [])
attachment_flags = [
flag
for flag in base_flags
if isinstance(flag, dict) and str(flag.get("source") or "").strip() == "attachment_analysis"
]
preserved_flags = [
flag
for flag in base_flags
if not (isinstance(flag, dict) and str(flag.get("source") or "").strip() == "submission_review")
]
review_flags: list[dict[str, Any]] = []
attention_reasons: list[str] = []
high_attachment_flags = [
flag
for flag in attachment_flags
if str(flag.get("severity") or "").strip().lower() == "high"
]
medium_attachment_flags = [
flag
for flag in attachment_flags
if str(flag.get("severity") or "").strip().lower() == "medium"
]
if high_attachment_flags:
attention_reasons.append("存在高风险票据,需审批人重点复核。")
review_flags.append(
{
"source": "submission_review",
"severity": "high",
"label": "AI预审重点复核",
"message": (
f"AI预审发现 {len(high_attachment_flags)} 条高风险附件,"
"已随单流转给审批人重点复核。"
),
}
)
elif medium_attachment_flags:
review_flags.append(
{
"source": "submission_review",
"severity": "medium",
"label": "AI预审提醒",
"message": f"AI预审发现 {len(medium_attachment_flags)} 条中风险附件,已随单流转给审批人复核。",
}
)
manager_name = self._resolve_claim_manager_name(claim)
if not manager_name:
attention_reasons.append("未识别到该员工的直属领导,需审批环节补充分配。")
review_flags.append(
{
"source": "submission_review",
"severity": "medium",
"label": "审批链待分配",
"message": "AI预审发现直属领导缺失已提交到审批环节等待分配或复核。",
}
)
historical_risk_count = self._count_recent_risky_claims(claim)
if historical_risk_count >= AI_REVIEW_REPEAT_RISK_BLOCK_COUNT:
review_flags.append(
{
"source": "submission_review",
"severity": "medium",
"label": "历史风险偏高",
"message": (
f"{AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销,"
"本次已追加到审批链重点关注。"
),
}
)
elif historical_risk_count >= AI_REVIEW_REPEAT_RISK_WARNING_COUNT:
review_flags.append(
{
"source": "submission_review",
"severity": "low",
"label": "历史风险提醒",
"message": (
f"{AI_REVIEW_LOOKBACK_DAYS} 天内该员工已有 {historical_risk_count} 笔带风险标记的报销,"
"建议直属领导重点复核。"
),
}
)
travel_review = self._run_travel_policy_review(claim)
attention_reasons.extend(travel_review["blocking_reasons"])
review_flags.extend(travel_review["flags"])
scene_policy_review = self._run_scene_policy_review(claim)
attention_reasons.extend(scene_policy_review["blocking_reasons"])
review_flags.extend(scene_policy_review["flags"])
platform_risk_review = self.evaluate_platform_risk_rules(claim)
attention_reasons.extend(platform_risk_review["blocking_reasons"])
review_flags.extend(platform_risk_review["flags"])
if attention_reasons:
summary_message = "AI预审发现需审批重点关注事项" + "".join(
dict.fromkeys(attention_reasons)
)
review_flags.insert(
0,
{
"source": "submission_review",
"severity": "medium",
"label": "AI预审重点复核",
"message": summary_message,
},
)
return {
"status": "submitted",
"approval_stage": "直属领导审批",
"risk_flags": preserved_flags + review_flags,
"message": (
f"报销单 {claim.claim_no} 已完成 AI预审"
f"现已提交给直属领导 {manager_name or '审批人'} 审批。"
),
"passed": True,
}
@staticmethod
def _resolve_claim_manager_name(claim: ExpenseClaim) -> str:
if claim.employee is not None:
if claim.employee.manager is not None and claim.employee.manager.name:
return str(claim.employee.manager.name).strip()
if claim.employee.organization_unit is not None and claim.employee.organization_unit.manager_name:
return str(claim.employee.organization_unit.manager_name).strip()
return ""
def _count_recent_risky_claims(self, claim: ExpenseClaim) -> int:
filters = []
if claim.employee_id:
filters.append(ExpenseClaim.employee_id == claim.employee_id)
elif claim.employee_name:
filters.append(ExpenseClaim.employee_name == claim.employee_name)
if not filters:
return 0
since = datetime.now(UTC) - timedelta(days=AI_REVIEW_LOOKBACK_DAYS)
stmt = (
select(ExpenseClaim)
.where(or_(*filters))
.where(ExpenseClaim.id != claim.id)
.where(ExpenseClaim.occurred_at >= since)
)
recent_claims = list(self.db.scalars(stmt).all())
return sum(1 for item in recent_claims if list(item.risk_flags_json or []))

File diff suppressed because it is too large Load Diff

View File

@@ -2,12 +2,11 @@ from __future__ import annotations
import json
import re
from dataclasses import dataclass, field
from decimal import Decimal
from typing import Any, Literal
from typing import Any
from openpyxl import load_workbook
from pydantic import BaseModel, Field, ValidationError
from pydantic import ValidationError
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -17,558 +16,29 @@ from app.services.agent_asset_spreadsheet import (
COMPANY_TRAVEL_EXPENSE_RULE_CODE,
AgentAssetSpreadsheetManager,
)
EXPENSE_RULE_CODE_BLOCK_PATTERN = re.compile(r"```expense-rule\s*(\{.*?\})\s*```", re.DOTALL)
DOCUMENT_TYPE_LABELS = {
"flight_itinerary": "机票/航班行程单",
"train_ticket": "火车/高铁票",
"hotel_invoice": "酒店住宿票据",
"taxi_receipt": "出租车/网约车票据",
"parking_toll_receipt": "停车/通行费票据",
"meal_receipt": "餐饮票据",
"office_invoice": "办公用品票据",
"meeting_invoice": "会议/会务票据",
"training_invoice": "培训票据",
"vat_invoice": "增值税发票",
"receipt": "一般收据/凭证",
"other": "其他单据",
}
SCENE_LABELS = {
"travel": "差旅",
"hotel": "住宿",
"transport": "交通",
"meal": "餐饮",
"entertainment": "业务招待",
"office": "办公",
"meeting": "会务",
"training": "培训",
"communication": "通讯",
"welfare": "福利",
"other": "其他",
}
DEFAULT_SCENE_RULE_ASSET_CODE = "rule.expense.scene_submission_standard"
DEFAULT_TRAVEL_RULE_ASSET_CODE = "rule.expense.travel_risk_control_standard"
DEFAULT_SCENE_MATRIX_CONFIG: dict[str, Any] = {
"kind": "scene_matrix",
"version": 1,
"scenes": {
"travel": {
"label": "差旅费",
"location_required": True,
"min_attachment_count": 1,
"allowed_scene_codes": ["travel"],
"allowed_document_types": ["flight_itinerary", "train_ticket"],
"attachment_mismatch_severity": "high",
},
"hotel": {
"label": "住宿费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["hotel"],
"allowed_document_types": ["hotel_invoice", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
},
"transport": {
"label": "交通费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["transport"],
"allowed_document_types": ["taxi_receipt", "parking_toll_receipt", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"item_amount_limit": {
"scope": "item_amount",
"warn_amount": "300.00",
"block_amount": "800.00",
"exception_keywords": ["跨城", "夜间", "应急", "无公共交通", "机场", "火车站", "超标说明"],
"metric_label": "单笔交通金额",
},
},
"meal": {
"label": "餐费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["meal"],
"allowed_document_types": ["meal_receipt", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "300.00",
"block_amount": "800.00",
"exception_keywords": ["客户接待", "团队活动", "加班", "展会", "超标说明"],
"metric_label": "餐费合计",
},
},
"entertainment": {
"label": "业务招待费",
"location_required": True,
"min_attachment_count": 1,
"allowed_scene_codes": ["meal"],
"allowed_document_types": ["meal_receipt", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "2000.00",
"block_amount": "5000.00",
"exception_keywords": ["重要客户", "商务宴请", "项目签约", "超标说明"],
"metric_label": "招待费合计",
},
},
"office": {
"label": "办公费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["office"],
"allowed_document_types": ["office_invoice", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "1500.00",
"block_amount": "5000.00",
"exception_keywords": ["批量采购", "固定资产", "部门集中采购", "超标说明"],
"metric_label": "办公费合计",
},
},
"meeting": {
"label": "会务费",
"location_required": True,
"min_attachment_count": 1,
"allowed_scene_codes": ["meeting"],
"allowed_document_types": ["meeting_invoice", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "5000.00",
"block_amount": "30000.00",
"exception_keywords": ["大型会议", "外部场地", "超标说明"],
"metric_label": "会务费合计",
},
},
"training": {
"label": "培训费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["training"],
"allowed_document_types": ["training_invoice", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "3000.00",
"block_amount": "15000.00",
"exception_keywords": ["认证考试", "外部培训", "超标说明"],
"metric_label": "培训费合计",
},
},
"communication": {
"label": "通讯费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["other"],
"allowed_document_types": ["vat_invoice", "receipt"],
"attachment_mismatch_severity": "medium",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "300.00",
"block_amount": "1000.00",
"exception_keywords": ["国际漫游", "专项通信", "超标说明"],
"metric_label": "通讯费合计",
},
},
"welfare": {
"label": "福利费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["other"],
"allowed_document_types": ["vat_invoice", "receipt"],
"attachment_mismatch_severity": "medium",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "1000.00",
"block_amount": "5000.00",
"exception_keywords": ["节日福利", "团队活动", "员工关怀", "超标说明"],
"metric_label": "福利费合计",
},
},
"other": {
"label": "其他费用",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["other"],
"allowed_document_types": ["vat_invoice", "receipt"],
"attachment_mismatch_severity": "medium",
"always_warn": True,
"always_warn_message": "其他费用默认进入人工重点复核,请补充清晰用途说明并由审批人重点确认。",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "1000.00",
"block_amount": "3000.00",
"exception_keywords": ["特殊事项", "临时采购", "超标说明"],
"metric_label": "其他费用合计",
},
},
},
}
DEFAULT_TRAVEL_POLICY_CONFIG: dict[str, Any] = {
"kind": "travel_policy",
"version": 1,
"relevant_expense_types": ["travel", "hotel", "transport"],
"long_distance_document_types": ["flight_itinerary", "train_ticket"],
"route_exception_keywords": [
"中转",
"转机",
"经停",
"改签",
"多地出差",
"多城市",
"多站",
"异地返程",
"异地结束",
"临时变更",
"继续前往",
"第二站",
],
"standard_exception_keywords": [
"超标说明",
"无直达",
"展会高峰",
"会议高峰",
"协议酒店满房",
"客户指定",
"临时改签",
"行程变更",
"红眼航班",
"晚到店",
],
"band_labels": {
"junior": "P1-P3",
"mid": "P4-P5",
"senior": "P6-P7",
"manager": "M1-M2",
"executive": "M3及以上 / D序列",
},
"city_tiers": {
"北京": "tier_1",
"上海": "tier_1",
"广州": "tier_1",
"深圳": "tier_1",
"杭州": "tier_2",
"南京": "tier_2",
"苏州": "tier_2",
"武汉": "tier_2",
"成都": "tier_2",
"重庆": "tier_2",
"西安": "tier_2",
"天津": "tier_2",
"宁波": "tier_2",
"厦门": "tier_2",
"青岛": "tier_2",
"长沙": "tier_2",
"郑州": "tier_2",
"合肥": "tier_2",
"济南": "tier_2",
"沈阳": "tier_2",
"大连": "tier_2",
"福州": "tier_2",
"昆明": "tier_2",
"海口": "tier_2",
"三亚": "tier_2",
"无锡": "tier_2",
"东莞": "tier_2",
"佛山": "tier_2",
},
"hotel_limits": {
"junior": {"tier_1": "450.00", "tier_2": "380.00", "tier_3": "320.00"},
"mid": {"tier_1": "550.00", "tier_2": "480.00", "tier_3": "380.00"},
"senior": {"tier_1": "700.00", "tier_2": "620.00", "tier_3": "520.00"},
"manager": {"tier_1": "900.00", "tier_2": "820.00", "tier_3": "720.00"},
"executive": {"tier_1": "1200.00", "tier_2": "1000.00", "tier_3": "900.00"},
},
"transport_limits": {
"junior": {"flight": 1, "train": 1},
"mid": {"flight": 1, "train": 1},
"senior": {"flight": 2, "train": 2},
"manager": {"flight": 3, "train": 3},
"executive": {"flight": 4, "train": 3},
},
"flight_classes": [
{"keyword": "头等舱", "level": 4},
{"keyword": "公务舱", "level": 3},
{"keyword": "商务舱", "level": 3},
{"keyword": "超级经济舱", "level": 2},
{"keyword": "高端经济舱", "level": 2},
{"keyword": "明珠经济舱", "level": 2},
{"keyword": "经济舱", "level": 1},
],
"train_classes": [
{"keyword": "商务座", "level": 3},
{"keyword": "一等座", "level": 2},
{"keyword": "软卧", "level": 2},
{"keyword": "二等座", "level": 1},
{"keyword": "二等卧", "level": 1},
{"keyword": "硬卧", "level": 1},
],
}
class AmountLimitConfig(BaseModel):
scope: Literal["claim_total", "item_amount"] = "claim_total"
warn_amount: Decimal | None = None
block_amount: Decimal | None = None
exception_keywords: list[str] = Field(default_factory=list)
metric_label: str = "金额"
class ScenePolicyConfig(BaseModel):
label: str
location_required: bool = False
min_attachment_count: int = 1
allowed_scene_codes: list[str] = Field(default_factory=list)
allowed_document_types: list[str] = Field(default_factory=list)
attachment_mismatch_severity: Literal["low", "medium", "high"] = "high"
claim_amount_limit: AmountLimitConfig | None = None
item_amount_limit: AmountLimitConfig | None = None
always_warn: bool = False
always_warn_message: str = ""
class SceneMatrixRuleConfig(BaseModel):
kind: Literal["scene_matrix"]
version: int = 1
scenes: dict[str, ScenePolicyConfig]
class TravelClassConfig(BaseModel):
keyword: str
level: int
class TravelPolicyConfig(BaseModel):
kind: Literal["travel_policy"]
version: int = 1
relevant_expense_types: list[str] = Field(default_factory=list)
long_distance_document_types: list[str] = Field(default_factory=list)
route_exception_keywords: list[str] = Field(default_factory=list)
standard_exception_keywords: list[str] = Field(default_factory=list)
band_labels: dict[str, str] = Field(default_factory=dict)
city_tiers: dict[str, str] = Field(default_factory=dict)
hotel_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict)
hotel_city_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict)
allowance_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict)
standard_rule_code: str = ""
standard_rule_name: str = ""
standard_rule_version: str = ""
transport_limits: dict[str, dict[str, int]] = Field(default_factory=dict)
flight_classes: list[TravelClassConfig] = Field(default_factory=list)
train_classes: list[TravelClassConfig] = Field(default_factory=list)
class ExpenseScenePolicy(ScenePolicyConfig):
expense_type: str
rule_code: str
rule_name: str
rule_version: str
class RuntimeTravelPolicy(TravelPolicyConfig):
rule_code: str
rule_name: str
rule_version: str
@dataclass
class ExpenseRuleCatalog:
scene_policies: dict[str, ExpenseScenePolicy] = field(default_factory=dict)
travel_policy: RuntimeTravelPolicy | None = None
def get_scene_policy(self, expense_type: str | None) -> ExpenseScenePolicy | None:
normalized = str(expense_type or "").strip().lower() or "other"
return self.scene_policies.get(normalized)
def resolve_document_type_label(document_type: str | None) -> str:
normalized = str(document_type or "").strip().lower() or "other"
return DOCUMENT_TYPE_LABELS.get(normalized, normalized or "其他单据")
def build_default_expense_rule_catalog() -> ExpenseRuleCatalog:
catalog = ExpenseRuleCatalog()
scene_matrix = SceneMatrixRuleConfig.model_validate(DEFAULT_SCENE_MATRIX_CONFIG)
for expense_type, config in scene_matrix.scenes.items():
catalog.scene_policies[expense_type] = ExpenseScenePolicy(
expense_type=expense_type,
rule_code=DEFAULT_SCENE_RULE_ASSET_CODE,
rule_name="报销场景提交与附件标准",
rule_version="v1.0.0",
**config.model_dump(),
)
travel_policy = TravelPolicyConfig.model_validate(DEFAULT_TRAVEL_POLICY_CONFIG)
catalog.travel_policy = RuntimeTravelPolicy(
rule_code=DEFAULT_TRAVEL_RULE_ASSET_CODE,
rule_name="差旅报销风险管控制度",
rule_version="v1.1.0",
**travel_policy.model_dump(),
)
return catalog
def build_scene_submission_standard_markdown() -> str:
scene_matrix = SceneMatrixRuleConfig.model_validate(DEFAULT_SCENE_MATRIX_CONFIG)
sections: list[str] = [
"# 报销场景提交与附件标准",
"",
"## 模板信息",
"",
"- 模板类型:系统内置场景矩阵规则",
"- 运行时类型:`scene_matrix`",
"- 适用对象:报销提交与附件校验",
"",
"## 目标",
"",
"统一约束各报销场景的必填字段、附件类型和金额预警口径,在上传附件和提交审核两个时点直接输出可执行风险判断。",
"",
"## 适用范围",
"",
"适用于差旅、住宿、交通、餐费、业务招待、办公、会务、培训、通讯、福利和其他费用场景。",
"",
"## 输入字段",
"",
"- expense_type",
"- attachments",
"- location",
"- amount / item_amount",
"- reason",
"",
"## 判断规则",
"",
]
for index, (expense_type, config) in enumerate(scene_matrix.scenes.items(), start=1):
expected_document_labels = "".join(
resolve_document_type_label(item) for item in config.allowed_document_types
)
expected_scene_labels = "".join(
SCENE_LABELS.get(item, item) for item in config.allowed_scene_codes
)
sections.extend(
[
f"### 规则 {index} {config.label}`{expense_type}`",
"",
f"- 业务地点:{'必填' if config.location_required else '非必填'}",
f"- 最少附件数:{config.min_attachment_count}",
f"- 允许识别场景:{expected_scene_labels or '不限制'}",
f"- 允许附件类型:{expected_document_labels or '不限制'}",
f"- 附件不匹配处理:{config.attachment_mismatch_severity.upper()}",
]
)
if config.claim_amount_limit is not None:
sections.append(
f"- 合计金额阈值:预警 {config.claim_amount_limit.warn_amount or '-'} 元,"
f"拦截 {config.claim_amount_limit.block_amount or '-'}"
)
if config.item_amount_limit is not None:
sections.append(
f"- 单笔金额阈值:预警 {config.item_amount_limit.warn_amount or '-'} 元,"
f"拦截 {config.item_amount_limit.block_amount or '-'}"
)
if config.always_warn and config.always_warn_message:
sections.append(f"- 特殊处理:{config.always_warn_message}")
sections.append("")
sections.extend(
[
"## 输出",
"",
"- 命中高风险时退回待补充。",
"- 命中中风险时继续流转,并提示审批人重点复核。",
"- 命中 always_warn 场景时追加人工重点复核提示。",
"",
"## 来源依据",
"",
"- 公司报销制度中关于场景识别、附件要求、金额阈值和人工复核的统一口径。",
"",
"## 审核约束",
"",
"- 当前规则为系统内置真实运行规则,变更后需重新审核并评估回滚影响。",
"- 规则 JSON 与 Markdown 说明必须保持一致。",
"",
"## 管理员备注",
"",
"如后续制度调整附件类型、金额阈值或人工复核口径,应优先修改运行时 JSON 并同步更新说明。",
"",
"```expense-rule",
json.dumps(DEFAULT_SCENE_MATRIX_CONFIG, ensure_ascii=False, indent=2),
"```",
]
)
return "\n".join(sections)
def build_travel_risk_control_standard_markdown() -> str:
return "\n".join(
[
"# 差旅报销风险管控制度",
"",
"## 模板信息",
"",
"- 模板键:`travel_standard_v1`",
"- 运行时类型:`travel_policy`",
"- 适用对象:差旅、住宿、交通相关报销审核",
"",
"## 目标",
"",
"校验差旅行程闭环、酒店地点一致性、住宿标准、飞机舱位和火车席别是否符合制度,并对例外情况保留人工复核入口。",
"",
"## 适用范围",
"",
"适用于差旅费、住宿费和交通费相关报销单,重点覆盖跨城市出差、改签、中转和超标说明场景。",
"",
"## 输入字段",
"",
"- expense_type",
"- attachments / OCR routes",
"- location",
"- employee_grade",
"- reason",
"",
"## 判断规则",
"",
"- 两段及以上长途交通票据必须首尾衔接。",
"- 最终终点应与申报目的地一致,或返回首段出发城市。",
"- 检测到多城市行程但无说明时,按高风险退回待补充。",
"- 酒店城市必须落在目的地或交通链路停留城市中。",
"- 住宿标准、飞机舱位和火车席别按职级与城市分级执行。",
"- 超标但有说明时记为中风险;超标且无说明时记为高风险。",
"",
"## 输出",
"",
"- 行程异常时输出高风险退回。",
"- 差标超限但有合理说明时输出中风险提醒。",
"- 命中差旅制度规则时,保留 `rule_code` 和 `rule_version` 供审批链追踪。",
"",
"## 来源依据",
"",
"- 公司差旅制度关于行程闭环、酒店地点一致性、职级差标和例外说明的规定。",
"",
"## 审核约束",
"",
"- 当前规则为系统内置真实运行规则,修改前需确认差旅制度版本与灰度回滚方案。",
"- 规则 JSON 与 Markdown 说明必须保持一致。",
"",
"## 管理员备注",
"",
"如制度调整职级带、城市分级或交通等级,应先更新运行时 JSON再同步修改本说明。",
"",
"```expense-rule",
json.dumps(DEFAULT_TRAVEL_POLICY_CONFIG, ensure_ascii=False, indent=2),
"```",
]
)
from app.services.expense_rule_runtime_defaults import (
DEFAULT_SCENE_MATRIX_CONFIG,
DEFAULT_SCENE_RULE_ASSET_CODE,
DEFAULT_TRAVEL_POLICY_CONFIG,
DEFAULT_TRAVEL_RULE_ASSET_CODE,
DOCUMENT_TYPE_LABELS,
EXPENSE_RULE_CODE_BLOCK_PATTERN,
SCENE_LABELS,
)
from app.services.expense_rule_runtime_models import (
AmountLimitConfig,
ExpenseRuleCatalog,
ExpenseScenePolicy,
RuntimeTravelPolicy,
SceneMatrixRuleConfig,
TravelPolicyConfig,
build_default_expense_rule_catalog,
resolve_document_type_label,
)
from app.services.expense_rule_runtime_standards import (
build_scene_submission_standard_markdown,
build_travel_risk_control_standard_markdown,
)
class ExpenseRuleRuntimeService:
def __init__(self, db: Session) -> None:

View File

@@ -0,0 +1,299 @@
from __future__ import annotations
import re
from typing import Any
EXPENSE_RULE_CODE_BLOCK_PATTERN = re.compile(r"```expense-rule\s*(\{.*?\})\s*```", re.DOTALL)
DOCUMENT_TYPE_LABELS = {
"flight_itinerary": "机票/航班行程单",
"train_ticket": "火车/高铁票",
"hotel_invoice": "酒店住宿票据",
"taxi_receipt": "出租车/网约车票据",
"parking_toll_receipt": "停车/通行费票据",
"meal_receipt": "餐饮票据",
"office_invoice": "办公用品票据",
"meeting_invoice": "会议/会务票据",
"training_invoice": "培训票据",
"vat_invoice": "增值税发票",
"receipt": "一般收据/凭证",
"other": "其他单据",
}
SCENE_LABELS = {
"travel": "差旅",
"hotel": "住宿",
"transport": "交通",
"meal": "餐饮",
"entertainment": "业务招待",
"office": "办公",
"meeting": "会务",
"training": "培训",
"communication": "通讯",
"welfare": "福利",
"other": "其他",
}
DEFAULT_SCENE_RULE_ASSET_CODE = "rule.expense.scene_submission_standard"
DEFAULT_TRAVEL_RULE_ASSET_CODE = "rule.expense.travel_risk_control_standard"
DEFAULT_SCENE_MATRIX_CONFIG: dict[str, Any] = {
"kind": "scene_matrix",
"version": 1,
"scenes": {
"travel": {
"label": "差旅费",
"location_required": True,
"min_attachment_count": 1,
"allowed_scene_codes": ["travel"],
"allowed_document_types": ["flight_itinerary", "train_ticket"],
"attachment_mismatch_severity": "high",
},
"hotel": {
"label": "住宿费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["hotel"],
"allowed_document_types": ["hotel_invoice", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
},
"transport": {
"label": "交通费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["transport"],
"allowed_document_types": ["taxi_receipt", "parking_toll_receipt", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"item_amount_limit": {
"scope": "item_amount",
"warn_amount": "300.00",
"block_amount": "800.00",
"exception_keywords": ["跨城", "夜间", "应急", "无公共交通", "机场", "火车站", "超标说明"],
"metric_label": "单笔交通金额",
},
},
"meal": {
"label": "餐费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["meal"],
"allowed_document_types": ["meal_receipt", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "300.00",
"block_amount": "800.00",
"exception_keywords": ["客户接待", "团队活动", "加班", "展会", "超标说明"],
"metric_label": "餐费合计",
},
},
"entertainment": {
"label": "业务招待费",
"location_required": True,
"min_attachment_count": 1,
"allowed_scene_codes": ["meal"],
"allowed_document_types": ["meal_receipt", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "2000.00",
"block_amount": "5000.00",
"exception_keywords": ["重要客户", "商务宴请", "项目签约", "超标说明"],
"metric_label": "招待费合计",
},
},
"office": {
"label": "办公费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["office"],
"allowed_document_types": ["office_invoice", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "1500.00",
"block_amount": "5000.00",
"exception_keywords": ["批量采购", "固定资产", "部门集中采购", "超标说明"],
"metric_label": "办公费合计",
},
},
"meeting": {
"label": "会务费",
"location_required": True,
"min_attachment_count": 1,
"allowed_scene_codes": ["meeting"],
"allowed_document_types": ["meeting_invoice", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "5000.00",
"block_amount": "30000.00",
"exception_keywords": ["大型会议", "外部场地", "超标说明"],
"metric_label": "会务费合计",
},
},
"training": {
"label": "培训费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["training"],
"allowed_document_types": ["training_invoice", "vat_invoice", "receipt"],
"attachment_mismatch_severity": "high",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "3000.00",
"block_amount": "15000.00",
"exception_keywords": ["认证考试", "外部培训", "超标说明"],
"metric_label": "培训费合计",
},
},
"communication": {
"label": "通讯费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["other"],
"allowed_document_types": ["vat_invoice", "receipt"],
"attachment_mismatch_severity": "medium",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "300.00",
"block_amount": "1000.00",
"exception_keywords": ["国际漫游", "专项通信", "超标说明"],
"metric_label": "通讯费合计",
},
},
"welfare": {
"label": "福利费",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["other"],
"allowed_document_types": ["vat_invoice", "receipt"],
"attachment_mismatch_severity": "medium",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "1000.00",
"block_amount": "5000.00",
"exception_keywords": ["节日福利", "团队活动", "员工关怀", "超标说明"],
"metric_label": "福利费合计",
},
},
"other": {
"label": "其他费用",
"location_required": False,
"min_attachment_count": 1,
"allowed_scene_codes": ["other"],
"allowed_document_types": ["vat_invoice", "receipt"],
"attachment_mismatch_severity": "medium",
"always_warn": True,
"always_warn_message": "其他费用默认进入人工重点复核,请补充清晰用途说明并由审批人重点确认。",
"claim_amount_limit": {
"scope": "claim_total",
"warn_amount": "1000.00",
"block_amount": "3000.00",
"exception_keywords": ["特殊事项", "临时采购", "超标说明"],
"metric_label": "其他费用合计",
},
},
},
}
DEFAULT_TRAVEL_POLICY_CONFIG: dict[str, Any] = {
"kind": "travel_policy",
"version": 1,
"relevant_expense_types": ["travel", "hotel", "transport"],
"long_distance_document_types": ["flight_itinerary", "train_ticket"],
"route_exception_keywords": [
"中转",
"转机",
"经停",
"改签",
"多地出差",
"多城市",
"多站",
"异地返程",
"异地结束",
"临时变更",
"继续前往",
"第二站",
],
"standard_exception_keywords": [
"超标说明",
"无直达",
"展会高峰",
"会议高峰",
"协议酒店满房",
"客户指定",
"临时改签",
"行程变更",
"红眼航班",
"晚到店",
],
"band_labels": {
"junior": "P1-P3",
"mid": "P4-P5",
"senior": "P6-P7",
"manager": "M1-M2",
"executive": "M3及以上 / D序列",
},
"city_tiers": {
"北京": "tier_1",
"上海": "tier_1",
"广州": "tier_1",
"深圳": "tier_1",
"杭州": "tier_2",
"南京": "tier_2",
"苏州": "tier_2",
"武汉": "tier_2",
"成都": "tier_2",
"重庆": "tier_2",
"西安": "tier_2",
"天津": "tier_2",
"宁波": "tier_2",
"厦门": "tier_2",
"青岛": "tier_2",
"长沙": "tier_2",
"郑州": "tier_2",
"合肥": "tier_2",
"济南": "tier_2",
"沈阳": "tier_2",
"大连": "tier_2",
"福州": "tier_2",
"昆明": "tier_2",
"海口": "tier_2",
"三亚": "tier_2",
"无锡": "tier_2",
"东莞": "tier_2",
"佛山": "tier_2",
},
"hotel_limits": {
"junior": {"tier_1": "450.00", "tier_2": "380.00", "tier_3": "320.00"},
"mid": {"tier_1": "550.00", "tier_2": "480.00", "tier_3": "380.00"},
"senior": {"tier_1": "700.00", "tier_2": "620.00", "tier_3": "520.00"},
"manager": {"tier_1": "900.00", "tier_2": "820.00", "tier_3": "720.00"},
"executive": {"tier_1": "1200.00", "tier_2": "1000.00", "tier_3": "900.00"},
},
"transport_limits": {
"junior": {"flight": 1, "train": 1},
"mid": {"flight": 1, "train": 1},
"senior": {"flight": 2, "train": 2},
"manager": {"flight": 3, "train": 3},
"executive": {"flight": 4, "train": 3},
},
"flight_classes": [
{"keyword": "头等舱", "level": 4},
{"keyword": "公务舱", "level": 3},
{"keyword": "商务舱", "level": 3},
{"keyword": "超级经济舱", "level": 2},
{"keyword": "高端经济舱", "level": 2},
{"keyword": "明珠经济舱", "level": 2},
{"keyword": "经济舱", "level": 1},
],
"train_classes": [
{"keyword": "商务座", "level": 3},
{"keyword": "一等座", "level": 2},
{"keyword": "软卧", "level": 2},
{"keyword": "二等座", "level": 1},
{"keyword": "二等卧", "level": 1},
{"keyword": "硬卧", "level": 1},
],
}

View File

@@ -0,0 +1,116 @@
from __future__ import annotations
from dataclasses import dataclass, field
from decimal import Decimal
from typing import Literal
from pydantic import BaseModel, Field
from app.services.expense_rule_runtime_defaults import (
DEFAULT_SCENE_MATRIX_CONFIG,
DEFAULT_SCENE_RULE_ASSET_CODE,
DEFAULT_TRAVEL_POLICY_CONFIG,
DEFAULT_TRAVEL_RULE_ASSET_CODE,
DOCUMENT_TYPE_LABELS,
)
class AmountLimitConfig(BaseModel):
scope: Literal["claim_total", "item_amount"] = "claim_total"
warn_amount: Decimal | None = None
block_amount: Decimal | None = None
exception_keywords: list[str] = Field(default_factory=list)
metric_label: str = "金额"
class ScenePolicyConfig(BaseModel):
label: str
location_required: bool = False
min_attachment_count: int = 1
allowed_scene_codes: list[str] = Field(default_factory=list)
allowed_document_types: list[str] = Field(default_factory=list)
attachment_mismatch_severity: Literal["low", "medium", "high"] = "high"
claim_amount_limit: AmountLimitConfig | None = None
item_amount_limit: AmountLimitConfig | None = None
always_warn: bool = False
always_warn_message: str = ""
class SceneMatrixRuleConfig(BaseModel):
kind: Literal["scene_matrix"]
version: int = 1
scenes: dict[str, ScenePolicyConfig]
class TravelClassConfig(BaseModel):
keyword: str
level: int
class TravelPolicyConfig(BaseModel):
kind: Literal["travel_policy"]
version: int = 1
relevant_expense_types: list[str] = Field(default_factory=list)
long_distance_document_types: list[str] = Field(default_factory=list)
route_exception_keywords: list[str] = Field(default_factory=list)
standard_exception_keywords: list[str] = Field(default_factory=list)
band_labels: dict[str, str] = Field(default_factory=dict)
city_tiers: dict[str, str] = Field(default_factory=dict)
hotel_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict)
hotel_city_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict)
allowance_limits: dict[str, dict[str, Decimal]] = Field(default_factory=dict)
standard_rule_code: str = ""
standard_rule_name: str = ""
standard_rule_version: str = ""
transport_limits: dict[str, dict[str, int]] = Field(default_factory=dict)
flight_classes: list[TravelClassConfig] = Field(default_factory=list)
train_classes: list[TravelClassConfig] = Field(default_factory=list)
class ExpenseScenePolicy(ScenePolicyConfig):
expense_type: str
rule_code: str
rule_name: str
rule_version: str
class RuntimeTravelPolicy(TravelPolicyConfig):
rule_code: str
rule_name: str
rule_version: str
@dataclass
class ExpenseRuleCatalog:
scene_policies: dict[str, ExpenseScenePolicy] = field(default_factory=dict)
travel_policy: RuntimeTravelPolicy | None = None
def get_scene_policy(self, expense_type: str | None) -> ExpenseScenePolicy | None:
normalized = str(expense_type or "").strip().lower() or "other"
return self.scene_policies.get(normalized)
def resolve_document_type_label(document_type: str | None) -> str:
normalized = str(document_type or "").strip().lower() or "other"
return DOCUMENT_TYPE_LABELS.get(normalized, normalized or "其他单据")
def build_default_expense_rule_catalog() -> ExpenseRuleCatalog:
catalog = ExpenseRuleCatalog()
scene_matrix = SceneMatrixRuleConfig.model_validate(DEFAULT_SCENE_MATRIX_CONFIG)
for expense_type, config in scene_matrix.scenes.items():
catalog.scene_policies[expense_type] = ExpenseScenePolicy(
expense_type=expense_type,
rule_code=DEFAULT_SCENE_RULE_ASSET_CODE,
rule_name="报销场景提交与附件标准",
rule_version="v1.0.0",
**config.model_dump(),
)
travel_policy = TravelPolicyConfig.model_validate(DEFAULT_TRAVEL_POLICY_CONFIG)
catalog.travel_policy = RuntimeTravelPolicy(
rule_code=DEFAULT_TRAVEL_RULE_ASSET_CODE,
rule_name="差旅报销风险管控制度",
rule_version="v1.1.0",
**travel_policy.model_dump(),
)
return catalog

View File

@@ -0,0 +1,166 @@
from __future__ import annotations
import json
from app.services.expense_rule_runtime_defaults import (
DEFAULT_SCENE_MATRIX_CONFIG,
DEFAULT_TRAVEL_POLICY_CONFIG,
SCENE_LABELS,
)
from app.services.expense_rule_runtime_models import (
SceneMatrixRuleConfig,
resolve_document_type_label,
)
def build_scene_submission_standard_markdown() -> str:
scene_matrix = SceneMatrixRuleConfig.model_validate(DEFAULT_SCENE_MATRIX_CONFIG)
sections: list[str] = [
"# 报销场景提交与附件标准",
"",
"## 模板信息",
"",
"- 模板类型:系统内置场景矩阵规则",
"- 运行时类型:`scene_matrix`",
"- 适用对象:报销提交与附件校验",
"",
"## 目标",
"",
"统一约束各报销场景的必填字段、附件类型和金额预警口径,在上传附件和提交审核两个时点直接输出可执行风险判断。",
"",
"## 适用范围",
"",
"适用于差旅、住宿、交通、餐费、业务招待、办公、会务、培训、通讯、福利和其他费用场景。",
"",
"## 输入字段",
"",
"- expense_type",
"- attachments",
"- location",
"- amount / item_amount",
"- reason",
"",
"## 判断规则",
"",
]
for index, (expense_type, config) in enumerate(scene_matrix.scenes.items(), start=1):
expected_document_labels = "".join(
resolve_document_type_label(item) for item in config.allowed_document_types
)
expected_scene_labels = "".join(
SCENE_LABELS.get(item, item) for item in config.allowed_scene_codes
)
sections.extend(
[
f"### 规则 {index} {config.label}`{expense_type}`",
"",
f"- 业务地点:{'必填' if config.location_required else '非必填'}",
f"- 最少附件数:{config.min_attachment_count}",
f"- 允许识别场景:{expected_scene_labels or '不限制'}",
f"- 允许附件类型:{expected_document_labels or '不限制'}",
f"- 附件不匹配处理:{config.attachment_mismatch_severity.upper()}",
]
)
if config.claim_amount_limit is not None:
sections.append(
f"- 合计金额阈值:预警 {config.claim_amount_limit.warn_amount or '-'} 元,"
f"拦截 {config.claim_amount_limit.block_amount or '-'}"
)
if config.item_amount_limit is not None:
sections.append(
f"- 单笔金额阈值:预警 {config.item_amount_limit.warn_amount or '-'} 元,"
f"拦截 {config.item_amount_limit.block_amount or '-'}"
)
if config.always_warn and config.always_warn_message:
sections.append(f"- 特殊处理:{config.always_warn_message}")
sections.append("")
sections.extend(
[
"## 输出",
"",
"- 命中高风险时退回待补充。",
"- 命中中风险时继续流转,并提示审批人重点复核。",
"- 命中 always_warn 场景时追加人工重点复核提示。",
"",
"## 来源依据",
"",
"- 公司报销制度中关于场景识别、附件要求、金额阈值和人工复核的统一口径。",
"",
"## 审核约束",
"",
"- 当前规则为系统内置真实运行规则,变更后需重新审核并评估回滚影响。",
"- 规则 JSON 与 Markdown 说明必须保持一致。",
"",
"## 管理员备注",
"",
"如后续制度调整附件类型、金额阈值或人工复核口径,应优先修改运行时 JSON 并同步更新说明。",
"",
"```expense-rule",
json.dumps(DEFAULT_SCENE_MATRIX_CONFIG, ensure_ascii=False, indent=2),
"```",
]
)
return "\n".join(sections)
def build_travel_risk_control_standard_markdown() -> str:
return "\n".join(
[
"# 差旅报销风险管控制度",
"",
"## 模板信息",
"",
"- 模板键:`travel_standard_v1`",
"- 运行时类型:`travel_policy`",
"- 适用对象:差旅、住宿、交通相关报销审核",
"",
"## 目标",
"",
"校验差旅行程闭环、酒店地点一致性、住宿标准、飞机舱位和火车席别是否符合制度,并对例外情况保留人工复核入口。",
"",
"## 适用范围",
"",
"适用于差旅费、住宿费和交通费相关报销单,重点覆盖跨城市出差、改签、中转和超标说明场景。",
"",
"## 输入字段",
"",
"- expense_type",
"- attachments / OCR routes",
"- location",
"- employee_grade",
"- reason",
"",
"## 判断规则",
"",
"- 两段及以上长途交通票据必须首尾衔接。",
"- 最终终点应与申报目的地一致,或返回首段出发城市。",
"- 检测到多城市行程但无说明时,按高风险退回待补充。",
"- 酒店城市必须落在目的地或交通链路停留城市中。",
"- 住宿标准、飞机舱位和火车席别按职级与城市分级执行。",
"- 超标但有说明时记为中风险;超标且无说明时记为高风险。",
"",
"## 输出",
"",
"- 行程异常时输出高风险退回。",
"- 差标超限但有合理说明时输出中风险提醒。",
"- 命中差旅制度规则时,保留 `rule_code` 和 `rule_version` 供审批链追踪。",
"",
"## 来源依据",
"",
"- 公司差旅制度关于行程闭环、酒店地点一致性、职级差标和例外说明的规定。",
"",
"## 审核约束",
"",
"- 当前规则为系统内置真实运行规则,修改前需确认差旅制度版本与灰度回滚方案。",
"- 规则 JSON 与 Markdown 说明必须保持一致。",
"",
"## 管理员备注",
"",
"如制度调整职级带、城市分级或交通等级,应先更新运行时 JSON再同步修改本说明。",
"",
"```expense-rule",
json.dumps(DEFAULT_TRAVEL_POLICY_CONFIG, ensure_ascii=False, indent=2),
"```",
]
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,66 @@
from __future__ import annotations
FIXED_KNOWLEDGE_FOLDERS = [
"财务知识库",
"制度政策",
"报销制度",
"差旅规范",
"发票管理",
"税务合规",
"预算管理",
"财务共享",
"培训资料",
"常见问答",
]
ICON_BY_TYPE = {
"pdf": "mdi mdi-file-document-outline-pdf pdf",
"word": "mdi mdi-file-document-outline-word word",
"excel": "mdi mdi-file-document-outline-excel excel",
"ppt": "mdi mdi-file-powerpoint-box ppt",
"image": "mdi mdi-file-image-outline image",
"text": "mdi mdi-file-document-outline text",
"archive": "mdi mdi-folder-zip-outline archive",
"binary": "mdi mdi-file-outline",
}
TEXT_EXTENSIONS = {"txt", "md", "csv", "json", "xml", "yml", "yaml", "log"}
WORD_EXTENSIONS = {"doc", "docx"}
EXCEL_EXTENSIONS = {"xls", "xlsx", "csv"}
PPT_EXTENSIONS = {"ppt", "pptx"}
IMAGE_EXTENSIONS = {"png", "jpg", "jpeg", "gif", "bmp", "webp", "svg"}
ARCHIVE_EXTENSIONS = {"zip", "rar", "7z"}
STRUCTURED_PREVIEW_EXTENSIONS = {"docx", "xlsx", "pptx"} | TEXT_EXTENSIONS
INLINE_PREVIEW_EXTENSIONS = {"pdf"} | IMAGE_EXTENSIONS
ONLYOFFICE_EDITABLE_EXTENSIONS = {"docx", "xlsx", "pptx"}
KNOWLEDGE_INGEST_SYNC_STALE_SECONDS = 90
KNOWLEDGE_SEARCH_RESULT_LIMIT = 3
KNOWLEDGE_SEARCH_STOP_TERMS = {
"什么",
"怎么",
"如何",
"多少",
"是否",
"可以",
"一下",
"请问",
"帮我",
"一下子",
"这个",
"那个",
"哪些",
"一下吧",
}
KNOWLEDGE_INGEST_STATUS_PUBLISHED = 1
KNOWLEDGE_INGEST_STATUS_SYNCING = 2
KNOWLEDGE_INGEST_STATUS_INGESTED = 3
KNOWLEDGE_INGEST_STATUS_FAILED = 4
KNOWLEDGE_INGEST_STATUS_META = {
KNOWLEDGE_INGEST_STATUS_PUBLISHED: ("待归纳", "muted"),
KNOWLEDGE_INGEST_STATUS_SYNCING: ("正归纳", "warning"),
KNOWLEDGE_INGEST_STATUS_INGESTED: ("已归纳", "success"),
KNOWLEDGE_INGEST_STATUS_FAILED: ("归纳失败", "danger"),
}

View File

@@ -0,0 +1,223 @@
from __future__ import annotations
import re
import shutil
import subprocess
from pathlib import Path
from xml.etree import ElementTree
from zipfile import BadZipFile, ZipFile
from app.services.knowledge_constants import IMAGE_EXTENSIONS, TEXT_EXTENSIONS
from app.services.knowledge_file_utils import extract_extension
def _read_text_preview(file_path: Path) -> str:
encodings = ("utf-8", "utf-8-sig", "gbk")
for encoding in encodings:
try:
return file_path.read_text(encoding=encoding)
except UnicodeDecodeError:
continue
return "当前文本文件编码暂不支持在线解析。"
def _extract_docx_text(file_path: Path) -> str:
try:
with ZipFile(file_path) as archive:
xml_content = archive.read("word/document.xml")
except (BadZipFile, KeyError):
return "当前 Word 文件解析失败。"
root = ElementTree.fromstring(xml_content)
texts = [node.text.strip() for node in root.iter() if node.tag.endswith("}t") and node.text]
return "\n".join(texts)
def _extract_document_text_from_path(
*,
file_path: Path,
original_name: str,
mime_type: str,
) -> str:
extension = extract_extension(original_name)
if extension in TEXT_EXTENSIONS:
return _normalize_extracted_text(_read_text_preview(file_path))
if extension == "docx":
return _normalize_extracted_text(_extract_docx_text(file_path))
if extension == "pdf":
text = _normalize_extracted_text(_extract_pdf_text(file_path))
if text:
return text
return _normalize_extracted_text(
_extract_text_with_ocr(
file_path=file_path,
original_name=original_name,
mime_type=mime_type,
)
)
if extension in IMAGE_EXTENSIONS:
return _normalize_extracted_text(
_extract_text_with_ocr(
file_path=file_path,
original_name=original_name,
mime_type=mime_type,
)
)
return ""
def _normalize_extracted_text(text: str) -> str:
normalized = str(text or "").replace("\r\n", "\n").replace("\r", "\n")
normalized = re.sub(r"\n{3,}", "\n\n", normalized)
return normalized.strip()
def _extract_pdf_text(file_path: Path) -> str:
pdftotext_bin = shutil.which("pdftotext")
if not pdftotext_bin:
return ""
completed = subprocess.run(
[pdftotext_bin, "-layout", str(file_path), "-"],
capture_output=True,
text=True,
timeout=40,
check=False,
)
if completed.returncode != 0:
return ""
return str(completed.stdout or "")
def _extract_text_with_ocr(
*,
file_path: Path,
original_name: str,
mime_type: str,
) -> str:
try:
from app.services.ocr import OcrService
result = OcrService().recognize_files(
[(original_name, file_path.read_bytes(), mime_type)]
)
except Exception:
return ""
parts: list[str] = []
for document in result.documents:
text = str(getattr(document, "text", "") or "").strip()
summary = str(getattr(document, "summary", "") or "").strip()
if text:
parts.append(text)
elif summary:
parts.append(summary)
return "\n\n".join(part for part in parts if part)
def _extract_xlsx_sheets(file_path: Path) -> list[tuple[str, list[list[str]]]]:
try:
with ZipFile(file_path) as archive:
shared_strings: list[str] = []
if "xl/sharedStrings.xml" in archive.namelist():
shared_root = ElementTree.fromstring(archive.read("xl/sharedStrings.xml"))
shared_strings = [
"".join(node.itertext()).strip()
for node in shared_root.iter()
if node.tag.endswith("}si")
]
sheet_files = sorted(
name
for name in archive.namelist()
if re.fullmatch(r"xl/worksheets/sheet\d+\.xml", name)
)
if not sheet_files:
return []
relationship_targets: dict[str, str] = {}
if "xl/_rels/workbook.xml.rels" in archive.namelist():
rel_root = ElementTree.fromstring(archive.read("xl/_rels/workbook.xml.rels"))
for node in rel_root.iter():
if not node.tag.endswith("Relationship"):
continue
rel_id = node.attrib.get("Id")
target = node.attrib.get("Target")
if not rel_id or not target:
continue
normalized = target.lstrip("/")
if not normalized.startswith("xl/"):
normalized = f"xl/{normalized.lstrip('./')}"
relationship_targets[rel_id] = normalized
ordered_sheets: list[tuple[str, str]] = []
if "xl/workbook.xml" in archive.namelist():
workbook_root = ElementTree.fromstring(archive.read("xl/workbook.xml"))
for index, node in enumerate(workbook_root.iter()):
if not node.tag.endswith("sheet"):
continue
sheet_name = node.attrib.get("name") or f"Sheet {index + 1}"
relationship_id = next(
(value for key, value in node.attrib.items() if key.endswith("}id")),
None,
)
target = relationship_targets.get(relationship_id or "")
if target:
ordered_sheets.append((sheet_name, target))
if not ordered_sheets:
ordered_sheets = [
(f"Sheet {index + 1}", sheet_file)
for index, sheet_file in enumerate(sheet_files)
]
preview_sheets: list[tuple[str, list[list[str]]]] = []
for sheet_name, target in ordered_sheets:
if target not in archive.namelist():
continue
sheet_root = ElementTree.fromstring(archive.read(target))
rows: list[list[str]] = []
for row in sheet_root.iter():
if not row.tag.endswith("}row"):
continue
row_values: list[str] = []
for cell in row:
if not cell.tag.endswith("}c"):
continue
cell_type = cell.attrib.get("t")
value_node = next((item for item in cell if item.tag.endswith("}v")), None)
if cell_type == "inlineStr":
text_node = next((item for item in cell.iter() if item.tag.endswith("}t")), None)
row_values.append((text_node.text or "").strip() if text_node is not None else "")
continue
if value_node is None or value_node.text is None:
row_values.append("")
continue
raw_value = value_node.text.strip()
if cell_type == "s" and raw_value.isdigit():
index = int(raw_value)
row_values.append(shared_strings[index] if index < len(shared_strings) else raw_value)
else:
row_values.append(raw_value)
if row_values:
rows.append(row_values)
preview_sheets.append((sheet_name, rows))
return preview_sheets
except (BadZipFile, ElementTree.ParseError, KeyError, ValueError):
return []
def _extract_pptx_slides(file_path: Path) -> list[list[str]]:
try:
with ZipFile(file_path) as archive:
slide_names = sorted(
name
for name in archive.namelist()
if re.fullmatch(r"ppt/slides/slide\d+\.xml", name)
)
slides: list[list[str]] = []
for slide_name in slide_names:
root = ElementTree.fromstring(archive.read(slide_name))
texts = [node.text.strip() for node in root.iter() if node.tag.endswith("}t") and node.text]
slides.append(texts)
return slides
except (BadZipFile, ElementTree.ParseError, KeyError):
return []

View File

@@ -0,0 +1,112 @@
from __future__ import annotations
from datetime import UTC, datetime
from pathlib import Path
from uuid import uuid4
from app.services.knowledge_constants import (
ARCHIVE_EXTENSIONS,
EXCEL_EXTENSIONS,
FIXED_KNOWLEDGE_FOLDERS,
IMAGE_EXTENSIONS,
INLINE_PREVIEW_EXTENSIONS,
PPT_EXTENSIONS,
STRUCTURED_PREVIEW_EXTENSIONS,
TEXT_EXTENSIONS,
WORD_EXTENSIONS,
)
def normalize_filename(filename: str) -> str:
normalized = Path(str(filename or "").strip()).name.strip()
normalized = normalized.replace("/", "_").replace("\\", "_")
if not normalized:
raise ValueError("文件名不能为空。")
return normalized
def normalize_folder(folder: str) -> str:
normalized = str(folder or "").strip()
if normalized not in FIXED_KNOWLEDGE_FOLDERS:
raise ValueError("只能上传到预设知识库文件夹。")
return normalized
def extract_extension(filename: str) -> str:
suffix = Path(filename).suffix.lower().lstrip(".")
return suffix
def _build_onlyoffice_document_key(entry: dict[str, Any]) -> str:
version = int(entry.get("version_number", 1))
checksum = str(entry.get("sha256") or "")[:12]
return f"{entry['id']}-v{version}-{checksum or 'nochecksum'}"
def _build_onlyoffice_access_token(self, document_id: str) -> str:
onlyoffice_settings = resolve_onlyoffice_settings()
payload = {
"scope": "onlyoffice-content",
"document_id": document_id,
}
return jwt.encode(payload, onlyoffice_settings.jwt_secret, algorithm="HS256")
def _resolve_onlyoffice_document_type(extension: str) -> str:
if extension in WORD_EXTENSIONS:
return "word"
if extension in EXCEL_EXTENSIONS:
return "cell"
if extension in PPT_EXTENSIONS:
return "slide"
raise ValueError("当前文件格式不支持 ONLYOFFICE 预览。")
def parse_stored_name(stored_name: str) -> tuple[str, str]:
if "__" not in stored_name:
return uuid4().hex, stored_name
document_id, original_name = stored_name.split("__", 1)
return document_id or uuid4().hex, original_name or stored_name
def format_time(value: str | None) -> str:
if not value:
return ""
try:
parsed = datetime.fromisoformat(value)
except ValueError:
return value
return parsed.astimezone(UTC).strftime("%Y-%m-%d %H:%M")
def format_size(size_bytes: int) -> str:
if size_bytes < 1024:
return f"{size_bytes} B"
if size_bytes < 1024 * 1024:
return f"{size_bytes / 1024:.1f} KB"
return f"{size_bytes / (1024 * 1024):.1f} MB"
def resolve_file_type(extension: str) -> str:
if extension == "pdf":
return "pdf"
if extension in WORD_EXTENSIONS:
return "word"
if extension in EXCEL_EXTENSIONS:
return "excel"
if extension in PPT_EXTENSIONS:
return "ppt"
if extension in IMAGE_EXTENSIONS:
return "image"
if extension in TEXT_EXTENSIONS:
return "text"
if extension in ARCHIVE_EXTENSIONS:
return "archive"
return "binary"
def resolve_file_type_label(file_type: str) -> str:
mapping = {
"pdf": "PDF 预览",
"word": "Word 预览",
"excel": "Excel 预览",
"ppt": "PPT 预览",
"image": "图片预览",
"text": "文本预览",
"archive": "压缩包",
"binary": "文件预览",
}
return mapping.get(file_type, "文件预览")
def can_preview(extension: str) -> bool:
return extension in INLINE_PREVIEW_EXTENSIONS or extension in STRUCTURED_PREVIEW_EXTENSIONS

View File

@@ -0,0 +1,69 @@
from __future__ import annotations
from datetime import UTC, datetime
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.core.agent_enums import AgentRunStatus
from app.models.agent_run import AgentRun
from app.services.knowledge_constants import (
KNOWLEDGE_INGEST_STATUS_META,
KNOWLEDGE_INGEST_STATUS_PUBLISHED,
KNOWLEDGE_INGEST_SYNC_STALE_SECONDS,
)
def normalize_ingest_status_code(value: Any) -> int:
try:
status_code = int(value)
except (TypeError, ValueError):
return KNOWLEDGE_INGEST_STATUS_PUBLISHED
if status_code not in KNOWLEDGE_INGEST_STATUS_META:
return KNOWLEDGE_INGEST_STATUS_PUBLISHED
return status_code
def is_syncing_status_stale(entry: dict[str, Any]) -> bool:
raw_value = str(entry.get("ingest_status_updated_at") or "").strip()
if not raw_value:
return True
try:
updated_at = datetime.fromisoformat(raw_value)
except ValueError:
return True
if updated_at.tzinfo is None:
updated_at = updated_at.replace(tzinfo=UTC)
age_seconds = (datetime.now(UTC) - updated_at.astimezone(UTC)).total_seconds()
return age_seconds >= KNOWLEDGE_INGEST_SYNC_STALE_SECONDS
def should_preserve_syncing_status(entry: dict[str, Any], *, db: Session | None) -> bool:
agent_run_id = str(entry.get("ingest_agent_run_id") or "").strip()
if not agent_run_id or db is None:
return not is_syncing_status_stale(entry)
run = db.scalar(select(AgentRun).where(AgentRun.run_id == agent_run_id))
if run is None:
return not is_syncing_status_stale(entry)
if run.status != AgentRunStatus.RUNNING.value:
return False
heartbeat_at = str((run.route_json or {}).get("heartbeat_at") or "").strip()
if heartbeat_at:
probe_entry = {"ingest_status_updated_at": heartbeat_at}
return not is_syncing_status_stale(probe_entry)
return not is_syncing_status_stale(entry)
def resolve_linked_ingest_run_status(entry: dict[str, Any], *, db: Session | None) -> str:
agent_run_id = str(entry.get("ingest_agent_run_id") or "").strip()
if not agent_run_id or db is None:
return ""
run = db.scalar(select(AgentRun).where(AgentRun.run_id == agent_run_id))
if run is None:
return ""
return str(run.status or "").strip()

View File

@@ -0,0 +1,166 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any
import jwt
from app.api.deps import CurrentUserContext
from app.core.config import get_settings
from app.core.logging import get_logger
from app.schemas.knowledge import KnowledgeOnlyOfficeConfigRead
from app.services.knowledge_constants import (
EXCEL_EXTENSIONS,
ONLYOFFICE_EDITABLE_EXTENSIONS,
PPT_EXTENSIONS,
WORD_EXTENSIONS,
)
from app.services.knowledge_file_utils import extract_extension
from app.services.settings import resolve_onlyoffice_settings
logger = get_logger("app.services.knowledge")
@dataclass(slots=True)
class OnlyOfficeCallbackPayload:
status: int
download_url: str
users: list[str]
def parse_onlyoffice_callback(payload: dict[str, Any]) -> OnlyOfficeCallbackPayload:
status = int(payload.get("status") or 0)
download_url = str(payload.get("url") or "").strip()
users = [str(item).strip() for item in payload.get("users") or [] if str(item).strip()]
return OnlyOfficeCallbackPayload(status=status, download_url=download_url, users=users)
def build_onlyoffice_document_key(entry: dict[str, Any]) -> str:
version = int(entry.get("version_number", 1))
checksum = str(entry.get("sha256") or "")[:12]
return f"{entry['id']}-v{version}-{checksum or 'nochecksum'}"
def build_onlyoffice_access_token(document_id: str) -> str:
onlyoffice_settings = resolve_onlyoffice_settings()
payload = {
"scope": "onlyoffice-content",
"document_id": document_id,
}
return jwt.encode(payload, onlyoffice_settings.jwt_secret, algorithm="HS256")
def build_onlyoffice_config(
*,
document_id: str,
entry: dict[str, Any],
current_user: CurrentUserContext,
) -> KnowledgeOnlyOfficeConfigRead:
settings = get_settings()
onlyoffice_settings = resolve_onlyoffice_settings()
if not onlyoffice_settings.enabled:
logger.warning(
"ONLYOFFICE disabled in runtime config doc=%s enabled=%s public_url=%s backend_url=%s jwt_set=%s",
document_id,
onlyoffice_settings.enabled,
onlyoffice_settings.public_url,
onlyoffice_settings.backend_url,
bool(onlyoffice_settings.jwt_secret),
)
raise ValueError("ONLYOFFICE 预览未启用。")
if not onlyoffice_settings.public_url or not onlyoffice_settings.backend_url:
logger.warning(
"ONLYOFFICE config incomplete doc=%s enabled=%s public_url=%s backend_url=%s jwt_set=%s",
document_id,
onlyoffice_settings.enabled,
onlyoffice_settings.public_url,
onlyoffice_settings.backend_url,
bool(onlyoffice_settings.jwt_secret),
)
raise ValueError("ONLYOFFICE 地址配置不完整。")
if not onlyoffice_settings.jwt_secret:
logger.warning(
"ONLYOFFICE JWT missing doc=%s enabled=%s public_url=%s backend_url=%s jwt_set=%s",
document_id,
onlyoffice_settings.enabled,
onlyoffice_settings.public_url,
onlyoffice_settings.backend_url,
bool(onlyoffice_settings.jwt_secret),
)
raise ValueError("ONLYOFFICE JWT 密钥未配置。")
extension = extract_extension(entry["original_name"])
if extension not in ONLYOFFICE_EDITABLE_EXTENSIONS:
raise ValueError("当前文件格式不支持 ONLYOFFICE 预览。")
backend_base_url = onlyoffice_settings.backend_url.rstrip("/")
public_url = onlyoffice_settings.public_url.rstrip("/")
access_token = build_onlyoffice_access_token(document_id)
document_url = (
f"{backend_base_url}{settings.api_v1_prefix}/knowledge/documents/{document_id}/onlyoffice/content"
f"?access_token={access_token}"
)
callback_url = (
f"{backend_base_url}{settings.api_v1_prefix}/knowledge/documents/{document_id}/onlyoffice/callback"
)
config: dict[str, Any] = {
"documentType": resolve_onlyoffice_document_type(extension),
"document": {
"fileType": extension,
"key": build_onlyoffice_document_key(entry),
"title": entry["original_name"],
"url": document_url,
"permissions": {
"download": True,
"edit": False,
"print": True,
"copy": True,
},
},
"editorConfig": {
"mode": "view",
"lang": "zh-CN",
"callbackUrl": callback_url,
"user": {
"id": current_user.username,
"name": current_user.name,
},
"customization": {
"compactHeader": True,
"compactToolbar": True,
"toolbarNoTabs": False,
"autosave": False,
"forcesave": False,
},
},
"width": "100%",
"height": "100%",
}
config["token"] = jwt.encode(config, onlyoffice_settings.jwt_secret, algorithm="HS256")
return KnowledgeOnlyOfficeConfigRead(documentServerUrl=public_url, config=config)
def validate_onlyoffice_access_token(document_id: str, access_token: str) -> None:
onlyoffice_settings = resolve_onlyoffice_settings()
try:
payload = jwt.decode(
access_token,
onlyoffice_settings.jwt_secret,
algorithms=["HS256"],
)
except jwt.PyJWTError as exc:
raise ValueError("ONLYOFFICE 文件访问令牌无效。") from exc
if payload.get("scope") != "onlyoffice-content" or payload.get("document_id") != document_id:
raise ValueError("ONLYOFFICE 文件访问令牌无效。")
def resolve_onlyoffice_document_type(extension: str) -> str:
if extension in WORD_EXTENSIONS:
return "word"
if extension in EXCEL_EXTENSIONS:
return "cell"
if extension in PPT_EXTENSIONS:
return "slide"
raise ValueError("当前文件格式不支持 ONLYOFFICE 预览。")

View File

@@ -0,0 +1,157 @@
from __future__ import annotations
from typing import Any
from app.schemas.knowledge import (
KnowledgePreviewBlockRead,
KnowledgePreviewPageRead,
KnowledgePreviewStatRead,
)
from app.services.knowledge_constants import IMAGE_EXTENSIONS, TEXT_EXTENSIONS
from app.services.knowledge_document_extractors import (
_extract_docx_text,
_extract_pptx_slides,
_extract_xlsx_sheets,
_read_text_preview,
)
from app.services.knowledge_file_utils import extract_extension, format_size
def build_preview(
entry: dict[str, Any],
*,
resolve_document_path,
) -> tuple[str, list[KnowledgePreviewPageRead]]:
extension = extract_extension(entry["original_name"])
file_path = resolve_document_path(entry)
if extension == "pdf":
return "pdf", []
if extension in IMAGE_EXTENSIONS:
return "image", []
if extension in TEXT_EXTENSIONS:
text = _read_text_preview(file_path)
return "text", [_build_text_preview_page(entry, text)]
if extension == "docx":
text = _extract_docx_text(file_path)
return "text", [_build_text_preview_page(entry, text)]
if extension == "xlsx":
return "table", _build_xlsx_preview_pages(entry, file_path)
if extension == "pptx":
return "slides", _build_pptx_preview_pages(entry, file_path)
return (
"unsupported",
[
KnowledgePreviewPageRead(
title=entry["original_name"],
subtitle="当前格式暂不支持在线解析预览。",
stats=[
KnowledgePreviewStatRead(label="文件格式", value=extension.upper() or "FILE"),
KnowledgePreviewStatRead(label="文件大小", value=format_size(entry["size_bytes"])),
KnowledgePreviewStatRead(label="建议操作", value="下载后查看"),
],
blocks=[
KnowledgePreviewBlockRead(
heading="预览说明",
lines=[
"当前系统已支持该文件的上传、下载和权限控制。",
"如需在线预览,可后续接入专门的文档转换服务。",
],
)
],
)
],
)
def _build_text_preview_page(
entry: dict[str, Any], text: str
) -> KnowledgePreviewPageRead:
lines = [line.strip() for line in text.splitlines() if line.strip()]
if not lines:
lines = ["文件内容为空,或当前文档未提取到可展示文本。"]
groups = [lines[index : index + 8] for index in range(0, min(len(lines), 24), 8)]
blocks = [
KnowledgePreviewBlockRead(heading=f"内容片段 {index + 1}", lines=group)
for index, group in enumerate(groups)
]
return KnowledgePreviewPageRead(
title=entry["original_name"],
subtitle="文本提取预览",
stats=[
KnowledgePreviewStatRead(label="文件格式", value=entry["extension"].upper() or "TEXT"),
KnowledgePreviewStatRead(label="可见行数", value=str(len(lines))),
KnowledgePreviewStatRead(label="文件大小", value=format_size(entry["size_bytes"])),
],
blocks=blocks,
)
def _build_xlsx_preview_pages(
entry: dict[str, Any], file_path
) -> list[KnowledgePreviewPageRead]:
sheets = self._extract_xlsx_sheets(file_path)
if not sheets:
sheets = [("Sheet 1", [["未提取到表格内容。"]])]
preview_pages: list[KnowledgePreviewPageRead] = []
sheet_count = len(sheets)
for sheet_name, rows in sheets[:8]:
visible_rows = rows[:12] if rows else [["未提取到表格内容。"]]
blocks = [
KnowledgePreviewBlockRead(
heading=f"{index + 1}",
lines=[" | ".join((cell or "") for cell in row)],
)
for index, row in enumerate(visible_rows)
]
preview_pages.append(
KnowledgePreviewPageRead(
title=sheet_name,
subtitle="表格内容预览",
stats=[
KnowledgePreviewStatRead(label="工作表数量", value=str(sheet_count)),
KnowledgePreviewStatRead(label="预览行数", value=str(len(visible_rows))),
KnowledgePreviewStatRead(label="文件大小", value=format_size(entry["size_bytes"])),
],
blocks=blocks,
)
)
return preview_pages
def _build_pptx_preview_pages(
entry: dict[str, Any], file_path
) -> list[KnowledgePreviewPageRead]:
slides = self._extract_pptx_slides(file_path)
if not slides:
slides = [["未提取到幻灯片文本。"]]
pages: list[KnowledgePreviewPageRead] = []
for index, slide_lines in enumerate(slides[:8]):
pages.append(
KnowledgePreviewPageRead(
title=entry["original_name"],
subtitle=f"幻灯片 {index + 1}",
stats=[
KnowledgePreviewStatRead(label="页码", value=str(index + 1)),
KnowledgePreviewStatRead(label="文本条数", value=str(len(slide_lines))),
KnowledgePreviewStatRead(label="文件格式", value="PPTX"),
],
blocks=[
KnowledgePreviewBlockRead(
heading="幻灯片内容",
lines=slide_lines or ["该页未提取到文本内容。"],
)
],
)
)
return pages

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,672 @@
from __future__ import annotations
import asyncio
import json
import os
from dataclasses import dataclass
from http import HTTPStatus
from pathlib import Path
from time import perf_counter
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.parse import quote
from urllib.request import Request, urlopen
from app.core.logging import get_logger
from app.services.model_connectivity import AZURE_API_VERSION
logger = get_logger("app.services.knowledge_rag")
DEFAULT_LIGHTRAG_QUERY_MODE = "naive"
DEFAULT_LLM_TIMEOUT_SECONDS = 180
DEFAULT_EMBEDDING_TIMEOUT_SECONDS = 120
class KnowledgeRagError(RuntimeError):
pass
@dataclass(frozen=True, slots=True)
class RuntimeModelConfig:
slot: str
provider: str
model: str
endpoint: str
api_key: str
capability: str
class _LightRagRuntime:
def __init__(
self,
*,
working_dir: Path,
workspace: str,
qdrant_url: str,
qdrant_api_key: str,
primary_chat: RuntimeModelConfig,
backup_chat: RuntimeModelConfig | None,
embedding: RuntimeModelConfig,
reranker: RuntimeModelConfig | None,
) -> None:
self.working_dir = working_dir
self.workspace = workspace
self.qdrant_url = qdrant_url
self.qdrant_api_key = qdrant_api_key
self.primary_chat = primary_chat
self.backup_chat = backup_chat
self.embedding = embedding
self.reranker = reranker
self._rag = self._build_rag()
self._initialize()
self._graph_has_content_cache: bool | None = None
@property
def rag(self):
return self._rag
def _build_rag(self):
try:
from lightrag import LightRAG
from lightrag.utils import EmbeddingFunc
except ImportError as exc: # pragma: no cover - exercised in runtime env
raise KnowledgeRagError(
"LightRAG 依赖未安装,请先在 server 环境执行依赖安装。"
) from exc
self.working_dir.mkdir(parents=True, exist_ok=True)
if self.qdrant_url:
os.environ["QDRANT_URL"] = self.qdrant_url
if self.qdrant_api_key:
os.environ["QDRANT_API_KEY"] = self.qdrant_api_key
embedding_dim = self._probe_embedding_dimension(self.embedding)
logger.info(
"Initialize LightRAG runtime workspace=%s qdrant=%s embedding_model=%s dim=%s",
self.workspace,
self.qdrant_url,
self.embedding.model,
embedding_dim,
)
async def embedding_func(texts: list[str]) -> Any:
return await asyncio.to_thread(self._embed_sync, texts)
async def llm_model_func(
prompt: str,
system_prompt: str | None = None,
history_messages: list[dict[str, Any]] | None = None,
keyword_extraction: bool = False,
**kwargs: Any,
) -> str:
return await asyncio.to_thread(
self._complete_sync,
prompt,
system_prompt,
history_messages or [],
keyword_extraction,
kwargs,
)
async def rerank_model_func(
query: str,
documents: list[str],
top_n: int | None = None,
**_kwargs: Any,
) -> list[dict[str, Any]]:
return await asyncio.to_thread(
self._rerank_sync,
query,
documents,
top_n,
)
return LightRAG(
working_dir=str(self.working_dir),
workspace=self.workspace,
kv_storage="JsonKVStorage",
graph_storage="NetworkXStorage",
vector_storage="QdrantVectorDBStorage",
doc_status_storage="JsonDocStatusStorage",
llm_model_name=self.primary_chat.model,
llm_model_func=llm_model_func,
embedding_func=EmbeddingFunc(
embedding_dim=embedding_dim,
func=embedding_func,
max_token_size=8192,
model_name=self.embedding.model,
supports_asymmetric=False,
),
rerank_model_func=rerank_model_func if self.reranker is not None else None,
enable_llm_cache=False,
enable_llm_cache_for_entity_extract=False,
)
def _initialize(self) -> None:
from lightrag.utils import always_get_an_event_loop
loop = always_get_an_event_loop()
loop.run_until_complete(self._rag.initialize_storages())
def finalize(self) -> None:
from lightrag.utils import always_get_an_event_loop
loop = always_get_an_event_loop()
loop.run_until_complete(self._rag.finalize_storages())
def query_data(self, query: str, *, conversation_history: list[dict[str, str]] | None = None) -> dict[str, Any]:
from lightrag import QueryParam
configured_mode = os.environ.get("LIGHTRAG_QUERY_MODE", DEFAULT_LIGHTRAG_QUERY_MODE).strip() or DEFAULT_LIGHTRAG_QUERY_MODE
mode = "naive" if configured_mode != "naive" and not self._graph_has_content() else configured_mode
started_at = perf_counter()
param = QueryParam(
mode=mode,
top_k=8,
chunk_top_k=10,
only_need_context=True,
response_type="Multiple Paragraphs",
conversation_history=conversation_history or [],
include_references=True,
)
try:
result = self._rag.query_data(query, param)
logger.info("LightRAG query completed mode=%s elapsed=%.2fs", mode, perf_counter() - started_at)
return result
except Exception:
if mode == "naive":
raise
logger.warning("LightRAG query mode=%s failed, retry with naive mode", mode)
fallback_param = QueryParam(
mode="naive",
top_k=8,
chunk_top_k=10,
only_need_context=True,
response_type="Multiple Paragraphs",
conversation_history=conversation_history or [],
include_references=True,
)
result = self._rag.query_data(query, fallback_param)
logger.info("LightRAG query completed mode=naive elapsed=%.2fs", perf_counter() - started_at)
return result
def _graph_has_content(self) -> bool:
if self._graph_has_content_cache is not None:
return self._graph_has_content_cache
graph_path = self.working_dir / self.workspace / "graph_chunk_entity_relation.graphml"
try:
graph_text = graph_path.read_text(encoding="utf-8")
except OSError:
self._graph_has_content_cache = False
return False
self._graph_has_content_cache = "<node " in graph_text or "<edge " in graph_text
return self._graph_has_content_cache
def insert_documents(
self,
*,
texts: list[str],
document_ids: list[str],
file_paths: list[str],
) -> str:
return self._rag.insert(texts, ids=document_ids, file_paths=file_paths)
def get_document_statuses(self, document_ids: list[str]) -> dict[str, Any]:
from lightrag.utils import always_get_an_event_loop
loop = always_get_an_event_loop()
return loop.run_until_complete(self._rag.aget_docs_by_ids(document_ids))
def delete_document(self, document_id: str) -> None:
from lightrag.utils import always_get_an_event_loop
loop = always_get_an_event_loop()
result = loop.run_until_complete(self._rag.adelete_by_doc_id(document_id))
status = str(getattr(result, "status", "") or "")
if status not in {"success", "not_found"}:
raise KnowledgeRagError(str(getattr(result, "message", "") or "LightRAG 删除文档失败。"))
def _probe_embedding_dimension(self, config: RuntimeModelConfig) -> int:
vectors = self._request_embeddings(config, ["dimension probe"])
if not vectors or not isinstance(vectors[0], list):
raise KnowledgeRagError("无法从 embedding 模型返回结果中解析向量维度。")
dimension = len(vectors[0])
if dimension <= 0:
raise KnowledgeRagError("embedding 模型返回了无效的向量维度。")
return dimension
def _embed_sync(self, texts: list[str]) -> Any:
import numpy as np
vectors = self._request_embeddings(self.embedding, texts)
return np.array(vectors, dtype=float)
def _rerank_sync(
self,
query: str,
documents: list[str],
top_n: int | None,
) -> list[dict[str, Any]]:
if self.reranker is None:
return []
status_code, body = self._request_rerank(
self.reranker,
query=query,
documents=documents,
top_n=top_n,
)
if status_code >= HTTPStatus.BAD_REQUEST:
raise KnowledgeRagError(f"reranker 模型返回异常状态码 {status_code}")
return _extract_rerank_results(body, provider=self.reranker.provider)
def _complete_sync(
self,
prompt: str,
system_prompt: str | None,
history_messages: list[dict[str, Any]],
keyword_extraction: bool,
kwargs: dict[str, Any],
) -> str:
del keyword_extraction
last_error: Exception | None = None
for config in [self.primary_chat, self.backup_chat]:
if config is None:
continue
try:
return self._request_chat_completion(
config,
prompt=prompt,
system_prompt=system_prompt,
history_messages=history_messages,
max_tokens=int(kwargs.get("max_tokens") or 1200),
temperature=float(kwargs.get("temperature") or 0.1),
)
except Exception as exc: # pragma: no cover - runtime fallback
last_error = exc
logger.warning(
"LightRAG LLM request failed slot=%s provider=%s model=%s: %s",
config.slot,
config.provider,
config.model,
exc,
)
continue
raise KnowledgeRagError(f"LightRAG 调用知识模型失败:{last_error or '没有可用模型配置'}")
def _request_chat_completion(
self,
config: RuntimeModelConfig,
*,
prompt: str,
system_prompt: str | None,
history_messages: list[dict[str, Any]],
max_tokens: int,
temperature: float,
) -> str:
messages: list[dict[str, Any]] = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.extend(history_messages)
messages.append({"role": "user", "content": prompt})
if config.provider == "Azure OpenAI":
url = f"{_build_azure_deployment_base(config.endpoint, config.model)}/chat/completions?api-version={AZURE_API_VERSION}"
payload = {
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
}
status_code, body = _send_json_request(
"POST",
url,
headers=_build_headers(config.api_key, use_bearer=False, use_api_key=True),
payload=payload,
timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS,
)
elif config.provider == "Ollama":
url = _ensure_path(_normalize_endpoint(config.endpoint), "api/chat")
payload = {
"model": config.model,
"messages": messages,
"stream": False,
"options": {
"num_predict": max_tokens,
"temperature": temperature,
},
}
status_code, body = _send_json_request(
"POST",
url,
headers={"Content-Type": "application/json", "Accept": "application/json"},
payload=payload,
timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS,
)
else:
url = _ensure_path(_normalize_endpoint(config.endpoint), "chat/completions")
payload = {
"model": config.model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
}
status_code, body = _send_json_request(
"POST",
url,
headers=_build_headers(config.api_key, use_bearer=True),
payload=payload,
timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS,
)
if status_code >= HTTPStatus.BAD_REQUEST:
raise KnowledgeRagError(f"知识模型返回异常状态码 {status_code}")
return _extract_chat_text(body, provider=config.provider)
def _request_embeddings(self, config: RuntimeModelConfig, texts: list[str]) -> list[list[float]]:
if config.provider == "Azure OpenAI":
url = f"{_build_azure_deployment_base(config.endpoint, config.model)}/embeddings?api-version={AZURE_API_VERSION}"
payload = {"input": texts}
status_code, body = _send_json_request(
"POST",
url,
headers=_build_headers(config.api_key, use_bearer=False, use_api_key=True),
payload=payload,
timeout_seconds=DEFAULT_EMBEDDING_TIMEOUT_SECONDS,
)
elif config.provider == "Ollama":
url = _ensure_path(_normalize_endpoint(config.endpoint), "api/embed")
payload = {"model": config.model, "input": texts}
status_code, body = _send_json_request(
"POST",
url,
headers={"Content-Type": "application/json", "Accept": "application/json"},
payload=payload,
timeout_seconds=DEFAULT_EMBEDDING_TIMEOUT_SECONDS,
)
else:
url = _ensure_path(_normalize_endpoint(config.endpoint), "embeddings")
payload = {"model": config.model, "input": texts}
status_code, body = _send_json_request(
"POST",
url,
headers=_build_headers(config.api_key, use_bearer=True),
payload=payload,
timeout_seconds=DEFAULT_EMBEDDING_TIMEOUT_SECONDS,
)
if status_code >= HTTPStatus.BAD_REQUEST:
raise KnowledgeRagError(f"embedding 模型返回异常状态码 {status_code}")
return _extract_embedding_vectors(body, provider=config.provider)
def _request_rerank(
self,
config: RuntimeModelConfig,
*,
query: str,
documents: list[str],
top_n: int | None,
) -> tuple[int, Any]:
if config.provider == "Azure OpenAI":
url = f"{_build_azure_deployment_base(config.endpoint, config.model)}/rerank?api-version={AZURE_API_VERSION}"
payload: dict[str, Any] = {
"query": query,
"documents": documents,
}
if top_n is not None:
payload["top_n"] = top_n
return _send_json_request(
"POST",
url,
headers=_build_headers(config.api_key, use_bearer=False, use_api_key=True),
payload=payload,
timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS,
)
if config.provider == "Ali":
url, payload = _build_ali_rerank_request(
config.model,
query=query,
documents=documents,
top_n=top_n,
)
return _send_json_request(
"POST",
url,
headers=_build_headers(config.api_key, use_bearer=True),
payload=payload,
timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS,
)
url = _ensure_path(_normalize_endpoint(config.endpoint), "rerank")
payload = {
"model": config.model,
"query": query,
"documents": documents,
}
if top_n is not None:
payload["top_n"] = top_n
return _send_json_request(
"POST",
url,
headers=_build_headers(config.api_key, use_bearer=True),
payload=payload,
timeout_seconds=DEFAULT_LLM_TIMEOUT_SECONDS,
)
def _normalize_endpoint(endpoint: str) -> str:
normalized = str(endpoint or "").strip()
if not normalized:
raise KnowledgeRagError("模型 endpoint 不能为空。")
return normalized.rstrip("/")
def _ensure_path(endpoint: str, suffix: str) -> str:
suffix = suffix.lstrip("/")
if endpoint.endswith(suffix):
return endpoint
return f"{endpoint}/{suffix}"
def _build_azure_deployment_base(endpoint: str, model: str) -> str:
normalized_endpoint = _normalize_endpoint(endpoint)
quoted_model = quote(model, safe="")
if "/openai/deployments/" in normalized_endpoint:
return normalized_endpoint
if "/openai/v1" in normalized_endpoint:
resource_root = normalized_endpoint.split("/openai/v1", maxsplit=1)[0]
return f"{resource_root}/openai/deployments/{quoted_model}"
if normalized_endpoint.endswith("/openai"):
return f"{normalized_endpoint}/deployments/{quoted_model}"
return f"{normalized_endpoint}/openai/deployments/{quoted_model}"
def _build_headers(
api_key: str,
*,
use_bearer: bool,
use_api_key: bool = False,
) -> dict[str, str]:
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
}
normalized_key = str(api_key or "").strip()
if normalized_key:
if use_api_key:
headers["api-key"] = normalized_key
elif use_bearer:
headers["Authorization"] = f"Bearer {normalized_key}"
return headers
def _send_json_request(
method: str,
url: str,
*,
headers: dict[str, str],
payload: dict[str, Any],
timeout_seconds: int,
) -> tuple[int, Any]:
data = json.dumps(payload).encode("utf-8")
request = Request(url=url, data=data, headers=headers, method=method)
try:
with urlopen(request, timeout=timeout_seconds) as response: # noqa: S310
body = response.read().decode("utf-8") if response.length != 0 else ""
return response.status, _parse_json_body(body)
except HTTPError as exc: # pragma: no cover - runtime path
body = exc.read().decode("utf-8", errors="ignore")
detail = _extract_error_message(_parse_json_body(body)) or f"接口返回 {exc.code}"
raise KnowledgeRagError(detail) from exc
except URLError as exc: # pragma: no cover - runtime path
raise KnowledgeRagError(f"无法连接模型接口:{getattr(exc, 'reason', exc)}") from exc
except TimeoutError as exc: # pragma: no cover - runtime path
raise KnowledgeRagError("模型接口调用超时。") from exc
def _parse_json_body(body: str) -> Any:
if not body:
return None
try:
return json.loads(body)
except json.JSONDecodeError:
return {"message": body}
def _extract_error_message(payload: Any) -> str | None:
if payload is None:
return None
if isinstance(payload, dict):
if isinstance(payload.get("detail"), str):
return payload["detail"]
if isinstance(payload.get("message"), str):
return payload["message"]
error_payload = payload.get("error")
if isinstance(error_payload, dict) and isinstance(error_payload.get("message"), str):
return error_payload["message"]
if isinstance(payload, str):
return payload
return None
def _extract_chat_text(payload: Any, *, provider: str) -> str:
if provider == "Ollama":
message = payload.get("message") if isinstance(payload, dict) else None
if isinstance(message, dict):
return str(message.get("content") or "").strip()
return ""
if not isinstance(payload, dict):
return ""
choices = payload.get("choices")
if not isinstance(choices, list) or not choices:
return ""
first_choice = choices[0]
if not isinstance(first_choice, dict):
return ""
message = first_choice.get("message")
if isinstance(message, dict):
content = message.get("content")
if isinstance(content, str):
return content.strip()
if isinstance(content, list):
parts: list[str] = []
for item in content:
if isinstance(item, dict) and item.get("type") == "text":
parts.append(str(item.get("text") or "").strip())
return "\n".join(part for part in parts if part).strip()
text = first_choice.get("text")
if isinstance(text, str):
return text.strip()
return ""
def _extract_embedding_vectors(payload: Any, *, provider: str) -> list[list[float]]:
if provider == "Ollama":
embeddings = payload.get("embeddings") if isinstance(payload, dict) else None
if isinstance(embeddings, list):
return [[float(value) for value in item] for item in embeddings if isinstance(item, list)]
embedding = payload.get("embedding") if isinstance(payload, dict) else None
if isinstance(embedding, list):
return [[float(value) for value in embedding]]
raise KnowledgeRagError("Ollama embedding 返回格式无法识别。")
if not isinstance(payload, dict):
raise KnowledgeRagError("embedding 接口返回格式无效。")
data = payload.get("data")
if not isinstance(data, list) or not data:
raise KnowledgeRagError("embedding 接口没有返回 data。")
vectors: list[list[float]] = []
for item in data:
if not isinstance(item, dict):
continue
embedding = item.get("embedding")
if isinstance(embedding, list):
vectors.append([float(value) for value in embedding])
if not vectors:
raise KnowledgeRagError("embedding 接口返回中未找到向量数据。")
return vectors
def _build_ali_rerank_request(
model: str,
*,
query: str,
documents: list[str],
top_n: int | None,
) -> tuple[str, dict[str, Any]]:
normalized_model = str(model or "").strip()
if normalized_model == "qwen3-rerank":
payload: dict[str, Any] = {
"model": normalized_model,
"query": query,
"documents": documents,
}
if top_n is not None:
payload["top_n"] = top_n
return "https://dashscope.aliyuncs.com/compatible-api/v1/reranks", payload
payload = {
"model": normalized_model,
"input": {
"query": query,
"documents": documents,
},
"parameters": {
"return_documents": False,
},
}
if top_n is not None:
payload["parameters"]["top_n"] = top_n
return "https://dashscope.aliyuncs.com/api/v1/services/rerank/text-rerank/text-rerank", payload
def _extract_rerank_results(payload: Any, *, provider: str) -> list[dict[str, Any]]:
if not isinstance(payload, dict):
return []
if provider == "Ali" and isinstance(payload.get("output"), dict):
results = payload["output"].get("results")
else:
results = payload.get("results")
if not isinstance(results, list):
return []
normalized: list[dict[str, Any]] = []
for item in results:
if not isinstance(item, dict):
continue
try:
normalized.append(
{
"index": int(item["index"]),
"relevance_score": float(item["relevance_score"]),
}
)
except (KeyError, TypeError, ValueError):
continue
return normalized

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,451 @@
from __future__ import annotations
import json
import re
from typing import Any
from pydantic import ValidationError
from app.core.logging import get_logger
from app.schemas.ontology import (
OntologyConstraint,
OntologyEntity,
OntologyMetric,
OntologyParseRequest,
OntologyTimeRange,
)
from app.services.ontology_rules import (
AR_CORE_KEYWORDS,
AP_CORE_KEYWORDS,
COMPARE_KEYWORDS,
DRAFT_FOLLOW_UP_KEYWORDS,
DRAFT_KEYWORDS,
EXPENSE_NARRATIVE_KEYWORDS,
EXPENSE_REVIEW_ACTIONS,
EXPLAIN_KEYWORDS,
GENERIC_EXPENSE_PROMPTS,
KNOWLEDGE_INTENTS,
LlmOntologyEntityHint,
LlmOntologyParseResult,
OPERATE_KEYWORDS,
QUERY_KEYWORDS,
RISK_KEYWORDS,
SCENARIO_KEYWORDS,
STATUS_KEYWORDS,
)
logger = get_logger("app.services.ontology")
class OntologyDetectionMixin:
def _detect_scenario(self, compact_query: str) -> tuple[str, float]:
scores = {key: 0.0 for key in SCENARIO_KEYWORDS}
for scenario, keywords in SCENARIO_KEYWORDS.items():
for keyword, weight in keywords:
if keyword in compact_query:
scores[scenario] += weight
best_scenario = max(scores, key=scores.get)
best_score = scores[best_scenario]
if best_score <= 0:
if "单据" in compact_query and any(
keyword in compact_query for keyword in STATUS_KEYWORDS
):
return "expense", 0.14
return "unknown", 0.0
if best_scenario == "knowledge":
business_scores = [
scores["expense"],
scores["accounts_receivable"],
scores["accounts_payable"],
]
if max(business_scores) > 0:
best_scenario = ("expense", "accounts_receivable", "accounts_payable")[
business_scores.index(max(business_scores))
]
best_score = max(business_scores)
return best_scenario, round(min(best_score, 0.34), 2)
def _detect_intent(
self,
compact_query: str,
*,
scenario: str,
entities: list[OntologyEntity],
time_range: OntologyTimeRange,
) -> tuple[str, float]:
if any(keyword in compact_query for keyword in OPERATE_KEYWORDS):
return "operate", 0.30
status_document_query = (
"单据" in compact_query
and any(keyword in compact_query for keyword in STATUS_KEYWORDS)
and not any(keyword in compact_query for keyword in DRAFT_KEYWORDS if keyword != "草稿")
)
historical_document_query = any(
keyword in compact_query
for keyword in ("报销的单据", "报销单据", "报销过的单据", "报销记录")
)
if scenario == "expense" and any(
keyword in compact_query
for keyword in (
"报销了吗",
"报销了么",
"报销了没",
"报销了没有",
"报销没",
"单据状态",
"审批状态",
"报销进度",
"到哪了",
"到了哪",
"有没有报销",
"是否报销",
"进行中的单据",
"草稿单据",
"草稿的单据",
"待补充单据",
"审批中的单据",
"已提交单据",
"已入账单据",
)
) or (scenario == "expense" and (status_document_query or historical_document_query)):
return "query", 0.24
if any(keyword in compact_query for keyword in DRAFT_KEYWORDS):
return "draft", 0.26
if scenario == "expense" and self._is_generic_expense_prompt(compact_query):
return "draft", 0.24
if any(keyword in compact_query for keyword in COMPARE_KEYWORDS):
return "compare", 0.24
if any(keyword in compact_query for keyword in EXPLAIN_KEYWORDS):
return "explain", 0.22
if any(keyword in compact_query for keyword in RISK_KEYWORDS):
return "risk_check", 0.24
if any(keyword in compact_query for keyword in QUERY_KEYWORDS):
return "query", 0.20
if self._looks_like_expense_narrative(
compact_query,
scenario=scenario,
entities=entities,
time_range=time_range,
):
return "draft", 0.22
return "query", 0.10
@staticmethod
def _looks_like_follow_up_message(compact_query: str) -> bool:
if not compact_query:
return False
if any(keyword in compact_query for keyword in DRAFT_FOLLOW_UP_KEYWORDS):
return True
if compact_query.startswith(("", "", "", "这个", "那个")):
return True
has_domain_keyword = any(
keyword in compact_query
for keyword, _weight in (
*SCENARIO_KEYWORDS["expense"],
*SCENARIO_KEYWORDS["accounts_receivable"],
*SCENARIO_KEYWORDS["accounts_payable"],
*SCENARIO_KEYWORDS["knowledge"],
)
)
return len(compact_query) <= 12 and not has_domain_keyword
def _should_inherit_expense_draft(
self,
compact_query: str,
*,
scenario: str,
entities: list[OntologyEntity],
time_range: OntologyTimeRange,
context_json: dict[str, Any],
) -> bool:
context_scenario = self._resolve_context_scenario(context_json)
draft_claim_id = str(context_json.get("draft_claim_id") or "").strip()
review_action = str(context_json.get("review_action") or "").strip()
if review_action in EXPENSE_REVIEW_ACTIONS:
return True
if context_scenario != "expense" and not draft_claim_id:
return False
if any(keyword in compact_query for keyword in DRAFT_FOLLOW_UP_KEYWORDS):
return True
if self._looks_like_expense_narrative(
compact_query,
scenario="expense",
entities=entities,
time_range=time_range,
):
return True
if self._looks_like_follow_up_message(compact_query):
return True
if any(keyword in compact_query for keyword in OPERATE_KEYWORDS):
return False
if any(keyword in compact_query for keyword in COMPARE_KEYWORDS + RISK_KEYWORDS):
return False
if any(keyword in compact_query for keyword in QUERY_KEYWORDS):
return False
return bool(
draft_claim_id
and any(
item.type
in {"amount", "customer", "employee", "expense_type", "project", "invoice"}
for item in entities
)
)
@staticmethod
def _is_generic_expense_prompt(compact_query: str) -> bool:
return compact_query in GENERIC_EXPENSE_PROMPTS
@staticmethod
def _looks_like_expense_narrative(
compact_query: str,
*,
scenario: str,
entities: list[OntologyEntity],
time_range: OntologyTimeRange,
) -> bool:
if scenario not in {"expense", "accounts_receivable", "accounts_payable", "unknown"}:
return False
if any(keyword in compact_query for keyword in AR_CORE_KEYWORDS + AP_CORE_KEYWORDS):
return False
entity_types = {item.type for item in entities}
has_expense_signal = any(
keyword in compact_query for keyword in EXPENSE_NARRATIVE_KEYWORDS
) or "expense_type" in entity_types
has_context_signal = bool(time_range.start_date) or "amount" in entity_types
return has_expense_signal and has_context_signal
def _parse_with_model(
self,
*,
payload: OntologyParseRequest,
query: str,
compact_query: str,
fallback_scenario: str,
fallback_intent: str,
entities: list[OntologyEntity],
time_range: OntologyTimeRange,
metrics: list[OntologyMetric],
constraints: list[OntologyConstraint],
) -> LlmOntologyParseResult | None:
messages = self._build_model_messages(
payload=payload,
query=query,
compact_query=compact_query,
fallback_scenario=fallback_scenario,
fallback_intent=fallback_intent,
entities=entities,
time_range=time_range,
metrics=metrics,
constraints=constraints,
)
response_text = self.runtime_chat_service.complete(
messages,
max_tokens=600,
temperature=0.0,
)
payload_json = self._extract_json_payload(response_text)
if payload_json is None:
return None
try:
return LlmOntologyParseResult.model_validate(payload_json)
except ValidationError as exc:
logger.warning("Semantic model output validation failed: %s", exc)
return None
@staticmethod
def _build_model_messages(
*,
payload: OntologyParseRequest,
query: str,
compact_query: str,
fallback_scenario: str,
fallback_intent: str,
entities: list[OntologyEntity],
time_range: OntologyTimeRange,
metrics: list[OntologyMetric],
constraints: list[OntologyConstraint],
) -> list[dict[str, str]]:
facts = {
"query": query,
"compact_query": compact_query,
"context": {
"entry_source": payload.context_json.get("entry_source"),
"attachment_names": payload.context_json.get("attachment_names", []),
"attachment_count": payload.context_json.get("attachment_count", 0),
"ocr_summary": payload.context_json.get("ocr_summary", ""),
"ocr_documents": payload.context_json.get("ocr_documents", []),
"request_context": payload.context_json.get("request_context"),
"role_codes": payload.context_json.get("role_codes", []),
"conversation_id": payload.context_json.get("conversation_id"),
"conversation_scenario": payload.context_json.get("conversation_scenario"),
"conversation_intent": payload.context_json.get("conversation_intent"),
"draft_claim_id": payload.context_json.get("draft_claim_id"),
"review_action": payload.context_json.get("review_action"),
"review_form_values": payload.context_json.get("review_form_values"),
"conversation_history": payload.context_json.get("conversation_history", []),
},
"rule_candidates": {
"scenario": fallback_scenario,
"intent": fallback_intent,
"entities": [item.model_dump(mode="json") for item in entities],
"time_range": time_range.model_dump(mode="json"),
"metrics": [item.model_dump(mode="json") for item in metrics],
"constraints": [item.model_dump(mode="json") for item in constraints],
},
}
system_prompt = (
"你是企业财务共享平台的语义解析器。"
"你的任务是把用户输入解析为固定 JSON用于后续路由、追问和权限判断。"
"只输出 JSON 对象,不要输出 Markdown、代码块、解释、标题或 <think>。"
"场景 scenario 只能是expense, accounts_receivable, "
"accounts_payable, knowledge, unknown。"
"意图 intent 只能是query, explain, compare, risk_check, draft, operate。"
"如果用户是在描述一笔待处理费用、待报销事项、上传票据或希望整理报销,"
"即使没有明确说“生成草稿”,也优先使用 expense + draft。"
"如果提供了 conversation_history必须把最近轮次作为当前追问的上下文"
"正确理解“这个”“那笔”“改成 800”“继续补充”这类省略表达。"
"出现“客户”不等于应收,出现“供应商”不等于应付,必须结合动作词和业务目标判断。"
"只有明确查询、统计、列出、多少、明细、对比时才优先使用 query 或 compare。"
"附件名称和 OCR 摘要只作为辅助证据,不能编造未出现的事实。"
"信息不足时 clarification_required=true并给出一句简短中文追问。"
"missing_slots 使用简短 snake_case例如 expense_type, amount, "
"customer_name, participants, attachments。"
"entity_hints 只填写你比较确定的业务对象;如果不确定,可以返回空数组。"
)
user_prompt = (
"请根据以下事实输出 JSON\n"
f"{json.dumps(facts, ensure_ascii=False, indent=2, default=str)}\n\n"
"输出格式:\n"
"{\n"
' "scenario": "expense",\n'
' "intent": "draft",\n'
' "confidence": 0.88,\n'
' "clarification_required": true,\n'
' "clarification_question": "请补充客户单位、参与人员和票据附件。",\n'
' "missing_slots": ["customer_name", "participants", "attachments"],\n'
' "ambiguity": [],\n'
' "entity_hints": [\n'
' {"type": "expense_type", "value": "招待", '
'"normalized_value": "entertainment", "role": "filter", '
'"confidence": 0.86}\n'
" ]\n"
"}"
)
return [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
@staticmethod
def _extract_json_payload(response_text: str | None) -> dict[str, Any] | None:
if not response_text:
return None
cleaned = re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL | re.IGNORECASE)
cleaned = cleaned.strip()
if not cleaned:
return None
fenced_match = re.search(r"```(?:json)?\s*(\{.*\})\s*```", cleaned, flags=re.DOTALL)
candidates = [fenced_match.group(1)] if fenced_match else []
candidates.extend([cleaned])
start = cleaned.find("{")
end = cleaned.rfind("}")
if start != -1 and end != -1 and end > start:
candidates.append(cleaned[start : end + 1])
for candidate in candidates:
try:
parsed = json.loads(candidate)
except json.JSONDecodeError:
continue
if isinstance(parsed, dict):
return parsed
return None
@staticmethod
def _resolve_scenario(
fallback_scenario: str,
model_parse: LlmOntologyParseResult | None,
) -> str:
if model_parse is None:
return fallback_scenario
if model_parse.scenario == "unknown" and fallback_scenario != "unknown":
return fallback_scenario
return model_parse.scenario
def _resolve_intent(
self,
compact_query: str,
*,
fallback_intent: str,
scenario: str,
entities: list[OntologyEntity],
time_range: OntologyTimeRange,
model_parse: LlmOntologyParseResult | None,
) -> str:
candidate = model_parse.intent if model_parse is not None else fallback_intent
if scenario == "knowledge":
if candidate in KNOWLEDGE_INTENTS:
return candidate
if fallback_intent in KNOWLEDGE_INTENTS:
return fallback_intent
return "query"
if candidate == "query" and scenario == "expense":
if self._is_generic_expense_prompt(compact_query) or fallback_intent == "draft":
return "draft"
return candidate
@staticmethod
def _merge_entities(
base_entities: list[OntologyEntity],
entity_hints: list[LlmOntologyEntityHint],
) -> list[OntologyEntity]:
merged: dict[tuple[str, str], OntologyEntity] = {
(item.type, item.normalized_value): item for item in base_entities
}
for hint in entity_hints:
value = str(hint.value or "").strip()
if not value:
continue
normalized_value = str(hint.normalized_value or value).strip()
key = (str(hint.type).strip(), normalized_value)
candidate = OntologyEntity(
type=str(hint.type).strip(),
value=value,
normalized_value=normalized_value,
role=str(hint.role or "target").strip() or "target",
confidence=float(hint.confidence),
)
existing = merged.get(key)
if existing is None or existing.confidence < candidate.confidence:
merged[key] = candidate
return list(merged.values())
@staticmethod
def _normalize_short_text_list(values: list[str]) -> list[str]:
normalized: list[str] = []
seen: set[str] = set()
for value in values:
cleaned = str(value or "").strip()
if not cleaned or cleaned in seen:
continue
normalized.append(cleaned)
seen.add(cleaned)
return normalized[:6]

View File

@@ -0,0 +1,529 @@
from __future__ import annotations
import calendar
import re
from datetime import UTC, date, datetime, timedelta
from typing import Any
from app.core.agent_enums import AgentPermissionLevel
from app.schemas.ontology import (
OntologyConstraint,
OntologyEntity,
OntologyMetric,
OntologyPermission,
OntologyTimeRange,
)
from app.services.ontology_rules import (
AMOUNT_PATTERN,
DATE_RANGE_PATTERN,
EXPLICIT_DATE_PATTERN,
EXPLICIT_MONTH_PATTERN,
EXPENSE_TYPE_KEYWORDS,
GENERIC_EXPENSE_PROMPTS,
LOCATION_KEYWORDS,
MONTH_DAY_PATTERN,
MONTH_DAY_RANGE_PATTERN,
ReferenceCatalog,
STATUS_KEYWORDS,
TOP_N_PATTERN,
)
class OntologyExtractionMixin:
def _infer_default_missing_slots(
self,
compact_query: str,
*,
scenario: str,
intent: str,
entities: list[OntologyEntity],
time_range: OntologyTimeRange,
context_json: dict[str, Any],
) -> list[str]:
if scenario != "expense" or intent != "draft":
return []
entity_types = {item.type for item in entities}
attachment_count = int(context_json.get("attachment_count") or 0)
missing_slots: list[str] = []
if self._is_generic_expense_prompt(compact_query):
if "expense_type" not in entity_types:
missing_slots.append("expense_type")
if "amount" not in entity_types:
missing_slots.append("amount")
if not time_range.start_date:
missing_slots.append("time_range")
missing_slots.append("reason")
if attachment_count <= 0:
missing_slots.append("attachments")
return missing_slots
if any(
item.normalized_value == "entertainment"
for item in entities
if item.type == "expense_type"
):
if "customer" not in entity_types:
missing_slots.append("customer_name")
missing_slots.append("participants")
if attachment_count <= 0:
missing_slots.append("attachments")
return missing_slots
@staticmethod
def _resolve_confidence(
*,
model_confidence: float | None,
fallback_confidence: float,
clarification_required: bool,
permission: OntologyPermission,
) -> float:
confidence = fallback_confidence if model_confidence is None else float(model_confidence)
confidence = max(0.0, min(confidence, 0.98))
if permission.level == AgentPermissionLevel.FORBIDDEN.value:
confidence = max(confidence, 0.86)
if clarification_required and permission.level != AgentPermissionLevel.FORBIDDEN.value:
confidence = min(confidence, 0.58)
return round(confidence, 2)
def _extract_entities(
self,
query: str,
compact_query: str,
reference: ReferenceCatalog,
) -> list[OntologyEntity]:
entities: dict[tuple[str, str], OntologyEntity] = {}
def upsert(entity: OntologyEntity) -> None:
key = (entity.type, entity.normalized_value)
if key not in entities:
entities[key] = entity
for match in re.finditer(r"客户\s*([A-Za-z0-9一二三四五六七八九十]+)", query):
suffix = match.group(1).strip()
normalized = f"客户{suffix}".replace(" ", "")
upsert(self._make_entity("customer", match.group(0).strip(), normalized, role="filter"))
labeled_customer_match = re.search(r"客户名称[:]\s*(?P<name>[^\n]+)", query)
if labeled_customer_match:
customer_name = labeled_customer_match.group("name").strip()
upsert(self._make_entity("customer", customer_name, customer_name, role="filter"))
for match in re.finditer(r"供应商\s*([A-Za-z0-9一二三四五六七八九十]+)", query):
suffix = match.group(1).strip()
normalized = f"供应商{suffix}".replace(" ", "")
upsert(self._make_entity("vendor", match.group(0).strip(), normalized, role="filter"))
employee_match = re.search(
r"(?P<name>[赵钱孙李周吴郑王冯陈褚卫蒋沈韩杨朱秦许何吕施张孔曹严华金魏陶姜"
r"戚谢邹喻柏水窦章云苏潘葛范彭郎鲁韦昌马苗凤花方俞任袁柳鲍史唐费廉岑"
r"薛雷贺倪汤滕殷罗毕郝邬安常乐于时傅卞康伍余元卜顾孟平黄和穆萧尹姚邵"
r"湛汪祁毛禹狄米贝明臧计成戴宋庞熊纪舒屈项祝董梁杜阮蓝闵席季强贾路江"
r"童颜郭梅盛林钟徐邱骆高夏蔡田樊胡凌霍虞万支柯管卢莫房裘缪解应宗丁宣"
r"邓洪包左石崔吉龚程嵇邢裴陆荣翁荀羊惠甄曲家封芮储靳汲邴糜松井段富巫"
r"乌焦巴弓牧隗山谷车侯伊宫宁仇栾刘景詹束龙叶司黎薄印白怀蒲邰从鄂索咸"
r"籍卓蔺屠蒙池乔阴胥能苍双闻莘党翟谭贡姬申扶堵冉宰郦雍桑桂牛寿通边扈"
r"燕冀浦尚农温别庄晏柴瞿阎连茹习艾容向古易慎戈廖庾终暨居衡步都耿满弘"
r"匡国文寇广禄阙东欧殳沃利蔚越夔隆师巩聂晁勾敖融冷辛阚那简饶曾关蒯相"
r"查后荆游竺权盖益桓公][\u4e00-\u9fa5]{1,2})(?=\s*(?:\d{4}年|\d{1,2}月|本月|"
r"上月|本周|报销|差旅|费用|申请))",
query,
)
if employee_match:
name = employee_match.group("name")
upsert(self._make_entity("employee", name, name, role="filter"))
for name in reference.employees:
if self._compact(name) in compact_query:
upsert(self._make_entity("employee", name, name, role="filter"))
for name in reference.departments:
if self._compact(name) in compact_query:
upsert(self._make_entity("department", name, name, role="filter"))
for name in reference.customers:
if self._compact(name) in compact_query:
upsert(self._make_entity("customer", name, name, role="filter"))
for name in reference.vendors:
if self._compact(name) in compact_query:
upsert(self._make_entity("vendor", name, name, role="filter"))
for code in reference.projects:
if self._compact(code) in compact_query:
upsert(self._make_entity("project", code, code, role="filter"))
for code in re.findall(r"PRJ-[A-Z]+-\d+", query, flags=re.IGNORECASE):
upsert(self._make_entity("project", code, code.upper(), role="filter"))
for code in re.findall(r"EXP-\d{6}-\d{3}", query, flags=re.IGNORECASE):
upsert(self._make_entity("expense_claim", code, code.upper()))
for code in re.findall(r"AR-\d{6}-\d{3}", query, flags=re.IGNORECASE):
upsert(self._make_entity("receivable", code, code.upper()))
for code in re.findall(r"AP-\d{6}-\d{3}", query, flags=re.IGNORECASE):
upsert(self._make_entity("payable", code, code.upper()))
for code in re.findall(r"INV-[A-Z]+-\d+", query, flags=re.IGNORECASE):
upsert(self._make_entity("invoice", code, code.upper()))
for code in re.findall(r"CTR-[A-Z]+-\d+", query, flags=re.IGNORECASE):
upsert(self._make_entity("contract", code, code.upper()))
for location in LOCATION_KEYWORDS:
if location in query:
upsert(self._make_entity("location", location, location, role="filter", confidence=0.86))
for label, normalized in EXPENSE_TYPE_KEYWORDS.items():
if label in query:
upsert(self._make_entity("expense_type", label, normalized, role="filter"))
has_customer_entertainment_signal = "客户" in query and any(
keyword in query for keyword in ("吃饭", "用餐", "餐饮", "宴请", "请客", "招待")
)
if has_customer_entertainment_signal:
upsert(
self._make_entity(
"expense_type",
"客户招待",
"entertainment",
role="filter",
confidence=0.96,
)
)
if any(
keyword in query
for keyword in ("打车", "网约车", "出租车", "车费", "乘车", "用车", "叫车", "车资", "停车费", "过路费")
):
upsert(self._make_entity("expense_type", "交通", "transport", role="filter", confidence=0.9))
if any(keyword in query for keyword in ("出差", "机票", "火车", "高铁", "行程单")):
upsert(self._make_entity("expense_type", "差旅", "travel", role="filter", confidence=0.88))
if any(keyword in query for keyword in ("酒店", "住宿", "宾馆")):
upsert(self._make_entity("expense_type", "住宿", "hotel", role="filter", confidence=0.86))
if (
not has_customer_entertainment_signal
and any(keyword in query for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "餐饮"))
):
upsert(self._make_entity("expense_type", "餐费", "meal", role="filter", confidence=0.84))
if any(
keyword in query
for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")
):
upsert(self._make_entity("expense_type", "办公费", "office", role="filter", confidence=0.87))
if any(keyword in query for keyword in ("培训", "讲师费", "课时费", "课程费")):
upsert(self._make_entity("expense_type", "培训费", "training", role="filter", confidence=0.84))
if any(keyword in query for keyword in ("通讯费", "话费", "流量费", "宽带费")):
upsert(self._make_entity("expense_type", "通讯费", "communication", role="filter", confidence=0.84))
if any(keyword in query for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")):
upsert(self._make_entity("expense_type", "福利费", "welfare", role="filter", confidence=0.84))
for amount in self._extract_amount_entities(query):
upsert(amount)
return list(entities.values())
def _extract_amount_entities(self, query: str) -> list[OntologyEntity]:
entities: list[OntologyEntity] = []
for match in AMOUNT_PATTERN.finditer(query):
raw_value = match.group("value")
unit = match.group("unit")
prefix = match.group("prefix")
if raw_value is None:
continue
if prefix is None and unit is None:
continue
amount_value = self._normalize_amount(raw_value, unit)
display_value = f"{raw_value}{unit or ''}"
role = "threshold" if prefix else "target"
entities.append(
self._make_entity(
"amount",
display_value,
str(amount_value),
role=role,
confidence=0.9,
)
)
return entities
@staticmethod
def _make_entity(
entity_type: str,
value: str,
normalized_value: str,
*,
role: str = "target",
confidence: float = 0.92,
) -> OntologyEntity:
return OntologyEntity(
type=entity_type,
value=value,
normalized_value=normalized_value,
role=role,
confidence=confidence,
)
@staticmethod
def _infer_scenario_from_entities(entities: list[OntologyEntity]) -> str | None:
entity_types = {item.type for item in entities}
if entity_types & {"vendor", "payable"}:
return "accounts_payable"
if entity_types & {"customer", "receivable", "contract"}:
return "accounts_receivable"
if entity_types & {"employee", "expense_claim", "expense_type"}:
return "expense"
return None
def _extract_time_range(
self,
query: str,
compact_query: str,
*,
context_json: dict[str, Any],
) -> tuple[OntologyTimeRange, float]:
today = self._resolve_reference_today(context_json)
direct_mappings = [
("大前天", self._single_day_range(today - timedelta(days=3), "大前天", "day")),
("前天", self._single_day_range(today - timedelta(days=2), "前天", "day")),
("昨日", self._single_day_range(today - timedelta(days=1), "昨日", "day")),
("昨天", self._single_day_range(today - timedelta(days=1), "昨天", "day")),
("今天", self._single_day_range(today, "今天", "day")),
("明天", self._single_day_range(today + timedelta(days=1), "明天", "day")),
("后天", self._single_day_range(today + timedelta(days=2), "后天", "day")),
("大后天", self._single_day_range(today + timedelta(days=3), "大后天", "day")),
]
for keyword, value in direct_mappings:
if keyword in query:
return value, 0.10
if "本周" in query or "这周" in query or "本星期" in query:
start = today - timedelta(days=today.weekday())
end = start + timedelta(days=6)
return self._range(start, end, "本周", "week"), 0.10
if "上周" in query:
end = today - timedelta(days=today.weekday() + 1)
start = end - timedelta(days=6)
return self._range(start, end, "上周", "week"), 0.10
if "本月" in query or "这个月" in query:
start = date(today.year, today.month, 1)
end = date(today.year, today.month, calendar.monthrange(today.year, today.month)[1])
return self._range(start, end, "本月", "month"), 0.10
if "上月" in query:
year = today.year if today.month > 1 else today.year - 1
month = today.month - 1 if today.month > 1 else 12
start = date(year, month, 1)
end = date(year, month, calendar.monthrange(year, month)[1])
return self._range(start, end, "上月", "month"), 0.10
if "本季度" in query or "这个季度" in query:
quarter = (today.month - 1) // 3
start_month = quarter * 3 + 1
end_month = start_month + 2
start = date(today.year, start_month, 1)
end = date(today.year, end_month, calendar.monthrange(today.year, end_month)[1])
return self._range(start, end, "本季度", "quarter"), 0.10
if "今年" in query:
return (
self._range(date(today.year, 1, 1), date(today.year, 12, 31), "今年", "year"),
0.10,
)
if "去年" in query or "上一年" in query:
year = today.year - 1
return (
self._range(date(year, 1, 1), date(year, 12, 31), "去年", "year"),
0.10,
)
match = DATE_RANGE_PATTERN.search(query)
if match:
start = self._parse_iso_date(match.group("start"))
end = self._parse_iso_date(match.group("end"))
if start and end:
return self._range(start, end, match.group(0), "custom"), 0.10
match = EXPLICIT_DATE_PATTERN.search(query)
if match:
explicit = date(
int(match.group("year")),
int(match.group("month")),
int(match.group("day")),
)
return self._single_day_range(explicit, match.group(0), "day"), 0.10
match = EXPLICIT_MONTH_PATTERN.search(query)
if match:
year = int(match.group("year"))
month = int(match.group("month"))
start = date(year, month, 1)
end = date(year, month, calendar.monthrange(year, month)[1])
return self._range(start, end, match.group(0), "month"), 0.10
match = MONTH_DAY_RANGE_PATTERN.search(query)
if match:
start = date(today.year, int(match.group("start_month")), int(match.group("start_day")))
end = date(today.year, int(match.group("end_month")), int(match.group("end_day")))
return self._range(start, end, match.group(0), "custom"), 0.10
match = MONTH_DAY_PATTERN.search(compact_query)
if match:
explicit = date(today.year, int(match.group("month")), int(match.group("day")))
return self._single_day_range(explicit, match.group(0), "day"), 0.08
month_match = re.search(r"(?P<month>\d{1,2})月", compact_query)
if month_match:
month = int(month_match.group("month"))
start = date(today.year, month, 1)
end = date(today.year, month, calendar.monthrange(today.year, month)[1])
return self._range(start, end, month_match.group(0), "month"), 0.08
return OntologyTimeRange(), 0.0
@staticmethod
def _resolve_reference_today(context_json: dict[str, Any]) -> date:
client_now_iso = str(context_json.get("client_now_iso") or "").strip()
if not client_now_iso:
return datetime.now(UTC).date()
normalized = client_now_iso.replace("Z", "+00:00")
try:
client_now = datetime.fromisoformat(normalized)
except ValueError:
return datetime.now(UTC).date()
if client_now.tzinfo is None:
client_now = client_now.replace(tzinfo=UTC)
try:
offset_minutes = int(context_json.get("client_timezone_offset_minutes") or 0)
except (TypeError, ValueError):
offset_minutes = 0
local_now = client_now - timedelta(minutes=offset_minutes)
return local_now.date()
@staticmethod
def _single_day_range(target: date, raw: str, granularity: str) -> OntologyTimeRange:
return OntologyTimeRange(
raw=raw,
start_date=target.isoformat(),
end_date=target.isoformat(),
granularity=granularity,
)
@staticmethod
def _range(start: date, end: date, raw: str, granularity: str) -> OntologyTimeRange:
return OntologyTimeRange(
raw=raw,
start_date=start.isoformat(),
end_date=end.isoformat(),
granularity=granularity,
)
@staticmethod
def _parse_iso_date(value: str) -> date | None:
try:
return date.fromisoformat(value)
except ValueError:
return None
def _extract_metrics(self, compact_query: str) -> list[OntologyMetric]:
metrics: dict[str, OntologyMetric] = {}
def upsert(metric: OntologyMetric) -> None:
metrics[metric.name] = metric
if any(
keyword in compact_query
for keyword in ("多少钱", "金额", "总额", "支出", "回款", "应收", "应付")
):
upsert(OntologyMetric(name="amount", aggregation="sum", unit="CNY"))
if any(keyword in compact_query for keyword in ("多少笔", "几笔", "数量", "条数", "单数")):
upsert(OntologyMetric(name="count", aggregation="count", unit="records"))
if "超标" in compact_query or "超预算" in compact_query:
upsert(OntologyMetric(name="amount_over_limit"))
if "逾期" in compact_query or "账龄" in compact_query:
upsert(OntologyMetric(name="overdue"))
if "重复" in compact_query:
upsert(OntologyMetric(name="duplicate_expense"))
top_match = TOP_N_PATTERN.search(compact_query)
if top_match:
metrics["amount"] = OntologyMetric(
name="amount",
aggregation="sum",
unit="CNY",
sort="desc" if "最低" not in compact_query else "asc",
top_n=int(top_match.group("top")),
)
return list(metrics.values())
def _extract_constraints(
self,
compact_query: str,
entities: list[OntologyEntity],
) -> list[OntologyConstraint]:
constraints: dict[tuple[str, str, str, str | None], OntologyConstraint] = {}
def upsert(constraint: OntologyConstraint) -> None:
key = (
constraint.field,
constraint.operator,
str(constraint.value),
constraint.currency,
)
if key not in constraints:
constraints[key] = constraint
for entity in entities:
if entity.type in {
"employee",
"department",
"customer",
"vendor",
"project",
"location",
"expense_type",
}:
upsert(
OntologyConstraint(
field=entity.type,
operator="=",
value=entity.normalized_value,
)
)
for keyword, normalized in STATUS_KEYWORDS.items():
if keyword in compact_query:
upsert(OntologyConstraint(field="status", operator="=", value=normalized))
for amount_match in AMOUNT_PATTERN.finditer(compact_query):
if not amount_match.group("prefix"):
continue
operator = self._normalize_operator(amount_match.group("prefix"))
value = self._normalize_amount(amount_match.group("value"), amount_match.group("unit"))
upsert(
OntologyConstraint(
field="amount",
operator=operator,
value=value,
currency="CNY",
)
)
break
top_match = TOP_N_PATTERN.search(compact_query)
if top_match:
top_n = int(top_match.group("top"))
upsert(OntologyConstraint(field="top_n", operator="=", value=top_n))
upsert(
OntologyConstraint(
field="sort_by",
operator="desc" if "最低" not in compact_query else "asc",
value="amount",
)
)
return list(constraints.values())

View File

@@ -0,0 +1,298 @@
from __future__ import annotations
import re
from dataclasses import dataclass
from pydantic import BaseModel, ConfigDict, Field
from app.schemas.ontology import OntologyIntent, OntologyScenario
DATE_RANGE_PATTERN = re.compile(
r"(?P<start>\d{4}-\d{1,2}-\d{1,2})\s*(?:到|至|~|-)\s*(?P<end>\d{4}-\d{1,2}-\d{1,2})"
)
EXPLICIT_MONTH_PATTERN = re.compile(r"(?P<year>\d{4})年(?P<month>\d{1,2})月")
EXPLICIT_DATE_PATTERN = re.compile(
r"(?P<year>\d{4})[年/-](?P<month>\d{1,2})[月/-](?P<day>\d{1,2})日?"
)
MONTH_DAY_RANGE_PATTERN = re.compile(
r"(?P<start_month>\d{1,2})月(?P<start_day>\d{1,2})日?\s*(?:到|至|~|-)\s*"
r"(?P<end_month>\d{1,2})月(?P<end_day>\d{1,2})日?"
)
MONTH_DAY_PATTERN = re.compile(r"(?P<month>\d{1,2})月(?P<day>\d{1,2})日?")
AMOUNT_PATTERN = re.compile(
r"(?P<prefix>超过|大于|高于|不少于|不低于|小于|低于|少于|至多|不超过|<=|>=|<|>||=)?\s*"
r"(?P<value>\d+(?:\.\d+)?)\s*(?P<unit>万元|万|元)?"
)
TOP_N_PATTERN = re.compile(r"(?:top|TOP|前|最高的?|最低的?)\s*(?P<top>\d+)")
SCENARIO_KEYWORDS = {
"expense": (
("报销", 0.20),
("报销单", 0.20),
("单据报销", 0.18),
("报账", 0.20),
("差旅", 0.20),
("费用", 0.14),
("发票", 0.14),
("票据", 0.12),
("借款", 0.12),
("住宿", 0.10),
("餐费", 0.10),
("招待", 0.18),
("招待费", 0.18),
("花销", 0.16),
("花了", 0.14),
("支出", 0.14),
("垫付", 0.14),
),
"accounts_receivable": (
("应收", 0.22),
("回款", 0.20),
("收款", 0.18),
("账龄", 0.18),
("客户欠款", 0.22),
),
"accounts_payable": (
("应付", 0.22),
("付款", 0.20),
("请款", 0.18),
("供应商", 0.20),
("待付", 0.16),
("打款", 0.18),
),
"knowledge": (
("制度", 0.20),
("规则", 0.20),
("办法", 0.18),
("依据", 0.18),
("政策", 0.16),
("知识库", 0.18),
),
}
QUERY_KEYWORDS = (
"",
"查询",
"查看",
"列出",
"统计",
"汇总",
"多少",
"几笔",
"金额",
"明细",
)
EXPLAIN_KEYWORDS = ("为什么", "依据", "原因", "怎么处理", "是否可以", "能不能", "按什么规则")
COMPARE_KEYWORDS = ("对比", "比较", "相比", "差异", "变化")
RISK_KEYWORDS = ("风险", "异常", "重复", "超标", "超预算", "逾期", "验真", "巡检")
DRAFT_KEYWORDS = ("生成", "草稿", "起草", "拟一份", "创建", "发起", "准备")
DRAFT_FOLLOW_UP_KEYWORDS = (
"继续",
"下一步",
"核对",
"补充",
"补一下",
"修改",
"改成",
"改为",
"换成",
"更新",
"确认",
"提交",
"保存",
"客户是",
"地点是",
"金额是",
"日期是",
"时间是",
)
EXPENSE_REVIEW_ACTIONS = {
"save_draft",
"next_step",
"edit_review",
"link_to_existing_draft",
"create_new_claim_from_documents",
}
OPERATE_KEYWORDS = (
"直接付款",
"帮我付款",
"安排付款",
"发起付款",
"直接审批",
"审批通过",
"帮我审批",
"驳回",
"上线",
"激活",
"停用",
"删除",
)
EXPENSE_TYPE_KEYWORDS = {
"差旅": "travel",
"出差": "travel",
"住宿": "hotel",
"酒店": "hotel",
"交通": "transport",
"打车": "transport",
"网约车": "transport",
"出租车": "transport",
"乘车": "transport",
"乘车费": "transport",
"用车": "transport",
"叫车": "transport",
"车资": "transport",
"停车费": "transport",
"餐费": "meal",
"用餐": "meal",
"会务": "meeting",
"招待费": "entertainment",
"招待": "entertainment",
"宴请": "entertainment",
"办公费": "office",
"办公用品": "office",
"文具": "office",
"耗材": "office",
"办公耗材": "office",
"打印纸": "office",
"办公设备": "office",
"培训费": "training",
"培训": "training",
"通讯费": "communication",
"话费": "communication",
"福利费": "welfare",
"团建": "welfare",
}
EXPENSE_NARRATIVE_KEYWORDS = (
"报销",
"报账",
"招待",
"招待费",
"花销",
"花了",
"支出",
"垫付",
"打车",
"车费",
"乘车",
"乘车费",
"用车",
"叫车",
"车资",
"餐费",
"吃饭",
"用餐",
"宴请",
"请客",
"住宿",
"发票",
"票据",
"差旅",
"客户现场",
)
AR_CORE_KEYWORDS = ("应收", "回款", "收款", "账龄", "欠款", "未回款")
AP_CORE_KEYWORDS = ("应付", "付款", "请款", "待付", "打款", "未付款")
GENERIC_EXPENSE_PROMPTS = {
"报销",
"我要报销",
"我想报销",
"帮我报销",
"我要申请报销",
"发起报销",
"提交报销",
}
MISSING_SLOT_LABELS = {
"expense_type": "费用类型",
"amount": "金额",
"customer_name": "客户单位",
"vendor_name": "供应商",
"participants": "参与人员",
"attachments": "票据附件",
"time_range": "发生时间",
"reason": "事由说明",
"document_id": "单据号",
}
STATUS_KEYWORDS = {
"草稿": "draft",
"待提交": "draft",
"待补充": "supplement",
"退回": "returned",
"已退回": "returned",
"进行中": "review",
"审批中": "review",
"审核中": "review",
"流转中": "review",
"已提交": "submitted",
"逾期": "overdue",
"待审批": "pending",
"待审": "pending",
"已审批": "approved",
"已通过": "approved",
"已审核": "approved",
"已入账": "paid",
"已付款": "paid",
"未付款": "unpaid",
"未回款": "unreceived",
}
LOCATION_KEYWORDS = (
"北京",
"上海",
"广州",
"深圳",
"杭州",
"南京",
"苏州",
"成都",
"重庆",
"天津",
"武汉",
"西安",
"郑州",
"长沙",
"青岛",
"厦门",
"宁波",
"合肥",
"济南",
"福州",
)
PRIVILEGED_ROLE_CODES = {"manager", "finance", "approver", "executive"}
CONTEXTUAL_SCENARIOS = {"expense", "accounts_receivable", "accounts_payable", "knowledge"}
KNOWLEDGE_INTENTS = {"query", "explain", "compare"}
@dataclass(slots=True)
class ReferenceCatalog:
employees: list[str]
departments: list[str]
customers: list[str]
vendors: list[str]
projects: list[str]
class LlmOntologyEntityHint(BaseModel):
model_config = ConfigDict(extra="ignore")
type: str
value: str
normalized_value: str | None = None
role: str = "target"
confidence: float = Field(default=0.72, ge=0.0, le=1.0)
class LlmOntologyParseResult(BaseModel):
model_config = ConfigDict(extra="ignore")
scenario: OntologyScenario = Field(default="unknown")
intent: OntologyIntent = Field(default="query")
confidence: float = Field(default=0.0, ge=0.0, le=1.0)
clarification_required: bool = False
clarification_question: str | None = None
missing_slots: list[str] = Field(default_factory=list)
ambiguity: list[str] = Field(default_factory=list)
entity_hints: list[LlmOntologyEntityHint] = Field(default_factory=list)

View File

@@ -0,0 +1,285 @@
from __future__ import annotations
from typing import Any
from app.core.agent_enums import AgentPermissionLevel
from app.schemas.ontology import (
OntologyConstraint,
OntologyEntity,
OntologyFieldError,
OntologyMetric,
OntologyPermission,
OntologyTimeRange,
)
from app.services.ontology_rules import (
AMOUNT_PATTERN,
EXPENSE_REVIEW_ACTIONS,
MISSING_SLOT_LABELS,
OPERATE_KEYWORDS,
PRIVILEGED_ROLE_CODES,
)
class OntologyValidationMixin:
def _extract_risk_flags(self, compact_query: str, scenario: str) -> list[str]:
risk_flags: list[str] = []
def append(flag: str) -> None:
if flag not in risk_flags:
risk_flags.append(flag)
if "重复" in compact_query:
append("duplicate_expense")
if any(
keyword in compact_query
for keyword in ("发票异常", "票据异常", "验真失败", "附件缺失", "补件")
):
append("invoice_anomaly")
if any(keyword in compact_query for keyword in ("超标", "超预算", "超限")):
append("amount_over_limit")
if scenario == "accounts_receivable" and any(
keyword in compact_query for keyword in ("逾期", "账龄", "欠款", "未回款")
):
append("ar_overdue")
if scenario == "accounts_payable" and any(
keyword in compact_query for keyword in ("逾期", "待付", "付款风险", "未付款")
):
append("ap_overdue")
return risk_flags
def _resolve_permission(
self,
compact_query: str,
context_json: dict,
intent: str,
) -> OntologyPermission:
role_codes = {
str(item).strip().lower()
for item in context_json.get("role_codes", [])
if str(item).strip()
}
is_admin = bool(context_json.get("is_admin"))
privileged = is_admin or bool(role_codes & PRIVILEGED_ROLE_CODES)
if intent in {"query", "explain", "compare", "risk_check"}:
return OntologyPermission(
level=AgentPermissionLevel.READ.value,
allowed=True,
reason="只读查询。",
)
if intent == "draft":
return OntologyPermission(
level=AgentPermissionLevel.DRAFT_WRITE.value,
allowed=True,
reason="允许生成草稿,但不会直接提交业务动作。",
)
if any(keyword in compact_query for keyword in OPERATE_KEYWORDS) or "付款" in compact_query:
if privileged:
return OntologyPermission(
level=AgentPermissionLevel.APPROVAL_REQUIRED.value,
allowed=False,
reason="涉及付款、审批或上线动作,必须进入人工审批链。",
)
return OntologyPermission(
level=AgentPermissionLevel.FORBIDDEN.value,
allowed=False,
reason="当前账号缺少财务或审批权限,只能查看结果或生成草稿。",
)
return OntologyPermission(
level=AgentPermissionLevel.APPROVAL_REQUIRED.value,
allowed=False,
reason="操作类请求需要人工审批确认。",
)
def _build_field_errors(
self,
*,
scenario: str,
intent: str,
entities: list[OntologyEntity],
permission: OntologyPermission,
missing_slots: list[str],
ambiguity: list[str],
) -> list[OntologyFieldError]:
errors: list[OntologyFieldError] = []
if scenario == "unknown":
errors.append(
OntologyFieldError(
field="scenario",
code="scenario_unknown",
message="未识别出明确业务场景,请补充是报销、应收、应付还是制度问题。",
)
)
if intent == "compare" and len([item for item in entities if item.type != "amount"]) < 2:
errors.append(
OntologyFieldError(
field="entities",
code="compare_target_missing",
message="对比类问题请至少给出两个对象,或给出更明确的对比范围。",
)
)
if missing_slots:
errors.append(
OntologyFieldError(
field="missing_slots",
code="required_slot_missing",
message=(
"继续处理前还缺少关键信息:"
f"{''.join(self._display_slot_label(item) for item in missing_slots)}"
),
)
)
if ambiguity:
errors.append(
OntologyFieldError(
field="ambiguity",
code="ambiguity_detected",
message=f"当前问题存在歧义:{''.join(ambiguity)}",
)
)
if permission.level == AgentPermissionLevel.FORBIDDEN.value:
errors.append(
OntologyFieldError(
field="permission",
code="permission_forbidden",
message=permission.reason,
)
)
return errors
def _build_clarification(
self,
*,
scenario: str,
intent: str,
entities: list[OntologyEntity],
permission: OntologyPermission,
missing_slots: list[str],
ambiguity: list[str],
allow_incomplete_draft: bool,
model_clarification_required: bool,
model_clarification_question: str | None,
) -> tuple[bool, str | None]:
if permission.level == AgentPermissionLevel.FORBIDDEN.value:
return True, "当前动作超出权限范围。是否改为生成草稿或建议?"
if scenario == "knowledge" and intent in {"query", "explain"}:
return False, None
if model_clarification_required:
question = str(model_clarification_question or "").strip()
if question:
return True, question
if missing_slots:
return True, self._build_missing_slot_question(missing_slots)
if ambiguity:
return True, f"当前问题存在歧义,请进一步说明:{''.join(ambiguity)}"
if scenario == "unknown":
return True, "请说明这是报销、应收、应付,还是制度知识问题?"
if intent == "compare" and len([item for item in entities if item.type != "amount"]) < 2:
return True, "请补充需要对比的两个对象,例如两个客户、两个供应商或两个员工。"
if allow_incomplete_draft and scenario == "expense" and intent == "draft":
return False, None
if missing_slots:
return True, self._build_missing_slot_question(missing_slots)
if ambiguity:
return True, f"当前问题存在歧义,请进一步说明:{''.join(ambiguity)}"
return False, None
@staticmethod
def _allow_incomplete_draft(
context_json: dict[str, Any],
*,
scenario: str,
intent: str,
) -> bool:
if scenario != "expense" or intent != "draft":
return False
review_action = str(context_json.get("review_action") or "").strip()
return review_action in EXPENSE_REVIEW_ACTIONS
@staticmethod
def _display_slot_label(slot: str) -> str:
return MISSING_SLOT_LABELS.get(slot, slot)
def _build_missing_slot_question(self, missing_slots: list[str]) -> str:
labels = [self._display_slot_label(item) for item in missing_slots[:4]]
if not labels:
return "请补充更多上下文后再继续。"
return f"请补充{''.join(labels)},我再继续帮你解析和处理。"
@staticmethod
def _compute_confidence(
*,
scenario: str,
scenario_score: float,
intent_score: float,
entities: list[OntologyEntity],
time_range: OntologyTimeRange,
metrics: list[OntologyMetric],
constraints: list[OntologyConstraint],
risk_flags: list[str],
clarification_required: bool,
permission: OntologyPermission,
) -> float:
confidence = 0.18 + scenario_score + intent_score
confidence += min(0.16, len(entities) * 0.04)
if time_range.start_date:
confidence += 0.10
if metrics:
confidence += 0.06
if constraints:
confidence += 0.06
if risk_flags:
confidence += 0.08
if permission.level == AgentPermissionLevel.FORBIDDEN.value:
confidence = max(confidence, 0.86)
if scenario == "unknown":
confidence = min(confidence, 0.45)
if clarification_required and permission.level != AgentPermissionLevel.FORBIDDEN.value:
confidence = min(confidence, 0.58)
return round(min(confidence, 0.98), 2)
@staticmethod
def _build_result_summary(
scenario: str,
intent: str,
permission_level: str,
confidence: float,
) -> str:
return (
f"语义解析完成scenario={scenario}, intent={intent}, "
f"permission={permission_level}, confidence={confidence:.2f}"
)
@staticmethod
def _normalize_operator(value: str) -> str:
mapping = {
"超过": ">",
"大于": ">",
"高于": ">",
">": ">",
">=": ">=",
"不少于": ">=",
"不低于": ">=",
"小于": "<",
"低于": "<",
"少于": "<",
"<": "<",
"<=": "<=",
"至多": "<=",
"不超过": "<=",
"=": "=",
"": "=",
}
return mapping.get(value, value)
@staticmethod
def _normalize_amount(raw_value: str | None, unit: str | None) -> int | float:
numeric = float(raw_value or 0)
if unit in {"", "万元"}:
numeric *= 10000
return int(numeric) if numeric.is_integer() else round(numeric, 2)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,626 @@
from __future__ import annotations
from dataclasses import dataclass
from time import perf_counter
from typing import Any
from app.core.agent_enums import AgentRunSource, AgentRunStatus, AgentToolType
from app.schemas.agent_asset import AgentAssetListItem, AgentAssetRead
from app.schemas.ontology import OntologyParseResult
from app.schemas.orchestrator import OrchestratorRequest
from app.schemas.user_agent import UserAgentRequest, UserAgentResponse
@dataclass(slots=True)
class ExecutionOutcome:
status: str
result: dict[str, Any]
degraded: bool
tool_count: int
failed_tool_count: int
class OrchestratorExecutionEngine:
def __init__(
self,
*,
run_service,
expense_claim_service,
knowledge_service,
user_agent_service,
database_query_builder,
) -> None:
self.run_service = run_service
self.expense_claim_service = expense_claim_service
self.knowledge_service = knowledge_service
self.user_agent_service = user_agent_service
self.database_query_builder = database_query_builder
def _execute_user_agent(
self,
*,
payload: OrchestratorRequest,
run_id: str,
ontology: OntologyParseResult,
capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
requires_confirmation: bool,
context_json: dict[str, Any],
) -> ExecutionOutcome:
selected_capability_codes = self._flatten_capability_codes(capabilities)
if requires_confirmation:
response, degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.LLM.value,
tool_name="user_agent.confirmation_placeholder",
request_json={
"message": payload.message,
"permission_level": ontology.permission.level,
},
context_json=context_json,
executor=lambda: {
"confirmation_title": "操作需要确认",
"message": f"{ontology.permission.reason} 当前仅返回确认摘要,不直接执行动作。",
},
fallback_factory=lambda exc: {
"confirmation_title": "操作需要确认",
"message": f"确认摘要生成失败,已阻断自动执行:{exc}",
},
)
return ExecutionOutcome(
status=AgentRunStatus.BLOCKED.value,
result={**response, "degraded": degraded},
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
next_step = self._resolve_next_step(
ontology,
payload.source,
context_json=context_json,
)
if next_step == "query_database":
tool_payload, degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.DATABASE.value,
tool_name=self._database_tool_name(ontology.scenario),
request_json=self._build_ontology_json(ontology),
context_json=context_json,
executor=lambda: self.database_query_builder.build_database_answer(
ontology,
user_id=payload.user_id,
context_json=context_json,
message=payload.message or "",
),
fallback_factory=lambda exc: {
"message": f"数据库查询暂时不可用,已返回降级说明:{exc}",
"degraded": True,
},
)
result = self._build_user_agent_result(
self.user_agent_service.respond(
UserAgentRequest(
run_id=run_id,
user_id=payload.user_id,
message=payload.message or "",
ontology=ontology,
context_json=context_json,
tool_payload=tool_payload,
selected_capability_codes=selected_capability_codes,
degraded=degraded,
requires_confirmation=requires_confirmation,
)
),
degraded=degraded,
)
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result=result,
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
if next_step == "search_knowledge":
tool_payload, degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.DATABASE.value,
tool_name="knowledge.search",
request_json=self._build_ontology_json(ontology),
context_json=context_json,
executor=lambda: self._build_knowledge_answer(
message=payload.message or "",
ontology=ontology,
capabilities=capabilities,
context_json=context_json,
),
fallback_factory=lambda exc: {
"message": f"知识检索暂时不可用,建议稍后重试:{exc}",
"degraded": True,
},
)
result = self._build_user_agent_result(
self.user_agent_service.respond(
UserAgentRequest(
run_id=run_id,
user_id=payload.user_id,
message=payload.message or "",
ontology=ontology,
context_json=context_json,
tool_payload=tool_payload,
selected_capability_codes=selected_capability_codes,
degraded=degraded,
requires_confirmation=requires_confirmation,
)
),
degraded=degraded,
)
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result=result,
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
if next_step == "run_rule":
tool_payload, degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.RULE_ENGINE.value,
tool_name=self._rule_tool_name(capabilities),
request_json=self._build_ontology_json(ontology),
context_json=context_json,
executor=lambda: self._build_rule_answer(ontology),
fallback_factory=lambda exc: {
"message": f"规则检查暂时不可用,已返回人工复核建议:{exc}",
"degraded": True,
},
)
result = self._build_user_agent_result(
self.user_agent_service.respond(
UserAgentRequest(
run_id=run_id,
user_id=payload.user_id,
message=payload.message or "",
ontology=ontology,
context_json=context_json,
tool_payload=tool_payload,
selected_capability_codes=selected_capability_codes,
degraded=degraded,
requires_confirmation=requires_confirmation,
)
),
degraded=degraded,
)
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result=result,
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
tool_type = AgentToolType.LLM.value
tool_name = "user_agent.draft_placeholder"
executor = lambda: {
"message": (
f"已生成 {ontology.scenario} 场景草稿,"
"占位能力后续由 Day 5 User Agent 接管。"
),
"draft_only": True,
}
fallback_factory = lambda exc: {
"message": f"内容整理暂时不可用,请稍后再试:{exc}",
"degraded": True,
}
if ontology.scenario == "expense" or self._is_expense_review_action(context_json):
is_persistence_action = self._is_expense_persistence_action(context_json)
tool_type = (
AgentToolType.DATABASE.value
if is_persistence_action
else AgentToolType.LLM.value
)
tool_name = (
"database.expense_claims.save_or_submit"
if is_persistence_action
else "user_agent.expense_review_preview"
)
executor = lambda: self.expense_claim_service.save_or_submit_from_ontology(
run_id=run_id,
user_id=payload.user_id,
message=payload.message or "",
ontology=ontology,
context_json=context_json,
)
fallback_factory = lambda exc: {
"message": (
f"报销草稿落库失败,请稍后再试:{exc}"
if is_persistence_action
else f"报销内容预览生成失败,请稍后再试:{exc}"
),
"degraded": True,
}
tool_payload, degraded = self._invoke_tool(
run_id=run_id,
tool_type=tool_type,
tool_name=tool_name,
request_json=self._build_ontology_json(ontology),
context_json=context_json,
executor=executor,
fallback_factory=fallback_factory,
)
result = self._build_user_agent_result(
self.user_agent_service.respond(
UserAgentRequest(
run_id=run_id,
user_id=payload.user_id,
message=payload.message or "",
ontology=ontology,
context_json=context_json,
tool_payload=tool_payload,
selected_capability_codes=selected_capability_codes,
degraded=degraded,
requires_confirmation=requires_confirmation,
)
),
degraded=degraded,
)
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result=result,
degraded=degraded,
tool_count=1,
failed_tool_count=1 if degraded else 0,
)
def _execute_hermes(
self,
*,
payload: OrchestratorRequest,
run_id: str,
ontology: OntologyParseResult,
capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
requires_confirmation: bool,
task_asset: AgentAssetRead | None,
context_json: dict[str, Any],
) -> ExecutionOutcome:
if requires_confirmation:
return ExecutionOutcome(
status=AgentRunStatus.BLOCKED.value,
result={
"message": "Hermes 不会自动执行需要确认的高风险动作,已阻断。",
"degraded": False,
},
degraded=False,
tool_count=0,
failed_tool_count=0,
)
rule_response, rule_degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.RULE_ENGINE.value,
tool_name=self._rule_tool_name(capabilities),
request_json=self._build_ontology_json(ontology),
context_json=context_json,
executor=lambda: self._build_rule_answer(ontology),
fallback_factory=lambda exc: {
"message": f"规则巡检失败,已降级为待人工复核:{exc}",
"degraded": True,
},
)
mcp_response, mcp_degraded = self._invoke_tool(
run_id=run_id,
tool_type=AgentToolType.MCP.value,
tool_name=self._mcp_tool_name(capabilities),
request_json={
"task_code": task_asset.code if task_asset is not None else "",
"scenario": ontology.scenario,
},
context_json=context_json,
executor=lambda: self._build_mcp_answer(task_asset, ontology),
fallback_factory=lambda exc: {
"message": f"MCP 调用失败,已使用缓存快照降级:{exc}",
"fallback": "used_cached_snapshot",
},
)
degraded = rule_degraded or mcp_degraded
failed_tool_count = int(rule_degraded) + int(mcp_degraded)
result = {
"message": self._build_hermes_message(
task_asset=task_asset,
ontology=ontology,
rule_response=rule_response,
mcp_response=mcp_response,
degraded=degraded,
),
"report_type": task_asset.code if task_asset is not None else "hermes_runtime",
"degraded": degraded,
}
return ExecutionOutcome(
status=AgentRunStatus.SUCCEEDED.value,
result=result,
degraded=degraded,
tool_count=2,
failed_tool_count=failed_tool_count,
)
@staticmethod
def _resolve_next_step(
ontology: OntologyParseResult,
source: str,
*,
context_json: dict[str, Any] | None = None,
) -> str:
if OrchestratorExecutionEngine._is_expense_review_action(context_json or {}):
return "create_draft"
if ontology.clarification_required:
return "ask_clarification"
if ontology.intent == "draft":
return "create_draft"
if ontology.scenario == "knowledge" or ontology.intent == "explain":
return "search_knowledge"
if ontology.intent == "risk_check" or source == AgentRunSource.SCHEDULE.value:
return "run_rule"
if ontology.intent in {"query", "compare"}:
return "query_database"
return "create_draft"
@staticmethod
def _is_expense_review_action(context_json: dict[str, Any]) -> bool:
review_action = str((context_json or {}).get("review_action") or "").strip()
return review_action in {
"save_draft",
"next_step",
"edit_review",
"link_to_existing_draft",
"create_new_claim_from_documents",
}
@staticmethod
def _is_expense_persistence_action(context_json: dict[str, Any]) -> bool:
review_action = str((context_json or {}).get("review_action") or "").strip()
return review_action in {
"save_draft",
"next_step",
"link_to_existing_draft",
"create_new_claim_from_documents",
}
@staticmethod
def _flatten_capability_codes(
capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
) -> list[str]:
codes: list[str] = []
for items in capabilities.values():
for item in items[:2]:
if item.code not in codes:
codes.append(item.code)
return codes
def _rank_assets(
self,
items: list[AgentAssetListItem],
ontology: OntologyParseResult,
) -> list[AgentAssetListItem]:
def score(item: AgentAssetListItem) -> tuple[int, str]:
item_tags = {str(value) for value in item.scenario_json or []}
weight = 0
if ontology.scenario in item_tags:
weight += 3
if ontology.intent in item_tags:
weight += 2
for risk_flag in ontology.risk_flags:
if risk_flag in item_tags:
weight += 4
return weight, item.code
ranked = sorted(items, key=score, reverse=True)
if not ranked:
return []
scored = [item for item in ranked if score(item)[0] > 0]
return scored or ranked[:1]
def _invoke_tool(
self,
*,
run_id: str,
tool_type: str,
tool_name: str,
request_json: dict[str, Any],
context_json: dict[str, Any],
executor,
fallback_factory,
) -> tuple[dict[str, Any], bool]:
started = perf_counter()
try:
self._maybe_raise_simulated_failure(tool_type, context_json)
response = executor()
duration_ms = int((perf_counter() - started) * 1000)
self.run_service.record_tool_call(
run_id=run_id,
tool_type=tool_type,
tool_name=tool_name,
request_json=request_json,
response_json=response,
status="succeeded",
duration_ms=duration_ms,
)
return response, False
except Exception as exc:
duration_ms = int((perf_counter() - started) * 1000)
response = fallback_factory(exc)
self.run_service.record_tool_call(
run_id=run_id,
tool_type=tool_type,
tool_name=tool_name,
request_json=request_json,
response_json=response,
status="failed",
duration_ms=duration_ms,
error_message=str(exc),
)
return response, True
@staticmethod
def _maybe_raise_simulated_failure(tool_type: str, context_json: dict[str, Any]) -> None:
expected = str(context_json.get("simulate_tool_failure") or "").strip().lower()
if not expected:
return
if expected == tool_type.lower():
raise RuntimeError(f"simulated {tool_type} failure")
@staticmethod
def _build_user_query_result(
ontology: OntologyParseResult,
response: dict[str, Any],
) -> dict[str, Any]:
if ontology.scenario == "expense":
return {
"message": (
f"已路由到 User Agent占位查询结果命中 {response['record_count']} 笔报销,"
f"金额合计 {response['total_amount']} 元。"
),
"data": response,
}
if ontology.scenario == "accounts_receivable":
return {
"message": (
f"已路由到 User Agent占位查询结果命中 {response['record_count']} 条应收,"
f"未回款金额 {response['outstanding_amount']} 元。"
),
"data": response,
}
return {
"message": (
f"已路由到 User Agent占位查询结果命中 {response['record_count']} 条应付,"
f"待付金额 {response['outstanding_amount']} 元。"
),
"data": response,
}
@staticmethod
def _build_user_agent_result(
response: UserAgentResponse,
*,
degraded: bool,
) -> dict[str, Any]:
result = {
"message": response.answer,
"answer": response.answer,
"citations": [item.model_dump() for item in response.citations],
"suggested_actions": [item.model_dump() for item in response.suggested_actions],
"risk_flags": response.risk_flags,
"requires_confirmation": response.requires_confirmation,
"degraded": degraded,
}
if response.query_payload is not None:
result["query_payload"] = response.query_payload.model_dump()
if response.draft_payload is not None:
result["draft_payload"] = response.draft_payload.model_dump()
if response.review_payload is not None:
result["review_payload"] = response.review_payload.model_dump()
return result
def _build_knowledge_answer(
self,
*,
message: str,
ontology: OntologyParseResult,
capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
context_json: dict[str, Any],
) -> dict[str, Any]:
del ontology, capabilities
conversation_history = context_json.get("conversation_history")
if not isinstance(conversation_history, list):
conversation_history = None
payload = self.knowledge_service.search_knowledge(
message,
conversation_history=conversation_history,
limit=8,
)
references = [str(item).strip() for item in list(payload.get("references") or []) if str(item).strip()]
if references:
payload["references"] = references
return payload
@staticmethod
def _build_rule_answer(ontology: OntologyParseResult) -> dict[str, Any]:
risk_text = (
"".join(ontology.risk_flags)
if ontology.risk_flags
else "未识别到明确风险标签"
)
return {
"message": f"已完成占位规则检查,风险标签:{risk_text}",
"risk_flags": ontology.risk_flags,
}
@staticmethod
def _build_mcp_answer(
task_asset: AgentAssetRead | None,
ontology: OntologyParseResult,
) -> dict[str, Any]:
return {
"message": (
f"已调用占位 MCP 快照,任务={task_asset.code if task_asset else 'none'}"
f"scenario={ontology.scenario}"
),
"snapshot": "stubbed",
}
@staticmethod
def _build_hermes_message(
*,
task_asset: AgentAssetRead | None,
ontology: OntologyParseResult,
rule_response: dict[str, Any],
mcp_response: dict[str, Any],
degraded: bool,
) -> str:
task_code = task_asset.code if task_asset is not None else "task.unspecified"
suffix = ",其中部分能力已降级。" if degraded else ""
return (
f"Hermes 占位执行完成:任务 {task_code}"
f"场景 {ontology.scenario},规则结果={rule_response.get('message', '')}"
f"MCP 结果={mcp_response.get('message', '')}{suffix}"
)
@staticmethod
def _database_tool_name(scenario: str) -> str:
if scenario == "expense":
return "database.expense_claims.lookup"
if scenario == "accounts_receivable":
return "database.accounts_receivable.lookup"
return "database.accounts_payable.lookup"
@staticmethod
def _rule_tool_name(
capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
) -> str:
if capabilities["rules"]:
return capabilities["rules"][0].code
return "rule_engine.default_risk_check"
@staticmethod
def _mcp_tool_name(
capabilities: dict[str, list[AgentAssetListItem | AgentAssetRead]],
) -> str:
if capabilities["mcps"]:
return capabilities["mcps"][0].code
return "mcp.default_snapshot"
@staticmethod
def _build_ontology_json(ontology: OntologyParseResult) -> dict[str, Any]:
return {
"scenario": ontology.scenario,
"intent": ontology.intent,
"entities": [item.model_dump() for item in ontology.entities],
"time_range": ontology.time_range.model_dump(),
"metrics": [item.model_dump() for item in ontology.metrics],
"constraints": [item.model_dump() for item in ontology.constraints],
"risk_flags": ontology.risk_flags,
"permission": ontology.permission.model_dump(),
}

View File

@@ -0,0 +1,535 @@
from __future__ import annotations
from datetime import UTC, datetime, timedelta
from typing import Any
from sqlalchemy import and_, func, or_, select
from sqlalchemy.orm import Session
from app.models.employee import Employee
from app.models.financial_record import (
AccountsPayableRecord,
AccountsReceivableRecord,
ExpenseClaim,
)
from app.schemas.ontology import OntologyParseResult
PRIVILEGED_EXPENSE_QUERY_ROLE_CODES = {"finance"}
SELF_REFERENCE_KEYWORDS = ("我的", "我自己", "本人", "我名下", "给我查", "我提交", "我申请")
EXPENSE_QUERY_RECENT_WINDOW_DAYS = 10
EXPENSE_QUERY_PREVIEW_LIMIT = 20
EXPENSE_STATUS_LABELS = {
"draft": "草稿",
"submitted": "已提交",
"review": "审核中",
"approved": "已通过",
"rejected": "已驳回",
"paid": "已付款",
}
EXPENSE_STATUS_GROUP_LABELS = {
"draft": "草稿",
"in_progress": "审批中",
"completed": "审批完成",
"other": "其他状态",
}
EXPENSE_STATUS_GROUP_ORDER = ("draft", "in_progress", "completed", "other")
EXPENSE_TYPE_LABELS = {
"travel": "差旅费",
"hotel": "住宿费",
"transport": "交通费",
"meal": "餐费",
"meeting": "会务费",
"entertainment": "业务招待费",
"office": "办公费",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
}
class OrchestratorDatabaseQueryBuilder:
def __init__(self, db: Session) -> None:
self.db = db
def build_database_answer(
self,
ontology: OntologyParseResult,
*,
user_id: str | None,
context_json: dict[str, Any],
message: str,
) -> dict[str, Any]:
if ontology.scenario == "expense":
return self._build_expense_database_answer(
ontology=ontology,
user_id=user_id,
context_json=context_json,
message=message,
)
if ontology.scenario == "accounts_receivable":
return self._build_accounts_receivable_answer()
return self._build_accounts_payable_answer()
def _build_expense_database_answer(
self,
*,
ontology: OntologyParseResult,
user_id: str | None,
context_json: dict[str, Any],
message: str,
) -> dict[str, Any]:
conditions, scope_label, scoped_to_current_user = self._build_expense_query_scope(
ontology=ontology,
user_id=user_id,
context_json=context_json,
message=message,
)
count_stmt = select(func.count()).select_from(ExpenseClaim)
amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(ExpenseClaim)
for condition in conditions:
count_stmt = count_stmt.where(condition)
amount_stmt = amount_stmt.where(condition)
total_count = int(self.db.scalar(count_stmt) or 0)
total_amount = float(self.db.scalar(amount_stmt) or 0)
recent_window_applied = self._should_limit_expense_query_to_recent_window(ontology)
display_count = total_count
display_amount = total_amount
older_record_count = 0
display_conditions = list(conditions)
window_start_date: str | None = None
window_end_date: str | None = None
if recent_window_applied:
reference_now = self._resolve_reference_now(context_json)
recent_window_start, recent_window_end = self._resolve_expense_recent_window_bounds(reference_now)
recent_condition = self._build_expense_recent_window_condition(
recent_window_start,
recent_window_end,
)
display_conditions.append(recent_condition)
window_start_date = recent_window_start.date().isoformat()
window_end_date = (recent_window_end - timedelta(microseconds=1)).date().isoformat()
recent_count_stmt = select(func.count()).select_from(ExpenseClaim).where(recent_condition)
recent_amount_stmt = select(func.coalesce(func.sum(ExpenseClaim.amount), 0)).select_from(ExpenseClaim).where(
recent_condition
)
for condition in conditions:
recent_count_stmt = recent_count_stmt.where(condition)
recent_amount_stmt = recent_amount_stmt.where(condition)
display_count = int(self.db.scalar(recent_count_stmt) or 0)
display_amount = float(self.db.scalar(recent_amount_stmt) or 0)
older_record_count = max(0, total_count - display_count)
preview_stmt = (
select(ExpenseClaim)
.order_by(
func.coalesce(
ExpenseClaim.submitted_at,
ExpenseClaim.created_at,
ExpenseClaim.occurred_at,
).desc(),
ExpenseClaim.occurred_at.desc(),
)
.limit(EXPENSE_QUERY_PREVIEW_LIMIT)
)
for condition in display_conditions:
preview_stmt = preview_stmt.where(condition)
preview_claims = list(self.db.scalars(preview_stmt).all())
status_groups = self._build_expense_status_groups(display_conditions)
return {
"result_type": "expense_claim_list",
"record_count": display_count,
"total_amount": round(display_amount, 2),
"scope_label": scope_label,
"scoped_to_current_user": scoped_to_current_user,
"recent_window_applied": recent_window_applied,
"window_days": EXPENSE_QUERY_RECENT_WINDOW_DAYS if recent_window_applied else None,
"window_start_date": window_start_date,
"window_end_date": window_end_date,
"preview_count": len(preview_claims),
"older_record_count": older_record_count,
"records": [
self._build_expense_query_record(claim)
for claim in preview_claims
],
"status_groups": status_groups,
"has_more_in_window": display_count > len(preview_claims),
"total_matched_count": total_count,
}
def _build_accounts_receivable_answer(self) -> dict[str, Any]:
total_count = int(
self.db.scalar(
select(func.count()).select_from(AccountsReceivableRecord)
)
or 0
)
total_amount = float(
self.db.scalar(
select(func.coalesce(func.sum(AccountsReceivableRecord.amount_outstanding), 0))
)
or 0
)
return {
"record_count": total_count,
"outstanding_amount": round(total_amount, 2),
}
def _build_accounts_payable_answer(self) -> dict[str, Any]:
total_count = int(
self.db.scalar(select(func.count()).select_from(AccountsPayableRecord))
or 0
)
total_amount = float(
self.db.scalar(
select(func.coalesce(func.sum(AccountsPayableRecord.amount_outstanding), 0))
)
or 0
)
return {
"record_count": total_count,
"outstanding_amount": round(total_amount, 2),
}
@staticmethod
def _should_limit_expense_query_to_recent_window(
ontology: OntologyParseResult,
) -> bool:
has_explicit_claim_no = any(
item.type == "expense_claim"
and str(item.normalized_value or item.value or "").strip()
for item in ontology.entities
)
has_explicit_time_range = bool(
ontology.time_range.start_date or ontology.time_range.end_date
)
return not has_explicit_claim_no and not has_explicit_time_range
@staticmethod
def _resolve_reference_now(context_json: dict[str, Any]) -> datetime:
raw_value = str(context_json.get("client_now_iso") or "").strip()
if raw_value:
normalized = raw_value.replace("Z", "+00:00")
try:
parsed = datetime.fromisoformat(normalized)
if parsed.tzinfo is None:
return parsed.replace(tzinfo=UTC)
return parsed.astimezone(UTC)
except ValueError:
pass
return datetime.now(UTC)
@staticmethod
def _resolve_expense_recent_window_bounds(
reference_now: datetime,
) -> tuple[datetime, datetime]:
normalized_now = reference_now.astimezone(UTC)
window_end = normalized_now.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(days=1)
window_start = window_end - timedelta(days=EXPENSE_QUERY_RECENT_WINDOW_DAYS)
return window_start, window_end
@staticmethod
def _build_expense_recent_window_condition(
window_start: datetime,
window_end: datetime,
) -> Any:
document_datetime = func.coalesce(
ExpenseClaim.submitted_at,
ExpenseClaim.created_at,
ExpenseClaim.occurred_at,
)
return and_(document_datetime >= window_start, document_datetime < window_end)
def _build_expense_status_groups(
self,
conditions: list[Any],
) -> list[dict[str, Any]]:
stmt = select(ExpenseClaim.status, func.count()).select_from(ExpenseClaim).group_by(ExpenseClaim.status)
for condition in conditions:
stmt = stmt.where(condition)
grouped_counts = {
key: 0
for key in EXPENSE_STATUS_GROUP_ORDER
}
for status, count in self.db.execute(stmt).all():
group_key, _ = self._resolve_expense_status_group(str(status or "").strip())
grouped_counts[group_key] = grouped_counts.get(group_key, 0) + int(count or 0)
return [
{
"key": key,
"label": EXPENSE_STATUS_GROUP_LABELS[key],
"count": grouped_counts.get(key, 0),
}
for key in EXPENSE_STATUS_GROUP_ORDER
if grouped_counts.get(key, 0) > 0
]
@staticmethod
def _resolve_expense_status_group(status: str) -> tuple[str, str]:
normalized = str(status or "").strip().lower()
if normalized == "draft":
return "draft", EXPENSE_STATUS_GROUP_LABELS["draft"]
if normalized in {"submitted", "review"}:
return "in_progress", EXPENSE_STATUS_GROUP_LABELS["in_progress"]
if normalized in {"approved", "paid"}:
return "completed", EXPENSE_STATUS_GROUP_LABELS["completed"]
return "other", EXPENSE_STATUS_GROUP_LABELS["other"]
@staticmethod
def _resolve_expense_query_document_datetime(
claim: ExpenseClaim,
) -> datetime | None:
return claim.submitted_at or claim.created_at or claim.occurred_at
def _build_expense_query_record(
self,
claim: ExpenseClaim,
) -> dict[str, Any]:
status_group, status_group_label = self._resolve_expense_status_group(claim.status)
document_datetime = self._resolve_expense_query_document_datetime(claim)
return {
"claim_id": claim.id,
"claim_no": claim.claim_no,
"employee_name": claim.employee_name,
"expense_type": claim.expense_type,
"expense_type_label": EXPENSE_TYPE_LABELS.get(claim.expense_type, claim.expense_type or "报销"),
"amount": round(float(claim.amount), 2),
"status": claim.status,
"status_label": EXPENSE_STATUS_LABELS.get(claim.status, claim.status or "处理中"),
"status_group": status_group,
"status_group_label": status_group_label,
"approval_stage": claim.approval_stage,
"document_date": document_datetime.date().isoformat() if document_datetime else "",
"occurred_at": claim.occurred_at.date().isoformat() if claim.occurred_at else "",
"reason": claim.reason,
"location": claim.location,
}
def _build_expense_query_scope(
self,
*,
ontology: OntologyParseResult,
user_id: str | None,
context_json: dict[str, Any],
message: str,
) -> tuple[list[Any], str, bool]:
conditions: list[Any] = []
explicit_employee_names = list(
dict.fromkeys(
str(item.value or "").strip()
for item in ontology.entities
if item.type == "employee" and str(item.value or "").strip()
)
)
expense_claim_nos = list(
dict.fromkeys(
str(item.normalized_value or item.value or "").strip().upper()
for item in ontology.entities
if item.type == "expense_claim" and str(item.normalized_value or item.value or "").strip()
)
)
expense_types = list(
dict.fromkeys(
str(item.normalized_value or item.value or "").strip()
for item in ontology.entities
if item.type == "expense_type" and str(item.normalized_value or item.value or "").strip()
)
)
project_values = self._collect_expense_query_filter_values(ontology, "project")
location_values = self._collect_expense_query_filter_values(ontology, "location")
status_values = list(
dict.fromkeys(
str(item.value).strip()
for item in ontology.constraints
if item.field == "status" and item.operator == "=" and str(item.value).strip()
)
)
amount_constraints = [
item
for item in ontology.constraints
if item.field == "amount" and item.operator in {">", ">=", "<", "<=", "="}
]
scope_label = "报销单"
scoped_to_current_user = False
if expense_claim_nos:
conditions.append(ExpenseClaim.claim_no.in_(expense_claim_nos))
if expense_types:
conditions.append(ExpenseClaim.expense_type.in_(expense_types))
if status_values:
conditions.append(ExpenseClaim.status.in_(status_values))
if project_values:
project_conditions = []
for value in project_values:
pattern = f"%{value}%"
project_conditions.append(ExpenseClaim.project_code.ilike(pattern))
project_conditions.append(ExpenseClaim.reason.ilike(pattern))
conditions.append(or_(*project_conditions))
if location_values:
location_conditions = []
for value in location_values:
pattern = f"%{value}%"
location_conditions.append(ExpenseClaim.location.ilike(pattern))
location_conditions.append(ExpenseClaim.reason.ilike(pattern))
conditions.append(or_(*location_conditions))
for item in amount_constraints:
amount_value = float(item.value)
if item.operator == ">":
conditions.append(ExpenseClaim.amount > amount_value)
elif item.operator == ">=":
conditions.append(ExpenseClaim.amount >= amount_value)
elif item.operator == "<":
conditions.append(ExpenseClaim.amount < amount_value)
elif item.operator == "<=":
conditions.append(ExpenseClaim.amount <= amount_value)
else:
conditions.append(ExpenseClaim.amount == amount_value)
if ontology.time_range.start_date:
conditions.append(
ExpenseClaim.occurred_at
>= datetime.fromisoformat(f"{ontology.time_range.start_date}T00:00:00+00:00")
)
if ontology.time_range.end_date:
conditions.append(
ExpenseClaim.occurred_at
<= datetime.fromisoformat(f"{ontology.time_range.end_date}T23:59:59.999999+00:00")
)
has_privileged_access = self._has_privileged_expense_query_access(context_json)
refers_to_self = self._is_self_expense_query(message)
if not has_privileged_access:
owner_conditions, owner_label = self._build_current_user_claim_conditions(
user_id=user_id,
context_json=context_json,
)
if owner_conditions:
conditions.append(or_(*owner_conditions))
scope_label = owner_label
scoped_to_current_user = True
else:
conditions.append(ExpenseClaim.id == "__no_visible_claim__")
scope_label = "你的报销单"
scoped_to_current_user = True
elif explicit_employee_names:
conditions.append(ExpenseClaim.employee_name.in_(explicit_employee_names))
scope_label = f"{''.join(explicit_employee_names)}的报销单"
elif refers_to_self:
owner_conditions, owner_label = self._build_current_user_claim_conditions(
user_id=user_id,
context_json=context_json,
)
if owner_conditions:
conditions.append(or_(*owner_conditions))
scope_label = owner_label
scoped_to_current_user = True
else:
conditions.append(ExpenseClaim.id == "__no_visible_claim__")
scope_label = "你的报销单"
scoped_to_current_user = True
else:
scope_label = "全部报销单"
return conditions, scope_label, scoped_to_current_user
@staticmethod
def _collect_expense_query_filter_values(
ontology: OntologyParseResult,
field_name: str,
) -> list[str]:
values: list[str] = []
for entity in ontology.entities:
if entity.type != field_name:
continue
value = str(entity.normalized_value or entity.value or "").strip()
if value:
values.append(value)
for constraint in ontology.constraints:
if constraint.field != field_name or constraint.operator != "=":
continue
value = str(constraint.value or "").strip()
if value:
values.append(value)
return list(dict.fromkeys(values))
def _build_current_user_claim_conditions(
self,
*,
user_id: str | None,
context_json: dict[str, Any],
) -> tuple[list[Any], str]:
normalized_user_id = str(user_id or "").strip()
employee = None
if normalized_user_id:
employee = self.db.scalar(
select(Employee)
.where(func.lower(Employee.email) == normalized_user_id.lower())
.limit(1)
)
conditions: list[Any] = []
seen: set[tuple[str, str]] = set()
def add_condition(field_name: str, value: str | None) -> None:
normalized = str(value or "").strip()
if not normalized:
return
marker = (field_name, normalized.lower())
if marker in seen:
return
seen.add(marker)
if field_name == "employee_id":
conditions.append(ExpenseClaim.employee_id == normalized)
return
conditions.append(ExpenseClaim.employee_name == normalized)
if employee is not None:
add_condition("employee_id", employee.id)
add_condition("employee_name", employee.email)
if self._employee_name_is_unique(employee):
add_condition("employee_name", employee.name)
else:
add_condition("employee_id", normalized_user_id)
add_condition("employee_name", normalized_user_id)
subject_name = (employee.name if employee is not None else "") or normalized_user_id
if subject_name:
return conditions, "你的报销单"
return conditions, "当前用户的报销单"
def _employee_name_is_unique(self, employee: Employee) -> bool:
normalized_name = str(employee.name or "").strip()
if not normalized_name:
return False
same_name_count = int(
self.db.scalar(
select(func.count()).select_from(Employee).where(Employee.name == normalized_name)
)
or 0
)
return same_name_count == 1
@staticmethod
def _has_privileged_expense_query_access(context_json: dict[str, Any]) -> bool:
role_codes = {
str(item).strip().lower()
for item in context_json.get("role_codes", [])
if str(item).strip()
}
return bool(role_codes & PRIVILEGED_EXPENSE_QUERY_ROLE_CODES)
@staticmethod
def _is_self_expense_query(message: str) -> bool:
compact_message = "".join(str(message or "").split())
return any(keyword in compact_message for keyword in SELF_REFERENCE_KEYWORDS)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,179 @@
from __future__ import annotations
import re
SCENARIO_LABELS = {
"expense": "报销",
"accounts_receivable": "应收",
"accounts_payable": "应付",
"knowledge": "知识",
"unknown": "通用",
}
RISK_REASON_MAP = {
"duplicate_expense": "检测到同员工、同金额或近似单据存在重复提交迹象。",
"location_mismatch": "申报出差地点与票据识别地点可能不一致,需要核对行程或补充说明。",
"amount_over_limit": "金额超过当前制度或预算阈值,需要补充例外说明。",
"invoice_anomaly": "票据或附件完整性不满足当前规则要求,需要补件或人工复核。",
"ar_overdue": "应收账款已出现逾期,存在回款延迟风险。",
"ap_overdue": "应付付款已出现逾期,可能影响供应商履约或合作关系。",
}
GENERIC_EXPENSE_PROMPTS = {
"报销",
"我要报销",
"我想报销",
"帮我报销",
"我要申请报销",
"发起报销",
"提交报销",
}
EXPLICIT_DRAFT_KEYWORDS = ("生成", "草稿", "起草", "创建", "发起", "准备")
EXPENSE_TYPE_LABELS = {
"travel": "差旅费",
"hotel": "住宿费",
"transport": "交通费",
"meal": "餐费",
"meeting": "会务费",
"entertainment": "业务招待费",
"office": "办公费",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
}
GROUP_SCENE_LABELS = {
"travel": "差旅费",
"entertainment": "业务招待费",
"meal": "伙食费",
"transport": "交通费",
"hotel": "住宿费",
"office": "办公费",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
}
EXPENSE_SCENE_SELECTION_OPTIONS = (
("travel", "差旅费", "出差、长途交通、住宿、差旅补贴等场景。"),
("transport", "交通费", "市内打车、停车、过路费等日常交通场景。"),
("hotel", "住宿费", "单独住宿、酒店发票等场景。"),
("entertainment", "业务招待费", "客户接待、宴请、招待等场景。"),
("office", "办公费", "办公用品、耗材、办公设备等采购场景。"),
("other", "其他费用", "暂不属于以上分类的报销场景。"),
)
KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS = 3
KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS = 5
KNOWLEDGE_MODEL_TIMEOUT_SECONDS = KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS
EXPENSE_STATUS_LABELS = {
"draft": "草稿",
"submitted": "已提交",
"review": "审核中",
"approved": "已通过",
"rejected": "已驳回",
"paid": "已付款",
}
EXPENSE_STATUS_GROUP_LABELS = {
"draft": "草稿",
"in_progress": "审批中",
"completed": "审批完成",
"other": "其他状态",
}
SLOT_LABELS = {
"expense_type": "报销类型",
"customer_name": "客户名称",
"time_range": "发生时间",
"location": "地点",
"merchant_name": "酒店/商户",
"amount": "金额",
"reason": "事由说明",
"participants": "参与人员",
"attachments": "票据附件",
}
DATE_TEXT_PATTERN = re.compile(
r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[:][0-5]\d)?)"
)
AMOUNT_TEXT_PATTERN = re.compile(
r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)"
)
TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN = re.compile(r"(\d+)\s*(?:晚|间夜)")
TRAVEL_ROUTE_PATTERN = re.compile(r"([\u4e00-\u9fa5]{2,12})\s*(?:至|→|->|-|—)\s*([\u4e00-\u9fa5]{2,12})")
SOURCE_LABELS = {
"user_text": "用户描述",
"user_form": "用户修改",
"ocr": "票据识别",
"upload": "上传附件",
"detail_context": "关联单据",
"system_context": "系统上下文",
"inferred": "语义推断",
"system": "系统判断",
}
DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS = ("历史报销画像", "用户画像", "制度注意事项", "制度注意")
SCENE_REQUIRED_SLOT_KEYS = {
"hotel": {"merchant_name"},
"meeting": {"location"},
"entertainment": {"location", "customer_name", "participants"},
}
INFERRED_REASON_LABELS = {
"travel": "出差行程",
"hotel": "住宿报销",
"transport": "交通出行",
"meal": "餐饮用餐",
"meeting": "会务活动",
"entertainment": "客户接待",
"office": "办公采购",
"training": "培训学习",
"communication": "通讯使用",
"welfare": "员工福利",
"other": "其他费用",
}
SYSTEM_GENERATED_REASON_PREFIXES = (
"我上传了",
"请按当前已识别信息",
"请把当前上传的票据",
"请基于当前上传的多张票据",
"我已核对右侧识别结果",
"请同步修正逐票据识别结果",
"我已校正核对信息",
"查看报销草稿",
"请解释一下当前这笔报销的合规风险和待补充项",
)
LEADING_REASON_TIME_PATTERNS = (
re.compile(
r"^\s*(?:识别事项(?:有)?[:]\s*)?"
r"(?:业务发生(?:时间|日期)|费用发生(?:时间|日期)|发生(?:时间|日期)|报销(?:时间|日期)|时间)[:]?\s*"
r"(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
r"(?:\s*(?:至|到|~||—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
r"\s*[,。;;、]?\s*"
),
re.compile(
r"^\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?"
r"(?:\s*(?:至|到|~||—|-)\s*(?:19|20)\d{2}[-/年.]\d{1,2}[-/月.]\d{1,2}日?)?"
r"\s*[,。;;、]\s*"
),
)
AMOUNT_UNIT_ALIASES = {
"": "",
"": "",
"": "",
"": "",
"块钱": "",
"元整": "",
"万员": "万元",
"万圆": "万元",
"万园": "万元",
"万块": "万元",
"万元整": "万元",
}

View File

@@ -0,0 +1,380 @@
from __future__ import annotations
import re
from decimal import Decimal, InvalidOperation
from typing import Mapping
from app.schemas.user_agent import UserAgentRequest, UserAgentReviewDocumentCard
DEFAULT_GROUP_SCENE_LABELS = {
"travel": "差旅费",
"entertainment": "业务招待费",
"meal": "伙食费",
"transport": "交通费",
"hotel": "住宿费",
"office": "办公费",
"training": "培训费",
"communication": "通讯费",
"welfare": "福利费",
"other": "其他费用",
}
DOCUMENT_DATE_TEXT_PATTERN = re.compile(
r"(\d{4}[年/-]\d{1,2}[月/-]\d{1,2}日?(?:\s*[T ]?\s*(?:[01]?\d|2[0-3])[:][0-5]\d)?)"
)
DOCUMENT_AMOUNT_TEXT_PATTERN = re.compile(
r"(\d+(?:\.\d+)?)\s*(?:万元|万员|万圆|万园|万块|万元整|元整|块钱|块|元|员|圆|园|万)"
)
DOCUMENT_AMOUNT_PATTERN = re.compile(
r"(?:价税合计|合计金额|费用合计|订单(?:总)?金额|支付(?:金额)?|实付(?:金额)?|实收(?:金额)?|总(?:额|计|价)|票价|金额|车费|消费金额)"
r"[:\s¥¥人民币]*([0-9]+(?:[.,][0-9]{1,2})?)"
)
DOCUMENT_CURRENCY_AMOUNT_PATTERN = re.compile(r"[¥¥]\s*([0-9]+(?:[.,][0-9]{1,2})?)")
class UserAgentDocumentService:
"""集中处理票据分类和 OCR 字段抽取,避免主服务继续膨胀。"""
def __init__(self, *, group_scene_labels: Mapping[str, str] | None = None) -> None:
self._group_scene_labels = dict(group_scene_labels or DEFAULT_GROUP_SCENE_LABELS)
def classify_document(
self,
item: dict[str, object],
*,
expense_type_code: str = "",
has_customer: bool = False,
) -> dict[str, str]:
provided_type = str(item.get("document_type") or "").strip().lower()
normalized_expense_type = str(expense_type_code or "").strip().lower()
if provided_type:
if provided_type in {"flight_itinerary", "train_ticket"}:
return {
"document_type": provided_type,
"expense_type": "travel",
"group_code": "travel",
"scene_label": "差旅票据",
}
if provided_type == "hotel_invoice":
return {
"document_type": provided_type,
"expense_type": "hotel",
"group_code": "travel",
"scene_label": "住宿票据",
}
if provided_type in {"taxi_receipt", "parking_toll_receipt"}:
return {
"document_type": provided_type,
"expense_type": "transport",
"group_code": "travel",
"scene_label": "交通票据",
}
if provided_type == "meal_receipt":
group_code = "entertainment" if normalized_expense_type == "entertainment" or has_customer else "meal"
return {
"document_type": provided_type,
"expense_type": group_code,
"group_code": group_code,
"scene_label": "餐饮票据",
}
if provided_type == "office_invoice":
return {
"document_type": provided_type,
"expense_type": "office",
"group_code": "office",
"scene_label": "办公用品票据",
}
if provided_type == "meeting_invoice":
return {
"document_type": provided_type,
"expense_type": "meeting",
"group_code": "meeting",
"scene_label": "会务票据",
}
if provided_type == "training_invoice":
return {
"document_type": provided_type,
"expense_type": "training",
"group_code": "training",
"scene_label": "培训票据",
}
text = " ".join(
[
str(item.get("filename") or ""),
str(item.get("summary") or ""),
str(item.get("text") or ""),
]
).lower()
compact = text.replace(" ", "")
if any(keyword in compact for keyword in ("机票", "航班", "火车", "高铁", "行程单")):
return {
"document_type": "travel_ticket",
"expense_type": "travel",
"group_code": "travel",
"scene_label": "差旅票据",
}
if any(keyword in compact for keyword in ("酒店", "住宿", "宾馆")):
return {
"document_type": "hotel_invoice",
"expense_type": "hotel",
"group_code": "travel",
"scene_label": "住宿票据",
}
if any(keyword in compact for keyword in ("打车", "出租车", "滴滴", "网约车", "乘车", "用车", "叫车", "车费", "车资", "的士", "过路费", "停车")):
return {
"document_type": "transport_receipt",
"expense_type": "transport",
"group_code": "travel",
"scene_label": "交通票据",
}
if any(keyword in compact for keyword in ("", "饭店", "酒楼", "酒家", "餐饮", "meal")):
group_code = "entertainment" if normalized_expense_type == "entertainment" or has_customer else "meal"
return {
"document_type": "meal_receipt",
"expense_type": group_code,
"group_code": group_code,
"scene_label": "餐饮票据",
}
if any(keyword in compact for keyword in ("办公用品", "文具", "耗材", "办公耗材", "打印纸", "键盘", "鼠标", "白板", "墨盒", "硒鼓")):
return {
"document_type": "other",
"expense_type": "office",
"group_code": "office",
"scene_label": "办公用品票据",
}
return {
"document_type": "other",
"expense_type": normalized_expense_type or "other",
"group_code": self.normalize_group_code(normalized_expense_type or "other"),
"scene_label": "其他票据",
}
@staticmethod
def normalize_group_code(expense_type_code: str) -> str:
if expense_type_code in {"travel", "hotel", "transport"}:
return "travel"
if expense_type_code in {"entertainment", "meal", "office", "training", "communication", "welfare"}:
return expense_type_code
return "other"
def extract_document_fields(self, item: dict[str, object]) -> dict[str, str]:
raw_fields = item.get("document_fields")
normalized_fields: dict[str, str] = {}
document_type = str(item.get("document_type") or "").strip().lower()
if isinstance(raw_fields, list):
for field in raw_fields:
if not isinstance(field, dict):
continue
key = str(field.get("key") or "").strip()
label = str(field.get("label") or "").strip()
value = str(field.get("value") or "").strip()
if not value:
continue
normalized_label = self.normalize_document_field_label(key=key, label=label)
display_label = normalized_label or label
display_label = self.resolve_document_time_display_label(
document_type=document_type,
key=key,
label=label,
normalized_label=display_label,
)
normalized_value = self.normalize_document_field_value(
label=display_label,
value=value,
)
if display_label == "商户/酒店" and not self.is_hotel_document_item(item):
continue
if display_label and normalized_value:
normalized_fields.setdefault(display_label, normalized_value)
text = " ".join([str(item.get("summary") or ""), str(item.get("text") or "")]).strip()
amount_value = self.extract_amount_text_from_value(text)
if amount_value and "金额" not in normalized_fields:
normalized_fields["金额"] = amount_value
date_match = DOCUMENT_DATE_TEXT_PATTERN.search(text)
if date_match and "时间" not in normalized_fields:
time_label = self.resolve_document_time_display_label(
document_type=document_type,
key="date",
label="日期",
normalized_label="时间",
)
normalized_fields[time_label] = date_match.group(1)
merchant = self.extract_document_merchant_name_from_text(text) if self.is_hotel_document_item(item) else ""
if merchant and "商户/酒店" not in normalized_fields:
normalized_fields["商户/酒店"] = merchant
return normalized_fields
@staticmethod
def resolve_document_time_display_label(
*,
document_type: str,
key: str,
label: str,
normalized_label: str,
) -> str:
if normalized_label != "时间":
return normalized_label
label_by_type = {
"train_ticket": "列车出发时间",
"flight_itinerary": "起飞日期",
"taxi_receipt": "乘车时间",
"transport_receipt": "乘车时间",
"parking_toll_receipt": "通行日期",
}
normalized_type = str(document_type or "").strip().lower()
if normalized_type not in label_by_type:
return normalized_label
compact_key = str(key or "").strip().lower().replace("_", "")
compact_label = str(label or "").replace(" ", "")
if compact_key in {"date", "time", "issuedat", "issuedate", "invoicedate"}:
return label_by_type[normalized_type]
if any(token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")):
return label_by_type[normalized_type]
return normalized_label
@staticmethod
def normalize_document_field_label(*, key: str, label: str) -> str:
compact_key = str(key or "").strip().lower().replace("_", "")
compact_label = str(label or "").replace(" ", "")
if compact_key in {
"amount",
"totalamount",
"paymentamount",
"paidamount",
"actualamount",
} or any(
token in compact_label
for token in ("金额", "价税合计", "合计", "总额", "总计", "票价", "支付金额", "实付金额", "实收金额")
):
return "金额"
if compact_key in {"date", "time", "issuedat", "invoicedate"} or any(
token in compact_label for token in ("日期", "时间", "开票日期", "发生时间")
):
return "时间"
if compact_key in {"merchant", "merchantname", "sellername", "vendorname"} or any(
token in compact_label for token in ("商户", "酒店", "销售方", "开票方", "收款方")
):
return "商户/酒店"
return label
def normalize_document_field_value(self, *, label: str, value: str) -> str:
normalized_label = str(label or "").strip()
raw_value = str(value or "").strip()
if not normalized_label or not raw_value:
return ""
if normalized_label == "金额":
return self.extract_amount_text_from_value(raw_value) or raw_value
if normalized_label in {"时间", "出发日期", "列车出发时间", "起飞日期", "乘车时间", "通行日期"}:
match = DOCUMENT_DATE_TEXT_PATTERN.search(raw_value)
return match.group(1) if match else raw_value
return raw_value
def extract_amount_text_from_value(self, value: str) -> str:
raw_value = str(value or "").strip()
if not raw_value:
return ""
best_amount: Decimal | None = None
for pattern in (DOCUMENT_AMOUNT_PATTERN, DOCUMENT_CURRENCY_AMOUNT_PATTERN, DOCUMENT_AMOUNT_TEXT_PATTERN):
for match in pattern.finditer(raw_value):
try:
candidate = Decimal(str(match.group(1)).replace(",", "."))
except (InvalidOperation, TypeError):
continue
if candidate <= Decimal("0.00"):
continue
if best_amount is None or candidate > best_amount:
best_amount = candidate
if best_amount is None:
return ""
return f"{best_amount.quantize(Decimal('0.01')):.2f}"
def extract_document_merchant_name(self, item: dict[str, object]) -> str:
fields = self.extract_document_fields(item)
merchant = str(fields.get("商户/酒店") or "").strip()
if merchant:
return merchant
if not self.is_hotel_document_item(item):
return ""
text = " ".join([str(item.get("summary") or ""), str(item.get("text") or "")]).strip()
return self.extract_document_merchant_name_from_text(text)
@staticmethod
def is_hotel_document_item(item: dict[str, object]) -> bool:
document_type = str(item.get("document_type") or "").strip().lower()
scene_code = str(item.get("scene_code") or "").strip().lower()
scene_label = str(item.get("scene_label") or "").strip()
suggested_expense_type = str(item.get("suggested_expense_type") or "").strip().lower()
return (
document_type == "hotel_invoice"
or scene_code == "hotel"
or suggested_expense_type == "hotel"
or "住宿" in scene_label
or "酒店" in scene_label
)
@staticmethod
def extract_document_merchant_name_from_text(text: str) -> str:
for keyword in ("酒店", "宾馆", "饭店", "酒楼", "餐厅", "航空", "铁路", "滴滴"):
if keyword in text:
return keyword
return ""
@staticmethod
def extract_amount_from_card(card: UserAgentReviewDocumentCard) -> float:
for item in card.fields:
if item.label != "金额":
continue
try:
normalized_value = str(item.value).replace("", "").replace("", "").replace("¥", "").strip()
return float(normalized_value)
except ValueError:
return 0.0
return 0.0
@staticmethod
def resolve_amount_value(payload: UserAgentRequest) -> float:
for item in payload.ontology.entities:
if item.type == "amount" and item.role != "threshold":
try:
return float(item.normalized_value)
except ValueError:
return 0.0
return 0.0
def sum_ocr_amounts(self, ocr_documents: list[dict[str, object]]) -> float:
total = 0.0
for item in ocr_documents:
fields = self.extract_document_fields(item)
amount_text = str(fields.get("金额") or "").replace("", "").replace("", "").replace("¥", "").strip()
if not amount_text:
continue
try:
total += float(amount_text)
except ValueError:
continue
return total
def infer_expense_type_from_documents(
self,
ocr_documents: list[dict[str, object]],
*,
expense_type_code: str = "",
has_customer: bool = False,
) -> str:
labels: list[str] = []
for item in ocr_documents:
classified = self.classify_document(
item,
expense_type_code=expense_type_code,
has_customer=has_customer,
)
label = self._group_scene_labels.get(classified["group_code"], "")
if label and label not in labels:
labels.append(label)
return " + ".join(labels[:3])

View File

@@ -0,0 +1,627 @@
from __future__ import annotations
import re
from typing import Any
from app.schemas.user_agent import UserAgentCitation, UserAgentRequest
from app.services.user_agent_knowledge_helpers import UserAgentKnowledgeHelpersMixin
from app.services.user_agent_knowledge_constants import (
KNOWLEDGE_ARTICLE_PATTERN,
KNOWLEDGE_DIRECT_ANSWER_HINTS,
KNOWLEDGE_LIST_ITEM_PATTERN,
KNOWLEDGE_NUMBERED_ITEM_PATTERN,
KNOWLEDGE_QUERY_STOPWORDS,
KNOWLEDGE_SECTION_HEADING_PATTERN,
MAX_KNOWLEDGE_DIRECT_EVIDENCE,
MAX_KNOWLEDGE_MODEL_HITS,
MAX_KNOWLEDGE_QUERY_TERMS,
)
class UserAgentKnowledgeMixin(UserAgentKnowledgeHelpersMixin):
@staticmethod
def _build_model_tool_payload(
tool_payload: dict[str, Any],
*,
question: str | None = None,
) -> dict[str, Any]:
normalized = dict(tool_payload or {})
hits = []
for item in UserAgentKnowledgeMixin._select_knowledge_model_hits(
tool_payload,
question=question,
):
if not isinstance(item, dict):
continue
hits.append(
{
"title": str(item.get("title") or "").strip(),
"document_name": str(item.get("document_name") or "").strip(),
"excerpt": str(item.get("excerpt") or "").strip(),
"content": str(item.get("content") or "").strip()[:1200],
"tags": list(item.get("tags") or [])[:5],
"evidence": list(item.get("evidence") or [])[:3],
"code": str(item.get("code") or "").strip(),
}
)
normalized["hits"] = hits
return normalized
@staticmethod
def _build_knowledge_evidence_blocks(
tool_payload: dict[str, Any],
*,
question: str | None = None,
) -> str:
blocks: list[str] = []
for index, item in enumerate(
UserAgentKnowledgeMixin._select_knowledge_model_hits(
tool_payload,
question=question,
)[:3],
start=1,
):
if not isinstance(item, dict):
continue
title = str(item.get("title") or item.get("document_name") or f"证据 {index}").strip()
code = str(item.get("code") or "").strip()
content = str(item.get("content") or "").strip()
if not content:
continue
blocks.append(
"\n".join(
[
f"[证据 {index}] {title}" + (f" ({code})" if code else ""),
"```text",
content[:1200],
"```",
]
)
)
return "\n\n".join(blocks)
def _build_fast_knowledge_answer(
self,
payload: UserAgentRequest,
*,
citations: list[UserAgentCitation],
) -> str | None:
if payload.ontology.scenario != "knowledge":
return None
if str(payload.tool_payload.get("result_type") or "").strip() != "knowledge_search":
return None
evidence_items = self._build_knowledge_answer_evidence(payload)
if not evidence_items:
return None
question = self._resolve_knowledge_question(payload)
if not self._should_use_direct_knowledge_answer(question, evidence_items):
return None
return self._render_knowledge_direct_answer(
payload,
citations=citations,
evidence_items=evidence_items,
)
def _render_knowledge_direct_answer(
self,
payload: UserAgentRequest,
*,
citations: list[UserAgentCitation],
evidence_items: list[dict[str, Any]],
) -> str | None:
if not evidence_items:
return None
title = str(
(citations[0].title if citations else "")
or evidence_items[0].get("title")
or "相关制度"
).strip()
user_name = str(payload.context_json.get("name") or "").strip()
question = self._resolve_knowledge_question(payload)
query_terms = self._extract_knowledge_query_terms(question)
ordered_evidence_items = self._prioritize_knowledge_evidence_items(question, evidence_items)
primary_item = ordered_evidence_items[0]
primary_heading = self._format_knowledge_heading_label(
str(primary_item.get("heading") or "").strip()
)
primary_lines = self._collect_direct_knowledge_answer_lines(ordered_evidence_items)
lines: list[str] = []
if user_name:
lines.append(f"{user_name},您好。")
source_prefix = f"根据《{title}"
if primary_heading:
source_prefix = f"{source_prefix}{primary_heading}"
if str(primary_item.get("kind") or "") == "table":
lines.append(f"{source_prefix},当前能直接确认的是:")
lines.append(self._extract_relevant_table_preview(str(primary_item.get("content") or ""), query_terms))
else:
if not primary_lines:
lines.append(
f"{source_prefix},当前能直接确认的是:"
f"{self._summarize_knowledge_evidence_content(primary_item, query_terms)}"
)
elif len(primary_lines) == 1:
lines.append(f"{source_prefix},当前能直接确认的是:{primary_lines[0].strip()}")
else:
lines.append(f"{source_prefix},当前能直接确认的是:")
lines.extend(primary_lines)
notes: list[str] = []
location_note = self._build_missing_location_grounding_note(question, evidence_items)
if location_note:
notes.append(location_note)
if self._question_requires_explicit_condition(question) and not self._answer_evidence_has_numeric_or_condition(evidence_items):
notes.append("当前命中的证据更偏规则说明或流程约束,还没有直接给出可立即套用的数值或完整条件。")
if notes:
lines.append("")
lines.append("说明:")
lines.extend(f"- {note}" for note in notes)
return "\n".join(line for line in lines if line is not None).strip()
@staticmethod
def _resolve_knowledge_question(payload: UserAgentRequest) -> str:
return str(payload.context_json.get("user_input_text") or payload.message or "").strip()
@staticmethod
def _looks_like_structured_knowledge_query(question: str) -> bool:
normalized = str(question or "").strip()
if not normalized:
return False
return any(keyword in normalized for keyword in KNOWLEDGE_DIRECT_ANSWER_HINTS)
def _should_use_direct_knowledge_answer(
self,
question: str,
evidence_items: list[dict[str, Any]],
) -> bool:
if not evidence_items:
return False
if self._looks_like_structured_knowledge_query(question):
return True
return str(evidence_items[0].get("kind") or "") in {"table", "kv", "list", "clause"}
def _build_knowledge_answer_evidence(
self,
payload: UserAgentRequest,
) -> list[dict[str, Any]]:
question = self._resolve_knowledge_question(payload)
query_terms = self._extract_knowledge_query_terms(question)
candidates: list[dict[str, Any]] = []
for hit in self._select_knowledge_model_hits(
payload.tool_payload,
question=question,
):
if not isinstance(hit, dict):
continue
candidates.extend(self._extract_knowledge_evidence_candidates(hit, query_terms))
deduped: list[dict[str, Any]] = []
seen: set[tuple[str, str, str]] = set()
ranked_candidates = sorted(
candidates,
key=lambda value: (
float(value.get("score") or 0),
-len(str(value.get("content") or "")),
),
reverse=True,
)
top_score = float(ranked_candidates[0].get("score") or 0) if ranked_candidates else 0.0
for item in ranked_candidates:
score = float(item.get("score") or 0)
if deduped and score < max(6.0, top_score - 14):
continue
key = (
str(item.get("title") or "").strip(),
str(item.get("heading") or "").strip(),
self._clean_knowledge_segment_text(str(item.get("content") or ""))[:180],
)
if key in seen:
continue
seen.add(key)
deduped.append(item)
if len(deduped) >= MAX_KNOWLEDGE_DIRECT_EVIDENCE:
break
return deduped
def _extract_knowledge_evidence_candidates(
self,
hit: dict[str, Any],
query_terms: list[str],
) -> list[dict[str, Any]]:
title = str(hit.get("title") or hit.get("document_name") or "相关制度").strip()
content = str(hit.get("content") or "").strip()
if not content:
return []
raw_candidates = self._merge_knowledge_lead_in_segments(
self._split_knowledge_hit_into_segments(content)
)
candidates: list[dict[str, Any]] = []
for item in raw_candidates:
score = self._score_knowledge_evidence_candidate(item, query_terms)
if query_terms and score <= 0:
continue
normalized = dict(item)
normalized["title"] = title
normalized["score"] = score
candidates.append(normalized)
if candidates:
return candidates
fallback_text = str(hit.get("excerpt") or "").strip() or self._extract_excerpt(content)
if not fallback_text:
return []
return [
{
"title": title,
"heading": "",
"kind": "paragraph",
"content": fallback_text,
"score": 1,
}
]
def _merge_knowledge_lead_in_segments(
self,
segments: list[dict[str, str]],
) -> list[dict[str, str]]:
if not segments:
return []
merged: list[dict[str, str]] = []
index = 0
while index < len(segments):
current = dict(segments[index])
if not self._is_knowledge_lead_in_segment(current):
merged.append(current)
index += 1
continue
base_heading = str(current.get("heading") or "").strip()
current_marker = self._extract_knowledge_marker_family(str(current.get("content") or ""))
follow_segments: list[dict[str, str]] = []
next_index = index + 1
while next_index < len(segments):
candidate = segments[next_index]
if str(candidate.get("heading") or "").strip() != base_heading:
break
candidate_kind = str(candidate.get("kind") or "").strip()
candidate_content = str(candidate.get("content") or "").strip()
candidate_marker = self._extract_knowledge_marker_family(candidate_content)
if not candidate_content or candidate_kind == "table":
break
if current_marker and candidate_marker == current_marker:
break
if self._is_knowledge_lead_in_segment(candidate) and follow_segments:
break
if candidate_kind not in {"list", "paragraph", "kv", "clause"}:
break
follow_segments.append(candidate)
next_index += 1
if len(follow_segments) >= 4:
break
if candidate_kind == "paragraph" and len(candidate_content) >= 200:
break
if follow_segments:
current["content"] = "\n".join(
[str(current.get("content") or "").strip()]
+ [str(item.get("content") or "").strip() for item in follow_segments]
)
if any(str(item.get("kind") or "").strip() == "list" for item in follow_segments):
current["kind"] = "list"
merged.append(current)
index = next_index
continue
merged.append(current)
index += 1
return merged
def _split_knowledge_hit_into_segments(self, content: str) -> list[dict[str, str]]:
segments: list[dict[str, str]] = []
markdown_headings: list[str] = []
section_heading = ""
paragraph_lines: list[str] = []
table_lines: list[str] = []
def current_heading() -> str:
heading_parts = [item for item in markdown_headings if item]
if section_heading:
heading_parts.append(section_heading)
return " > ".join(heading_parts)
def flush_paragraph() -> None:
nonlocal paragraph_lines
if not paragraph_lines:
return
merged = " ".join(line.strip() for line in paragraph_lines if line.strip()).strip()
paragraph_lines = []
if merged:
segments.append(
{
"heading": current_heading(),
"kind": "paragraph",
"content": merged,
}
)
def flush_table() -> None:
nonlocal table_lines
if not table_lines:
return
merged = "\n".join(line.rstrip() for line in table_lines if line.strip()).strip()
table_lines = []
if merged:
segments.append(
{
"heading": current_heading(),
"kind": "table",
"content": merged,
}
)
for raw_line in str(content or "").replace("\r\n", "\n").replace("\r", "\n").splitlines():
line = raw_line.rstrip()
stripped = line.strip()
if not stripped:
flush_paragraph()
flush_table()
continue
markdown_heading_match = re.match(r"^(#{1,6})\s+(.+)$", stripped)
if markdown_heading_match:
flush_paragraph()
flush_table()
level = len(markdown_heading_match.group(1))
heading_text = markdown_heading_match.group(2).strip()
markdown_headings = markdown_headings[: max(0, level - 1)]
markdown_headings.append(heading_text)
section_heading = ""
continue
if KNOWLEDGE_SECTION_HEADING_PATTERN.match(stripped) and len(stripped) <= 90:
flush_paragraph()
flush_table()
section_heading = stripped.lstrip("#").strip()
continue
if stripped.count("|") >= 2 and "|" in stripped:
flush_paragraph()
table_lines.append(stripped)
continue
flush_table()
if KNOWLEDGE_LIST_ITEM_PATTERN.match(stripped):
flush_paragraph()
segments.append(
{
"heading": current_heading(),
"kind": "list",
"content": stripped,
}
)
continue
if KNOWLEDGE_NUMBERED_ITEM_PATTERN.match(stripped):
flush_paragraph()
segments.append(
{
"heading": current_heading(),
"kind": "list",
"content": stripped,
}
)
continue
if KNOWLEDGE_ARTICLE_PATTERN.match(stripped):
flush_paragraph()
segments.append(
{
"heading": current_heading(),
"kind": "clause",
"content": stripped,
}
)
continue
if ("" in stripped or ":" in stripped) and len(stripped) <= 180:
flush_paragraph()
segments.append(
{
"heading": current_heading(),
"kind": "kv",
"content": stripped,
}
)
continue
paragraph_lines.append(stripped)
flush_paragraph()
flush_table()
return segments
def _render_knowledge_evidence_text(self, item: dict[str, Any]) -> str:
lines = self._split_clean_knowledge_lines(
str(item.get("content") or ""),
preserve_marker=True,
)
if not lines:
return ""
if len(lines) == 1:
return self._clean_knowledge_segment_text(lines[0])
return "\n".join(f" {line}" for line in lines)
def _collect_direct_knowledge_answer_lines(
self,
ordered_evidence_items: list[dict[str, Any]],
) -> list[str]:
if not ordered_evidence_items:
return []
primary_item = ordered_evidence_items[0]
primary_title = str(primary_item.get("title") or "").strip()
primary_heading = str(primary_item.get("heading") or "").strip()
primary_kind = str(primary_item.get("kind") or "").strip()
related_items = [primary_item]
if primary_kind != "table":
for item in ordered_evidence_items[1:]:
if len(related_items) >= 3:
break
if str(item.get("kind") or "").strip() != primary_kind:
continue
if str(item.get("title") or "").strip() != primary_title:
continue
if str(item.get("heading") or "").strip() != primary_heading:
continue
related_items.append(item)
lines: list[str] = []
seen: set[str] = set()
for item in related_items:
rendered = self._render_knowledge_evidence_text(item)
for line in rendered.splitlines():
normalized = str(line or "").strip()
if not normalized or normalized in seen:
continue
seen.add(normalized)
lines.append(line)
return lines
def _summarize_knowledge_evidence_content(
self,
item: dict[str, Any],
query_terms: list[str],
) -> str:
kind = str(item.get("kind") or "").strip()
content = str(item.get("content") or "").strip()
if kind == "table":
preview = self._extract_relevant_table_preview(content, query_terms)
preview_rows = [line for line in preview.splitlines() if line.strip()][:4]
if len(preview_rows) >= 3:
return "当前命中的直接依据是一张与问题强相关的标准表,已摘出最相关的表头和行。"
return "当前命中的直接依据是一张与问题强相关的标准表。"
lines = self._split_clean_knowledge_lines(content, preserve_marker=True)
if len(lines) >= 2:
return self._clean_knowledge_segment_text(f"{lines[0]} {' '.join(lines[1:4])}")
return self._clean_knowledge_segment_text(content)
def _build_missing_location_grounding_note(
self,
question: str,
evidence_items: list[dict[str, Any]],
) -> str:
location = self._extract_query_location(question)
if not location:
return ""
haystack = "\n".join(
str(item.get("heading") or "") + "\n" + str(item.get("content") or "")
for item in evidence_items
)
if location in haystack:
return ""
return (
f"当前命中的制度依据没有直接写出“{location}”对应的地区档位或映射关系,"
"因此不能直接把它套用到表格中的某一列。"
)
def _build_knowledge_search_answer(
self,
payload: UserAgentRequest,
citations: list[UserAgentCitation],
) -> str:
hits = [item for item in list(payload.tool_payload.get("hits") or []) if isinstance(item, dict)]
evidence_items = self._build_knowledge_answer_evidence(payload)
primary_citation = citations[0] if citations else None
title = str(
(primary_citation.title if primary_citation else "")
or (hits[0].get("title") if hits else "")
or "相关制度"
).strip()
user_name = str(payload.context_json.get("name") or "").strip()
prefix = f"{user_name},您好。\n" if user_name else ""
if not hits:
return (
f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据,"
"但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败,"
"建议先检查主对话模型的连通性。"
)
evidence_lines: list[str] = []
for item in evidence_items[:3]:
heading = str(item.get("heading") or "").strip()
heading_text = f" > {heading}" if heading else ""
if str(item.get("kind") or "") == "table":
preview = self._extract_relevant_table_preview(
str(item.get("content") or ""),
self._extract_knowledge_query_terms(self._resolve_knowledge_question(payload)),
)
evidence_lines.append(f"- 《{item.get('title') or title}{heading_text}\n{preview}")
continue
rendered = self._render_knowledge_evidence_text(item)
if rendered:
if "\n" in rendered:
evidence_lines.append(f"- 《{item.get('title') or title}{heading_text}\n{rendered}")
else:
evidence_lines.append(f"- 《{item.get('title') or title}{heading_text}{rendered}")
if not evidence_lines:
for item in hits[:2]:
item_title = str(item.get("title") or item.get("document_name") or "相关制度").strip()
excerpt = (
str(item.get("excerpt") or "").strip()
or self._extract_excerpt(str(item.get("content") or ""))
)
if not excerpt:
continue
evidence_lines.append(f"- 《{item_title}》:{excerpt}")
if not evidence_lines:
return (
f"{prefix}我已经从《{title}》中检索到与你这次问题相关的制度依据,"
"但本次答案生成环节暂时没有成功返回。请稍后重试一次;如果仍然失败,"
"建议先检查主对话模型的连通性。"
)
return "\n".join(
[
f"{prefix}我已经命中与你这次问题最相关的制度依据,但答案整理阶段本轮没有及时返回。",
"先给你当前最直接的依据:",
*evidence_lines,
"如果你希望我继续把这些依据整理成更完整的结论、步骤或对比说明,可以继续缩小问题范围后再问一次。",
]
).strip()

View File

@@ -0,0 +1,54 @@
from __future__ import annotations
import re
KNOWLEDGE_DIRECT_ANSWER_HINTS = (
"是什么",
"标准",
"限额",
"流程",
"条件",
"规则",
"怎么",
"如何",
"哪些",
"需要",
"是否",
"区别",
"范围",
"额度",
"金额",
"多少",
"多少钱",
"上限",
)
KNOWLEDGE_QUERY_STOPWORDS = {
"什么",
"多少",
"哪些",
"怎么",
"如何",
"请问",
"一下",
"关于",
"规定",
"标准",
"可以",
"是否",
"一个",
"哪些人",
"目前",
"当前",
"一下子",
}
MAX_KNOWLEDGE_QUERY_TERMS = 12
MAX_KNOWLEDGE_DIRECT_EVIDENCE = 4
MAX_KNOWLEDGE_MODEL_HITS = 5
KNOWLEDGE_SECTION_HEADING_PATTERN = re.compile(
r"^(#\s*.+|##\s*.+|###\s*.+|第[一二三四五六七八九十百零0-9]+[章节条]\s*.*|[一二三四五六七八九十]+、.*|[一二三四五六七八九十]+.*|\([一二三四五六七八九十]+\).*)$"
)
KNOWLEDGE_LIST_ITEM_PATTERN = re.compile(r"^[-*•]\s+.+$")
KNOWLEDGE_NUMBERED_ITEM_PATTERN = re.compile(
r"^(?:(?:\d+[.)、])|(?:[(][一二三四五六七八九十百零0-9]+[)])|[①②③④⑤⑥⑦⑧⑨⑩])\s*.+$"
)
KNOWLEDGE_ARTICLE_PATTERN = re.compile(r"^(第[一二三四五六七八九十百零0-9]+条)\s*.*$")

View File

@@ -0,0 +1,322 @@
from __future__ import annotations
import re
from typing import Any
from app.services.user_agent_knowledge_constants import (
KNOWLEDGE_ARTICLE_PATTERN,
KNOWLEDGE_LIST_ITEM_PATTERN,
KNOWLEDGE_NUMBERED_ITEM_PATTERN,
KNOWLEDGE_QUERY_STOPWORDS,
KNOWLEDGE_SECTION_HEADING_PATTERN,
MAX_KNOWLEDGE_MODEL_HITS,
MAX_KNOWLEDGE_QUERY_TERMS,
)
class UserAgentKnowledgeHelpersMixin:
@staticmethod
def _select_knowledge_model_hits(
tool_payload: dict[str, Any],
*,
question: str | None = None,
) -> list[dict[str, Any]]:
raw_hits = [
item
for item in list(tool_payload.get("hits") or [])
if isinstance(item, dict)
][: max(MAX_KNOWLEDGE_MODEL_HITS + 1, 6)]
if not raw_hits:
return []
query_terms = UserAgentKnowledgeHelpersMixin._extract_knowledge_query_terms(question or "")
if not query_terms:
return raw_hits[:MAX_KNOWLEDGE_MODEL_HITS]
ranked_hits = sorted(
enumerate(raw_hits),
key=lambda value: (
UserAgentKnowledgeHelpersMixin._score_knowledge_model_hit(
value[1],
query_terms=query_terms,
rank_index=value[0],
),
-value[0],
),
reverse=True,
)
return [item for _, item in ranked_hits[:MAX_KNOWLEDGE_MODEL_HITS]]
@staticmethod
def _score_knowledge_model_hit(
item: dict[str, Any],
*,
query_terms: list[str],
rank_index: int,
) -> int:
title = str(item.get("title") or item.get("document_name") or "").lower()
excerpt = str(item.get("excerpt") or "").lower()
content = str(item.get("content") or "").lower()
haystack = "\n".join([title, excerpt, content[:1400]])
matched_terms = [term for term in query_terms if term in haystack]
score = max(1, 48 - rank_index * 4)
score += len(matched_terms) * 10
score += sum(1 for term in matched_terms if term in title) * 8
leading_marker = UserAgentKnowledgeHelpersMixin._leading_knowledge_appendix_marker(content)
if leading_marker == "# 章节导航":
score -= 22
elif leading_marker == "# 问答线索补充":
score += 6 if matched_terms else -8
elif leading_marker == "# 重点章节摘录":
score += 4 if matched_terms else -4
elif leading_marker == "# 结构化表格补充":
score += 8 if matched_terms else -3
if matched_terms and "|" in content:
score += 8
if matched_terms and any(marker in content for marker in ("", ":")):
score += 10
if matched_terms and "\n" in content:
score += 4
if matched_terms and any(marker in content for marker in ("附表", "", "")):
score += 4
if matched_terms and any(marker in content for marker in ("", "", "", "-", "")):
score += 4
if re.search(r"没有.{0,8}(信息|规定|说明|依据)", content):
score -= 12
return score
@staticmethod
def _leading_knowledge_appendix_marker(content: str) -> str:
normalized = str(content or "").lstrip()
for marker in ("# 章节导航", "# 重点章节摘录", "# 问答线索补充", "# 结构化表格补充"):
index = normalized.find(marker)
if 0 <= index <= 220:
return marker
return ""
def _prioritize_knowledge_evidence_items(
self,
question: str,
evidence_items: list[dict[str, Any]],
) -> list[dict[str, Any]]:
if not evidence_items or not self._question_requires_explicit_condition(question):
return evidence_items
for preferred_kind in ("table", "kv", "clause", "list"):
for index, item in enumerate(evidence_items):
if str(item.get("kind") or "") != preferred_kind:
continue
return [item, *evidence_items[:index], *evidence_items[index + 1 :]]
for index, item in enumerate(evidence_items):
if re.search(r"\d", str(item.get("content") or "")):
return [item, *evidence_items[:index], *evidence_items[index + 1 :]]
return evidence_items
@staticmethod
def _is_knowledge_lead_in_segment(item: dict[str, str]) -> bool:
kind = str(item.get("kind") or "").strip()
content = str(item.get("content") or "").strip()
return kind in {"kv", "list", "clause"} and content.endswith(("", ":"))
@staticmethod
def _extract_knowledge_marker_family(content: str) -> str:
normalized = str(content or "").strip()
if not normalized:
return ""
if KNOWLEDGE_ARTICLE_PATTERN.match(normalized):
return "article"
if re.match(r"^\d+[.)、]\s*", normalized):
return "arabic"
if re.match(r"^[(][一二三四五六七八九十百零0-9]+[)]\s*", normalized):
return "paren"
if re.match(r"^[①②③④⑤⑥⑦⑧⑨⑩]\s*", normalized):
return "circled"
if KNOWLEDGE_LIST_ITEM_PATTERN.match(normalized):
return "bullet"
return ""
@staticmethod
def _format_knowledge_heading_label(heading: str) -> str:
parts = [item.strip() for item in str(heading or "").split(">") if item.strip()]
return " / ".join(parts)
def _score_knowledge_evidence_candidate(
self,
item: dict[str, str],
query_terms: list[str],
) -> int:
heading = str(item.get("heading") or "").lower()
content = str(item.get("content") or "").lower()
kind = str(item.get("kind") or "").strip()
haystack = "\n".join([heading, content])
matched_terms = [term for term in query_terms if term in haystack]
score = len(matched_terms) * 10
score += sum(1 for term in matched_terms if term in heading) * 6
if kind == "table":
score += 10
elif kind in {"kv", "clause", "list"}:
score += 8
elif kind == "paragraph":
score += 4
if "问答线索补充" in heading or "重点章节摘录" in heading:
score += 8
if "结构化表格补充" in heading:
score += 10
if "章节导航" in heading or "目录" in heading:
score -= 16
if re.search(r"[.。…]{6,}", content):
score -= 12
if any(hint in content for hint in ("", "", "不得", "可以", "标准", "条件", "材料", "审批", "流程", "包括")):
score += 3
content_length = len(content)
if content_length > 220:
score -= min(8, (content_length - 220) // 40)
return score
@staticmethod
def _extract_knowledge_query_terms(question: str) -> list[str]:
normalized_question = str(question or "").strip().lower()
if not normalized_question:
return []
terms: list[str] = []
seen: set[str] = set()
def remember(term: str) -> None:
normalized = str(term or "").strip().lower()
if (
not normalized
or normalized in seen
or normalized in KNOWLEDGE_QUERY_STOPWORDS
):
return
seen.add(normalized)
terms.append(normalized)
for item in re.findall(r"[a-z0-9][a-z0-9_\-]{1,}", normalized_question):
remember(item)
for block in re.findall(r"[\u4e00-\u9fff]{2,20}", normalized_question):
if len(block) <= 4:
remember(block)
continue
for size in (4, 3, 2):
for start in range(0, len(block) - size + 1):
remember(block[start : start + size])
if len(terms) >= MAX_KNOWLEDGE_QUERY_TERMS:
return terms
return terms[:MAX_KNOWLEDGE_QUERY_TERMS]
@staticmethod
def _clean_knowledge_segment_text(content: str) -> str:
normalized = str(content or "").strip()
normalized = re.sub(r"^[-*•]\s*", "", normalized)
normalized = re.sub(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*", "", normalized)
normalized = re.sub(r"^[(][一二三四五六七八九十百零0-9]+[)]\s*", "", normalized)
normalized = re.sub(r"\s+", " ", normalized)
if len(normalized) <= 180:
return normalized
return f"{normalized[:177].rstrip()}..."
@staticmethod
def _normalize_knowledge_line(content: str, *, preserve_marker: bool) -> str:
normalized = str(content or "").strip()
normalized = re.sub(r"^[-*•]\s*", "", normalized)
if not preserve_marker:
normalized = re.sub(r"^(?:\d+[.)、]|[①②③④⑤⑥⑦⑧⑨⑩])\s*", "", normalized)
normalized = re.sub(r"^[(][一二三四五六七八九十百零0-9]+[)]\s*", "", normalized)
normalized = re.sub(r"\s+", " ", normalized)
return normalized
def _split_clean_knowledge_lines(
self,
content: str,
*,
preserve_marker: bool,
) -> list[str]:
return [
line
for line in (
self._normalize_knowledge_line(item, preserve_marker=preserve_marker)
for item in str(content or "").splitlines()
)
if line
]
@staticmethod
def _extract_relevant_table_preview(content: str, query_terms: list[str]) -> str:
lines = [line.strip() for line in str(content or "").splitlines() if line.strip()]
if len(lines) <= 3:
return "\n".join(lines)
header = lines[0]
divider = lines[1] if len(lines) > 1 else ""
body = lines[2:] if divider.count("|") >= 2 else lines[1:]
matched_rows = [
row
for row in body
if any(term in row.lower() for term in query_terms)
]
selected_rows = matched_rows[:3] or body[:2]
preview_lines = [header]
if divider:
preview_lines.append(divider)
preview_lines.extend(selected_rows)
return "\n".join(preview_lines).strip()
@staticmethod
def _question_requires_explicit_condition(question: str) -> bool:
normalized = str(question or "").strip()
return any(keyword in normalized for keyword in ("多少", "金额", "上限", "限额", "标准", "条件", "需要"))
@staticmethod
def _answer_evidence_has_numeric_or_condition(evidence_items: list[dict[str, Any]]) -> bool:
for item in evidence_items:
content = str(item.get("content") or "")
if re.search(r"\d", content):
return True
if any(
keyword in content
for keyword in ("", "", "不得", "可以", "条件", "材料", "审批", "流程", "标准", "适用")
):
return True
return False

View File

@@ -0,0 +1,726 @@
from __future__ import annotations
import json
import re
from datetime import UTC, datetime, timedelta
from decimal import Decimal, InvalidOperation
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.schemas.agent_asset import AgentAssetListItem
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
from app.schemas.user_agent import (
UserAgentCitation,
UserAgentDraftPayload,
UserAgentExpenseQueryRecord,
UserAgentQueryPayload,
UserAgentQueryStatusGroup,
UserAgentReviewAction,
UserAgentReviewClaimGroup,
UserAgentReviewDocumentCard,
UserAgentReviewDocumentField,
UserAgentReviewEditField,
UserAgentReviewPayload,
UserAgentReviewRiskBrief,
UserAgentReviewSlotCard,
UserAgentRequest,
UserAgentSuggestedAction,
)
from app.services.agent_assets import AgentAssetService
from app.services.expense_claims import ExpenseClaimService
from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label
from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
from app.services.user_agent_constants import *
class UserAgentResponseMixin:
def _build_fallback_answer(
self,
payload: UserAgentRequest,
*,
citations: list[UserAgentCitation],
draft_payload: UserAgentDraftPayload | None,
) -> str:
if str(payload.tool_payload.get("result_type") or "").strip() == "knowledge_search":
return self._build_explain_answer(payload, citations)
if payload.ontology.intent in {"query", "compare"}:
return self._build_query_answer(payload)
if payload.ontology.intent == "risk_check":
return self._build_risk_answer(payload, citations)
if payload.ontology.intent == "draft":
tool_message = str(payload.tool_payload.get("message") or "").strip()
if payload.tool_payload.get("draft_limit_reached"):
return tool_message or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。"
if tool_message and (
str(payload.tool_payload.get("claim_id") or "").strip()
or str(payload.tool_payload.get("claim_no") or "").strip()
):
return tool_message
if payload.ontology.intent == "draft" and draft_payload is not None:
return (
f"已生成 {draft_payload.title},当前仅返回待人工确认的草稿内容,"
"仍需人工确认后再进入正式流程。"
)
return self._build_explain_answer(payload, citations)
def _build_guided_answer(self, payload: UserAgentRequest) -> str | None:
if not self._is_generic_expense_prompt(payload):
return self._build_implicit_expense_draft_guidance(payload)
attachment_names = self._resolve_attachment_names(payload)
ocr_summary = str(payload.context_json.get("ocr_summary") or "").strip()
attachment_hint = ""
if ocr_summary:
attachment_hint = f" 我已读取附件 OCR 摘要:{ocr_summary}"
elif attachment_names:
attachment_hint = (
f" 我已带入 {len(attachment_names)} 份附件名称,但目前还不能直接读取附件内容,"
"仍需要你补充关键信息。"
)
return (
"可以帮你发起报销。请补充费用类型、发生时间、金额、事由和相关对象,"
"或者直接上传票据附件,我再继续帮你判断能否报、缺什么材料,并整理待核对信息。"
f"{attachment_hint}"
)
def _build_implicit_expense_draft_guidance(
self,
payload: UserAgentRequest,
) -> str | None:
if not self._is_implicit_expense_draft_request(payload):
return None
amount_text = next(
(item.value for item in payload.ontology.entities if item.type == "amount"),
"",
)
expense_type = next(
(
EXPENSE_TYPE_LABELS.get(item.normalized_value, item.value)
for item in payload.ontology.entities
if item.type == "expense_type"
),
"报销",
)
time_text = payload.ontology.time_range.raw or "本次"
amount_hint = f",金额 {amount_text}" if amount_text else ""
return (
f"已识别到一笔{time_text}{expense_type}支出{amount_hint}"
"如果要继续整理报销核对信息,还需要补充客户单位、参与人员、费用明细和票据附件。"
"你也可以继续上传发票或图片,我会把这些信息带入后续对话。"
)
def _generate_answer_with_model(
self,
payload: UserAgentRequest,
*,
citations: list[UserAgentCitation],
suggested_actions: list[UserAgentSuggestedAction],
risk_flags: list[str],
draft_payload: UserAgentDraftPayload | None,
fallback_answer: str,
) -> str | None:
messages = self._build_model_messages(
payload,
citations=citations,
suggested_actions=suggested_actions,
risk_flags=risk_flags,
draft_payload=draft_payload,
fallback_answer=fallback_answer,
)
answer = self._sanitize_model_answer(
self.runtime_chat_service.complete(
messages,
max_tokens=800 if payload.ontology.scenario == "knowledge" else 420,
temperature=0.2,
timeout_seconds=(
KNOWLEDGE_MODEL_TIMEOUT_SECONDS
if payload.ontology.scenario == "knowledge"
else None
),
slot_timeouts=(
{
"main": KNOWLEDGE_MODEL_MAIN_TIMEOUT_SECONDS,
"backup": KNOWLEDGE_MODEL_BACKUP_TIMEOUT_SECONDS,
}
if payload.ontology.scenario == "knowledge"
else None
),
max_attempts=1 if payload.ontology.scenario == "knowledge" else None,
)
)
return self._reject_unsupported_location_inference(payload, answer)
def _sanitize_model_answer(self, answer: str | None) -> str | None:
if not answer:
return None
cleaned = re.sub(r"<think>.*?</think>", "", answer, flags=re.DOTALL | re.IGNORECASE)
cleaned = cleaned.strip()
leaked_reasoning_markers = (
"用户问的是",
"让我分析一下",
"实体识别",
"从对话历史来看",
"从tool_payload来看",
"现在问题是",
"我需要:",
"关键是我",
)
if any(marker in cleaned[:500] for marker in leaked_reasoning_markers):
return None
return cleaned or None
@staticmethod
def _extract_query_location(message: str) -> str:
match = re.search(r"(?:去|到|前往)([\u4e00-\u9fff]{2,8})(?:出差|开会|培训)", str(message or ""))
return match.group(1) if match else ""
def _reject_unsupported_location_inference(
self,
payload: UserAgentRequest,
answer: str | None,
) -> str | None:
del payload
return answer
def _build_model_messages(
self,
payload: UserAgentRequest,
*,
citations: list[UserAgentCitation],
suggested_actions: list[UserAgentSuggestedAction],
risk_flags: list[str],
draft_payload: UserAgentDraftPayload | None,
fallback_answer: str,
) -> list[dict[str, str]]:
knowledge_question = (
self._resolve_knowledge_question(payload)
if payload.ontology.scenario == "knowledge"
else ""
)
facts = {
"run_id": payload.run_id,
"user_message": payload.message,
"ontology": payload.ontology.model_dump(mode="json"),
"context": {
"entry_source": payload.context_json.get("entry_source"),
"user_name": payload.context_json.get("name"),
"user_role": payload.context_json.get("role"),
"user_department": payload.context_json.get("department_name")
or payload.context_json.get("department"),
"user_position": payload.context_json.get("position"),
"user_grade": payload.context_json.get("grade"),
"employee_no": payload.context_json.get("employee_no"),
"manager_name": payload.context_json.get("manager_name"),
"employee_location": payload.context_json.get("employee_location"),
"cost_center": payload.context_json.get("cost_center"),
"finance_owner_name": payload.context_json.get("finance_owner_name"),
"employee_risk_profile": payload.context_json.get("employee_risk_profile", {}),
"user_role_codes": payload.context_json.get("role_codes", []),
"is_admin": bool(payload.context_json.get("is_admin")),
"request_context": payload.context_json.get("request_context"),
"attachment_count": payload.context_json.get("attachment_count"),
"attachment_names": self._resolve_attachment_names(payload),
"ocr_summary": payload.context_json.get("ocr_summary", ""),
"ocr_documents": payload.context_json.get("ocr_documents", []),
"conversation_id": payload.context_json.get("conversation_id"),
"conversation_scenario": payload.context_json.get("conversation_scenario"),
"conversation_intent": payload.context_json.get("conversation_intent"),
"draft_claim_id": payload.context_json.get("draft_claim_id"),
"conversation_history": self._resolve_conversation_history(payload),
},
"tool_payload": self._build_model_tool_payload(
payload.tool_payload,
question=knowledge_question,
),
"citations": [item.model_dump(mode="json") for item in citations],
"suggested_actions": [item.model_dump(mode="json") for item in suggested_actions],
"risk_flags": risk_flags,
"draft_payload": draft_payload.model_dump(mode="json") if draft_payload is not None else None,
"selected_capability_codes": payload.selected_capability_codes,
"requires_confirmation": payload.requires_confirmation,
"fallback_answer": fallback_answer,
}
if payload.ontology.scenario == "knowledge":
facts["knowledge_evidence_blocks"] = self._build_knowledge_evidence_blocks(
payload.tool_payload,
question=knowledge_question,
)
facts["knowledge_answer_evidence"] = [
{
"title": str(item.get("title") or "").strip(),
"heading": str(item.get("heading") or "").strip(),
"kind": str(item.get("kind") or "").strip(),
"content": str(item.get("content") or "").strip(),
}
for item in self._build_knowledge_answer_evidence(payload)
]
if payload.ontology.scenario == "knowledge":
answer_style_instruction = (
"你是财务制度知识问答助手。只能依据 facts.tool_payload.hits、facts.knowledge_answer_evidence、citations 与 conversation_history 回答,"
"不要扩展成通用助手。优先直接回答,不要复述思考过程,不要输出 JSON、代码块或 <think>。"
"回答风格要像一位真正熟悉制度的财务伙伴:先直接回应用户的核心问题,再用一张简洁表格或短段落说明依据,"
"最后补充最重要的注意事项。不要写成“已检索到内容”的系统回执,也不要把命中片段连缀成答案。"
"必须优先回答用户当前这句话本身,不能把制度标题、制度全文或完整标准表当成主答案。"
"如果用户问的是某次具体行程“一共能报多少”,就先给“当前已能确认的金额”,再用一张很短的表说明项目、"
"适用标准、计算式和结果;如果总额还缺少住宿晚数、实际票据或其他必要条件,就明确写出“暂不能确认的部分”。"
"只有用户明确在问“标准有哪些”或“制度全文怎么规定”时,才展开完整标准表。"
"如果命中的知识已经足够支持计算、比较或归纳,就直接给出结论;金额、标准、天数、补贴等问题要把计算过程写清楚。"
"适合时请使用 Markdown 二级标题、短段落和表格,让回答更清晰;表格必须保证每一行列数一致,不要出现空白残列。"
"只能陈述 hits 中明确出现的事实,不能用常识、外部知识或主观推断补齐缺失条件。"
"回答前先在全部 hits 中寻找与问题最直接相关的章节、表格或条目,不能只依赖排在最前面的片段。"
"如果 facts.knowledge_answer_evidence 中已经给出更短的高相关证据,优先基于这些证据组织答案,再回看原始 hits 补上下文。"
"如果某个表格在检索片段中已经被摊平成连续文本,只有在行、列和数值对应关系能够从片段本身明确确认时才能据此计算;"
"如果列对应关系不清楚,必须说明表格结构在当前片段中不够清晰,不能把第一列或相邻数字想当然套给用户。"
"如果 hits 中出现“结构化表格补充”,它表示知识归纳阶段已经把原文表格重新整理过,"
"优先使用这类结构化表格来理解行列关系,再回看原文确认上下文。"
"facts.knowledge_evidence_blocks 中保留了原始换行和定宽排版;遇到表格时,优先按这些证据块阅读,"
"必须按表头从左到右逐列对应数值,不能把第一列的数值直接套给后面的列名。"
"如果完成计算或归纳仍缺少某个关键映射关系、适用条件或数值依据,必须明确说明当前知识库还缺哪一项信息,再给出已能确认的部分。"
"如果用户问题里没有明确给出某个套用条件,而 hits 或 evidence 里也没有明确出现,就不能自己补一个默认值。"
"当问题涉及追问时,必须结合 conversation_history 延续上一轮上下文,而不是重新泛化成制度全文摘录。"
"不要大段粘贴原始命中文本;只提炼与问题直接相关的规则、条件、金额和注意事项。"
"如果依据仍然不足,明确指出缺少哪一项信息,再给出当前能确认的部分。"
)
else:
answer_style_instruction = "用 2 到 4 段完成回答,优先给结论,再补充最关键的依据与下一步建议。"
personalization_instruction = (
"如果 context.user_name 存在,并且当前问题与员工本人适用标准、报销额度、审批权限、职级待遇有关,"
"开头应自然称呼一次用户,例如“曹笑竹,您好”。"
"如果需要根据员工身份判断标准,优先参考 context.user_grade 与 context.user_position。"
"如果问题与用户身份无关,就不要生硬加入姓名、职级或岗位。"
)
system_prompt = (
"你是 X-Financial 的专业财务 AI 助手。"
"回答必须准确、自然、可执行,不要泄露中间推理。"
"当知识问题有命中依据时,先给结论,再给结构化说明。"
"不要把制度全文原样搬出来,不要把检索片段当作最终答案直接粘贴。"
"如果使用表格,确保列名简洁、数值明确。"
f"{personalization_instruction}"
f"{answer_style_instruction}"
)
user_prompt = (
"请严格依据下面的 facts 生成最终答复:\n"
f"{json.dumps(facts, ensure_ascii=False, indent=2)}"
)
return [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
def _build_query_answer(self, payload: UserAgentRequest) -> str:
scenario = payload.ontology.scenario
data = payload.tool_payload
subject = self._resolve_subject(payload)
if scenario == "expense":
query_payload = self._build_query_payload(payload)
scope_label = str(data.get("scope_label") or subject).strip() or subject
if query_payload is None:
return f"当前没有查到{scope_label}。你可以补充时间范围、单号或状态继续筛选。"
window_prefix = (
f"{query_payload.window_start_date}{query_payload.window_end_date}"
if query_payload.recent_window_applied
and query_payload.window_start_date
and query_payload.window_end_date
else (
f"{query_payload.window_days} 日内"
if query_payload.recent_window_applied and query_payload.window_days
else "当前条件下"
)
)
if query_payload.record_count <= 0:
if query_payload.older_record_count > 0 and query_payload.window_days:
return (
f"{window_prefix}没有查到{query_payload.scope_label}"
f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据,"
"请前往个人报销中心查看。"
)
return f"{window_prefix}没有查到{query_payload.scope_label}。你可以补充时间范围、单号或状态继续筛选。"
group_lines = [
f"{item.label} {item.count}"
for item in query_payload.status_groups
if item.count > 0
]
answer_parts = [
f"我先为你列出{window_prefix}{query_payload.scope_label}"
f"{query_payload.record_count} 笔,金额合计 {query_payload.total_amount:.2f} 元。"
]
if group_lines:
answer_parts.append(f"其中包括:{''.join(group_lines)}")
hint_parts: list[str] = []
if query_payload.has_more_in_window and query_payload.preview_count < query_payload.record_count:
hint_parts.append(
f"下方先展示最近 {query_payload.preview_count} 笔,你可以直接点击单据查看详情。"
)
elif query_payload.records:
hint_parts.append("下方已列出本次命中的真实单据,可直接点击查看详情。")
if query_payload.older_record_count > 0 and query_payload.window_days:
hint_parts.append(
f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据,"
"请前往个人报销中心查看。"
)
return " ".join(answer_parts + hint_parts).strip()
if scenario == "accounts_receivable":
record_count = int(data.get("record_count") or 0)
outstanding_amount = float(data.get("outstanding_amount") or 0)
return (
f"{subject}共命中 {record_count} 条应收,未回款金额 {outstanding_amount:.2f} 元。"
"建议结合账龄和客户分布继续排查逾期风险。"
)
if scenario == "accounts_payable":
record_count = int(data.get("record_count") or 0)
outstanding_amount = float(data.get("outstanding_amount") or 0)
return (
f"{subject}共命中 {record_count} 条应付,待付金额 {outstanding_amount:.2f} 元。"
"如需推进动作,建议先生成付款建议草稿并发起人工确认。"
)
return "已完成当前查询,但暂时没有更多结构化结果可展示。"
def _build_query_payload(
self,
payload: UserAgentRequest,
) -> UserAgentQueryPayload | None:
if payload.ontology.scenario != "expense" or payload.ontology.intent not in {"query", "compare"}:
return None
result_type = str(payload.tool_payload.get("result_type") or "").strip()
if result_type and result_type != "expense_claim_list":
return None
records: list[UserAgentExpenseQueryRecord] = []
for item in payload.tool_payload.get("records") or []:
if not isinstance(item, dict):
continue
amount = float(item.get("amount") or 0)
records.append(
UserAgentExpenseQueryRecord(
claim_id=str(item.get("claim_id") or "").strip(),
claim_no=str(item.get("claim_no") or "").strip() or "未编号",
employee_name=str(item.get("employee_name") or "").strip(),
expense_type=str(item.get("expense_type") or "").strip(),
expense_type_label=str(item.get("expense_type_label") or "").strip()
or EXPENSE_TYPE_LABELS.get(str(item.get("expense_type") or "").strip(), "报销"),
amount=round(amount, 2),
status=str(item.get("status") or "").strip(),
status_label=str(item.get("status_label") or "").strip()
or EXPENSE_STATUS_LABELS.get(str(item.get("status") or "").strip(), "处理中"),
status_group=str(item.get("status_group") or "").strip() or "other",
status_group_label=str(item.get("status_group_label") or "").strip()
or EXPENSE_STATUS_GROUP_LABELS.get(str(item.get("status_group") or "").strip(), "其他状态"),
approval_stage=str(item.get("approval_stage") or "").strip() or None,
document_date=str(item.get("document_date") or "").strip(),
occurred_at=str(item.get("occurred_at") or "").strip(),
reason=str(item.get("reason") or "").strip(),
location=str(item.get("location") or "").strip(),
)
)
status_groups: list[UserAgentQueryStatusGroup] = []
for item in payload.tool_payload.get("status_groups") or []:
if not isinstance(item, dict):
continue
status_groups.append(
UserAgentQueryStatusGroup(
key=str(item.get("key") or "").strip() or "other",
label=str(item.get("label") or "").strip() or "其他状态",
count=max(0, int(item.get("count") or 0)),
)
)
return UserAgentQueryPayload(
result_type="expense_claim_list",
scope_label=str(payload.tool_payload.get("scope_label") or self._resolve_subject(payload)).strip() or "报销单",
recent_window_applied=bool(payload.tool_payload.get("recent_window_applied")),
window_days=(
int(payload.tool_payload["window_days"])
if payload.tool_payload.get("window_days") not in {None, ""}
else None
),
window_start_date=(
str(payload.tool_payload.get("window_start_date") or "").strip() or None
),
window_end_date=(
str(payload.tool_payload.get("window_end_date") or "").strip() or None
),
record_count=max(0, int(payload.tool_payload.get("record_count") or 0)),
preview_count=max(0, int(payload.tool_payload.get("preview_count") or len(records))),
older_record_count=max(0, int(payload.tool_payload.get("older_record_count") or 0)),
has_more_in_window=bool(payload.tool_payload.get("has_more_in_window") or payload.tool_payload.get("has_more")),
total_amount=round(float(payload.tool_payload.get("total_amount") or 0), 2),
status_groups=status_groups,
records=records,
)
def _build_explain_answer(
self,
payload: UserAgentRequest,
citations: list[UserAgentCitation],
) -> str:
if str(payload.tool_payload.get("result_type") or "").strip() == "knowledge_search":
if citations:
return self._build_knowledge_search_answer(payload, citations)
tool_message = str(payload.tool_payload.get("message") or "").strip()
if tool_message:
return tool_message
if citations:
titles = "".join(item.title for item in citations[:2])
summary = citations[0].excerpt or "请结合制度全文进一步确认。"
return f"已检索到相关依据:{titles}。核心说明:{summary}"
return (
f"当前还没有与“{SCENARIO_LABELS.get(payload.ontology.scenario, '当前问题')}"
"强匹配的已上线规则引用,建议先人工复核或补充更具体的单据上下文。"
)
def _build_risk_answer(
self,
payload: UserAgentRequest,
citations: list[UserAgentCitation],
) -> str:
risk_flags = self._resolve_risk_flags(payload)
platform_messages = self._evaluate_platform_risk_messages(payload)
if not risk_flags and not platform_messages:
return "当前未识别到明确风险标签,建议继续查看原始明细或补充更多上下文。"
reasons = [
f"{flag}{RISK_REASON_MAP.get(flag, f'{flag} 需要人工进一步确认。')}"
for flag in risk_flags
]
if platform_messages:
reasons.extend(platform_messages)
citation_text = (
f" 参考规则:{''.join(item.title for item in citations[:2])}"
if citations
else ""
)
signal_count = len(risk_flags) + (1 if platform_messages else 0)
return (
f"本次识别到 {signal_count} 类风险信号。"
f"触发原因:{''.join(reasons)}"
"建议先复核明细、附件和审批链,再决定是否继续处理。"
f"{citation_text}"
)
def _evaluate_platform_risk_messages(self, payload: UserAgentRequest) -> list[str]:
claim_id = str(payload.tool_payload.get("claim_id") or "").strip()
if not claim_id:
return []
claim = self.db.scalar(
select(ExpenseClaim)
.where(ExpenseClaim.id == claim_id)
.options(selectinload(ExpenseClaim.items))
)
if claim is None:
return []
rule_codes = resolve_rule_codes_for_risk_check(
payload.ontology,
query_text=payload.message,
)
review = ExpenseClaimService(self.db).evaluate_platform_risk_rules(
claim,
rule_codes=rule_codes,
)
messages: list[str] = []
for flag in review.get("flags") or []:
if not isinstance(flag, dict):
continue
message = str(flag.get("message") or "").strip()
if message and message not in messages:
messages.append(message)
return messages
def _build_draft_payload(self, payload: UserAgentRequest) -> UserAgentDraftPayload:
scenario_label = SCENARIO_LABELS.get(payload.ontology.scenario, "业务")
subject = self._resolve_subject(payload)
claim_id = str(payload.tool_payload.get("claim_id") or "").strip() or None
claim_no = str(payload.tool_payload.get("claim_no") or "").strip() or None
claim_status = str(payload.tool_payload.get("status") or "").strip() or None
approval_stage = str(payload.tool_payload.get("approval_stage") or "").strip() or None
if claim_id and (claim_no is None or claim_status is None or approval_stage is None):
claim = self.db.get(ExpenseClaim, claim_id)
if claim is not None:
claim_no = claim_no or str(claim.claim_no or "").strip() or None
claim_status = claim_status or str(claim.status or "").strip() or None
approval_stage = approval_stage or str(claim.approval_stage or "").strip() or None
is_submitted = claim_status == "submitted"
title = f"{scenario_label}处理意见草稿"
if claim_no:
title = f"{scenario_label}{'报销单' if is_submitted else '草稿'} {claim_no}"
if is_submitted:
body = (
f"主题:{subject}\n"
f"结论:报销单已提交,当前节点为 {approval_stage or '审批中'}\n"
"建议:后续可在个人报销列表中跟踪审批进度,必要时再补充说明或附件。\n"
f"原始问题:{payload.message}"
)
else:
body = (
f"主题:{subject}\n"
"结论:已根据当前语义解析结果生成草稿,尚未自动执行。\n"
"建议:请先核对明细、规则命中和所需附件,再由人工确认是否提交正式流程。\n"
f"原始问题:{payload.message}"
)
return UserAgentDraftPayload(
draft_type=payload.ontology.scenario,
title=title,
body=body,
confirmation_required=not is_submitted,
claim_id=claim_id,
claim_no=claim_no,
status=claim_status,
approval_stage=approval_stage,
)
@staticmethod
def _should_build_draft_payload(payload: UserAgentRequest) -> bool:
if payload.ontology.scenario == "expense" and payload.tool_payload.get("preview_only"):
return any(
str(payload.tool_payload.get(key) or "").strip()
for key in ("claim_id", "claim_no")
)
if payload.ontology.intent == "draft":
return True
if payload.ontology.scenario != "expense":
return False
return any(
str(payload.tool_payload.get(key) or "").strip()
for key in ("claim_id", "claim_no", "status")
)
def _build_suggested_actions(
self,
payload: UserAgentRequest,
) -> list[UserAgentSuggestedAction]:
if payload.ontology.scenario == "knowledge":
return []
if self._should_prompt_expense_scene_selection(payload):
return [
UserAgentSuggestedAction(
label=label,
action_type="select_expense_type",
description=description,
payload={
"expense_type": code,
"expense_type_label": label,
"original_message": payload.message,
},
)
for code, label, description in EXPENSE_SCENE_SELECTION_OPTIONS
]
if self._is_generic_expense_prompt(payload):
return [
UserAgentSuggestedAction(
label="上传票据",
action_type="ask_clarification",
description="上传发票、行程单或付款截图,继续识别报销内容。",
),
UserAgentSuggestedAction(
label="补充报销信息",
action_type="ask_clarification",
description="补充费用类型、金额、时间和事由后继续处理。",
),
]
if payload.ontology.intent in {"query", "compare"}:
return [
UserAgentSuggestedAction(
label="查看明细",
action_type="open_detail",
description="继续查看命中记录和过滤条件。",
),
UserAgentSuggestedAction(
label="生成处理意见",
action_type="create_draft",
description="把当前查询结果整理成可确认草稿。",
),
]
if payload.ontology.intent == "risk_check":
return [
UserAgentSuggestedAction(
label="人工复核风险",
action_type="manual_review",
description="优先检查明细、附件和规则命中原因。",
),
UserAgentSuggestedAction(
label="生成整改建议",
action_type="create_draft",
description="把风险说明整理成处理意见草稿。",
),
]
if payload.ontology.intent == "draft":
return [
UserAgentSuggestedAction(
label="复制草稿",
action_type="copy_draft",
description="复制当前草稿后交由人工确认。",
),
UserAgentSuggestedAction(
label="补充上下文",
action_type="ask_clarification",
description="补充单据编号、客户或供应商信息以完善草稿。",
),
]
return [
UserAgentSuggestedAction(
label="查看规则全文",
action_type="open_rule",
description="继续查看引用规则或知识内容。",
),
UserAgentSuggestedAction(
label="补充问题上下文",
action_type="ask_clarification",
description="补充业务对象、时间或单据范围,提升回答准确度。",
),
]

View File

@@ -0,0 +1,528 @@
from __future__ import annotations
import json
import re
from datetime import UTC, datetime, timedelta
from decimal import Decimal, InvalidOperation
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.schemas.agent_asset import AgentAssetListItem
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
from app.schemas.user_agent import (
UserAgentCitation,
UserAgentDraftPayload,
UserAgentExpenseQueryRecord,
UserAgentQueryPayload,
UserAgentQueryStatusGroup,
UserAgentReviewAction,
UserAgentReviewClaimGroup,
UserAgentReviewDocumentCard,
UserAgentReviewDocumentField,
UserAgentReviewEditField,
UserAgentReviewPayload,
UserAgentReviewRiskBrief,
UserAgentReviewSlotCard,
UserAgentRequest,
UserAgentSuggestedAction,
)
from app.services.agent_assets import AgentAssetService
from app.services.expense_claims import ExpenseClaimService
from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label
from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
from app.services.user_agent_constants import *
class UserAgentReviewCoreMixin:
def _should_prompt_expense_scene_selection(self, payload: UserAgentRequest) -> bool:
if payload.ontology.scenario != "expense":
return False
if payload.ontology.intent not in {"draft", "operate"}:
return False
if str(payload.context_json.get("review_action") or "").strip():
return False
review_form_values = self._resolve_review_form_values(payload)
if str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip():
return False
if self._resolve_attachment_count(payload) > 0 or self._resolve_ocr_documents(payload):
return False
return not any(
item.type == "expense_type" and str(item.normalized_value or item.value or "").strip()
for item in payload.ontology.entities
)
@staticmethod
def _build_expense_scene_selection_answer(payload: UserAgentRequest) -> str:
has_time = bool(payload.ontology.time_range.start_date or payload.ontology.time_range.raw)
context_hint = "我先识别到这是一次报销申请"
if has_time:
context_hint += ",并看到了业务发生时间"
return (
f"{context_hint}。但你还没有明确这笔单据属于哪类报销。"
"请先在下面选择报销场景,我会按你选择的场景再继续识别时间、地点、事由、金额和所需票据,"
"避免系统先入为主把项目支持、部署等描述误判成差旅。"
)
def _build_review_payload(
self,
payload: UserAgentRequest,
*,
citations: list[UserAgentCitation],
draft_payload: UserAgentDraftPayload | None,
) -> UserAgentReviewPayload | None:
attachment_count = self._resolve_attachment_count(payload)
ocr_documents = self._resolve_ocr_documents(payload)
if payload.ontology.scenario != "expense":
return None
if payload.ontology.intent not in {"draft", "operate"} and attachment_count <= 0 and not ocr_documents:
return None
document_cards = self._build_review_document_cards(payload, ocr_documents=ocr_documents)
claim_groups = self._build_review_claim_groups(
payload,
document_cards=document_cards,
)
slot_cards = self._build_review_slot_cards(
payload,
ocr_documents=ocr_documents,
claim_groups=claim_groups,
)
travel_receipt_state = self._build_travel_receipt_state(
payload,
document_cards=document_cards,
claim_groups=claim_groups,
)
missing_slot_keys = self._resolve_review_missing_slot_keys(
payload,
slot_cards=slot_cards,
)
submission_blocked = bool(payload.tool_payload.get("submission_blocked"))
risk_briefs = self._build_review_risk_briefs(
payload,
citations=citations,
document_cards=document_cards,
claim_groups=claim_groups,
)
risk_briefs.extend(self._build_travel_receipt_briefs(travel_receipt_state))
association_choice_pending = self._is_review_association_choice_pending(payload)
can_proceed = (
False
if association_choice_pending or submission_blocked or travel_receipt_state.get("blocks_next_step")
else self._can_proceed_review(
payload,
missing_slot_keys=missing_slot_keys,
claim_groups=claim_groups,
)
)
confirmation_actions = self._build_review_confirmation_actions(
payload,
can_proceed=can_proceed,
claim_groups=claim_groups,
draft_payload=draft_payload,
missing_slot_keys=missing_slot_keys,
)
edit_fields = self._build_review_edit_fields(
payload,
draft_payload=draft_payload,
slot_cards=slot_cards,
)
intent_summary = self._build_review_intent_summary(
payload,
slot_cards=slot_cards,
claim_groups=claim_groups,
)
body_message = self._build_review_body_message(
payload,
slot_cards=slot_cards,
risk_briefs=risk_briefs,
can_proceed=can_proceed,
document_cards=document_cards,
travel_receipt_state=travel_receipt_state,
)
missing_slot_labels = [SLOT_LABELS.get(key, key) for key in missing_slot_keys]
missing_slot_labels.extend(
str(item)
for item in travel_receipt_state.get("required_missing_labels", [])
if str(item).strip()
)
missing_slot_labels = list(dict.fromkeys(missing_slot_labels))
return UserAgentReviewPayload(
intent_summary=intent_summary,
body_message=body_message,
scenario=payload.ontology.scenario,
intent=payload.ontology.intent,
can_proceed=can_proceed,
missing_slots=missing_slot_labels,
risk_briefs=risk_briefs,
slot_cards=slot_cards,
document_cards=document_cards,
claim_groups=claim_groups,
confirmation_actions=confirmation_actions,
edit_fields=edit_fields,
)
def _build_review_slot_cards(
self,
payload: UserAgentRequest,
*,
ocr_documents: list[dict[str, object]],
claim_groups: list[UserAgentReviewClaimGroup],
) -> list[UserAgentReviewSlotCard]:
entity_map = self._collect_entity_values(payload)
time_slot = self._build_time_slot(payload)
location_slot = self._build_location_slot(payload)
customer_slot = self._build_customer_slot(payload, entity_map=entity_map)
participants_slot = self._build_participants_slot(payload, entity_map=entity_map)
amount_slot = self._build_amount_slot(payload, entity_map=entity_map, ocr_documents=ocr_documents)
expense_type_slot = self._build_expense_type_slot(
payload,
entity_map=entity_map,
ocr_documents=ocr_documents,
)
merchant_slot = self._build_merchant_slot(payload, ocr_documents=ocr_documents)
reason_slot = self._build_reason_slot(
payload,
claim_groups=claim_groups,
)
attachment_slot = self._build_attachment_slot(payload)
required_keys = self._resolve_required_review_keys(
payload,
primary_expense_type=str(expense_type_slot["normalized_value"] or ""),
claim_groups=claim_groups,
)
cards = [
self._make_slot_card(
key="expense_type",
value=expense_type_slot["value"],
raw_value=expense_type_slot["raw_value"],
normalized_value=expense_type_slot["normalized_value"],
source=expense_type_slot["source"],
confidence=expense_type_slot["confidence"],
evidence=expense_type_slot["evidence"],
required="expense_type" in required_keys,
),
self._make_slot_card(
key="customer_name",
value=customer_slot["value"],
raw_value=customer_slot["raw_value"],
normalized_value=customer_slot["normalized_value"],
source=customer_slot["source"],
confidence=customer_slot["confidence"],
evidence=customer_slot["evidence"],
required="customer_name" in required_keys,
),
self._make_slot_card(
key="time_range",
value=time_slot["value"],
raw_value=time_slot["raw_value"],
normalized_value=time_slot["normalized_value"],
source=time_slot["source"],
confidence=time_slot["confidence"],
evidence=time_slot["evidence"],
required="time_range" in required_keys,
),
self._make_slot_card(
key="location",
value=location_slot["value"],
raw_value=location_slot["raw_value"],
normalized_value=location_slot["normalized_value"],
source=location_slot["source"],
confidence=location_slot["confidence"],
evidence=location_slot["evidence"],
required="location" in required_keys,
),
self._make_slot_card(
key="merchant_name",
value=merchant_slot["value"],
raw_value=merchant_slot["raw_value"],
normalized_value=merchant_slot["normalized_value"],
source=merchant_slot["source"],
confidence=merchant_slot["confidence"],
evidence=merchant_slot["evidence"],
required="merchant_name" in required_keys,
),
self._make_slot_card(
key="amount",
value=amount_slot["value"],
raw_value=amount_slot["raw_value"],
normalized_value=amount_slot["normalized_value"],
source=amount_slot["source"],
confidence=amount_slot["confidence"],
evidence=amount_slot["evidence"],
required="amount" in required_keys,
),
self._make_slot_card(
key="reason",
value=reason_slot["value"],
raw_value=reason_slot["raw_value"],
normalized_value=reason_slot["normalized_value"],
source=reason_slot["source"],
confidence=reason_slot["confidence"],
evidence=reason_slot["evidence"],
required="reason" in required_keys,
),
self._make_slot_card(
key="participants",
value=participants_slot["value"],
raw_value=participants_slot["raw_value"],
normalized_value=participants_slot["normalized_value"],
source=participants_slot["source"],
confidence=participants_slot["confidence"],
evidence=participants_slot["evidence"],
required="participants" in required_keys,
),
self._make_slot_card(
key="attachments",
value=attachment_slot["value"],
raw_value=attachment_slot["raw_value"],
normalized_value=attachment_slot["normalized_value"],
source=attachment_slot["source"],
confidence=attachment_slot["confidence"],
evidence=attachment_slot["evidence"],
required="attachments" in required_keys,
),
]
return cards
def _build_review_document_cards(
self,
payload: UserAgentRequest,
*,
ocr_documents: list[dict[str, object]],
) -> list[UserAgentReviewDocumentCard]:
cards: list[UserAgentReviewDocumentCard] = []
for index, item in enumerate(ocr_documents, start=1):
classified = self._classify_document(item, payload)
fields = self._extract_document_fields(item)
cards.append(
UserAgentReviewDocumentCard(
index=index,
filename=str(item.get("filename") or f"document-{index}"),
document_type=classified["document_type"],
suggested_expense_type=classified["expense_type"],
scene_label=GROUP_SCENE_LABELS.get(
classified["group_code"],
classified["scene_label"],
),
summary=str(item.get("summary") or item.get("text") or "").strip(),
avg_score=float(item.get("avg_score") or 0.0),
preview_kind=str(item.get("preview_kind") or "").strip(),
preview_data_url=str(item.get("preview_data_url") or "").strip(),
warnings=[str(warning) for warning in item.get("warnings", []) if str(warning).strip()],
fields=[
UserAgentReviewDocumentField(
label=label,
value=value,
source="ocr",
)
for label, value in fields.items()
if str(value).strip()
],
)
)
return cards
def _build_review_claim_groups(
self,
payload: UserAgentRequest,
*,
document_cards: list[UserAgentReviewDocumentCard],
) -> list[UserAgentReviewClaimGroup]:
groups: dict[str, dict[str, object]] = {}
for card in document_cards:
group_code = self._normalize_group_code(card.suggested_expense_type)
bucket = groups.setdefault(
group_code,
{
"document_indexes": [],
"amount_total": 0.0,
"expense_type": str(card.suggested_expense_type or group_code).strip() or group_code,
"scene_label": GROUP_SCENE_LABELS.get(
str(card.suggested_expense_type or group_code).strip() or group_code,
GROUP_SCENE_LABELS.get(group_code, "其他费用"),
),
"reasons": [],
},
)
bucket["document_indexes"].append(card.index)
bucket["amount_total"] = float(bucket["amount_total"]) + self._extract_amount_from_card(card)
bucket["reasons"].append(f"{card.filename} 识别为 {card.scene_label}")
current_expense_type = str(bucket["expense_type"] or "").strip()
current_card_type = str(card.suggested_expense_type or "").strip()
if current_expense_type and current_card_type and current_expense_type != current_card_type:
bucket["expense_type"] = group_code
bucket["scene_label"] = GROUP_SCENE_LABELS.get(group_code, "其他费用")
if not groups:
expense_type_code = self._collect_entity_values(payload).get("expense_type_code", "other")
group_code = self._normalize_group_code(expense_type_code)
groups[group_code] = {
"document_indexes": [],
"amount_total": self._resolve_amount_value(payload),
"expense_type": expense_type_code or "other",
"scene_label": GROUP_SCENE_LABELS.get(group_code, "其他费用"),
"reasons": ["当前主要依据用户文本和页面上下文进行分单建议。"],
}
claim_groups: list[UserAgentReviewClaimGroup] = []
for index, (group_code, bucket) in enumerate(groups.items(), start=1):
title = f"建议报销单 {index}{bucket['scene_label']}"
rationale = (
"".join(dict.fromkeys(str(item) for item in bucket["reasons"]))
if bucket["reasons"]
else "当前仅有单一场景,无需拆单。"
)
claim_groups.append(
UserAgentReviewClaimGroup(
group_code=group_code,
title=title,
expense_type=str(bucket["expense_type"]),
scene_label=str(bucket["scene_label"]),
document_indexes=list(bucket["document_indexes"]),
amount_total=round(float(bucket["amount_total"]), 2),
rationale=rationale,
)
)
return claim_groups
def _build_review_risk_briefs(
self,
payload: UserAgentRequest,
*,
citations: list[UserAgentCitation],
document_cards: list[UserAgentReviewDocumentCard],
claim_groups: list[UserAgentReviewClaimGroup],
) -> list[UserAgentReviewRiskBrief]:
briefs: list[UserAgentReviewRiskBrief] = []
for reason in self._resolve_submission_blocked_reasons(payload):
briefs.append(
UserAgentReviewRiskBrief(
title="提交风险提示",
level=self._resolve_submission_blocked_risk_level(reason),
content=reason,
detail=(
"该项属于提交审批前的阻断条件。系统会先要求补齐基础字段、附件或业务说明,"
"否则审批人无法判断成本归属、业务真实性或票据有效性。"
),
suggestion="按提示补齐对应信息;如果业务场景本身合理,请补充说明或佐证附件后再提交。",
)
)
briefs.extend(
self._build_travel_policy_precheck_briefs(
payload,
document_cards=document_cards,
claim_groups=claim_groups,
)
)
employee = self._resolve_employee_profile(payload)
employee_name = (
str(employee.name).strip()
if employee is not None and employee.name
else self._collect_entity_values(payload).get("employee_name")
or str(payload.context_json.get("name") or "").strip()
)
current_amount = self._resolve_amount_value(payload) or sum(
self._extract_amount_from_card(card) for card in document_cards
)
if employee_name and current_amount > 0:
since = datetime.now(UTC) - timedelta(days=90)
claim_identity_conditions = [ExpenseClaim.employee_name == employee_name]
if employee is not None:
employee_identifiers = {
str(employee.name or "").strip(),
str(employee.email or "").strip(),
str(employee.employee_no or "").strip(),
}
employee_identifiers.discard("")
claim_identity_conditions = [
ExpenseClaim.employee_id == employee.id,
ExpenseClaim.employee_name.in_(list(employee_identifiers)),
]
stmt = select(ExpenseClaim).where(or_(*claim_identity_conditions), ExpenseClaim.occurred_at >= since)
recent_claims = list(self.db.scalars(stmt).all())
if recent_claims:
duplicate_count = sum(
1
for item in recent_claims
if abs(float(item.amount) - current_amount) < 0.01
)
if duplicate_count:
briefs.append(
UserAgentReviewRiskBrief(
title="金额重复预警",
level="warning",
content=(
f"近 90 天发现 {duplicate_count} 笔金额相同的报销记录,"
"提交前建议核对是否为重复报销或拆分不当。"
),
detail=(
"系统将当前金额与近 90 天历史报销金额进行比对。金额完全一致不一定违规,"
"但在交通、餐饮、办公采购等场景中可能提示重复票据或拆分报销。"
),
suggestion="核对历史单据与当前票据是否对应同一业务;如不是重复,请在事由中说明差异。",
)
)
warning_count = sum(len(item.warnings) for item in document_cards)
if warning_count:
briefs.append(
UserAgentReviewRiskBrief(
title="票据识别提醒",
level="warning",
content=f"当前共有 {warning_count} 条票据识别提示,建议逐张确认 OCR 识别字段。",
detail="票据 OCR 识别存在字段缺失、置信度偏低或类型判断不稳定时,会生成该提醒。",
suggestion="打开票据明细逐张核对日期、金额、商户和票据类型,必要时更正后再提交。",
)
)
if len(claim_groups) > 1:
briefs.append(
UserAgentReviewRiskBrief(
title="建议拆单",
level="warning",
content=f"系统检测到 {len(claim_groups)} 类费用场景,建议拆成多张报销单后再提交。",
detail="同一批附件中包含多类费用场景时,混在一张报销单里会影响规则匹配、附件核验和审批归口。",
suggestion="按费用场景拆成多张报销单,分别确认金额、事由和附件归属。",
)
)
return self._filter_deprecated_review_risk_briefs(briefs)
@staticmethod
def _resolve_submission_blocked_risk_level(reason: str) -> str:
normalized = re.sub(r"\s+", "", str(reason or ""))
amount_keywords = ("金额", "超标", "费用", "价款", "票面金额", "单价", "合计")
return "high" if any(keyword in normalized for keyword in amount_keywords) else "warning"
@staticmethod
def _filter_deprecated_review_risk_briefs(
briefs: list[UserAgentReviewRiskBrief],
) -> list[UserAgentReviewRiskBrief]:
filtered: list[UserAgentReviewRiskBrief] = []
for brief in briefs:
title = str(brief.title or "").strip()
if any(keyword in title for keyword in DEPRECATED_REVIEW_RISK_TITLE_KEYWORDS):
continue
filtered.append(brief)
return filtered

View File

@@ -0,0 +1,673 @@
from __future__ import annotations
import json
import re
from datetime import UTC, datetime, timedelta
from decimal import Decimal, InvalidOperation
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.schemas.agent_asset import AgentAssetListItem
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
from app.schemas.user_agent import (
UserAgentCitation,
UserAgentDraftPayload,
UserAgentExpenseQueryRecord,
UserAgentQueryPayload,
UserAgentQueryStatusGroup,
UserAgentReviewAction,
UserAgentReviewClaimGroup,
UserAgentReviewDocumentCard,
UserAgentReviewDocumentField,
UserAgentReviewEditField,
UserAgentReviewPayload,
UserAgentReviewRiskBrief,
UserAgentReviewSlotCard,
UserAgentRequest,
UserAgentSuggestedAction,
)
from app.services.agent_assets import AgentAssetService
from app.services.expense_claims import ExpenseClaimService
from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label
from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
from app.services.user_agent_constants import *
class UserAgentReviewMessageMixin:
def _build_review_confirmation_actions(
self,
payload: UserAgentRequest,
*,
can_proceed: bool,
claim_groups: list[UserAgentReviewClaimGroup],
draft_payload: UserAgentDraftPayload | None,
missing_slot_keys: set[str] | None = None,
) -> list[UserAgentReviewAction]:
missing_slot_keys = set(missing_slot_keys or set())
if self._is_review_association_choice_pending(payload):
claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip()
link_label = f"关联到草稿 {claim_no}" if claim_no else "关联到现有草稿"
return [
UserAgentReviewAction(
label=link_label,
action_type="link_to_existing_draft",
description=(
f"把本次上传票据并入现有草稿 {claim_no}"
if claim_no
else "把本次上传票据并入现有草稿。"
),
emphasis="primary",
),
UserAgentReviewAction(
label="单独建立报销单",
action_type="create_new_claim_from_documents",
description="基于当前上传的多张票据,新建一张独立的报销草稿。",
emphasis="secondary",
),
]
review_action = str(payload.context_json.get("review_action") or "").strip()
if "expense_type" in missing_slot_keys and not review_action:
return [
UserAgentReviewAction(
label="保存为草稿",
action_type="save_draft",
description="先暂存当前已识别信息,稍后仍可从个人报销继续补充或提交。",
emphasis="primary",
),
]
primary_action = UserAgentReviewAction(
label="继续下一步" if can_proceed else "保存为草稿",
action_type="next_step" if can_proceed else "save_draft",
description=(
"当前识别信息已满足继续处理条件,确认后进入下一步。"
if can_proceed
else "暂存当前识别结果,后续可以继续补充或修改。"
),
emphasis="primary",
)
if len(claim_groups) > 1 and can_proceed:
primary_action.description = f"系统建议拆分为 {len(claim_groups)} 张报销单,确认后继续下一步。"
if draft_payload is not None and draft_payload.claim_no and not can_proceed:
primary_action.description = f"保存后会生成草稿 {draft_payload.claim_no},后续仍可继续补充。"
actions = []
if can_proceed:
actions.append(
UserAgentReviewAction(
label="保存为草稿",
action_type="save_draft",
description="先暂存当前已识别信息,稍后仍可从个人报销继续补充或提交。",
emphasis="secondary",
)
)
actions.append(primary_action)
return actions
def _build_review_intent_summary(
self,
payload: UserAgentRequest,
*,
slot_cards: list[UserAgentReviewSlotCard],
claim_groups: list[UserAgentReviewClaimGroup],
) -> str:
slots = {item.key: item for item in slot_cards}
expense_type = slots.get("expense_type")
amount = slots.get("amount")
time_range = slots.get("time_range")
location = slots.get("location")
customer = slots.get("customer_name")
summary = "我先根据您当前提供的信息整理出一笔报销:"
if expense_type and expense_type.value:
summary = f"识别到您希望报销一笔“{expense_type.value}”费用:"
details: list[str] = []
if customer and customer.value:
details.append(f"客户:{customer.value}")
if time_range and time_range.value:
details.append(f"时间:{time_range.value}")
if location and location.value:
details.append(f"地点:{location.value}")
if amount and amount.value:
details.append(f"金额:{amount.value}")
reason = slots.get("reason")
if reason and reason.value:
details.append(f"事由:{reason.value}")
if details:
return "\n\n".join([summary, "基础信息识别结果:", "\n".join(details)])
return summary
def _build_review_body_answer(
self,
payload: UserAgentRequest,
*,
review_payload: UserAgentReviewPayload | None,
draft_payload: UserAgentDraftPayload | None,
) -> str | None:
if review_payload is None:
return None
if payload.ontology.scenario != "expense":
return None
if payload.ontology.intent not in {"draft", "operate"}:
return None
if payload.tool_payload.get("draft_limit_reached"):
return (
str(payload.tool_payload.get("message") or "").strip()
or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。"
)
review_action = str(payload.context_json.get("review_action") or "").strip()
if payload.tool_payload.get("preview_only") and not review_action:
return review_payload.body_message or self._build_review_intent_summary(
payload,
slot_cards=review_payload.slot_cards,
claim_groups=review_payload.claim_groups,
)
if payload.tool_payload.get("duplicate_attachment_blocked") or payload.tool_payload.get("duplicate_invoice_blocked"):
return (
str(payload.tool_payload.get("message") or "").strip()
or "检测到本次上传票据与当前单据已有票据重复,请重新上传不同的票据后再归集。"
)
if review_action == "save_draft":
if draft_payload is not None and draft_payload.claim_no:
return (
f"已按您当前确认的信息保存为草稿 {draft_payload.claim_no}"
"后续您可以继续补充缺失项,或修改识别结果后再继续提交。"
)
return "已按您当前确认的信息保存为草稿。后续您可以继续补充缺失项,或修改识别结果后再继续提交。"
if review_action == "link_to_existing_draft":
document_count = self._resolve_review_document_count(payload)
followup_copy = self._build_review_action_followup_copy(review_payload)
if draft_payload is not None and draft_payload.claim_no:
return (
f"已将本次上传的 {document_count} 张票据关联到草稿 {draft_payload.claim_no}"
f"{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}"
)
return f"已将本次上传的票据关联到现有草稿。{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}"
if review_action == "create_new_claim_from_documents":
document_count = self._resolve_review_document_count(payload)
followup_copy = self._build_review_action_followup_copy(review_payload)
if draft_payload is not None and draft_payload.claim_no:
return (
f"已按当前上传的 {document_count} 张票据新建报销草稿 {draft_payload.claim_no}"
f"{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}"
)
return f"已按当前上传票据新建报销草稿。{followup_copy or '您可以继续补充识别字段,确认无误后再提交审批。'}"
if review_action == "next_step":
if draft_payload is not None and draft_payload.status == "submitted":
stage_text = draft_payload.approval_stage or "审批中"
return f"报销单 {draft_payload.claim_no or ''} 已提交,当前节点为 {stage_text}".strip()
if payload.tool_payload.get("submission_blocked"):
reasons = self._resolve_submission_blocked_reasons(payload)
if reasons:
reason_lines = "\n".join(
f"{index}. {reason}" for index, reason in enumerate(reasons, start=1)
)
return (
"AI预审暂未通过所以还没有提交到审批人。\n"
f"{reason_lines}\n"
"请先处理以上项目;处理完成后再点继续下一步。"
)
return str(payload.tool_payload.get("message") or "").strip() or "当前报销单暂时还不能提交审批。"
return (
f"{self._build_review_intent_summary(payload, slot_cards=review_payload.slot_cards, claim_groups=review_payload.claim_groups)}\n\n"
"当前关键信息已基本齐全,您确认无误后可以继续下一步。"
)
return review_payload.body_message or None
def _build_review_body_message(
self,
payload: UserAgentRequest,
*,
slot_cards: list[UserAgentReviewSlotCard],
risk_briefs: list[UserAgentReviewRiskBrief],
can_proceed: bool,
document_cards: list[UserAgentReviewDocumentCard],
travel_receipt_state: dict[str, Any] | None = None,
) -> str:
if self._is_review_association_choice_pending(payload):
claim_no = str(payload.tool_payload.get("association_candidate_claim_no") or "").strip()
document_count = len(document_cards) or self._resolve_review_document_count(payload)
if claim_no:
return (
f"已识别出本次上传的 {document_count} 张票据。"
f"系统检测到你已有草稿 {claim_no},请选择关联到该草稿,或单独建立一张新的报销单。"
)
return (
f"已识别出本次上传的 {document_count} 张票据。"
"系统检测到你已有可用草稿,请先选择关联到现有草稿,或单独建立一张新的报销单。"
)
blocked_reasons = self._resolve_submission_blocked_reasons(payload)
if blocked_reasons:
reason_text = "".join(dict.fromkeys(reason.strip("。;;") for reason in blocked_reasons if reason))
return (
f"AI预审未通过{reason_text}"
"请先根据风险提示补充原因、调整金额或更换附件,整改后再继续提交。"
)
travel_message = self._build_travel_receipt_guidance_message(
payload,
travel_receipt_state=travel_receipt_state or {},
can_proceed=can_proceed,
)
if travel_message:
return travel_message
missing_labels = self._resolve_review_missing_slot_labels(slot_cards)
if travel_receipt_state:
missing_labels.extend(
str(item)
for item in travel_receipt_state.get("required_missing_labels", [])
if str(item).strip()
)
missing_labels = list(dict.fromkeys(missing_labels))
expense_type_slot = next((item for item in slot_cards if item.key == "expense_type"), None)
if expense_type_slot is not None and not str(expense_type_slot.value or "").strip():
return (
f"{self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[])}\n\n"
"我已经先保留了当前识别出的时间、地点和事由,但还不能确定这张单据应该走哪类报销流程。"
"请先点击“选择报销类型”,在差旅费、交通费、住宿费等选项中选定;"
"选定后,后续上传的票据都会作为这张单据的补充继续核对,不会重新改判报销类型。"
)
review_payload = UserAgentReviewPayload(
intent_summary="",
body_message="",
scenario=payload.ontology.scenario,
intent=payload.ontology.intent,
can_proceed=can_proceed,
missing_slots=missing_labels,
risk_briefs=risk_briefs,
slot_cards=slot_cards,
document_cards=[],
claim_groups=[],
confirmation_actions=[],
edit_fields=[],
)
return "\n\n".join(
item
for item in [
self._build_review_intent_summary(payload, slot_cards=slot_cards, claim_groups=[]),
self._build_review_standard_calculation_copy(payload, slot_cards),
self._build_review_guidance_copy(review_payload, mention_save_draft=not can_proceed),
]
if item
)
def _build_review_standard_calculation_copy(
self,
payload: UserAgentRequest,
slot_cards: list[UserAgentReviewSlotCard],
) -> str:
slots = {item.key: item for item in slot_cards}
expense_type = str(slots.get("expense_type").value if slots.get("expense_type") else "").strip()
if "差旅" in expense_type:
return self._build_review_travel_calculation_table(payload, slots)
if "交通" in expense_type:
return (
"报销测算参考:交通费通常以实际票据金额为基础,结合出行地点、业务事由和票据合规性复核;"
"如果它属于差旅行程的一部分,后续也会并入差旅费测算。"
)
if "住宿" in expense_type:
return (
"报销测算参考:住宿费通常按“实际住宿金额”和“目的地住宿标准 × 住宿天数”取合规口径;"
"补齐酒店票据后再核对是否超标。"
)
return (
"报销测算参考:先以用户填写金额或票据识别金额为基础,"
"再结合费用类型、发生地点、业务事由和规则中心限额进行复核。"
)
def _build_review_travel_calculation_table(
self,
payload: UserAgentRequest,
slots: dict[str, UserAgentReviewSlotCard],
) -> str:
destination = self._resolve_slot_text(slots, "location")
days = self._resolve_review_travel_days(payload, slots)
ticket_amount = self._resolve_slot_money(slots, "amount")
employee = self._resolve_employee_profile(payload)
grade = self._resolve_review_employee_grade(payload, employee=employee)
if not destination or not grade:
return "\n".join(
[
"报销测算参考:",
"",
"| 项目 | 当前信息 | 测算说明 |",
"| --- | --- | --- |",
f"| 出差地点 | {destination or '待确认'} | 用于匹配城市住宿标准和补贴区域 |",
f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |",
f"| 职级 | {grade or '待确认'} | 补齐后才能匹配住宿标准和补贴档位 |",
f"| 交通票据 | {self._format_decimal_money(ticket_amount)} 元 | 上传票据后会按真实金额重新复核 |",
]
)
current_user = CurrentUserContext(
username=str(payload.user_id or payload.context_json.get("name") or "anonymous").strip() or "anonymous",
name=str(payload.context_json.get("name") or payload.user_id or "anonymous").strip() or "anonymous",
role_codes=[
str(item).strip()
for item in list(payload.context_json.get("role_codes") or [])
if str(item).strip()
],
is_admin=bool(payload.context_json.get("is_admin")),
department_name=str(payload.context_json.get("department_name") or payload.context_json.get("department") or "").strip(),
)
try:
calculation = TravelReimbursementCalculatorService(self.db).calculate(
TravelReimbursementCalculatorRequest(days=days, location=destination, grade=grade),
current_user,
)
except Exception:
return "\n".join(
[
"报销测算参考:",
"",
"| 项目 | 当前信息 | 测算说明 |",
"| --- | --- | --- |",
f"| 出差地点 | {destination} | 暂时未能匹配规则中心地点 |",
f"| 出差天数 | {days} 天 | 来自业务发生时间或用户描述 |",
f"| 职级 | {grade} | 暂时无法自动匹配差旅标准 |",
f"| 交通票据 | {self._format_decimal_money(ticket_amount)} 元 | 上传票据后会按真实金额重新复核 |",
]
)
total_amount = (
ticket_amount
+ self._coerce_decimal_money(calculation.hotel_amount)
+ self._coerce_decimal_money(calculation.allowance_amount)
).quantize(Decimal("0.01"))
ticket_basis = "当前未上传交通票据,先按 0.00 元占位" if ticket_amount <= Decimal("0.00") else "已识别或填写的交通票据金额"
return "\n".join(
[
"报销测算参考:",
"",
(
f"职级 {calculation.grade},目的地 {destination},匹配城市 {calculation.matched_city}"
"补齐交通、酒店等票据后,我会按真实票据金额和规则中心标准重新复核。"
),
"",
"| 项目 | 测算口径 | 金额 |",
"| --- | --- | ---: |",
f"| 交通票据 | {ticket_basis} | {self._format_decimal_money(ticket_amount)} 元 |",
f"| 住宿标准 | {self._format_decimal_money(calculation.hotel_rate)} 元/天 × {calculation.days} 天 | {self._format_decimal_money(calculation.hotel_amount)} 元 |",
f"| 出差补贴 | {self._format_decimal_money(calculation.total_allowance_rate)} 元/天 × {calculation.days} 天 | {self._format_decimal_money(calculation.allowance_amount)} 元 |",
f"| 参考合计 | 交通票据 + 住宿标准 + 出差补贴 | {self._format_decimal_money(total_amount)} 元 |",
]
)
@staticmethod
def _resolve_slot_text(slots: dict[str, UserAgentReviewSlotCard], key: str) -> str:
item = slots.get(key)
return str(getattr(item, "value", "") or getattr(item, "raw_value", "") or "").strip()
def _resolve_review_travel_days(
self,
payload: UserAgentRequest,
slots: dict[str, UserAgentReviewSlotCard],
) -> int:
text = " ".join(
[
str(payload.message or ""),
str(payload.context_json.get("user_input_text") or ""),
self._resolve_slot_text(slots, "reason"),
self._resolve_slot_text(slots, "time_range"),
]
)
explicit_match = re.search(r"(?<!\d)(\d{1,2})\s*天", text)
if explicit_match:
return max(1, int(explicit_match.group(1)))
dates = self._extract_dates_from_text(self._resolve_slot_text(slots, "time_range"))
if len(dates) >= 2:
return max(1, (max(dates).date() - min(dates).date()).days)
return 1
def _resolve_slot_money(
self,
slots: dict[str, UserAgentReviewSlotCard],
key: str,
) -> Decimal:
text = self._resolve_slot_text(slots, key).replace(",", "")
match = re.search(r"([0-9]+(?:\.[0-9]{1,2})?)", text)
if not match:
return Decimal("0.00")
return self._coerce_decimal_money(match.group(1))
@staticmethod
def _build_review_action_followup_copy(review_payload: UserAgentReviewPayload) -> str:
missing_slots = [str(item).strip() for item in review_payload.missing_slots if str(item).strip()]
receipt_briefs = [
item
for item in review_payload.risk_briefs
if "差旅票据待补充" in str(item.title or "")
]
if missing_slots:
return f"当前仍有 {''.join(missing_slots)},暂时只能保存为草稿,补齐后再继续下一步。"
if receipt_briefs:
return "当前必需票据已具备;如还有市内交通、打车、地铁或停车等乘车票据,可以继续上传,也可以继续下一步或保存草稿。"
if review_payload.can_proceed:
return "当前信息已较完整,您可以继续下一步,也可以先保存为草稿。"
return ""
def _build_travel_receipt_guidance_message(
self,
payload: UserAgentRequest,
*,
travel_receipt_state: dict[str, Any],
can_proceed: bool,
) -> str:
review_action = str(payload.context_json.get("review_action") or "").strip()
if review_action or not travel_receipt_state.get("has_long_distance_ticket"):
return ""
employee = self._resolve_employee_profile(payload)
user_name = (
str(employee.name).strip()
if employee is not None and employee.name
else str(payload.context_json.get("name") or payload.user_id or "同事").strip()
)
destination = str(travel_receipt_state.get("destination") or "待确认").strip()
days = max(1, int(travel_receipt_state.get("days") or 1))
ticket_type_label = str(travel_receipt_state.get("ticket_type_label") or "交通").strip()
ticket_amount = self._coerce_decimal_money(travel_receipt_state.get("ticket_amount"))
required_labels = [
str(item).strip()
for item in travel_receipt_state.get("required_missing_labels", [])
if str(item).strip()
]
optional_labels = [
str(item).strip()
for item in travel_receipt_state.get("optional_missing_labels", [])
if str(item).strip()
]
provide_items: list[str] = []
if required_labels:
provide_items.append("1. 酒店住宿发票/住宿清单(必须,当前待上传)")
if optional_labels:
provide_items.append(f"{len(provide_items) + 1}. 市内交通/乘车票据(非必须,如打车、地铁、停车等)")
sections = [
f"您好,{user_name}。我先按票据信息做一次差旅预检。",
"\n".join(
[
"已识别信息:",
f"1. 出差地点:{destination}",
f"2. 预计天数:{days}",
f"3. 票据类型:{ticket_type_label}",
f"4. 票据金额:{self._format_decimal_money(ticket_amount)}",
]
),
]
if provide_items:
sections.append("还需补充:\n" + "\n".join(provide_items))
else:
sections.append("票据完整性:当前核心票据已较完整,无需继续上传票据。")
if required_labels:
sections.append(
"处理建议:酒店票据仍缺失,暂时不能继续下一步。"
"您可以先保存为草稿,补齐后再提交。"
)
elif can_proceed and optional_labels:
sections.append(
"处理建议:必需票据已具备。"
"如暂时没有乘车票据,也可以继续下一步,或先保存为草稿。"
)
elif can_proceed:
sections.append(
"处理建议:当前信息已较完整,确认无误后可以继续下一步;"
"暂时不提交时,也可以先保存为草稿。"
)
estimate_copy = self._build_travel_receipt_estimate_copy(
payload,
travel_receipt_state=travel_receipt_state,
)
if estimate_copy:
sections.append(estimate_copy)
return "\n\n".join(section for section in sections if section)
def _build_travel_receipt_estimate_copy(
self,
payload: UserAgentRequest,
*,
travel_receipt_state: dict[str, Any],
) -> str:
destination = str(travel_receipt_state.get("destination") or "").strip()
days = max(1, int(travel_receipt_state.get("days") or 1))
ticket_type_label = str(travel_receipt_state.get("ticket_type_label") or "交通").strip()
ticket_amount = self._coerce_decimal_money(travel_receipt_state.get("ticket_amount"))
employee = self._resolve_employee_profile(payload)
grade = self._resolve_review_employee_grade(payload, employee=employee)
if not destination or not grade:
return (
"差旅费测算:\n"
f"1. 职级:{grade or '待确认'}\n"
f"2. 目的地:{destination or '出差地点待确认'}\n"
f"3. 已提交{ticket_type_label}{self._format_decimal_money(ticket_amount)}\n"
"4. 住宿和补贴金额:需补齐职级或地点后再核算。"
)
current_user = CurrentUserContext(
username=str(payload.user_id or payload.context_json.get("name") or "anonymous").strip() or "anonymous",
name=str(payload.context_json.get("name") or payload.user_id or "anonymous").strip() or "anonymous",
role_codes=[
str(item).strip()
for item in list(payload.context_json.get("role_codes") or [])
if str(item).strip()
],
is_admin=bool(payload.context_json.get("is_admin")),
department_name=str(payload.context_json.get("department_name") or payload.context_json.get("department") or "").strip(),
)
try:
calculation = TravelReimbursementCalculatorService(self.db).calculate(
TravelReimbursementCalculatorRequest(days=days, location=destination, grade=grade),
current_user,
)
except Exception:
return (
"差旅费测算:\n"
f"1. 职级:{grade}\n"
f"2. 目的地:{destination}\n"
f"3. 已提交{ticket_type_label}{self._format_decimal_money(ticket_amount)}\n"
"4. 住宿和补贴标准:暂时无法自动测算,请以规则中心最新差旅标准为准。"
)
total_amount = (
ticket_amount
+ self._coerce_decimal_money(calculation.hotel_amount)
+ self._coerce_decimal_money(calculation.allowance_amount)
).quantize(Decimal("0.01"))
return (
"差旅费测算:\n"
f"1. 职级:{calculation.grade}\n"
f"2. 目的地:{calculation.matched_city or destination}\n"
f"3. 已提交{ticket_type_label}{self._format_decimal_money(ticket_amount)}\n"
f"4. 住宿标准:{self._format_decimal_money(calculation.hotel_rate)} 元/天 × {calculation.days}\n"
f"5. 出差补贴:{self._format_decimal_money(calculation.total_allowance_rate)} 元/天 × {calculation.days}\n"
f"6. 参考合计:{self._format_decimal_money(total_amount)}"
)
@staticmethod
def _coerce_decimal_money(value: Any) -> Decimal:
try:
return Decimal(str(value or "0")).quantize(Decimal("0.01"))
except (InvalidOperation, ValueError):
return Decimal("0.00")
@staticmethod
def _format_decimal_money(value: Any) -> str:
return f"{UserAgentReviewMessageMixin._coerce_decimal_money(value):.2f}"
@staticmethod
def _resolve_review_missing_slot_labels(
slot_cards: list[UserAgentReviewSlotCard],
) -> list[str]:
return [item.label for item in slot_cards if item.status == "missing"]
@staticmethod
def _build_review_guidance_copy(
review_payload: UserAgentReviewPayload,
*,
mention_save_draft: bool,
) -> str:
reminder_count = len(review_payload.risk_briefs)
if review_payload.can_proceed:
if reminder_count:
return (
f"当前关键信息已基本齐全,但还有 {reminder_count} 条提醒。"
"请核查对话中的文字说明,确认无误后继续下一步。"
)
return "当前关键信息已基本齐全,您确认无误后可以继续下一步。"
return ""
@staticmethod
def _can_proceed_review(
payload: UserAgentRequest,
*,
missing_slot_keys: list[str],
claim_groups: list[UserAgentReviewClaimGroup],
) -> bool:
if payload.ontology.ambiguity:
return False
if missing_slot_keys:
return False
if not claim_groups:
return False
return True

View File

@@ -0,0 +1,465 @@
from __future__ import annotations
import json
import re
from datetime import UTC, datetime, timedelta
from decimal import Decimal, InvalidOperation
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.schemas.agent_asset import AgentAssetListItem
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
from app.schemas.user_agent import (
UserAgentCitation,
UserAgentDraftPayload,
UserAgentExpenseQueryRecord,
UserAgentQueryPayload,
UserAgentQueryStatusGroup,
UserAgentReviewAction,
UserAgentReviewClaimGroup,
UserAgentReviewDocumentCard,
UserAgentReviewDocumentField,
UserAgentReviewEditField,
UserAgentReviewPayload,
UserAgentReviewRiskBrief,
UserAgentReviewSlotCard,
UserAgentRequest,
UserAgentSuggestedAction,
)
from app.services.agent_assets import AgentAssetService
from app.services.expense_claims import ExpenseClaimService
from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label
from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
from app.services.user_agent_constants import *
class UserAgentReviewProfileMixin:
def _build_review_edit_fields(
self,
payload: UserAgentRequest,
*,
draft_payload: UserAgentDraftPayload | None,
slot_cards: list[UserAgentReviewSlotCard],
) -> list[UserAgentReviewEditField]:
slot_map = {item.key: item for item in slot_cards}
employee = self._resolve_employee_profile(payload)
reporter_name = (
slot_map.get("reporter_name").value
if slot_map.get("reporter_name")
else str(payload.context_json.get("name") or "").strip()
)
manager_name = self._resolve_manager_name(employee)
reason = slot_map.get("reason").value if slot_map.get("reason") else ""
attachments = "".join(self._resolve_attachment_names(payload))
fields = [
UserAgentReviewEditField(
key="claim_no",
label="报销单据编号",
value=str(draft_payload.claim_no if draft_payload is not None and draft_payload.claim_no else "待生成"),
placeholder="保存草稿后自动生成",
required=False,
group="basic",
),
UserAgentReviewEditField(
key="expense_type",
label="报销类型",
value=slot_map.get("expense_type").value if slot_map.get("expense_type") else "",
placeholder="例如:业务招待费 / 差旅费",
group="basic",
),
UserAgentReviewEditField(
key="occurred_date",
label="业务发生时间",
value=slot_map.get("time_range").normalized_value if slot_map.get("time_range") and slot_map.get("time_range").normalized_value else slot_map.get("time_range").value if slot_map.get("time_range") else "",
placeholder="例如2026-05-11",
group="basic",
),
UserAgentReviewEditField(
key="reporter_name",
label="报销人",
value=reporter_name,
placeholder="请输入报销人姓名",
group="basic",
),
UserAgentReviewEditField(
key="manager_name",
label="直属上司姓名",
value=manager_name,
placeholder="请输入直属上司姓名",
required=False,
group="basic",
),
UserAgentReviewEditField(
key="customer_name",
label="客户名称",
value=slot_map.get("customer_name").value if slot_map.get("customer_name") else "",
placeholder="请输入客户名称",
group="business",
),
UserAgentReviewEditField(
key="business_location",
label="业务地点",
value=slot_map.get("location").normalized_value if slot_map.get("location") and slot_map.get("location").normalized_value else slot_map.get("location").value if slot_map.get("location") else "",
placeholder="例如:北京 / 客户现场",
required=False,
group="business",
),
UserAgentReviewEditField(
key="merchant_name",
label="酒店/商户",
value=slot_map.get("merchant_name").value if slot_map.get("merchant_name") else "",
placeholder="请输入酒店或商户名称",
required=False,
group="business",
),
UserAgentReviewEditField(
key="amount",
label="金额",
value=slot_map.get("amount").normalized_value if slot_map.get("amount") and slot_map.get("amount").normalized_value else slot_map.get("amount").value if slot_map.get("amount") else "",
placeholder="例如200.00元",
group="business",
),
UserAgentReviewEditField(
key="participants",
label="参与人员",
value=slot_map.get("participants").value if slot_map.get("participants") else "",
placeholder="例如:客户 2 人,我方 1 人",
group="business",
),
UserAgentReviewEditField(
key="reason",
label="事由",
value=reason,
placeholder="请输入报销事由",
field_type="textarea",
group="business",
),
UserAgentReviewEditField(
key="attachment_names",
label="附件清单",
value=attachments,
placeholder="例如:发票.jpg、行程单.png",
required=False,
field_type="textarea",
group="attachments",
),
]
return fields
def _resolve_employee_profile(self, payload: UserAgentRequest) -> Employee | None:
candidates = [
str(payload.context_json.get("name") or "").strip(),
str(payload.user_id or "").strip(),
self._collect_entity_values(payload).get("employee_name", ""),
]
normalized = [item for item in dict.fromkeys(candidates) if item]
if not normalized:
return None
stmt = (
select(Employee)
.options(selectinload(Employee.organization_unit), selectinload(Employee.manager))
.where(
or_(
Employee.name.in_(normalized),
Employee.employee_no.in_(normalized),
Employee.email.in_(normalized),
)
)
.limit(1)
)
return self.db.scalar(stmt)
@staticmethod
def _resolve_manager_name(employee: Employee | None) -> str:
if employee is None:
return ""
if employee.manager is not None and employee.manager.name:
return employee.manager.name
if employee.organization_unit is not None and employee.organization_unit.manager_name:
return employee.organization_unit.manager_name
return ""
@staticmethod
def _extract_message_reason(message: str) -> str:
for line in str(message or "").splitlines():
cleaned = line.strip()
if not cleaned:
continue
if cleaned.startswith(("附件名称:", "OCR摘要", "关联单号:")):
continue
return cleaned[:300]
return ""
@staticmethod
def _looks_like_system_generated_reason_message(message: str) -> bool:
cleaned = str(message or "").strip()
if not cleaned:
return False
compact = re.sub(r"\s+", "", cleaned)
return compact.startswith(SYSTEM_GENERATED_REASON_PREFIXES)
def _resolve_reason_source_text(self, payload: UserAgentRequest) -> str:
explicit_text = payload.context_json.get("user_input_text")
if isinstance(explicit_text, str):
return explicit_text.strip()
if self._looks_like_system_generated_reason_message(payload.message):
return ""
return str(payload.message or "").strip()
@classmethod
def _resolve_reason_text(cls, message: str) -> str:
reason = cls._strip_leading_time_from_reason(cls._extract_message_reason(message))
if not reason:
return ""
compact = re.sub(r"\s+", "", reason)
if compact in GENERIC_EXPENSE_PROMPTS:
return ""
instruction_prefixes = (
"帮我生成",
"请帮我生成",
"生成",
"起草",
"创建",
"发起",
"准备",
"帮我报销",
"我要报销",
"我想报销",
)
if compact.startswith(instruction_prefixes):
for separator in ("", ",", "", "", ";", "", ":"):
if separator in reason:
trailing = reason.split(separator, 1)[1].strip()
if trailing:
return trailing[:300]
return ""
return reason
@staticmethod
def _strip_leading_time_from_reason(value: str) -> str:
reason = str(value or "").strip()
for pattern in LEADING_REASON_TIME_PATTERNS:
next_reason = pattern.sub("", reason).strip()
if next_reason != reason:
return next_reason
return reason
@staticmethod
def _should_skip_model_answer(
payload: UserAgentRequest,
review_payload: UserAgentReviewPayload | None,
) -> bool:
if payload.ontology.scenario == "expense" and payload.ontology.intent in {"query", "compare"}:
return True
if review_payload is None:
return False
return payload.ontology.scenario == "expense" and (
payload.ontology.intent == "draft"
or int(payload.context_json.get("attachment_count") or 0) > 0
)
def _build_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]:
knowledge_citations = self._build_knowledge_citations(payload)
if payload.ontology.scenario == "knowledge":
return knowledge_citations[:3]
rule_citations = self._build_rule_asset_citations(payload)
if knowledge_citations:
return (knowledge_citations + rule_citations)[:3]
return rule_citations
@staticmethod
def _build_knowledge_citations(payload: UserAgentRequest) -> list[UserAgentCitation]:
citations: list[UserAgentCitation] = []
for item in list(payload.tool_payload.get("hits") or [])[:3]:
if not isinstance(item, dict):
continue
title = str(item.get("title") or item.get("document_name") or "").strip()
code = str(item.get("code") or item.get("candidate_id") or "").strip()
if not title or not code:
continue
citations.append(
UserAgentCitation(
source_type="knowledge",
code=code,
title=title,
version=str(item.get("version") or "").strip() or None,
updated_at=str(item.get("updated_at") or "").strip() or None,
excerpt=(
str(item.get("excerpt") or "").strip()
or str(item.get("content") or "").strip()
or None
),
)
)
return citations
def _build_rule_asset_citations(self, payload: UserAgentRequest) -> list[UserAgentCitation]:
domain = self._resolve_domain(payload.ontology.scenario)
items = self.asset_service.list_assets(
asset_type=AgentAssetType.RULE.value,
status=AgentAssetStatus.ACTIVE.value,
domain=domain,
)
ranked = self._rank_rule_assets(items, payload)
citations: list[UserAgentCitation] = []
for item in ranked[:2]:
detail = self.asset_service.get_asset(item.id)
if detail is None:
continue
excerpt = self._extract_excerpt(str(detail.current_version_content or ""))
citations.append(
UserAgentCitation(
source_type="rule",
code=detail.code,
title=detail.name,
version=detail.current_version,
updated_at=detail.updated_at.date().isoformat(),
excerpt=excerpt,
)
)
return citations
@staticmethod
def _resolve_risk_flags(payload: UserAgentRequest) -> list[str]:
tool_flags = payload.tool_payload.get("risk_flags")
if isinstance(tool_flags, list) and tool_flags:
return [str(item) for item in tool_flags]
return [str(item) for item in payload.ontology.risk_flags]
@staticmethod
def _resolve_subject(payload: UserAgentRequest) -> str:
named_entities = [
item.value
for item in payload.ontology.entities
if item.type in {"employee", "customer", "vendor", "project"}
]
if named_entities:
return f"{''.join(named_entities)} 相关数据"
return f"{SCENARIO_LABELS.get(payload.ontology.scenario, '当前')}场景数据"
@staticmethod
def _is_generic_expense_prompt(payload: UserAgentRequest) -> bool:
if payload.ontology.scenario != "expense":
return False
normalized_message = re.sub(r"\s+", "", payload.message)
return normalized_message in GENERIC_EXPENSE_PROMPTS
@staticmethod
def _is_implicit_expense_draft_request(payload: UserAgentRequest) -> bool:
if payload.ontology.scenario != "expense" or payload.ontology.intent != "draft":
return False
compact_message = re.sub(r"\s+", "", payload.message)
if any(keyword in compact_message for keyword in EXPLICIT_DRAFT_KEYWORDS):
return False
return True
@staticmethod
def _resolve_attachment_names(payload: UserAgentRequest) -> list[str]:
names = payload.context_json.get("attachment_names")
if not isinstance(names, list):
return []
return [str(name) for name in names if str(name).strip()]
@staticmethod
def _resolve_attachment_count(payload: UserAgentRequest) -> int:
names = UserAgentReviewProfileMixin._resolve_attachment_names(payload)
if names:
return len(names)
try:
return max(0, int(payload.context_json.get("attachment_count") or 0))
except (TypeError, ValueError):
return 0
@staticmethod
def _resolve_ocr_documents(payload: UserAgentRequest) -> list[dict[str, object]]:
documents = payload.context_json.get("ocr_documents")
if not isinstance(documents, list):
return []
overrides = payload.context_json.get("review_document_form_values")
override_map: dict[tuple[int, str], dict[str, object]] = {}
if isinstance(overrides, list):
for item in overrides:
if not isinstance(item, dict):
continue
filename = str(item.get("filename") or "").strip()
index = int(item.get("index") or 0)
if not filename and index <= 0:
continue
override_map[(index, filename)] = item
normalized: list[dict[str, object]] = []
for index, item in enumerate(documents[:8], start=1):
if not isinstance(item, dict):
continue
normalized_item = dict(item)
override = override_map.get((index, str(normalized_item.get("filename") or "").strip()))
if override is None:
override = override_map.get((index, ""))
if override is not None:
summary = str(override.get("summary") or "").strip()
scene_label = str(override.get("scene_label") or "").strip()
fields = override.get("fields")
if summary:
normalized_item["summary"] = summary
if scene_label:
normalized_item["scene_label"] = scene_label
if isinstance(fields, list):
normalized_item["document_fields"] = [
{
"key": str(field.get("key") or field.get("label") or "").strip(),
"label": str(field.get("label") or "").strip(),
"value": str(field.get("value") or "").strip(),
}
for field in fields
if isinstance(field, dict)
and str(field.get("label") or "").strip()
and str(field.get("value") or "").strip()
]
normalized.append(normalized_item)
return normalized
@staticmethod
def _is_review_association_choice_pending(payload: UserAgentRequest) -> bool:
return bool(payload.tool_payload.get("pending_association_decision"))
def _resolve_review_document_count(self, payload: UserAgentRequest) -> int:
return max(
len(self._resolve_ocr_documents(payload)),
self._resolve_attachment_count(payload),
)

View File

@@ -0,0 +1,706 @@
from __future__ import annotations
import json
import re
from datetime import UTC, datetime, timedelta
from decimal import Decimal, InvalidOperation
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.schemas.agent_asset import AgentAssetListItem
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
from app.schemas.user_agent import (
UserAgentCitation,
UserAgentDraftPayload,
UserAgentExpenseQueryRecord,
UserAgentQueryPayload,
UserAgentQueryStatusGroup,
UserAgentReviewAction,
UserAgentReviewClaimGroup,
UserAgentReviewDocumentCard,
UserAgentReviewDocumentField,
UserAgentReviewEditField,
UserAgentReviewPayload,
UserAgentReviewRiskBrief,
UserAgentReviewSlotCard,
UserAgentRequest,
UserAgentSuggestedAction,
)
from app.services.agent_assets import AgentAssetService
from app.services.expense_claims import ExpenseClaimService
from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label
from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
from app.services.user_agent_constants import *
class UserAgentReviewSlotMixin:
@staticmethod
def _resolve_conversation_history(payload: UserAgentRequest) -> list[dict[str, object]]:
history = payload.context_json.get("conversation_history")
if not isinstance(history, list):
return []
normalized: list[dict[str, object]] = []
for item in history[-8:]:
if not isinstance(item, dict):
continue
role = str(item.get("role") or "").strip()
content = str(item.get("content") or "").strip()
if not role or not content:
continue
normalized.append({"role": role, "content": content})
return normalized
@staticmethod
def _resolve_domain(scenario: str) -> str | None:
if scenario == "expense":
return "expense"
if scenario == "accounts_receivable":
return "ar"
if scenario == "accounts_payable":
return "ap"
return None
@staticmethod
def _rank_rule_assets(
items: list[AgentAssetListItem],
payload: UserAgentRequest,
) -> list[AgentAssetListItem]:
def score(item: AgentAssetListItem) -> tuple[int, str]:
tags = {str(value) for value in item.scenario_json or []}
weight = 0
if payload.ontology.scenario in tags:
weight += 3
if payload.ontology.intent in tags:
weight += 2
for risk_flag in payload.ontology.risk_flags:
if risk_flag in tags:
weight += 4
return weight, item.code
ranked = sorted(items, key=score, reverse=True)
return [item for item in ranked if score(item)[0] > 0]
@staticmethod
def _extract_excerpt(content: str) -> str:
lines = [line.strip() for line in str(content).splitlines() if line.strip()]
cleaned: list[str] = []
for line in lines:
normalized = re.sub(r"^[#>\-\*\d\.\s`]+", "", line).strip()
if normalized:
cleaned.append(normalized)
if len(cleaned) >= 2:
break
return "".join(cleaned[:2])
def _collect_entity_values(self, payload: UserAgentRequest) -> dict[str, str]:
values = {
"employee_name": "",
"customer": "",
"participants": "",
"amount": "",
"expense_type": "",
"expense_type_code": "",
}
participants: list[str] = []
for item in payload.ontology.entities:
if item.type == "employee" and not values["employee_name"]:
values["employee_name"] = item.value
elif item.type == "customer" and not values["customer"]:
values["customer"] = item.value
elif item.type == "amount" and item.role != "threshold" and not values["amount"]:
normalized_amount = str(item.normalized_value or "").strip()
values["amount"] = f"{normalized_amount}" if normalized_amount else item.value
elif item.type == "expense_type" and not values["expense_type_code"]:
values["expense_type_code"] = item.normalized_value
values["expense_type"] = EXPENSE_TYPE_LABELS.get(
item.normalized_value,
item.value,
)
elif item.type in {"participant", "person"} and item.value.strip():
participants.append(item.value.strip())
if participants:
values["participants"] = "".join(dict.fromkeys(participants))
return values
def _format_time_range(self, payload: UserAgentRequest) -> str:
time_range = payload.ontology.time_range
if time_range.start_date and time_range.end_date:
if time_range.start_date == time_range.end_date:
return time_range.start_date
normalized = f"{time_range.start_date}{time_range.end_date}"
return normalized
if time_range.raw:
return time_range.raw
return ""
def _resolve_location_value(self, payload: UserAgentRequest) -> str:
review_form_values = self._resolve_review_form_values(payload)
for key in ("business_location", "location"):
value = str(review_form_values.get(key) or "").strip()
if value:
return value
if str(payload.context_json.get("entry_source") or "").strip() == "detail":
request_context = payload.context_json.get("request_context")
if isinstance(request_context, dict):
for key in ("city", "location"):
value = str(request_context.get(key) or "").strip()
if value:
return value
labeled_match = re.search(r"(?:业务地点|发生地点|地点)[:]\s*(?P<value>[^\n]+)", payload.message)
if labeled_match:
return labeled_match.group("value").strip()
city_match = re.search(
r"去(?P<city>[\u4e00-\u9fa5]{2,8}?)(?:出差|拜访|参会|见客户|客户现场|支撑|支持|部署|实施|处理|协助)",
payload.message,
)
if city_match:
return city_match.group("city").strip()
if "客户现场" in payload.message.replace(" ", ""):
return "客户现场"
return ""
@staticmethod
def _resolve_review_form_values(payload: UserAgentRequest) -> dict[str, str]:
values = payload.context_json.get("review_form_values")
if not isinstance(values, dict):
return {}
normalized: dict[str, str] = {}
for key, value in values.items():
cleaned_key = str(key or "").strip()
if not cleaned_key:
continue
normalized[cleaned_key] = str(value or "").strip()
return normalized
@staticmethod
def _build_slot_value(
*,
value: str = "",
raw_value: str = "",
normalized_value: str = "",
source: str = "system",
confidence: float = 0.0,
evidence: str = "",
) -> dict[str, str | float]:
return {
"value": str(value or "").strip(),
"raw_value": str(raw_value or "").strip(),
"normalized_value": str(normalized_value or "").strip(),
"source": str(source or "system").strip() or "system",
"confidence": float(confidence),
"evidence": str(evidence or "").strip(),
}
def _build_time_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
edited_value = str(
review_form_values.get("time_range")
or review_form_values.get("business_time")
or review_form_values.get("occurred_date")
or ""
).strip()
if edited_value:
raw_value = str(review_form_values.get("time_range_raw") or edited_value).strip()
return self._build_slot_value(
value=edited_value,
raw_value=raw_value,
normalized_value=edited_value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
time_range = payload.ontology.time_range
if time_range.start_date and time_range.end_date:
normalized_value = (
time_range.start_date
if time_range.start_date == time_range.end_date
else f"{time_range.start_date}{time_range.end_date}"
)
raw_value = str(time_range.raw or "").strip()
return self._build_slot_value(
value=normalized_value,
raw_value=raw_value,
normalized_value=normalized_value,
source="user_text",
confidence=0.92,
evidence="系统已根据当前日期将相对时间换算为标准日期。",
)
return self._build_slot_value()
def _build_location_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
for key in ("business_location", "location"):
value = str(review_form_values.get(key) or "").strip()
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
if str(payload.context_json.get("entry_source") or "").strip() == "detail":
request_context = payload.context_json.get("request_context")
if isinstance(request_context, dict):
for key in ("city", "location"):
value = str(request_context.get(key) or "").strip()
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="detail_context",
confidence=0.68,
evidence="来源于当前关联单据,仅作为辅助上下文,需要用户再次核对。",
)
value = self._resolve_location_value(payload)
if value:
evidence = "用户在文本中明确描述了业务地点。"
if value == "客户现场":
evidence = "用户明确提到“客户现场”,但未提供具体城市或地址。"
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_text",
confidence=0.82,
evidence=evidence,
)
return self._build_slot_value()
def _build_customer_slot(
self,
payload: UserAgentRequest,
*,
entity_map: dict[str, str],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
value = str(review_form_values.get("customer_name") or "").strip()
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
value = entity_map.get("customer", "")
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_text",
confidence=0.88,
evidence="用户在原始描述中直接提到了客户对象。",
)
return self._build_slot_value()
def _build_participants_slot(
self,
payload: UserAgentRequest,
*,
entity_map: dict[str, str],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
value = str(review_form_values.get("participants") or "").strip()
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
value = entity_map.get("participants", "")
if value:
return self._build_slot_value(
value=value,
normalized_value=value,
source="user_text",
confidence=0.8,
evidence="用户在当前描述中补充了参与人员。",
)
return self._build_slot_value()
def _build_reason_slot(
self,
payload: UserAgentRequest,
*,
claim_groups: list[UserAgentReviewClaimGroup],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
edited_value = str(review_form_values.get("reason") or "").strip()
if edited_value:
return self._build_slot_value(
value=edited_value,
raw_value=edited_value,
normalized_value=edited_value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
inferred_reason = self._infer_reason_from_claim_groups(
claim_groups=claim_groups,
)
reason_value = self._resolve_reason_text(self._resolve_reason_source_text(payload))
if inferred_reason:
return self._build_slot_value(
value=inferred_reason,
raw_value=reason_value or inferred_reason,
normalized_value=inferred_reason,
source="ocr",
confidence=0.82,
evidence=(
"系统已根据票据识别结果预置场景类型;原始描述仍保留为补充说明。"
if reason_value
else "系统已根据票据识别场景补全通用事由,若需更具体说明可继续修改。"
),
)
if reason_value:
return self._build_slot_value(
value=reason_value,
raw_value=reason_value,
normalized_value=reason_value,
source="user_text",
confidence=0.76,
evidence="系统从用户原始描述中提取了本次费用事由,建议继续核对。",
)
return self._build_slot_value()
def _build_amount_slot(
self,
payload: UserAgentRequest,
*,
entity_map: dict[str, str],
ocr_documents: list[dict[str, object]],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
edited_amount = str(review_form_values.get("amount") or "").strip()
if edited_amount:
normalized = self._normalize_amount_text(edited_amount)
return self._build_slot_value(
value=normalized,
raw_value=edited_amount,
normalized_value=normalized,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
amount_value = entity_map.get("amount", "")
if amount_value:
normalized = self._normalize_amount_text(amount_value)
return self._build_slot_value(
value=normalized,
raw_value=amount_value,
normalized_value=normalized,
source="user_text",
confidence=0.92,
evidence="用户在原始描述中直接给出了金额。",
)
ocr_total_amount = self._sum_ocr_amounts(ocr_documents)
if ocr_total_amount > 0:
normalized = f"{ocr_total_amount:.2f}"
return self._build_slot_value(
value=normalized,
normalized_value=normalized,
source="ocr",
confidence=0.76,
evidence="金额来自 OCR 汇总结果,仍建议用户核对票据原文。",
)
return self._build_slot_value()
def _build_expense_type_slot(
self,
payload: UserAgentRequest,
*,
entity_map: dict[str, str],
ocr_documents: list[dict[str, object]],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
edited_value = str(review_form_values.get("expense_type") or review_form_values.get("reimbursement_type") or "").strip()
if edited_value:
normalized_code, normalized_label = self._normalize_expense_type_input(edited_value)
return self._build_slot_value(
value=normalized_label,
raw_value=edited_value,
normalized_value=normalized_code,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
expense_type_code = entity_map.get("expense_type_code", "")
expense_type_value = EXPENSE_TYPE_LABELS.get(expense_type_code, entity_map.get("expense_type", ""))
if expense_type_value:
return self._build_slot_value(
value=expense_type_value,
raw_value=expense_type_value,
normalized_value=expense_type_code,
source="user_text",
confidence=0.9,
evidence="系统根据用户描述中的业务场景判断费用类型。",
)
inferred_label = self._infer_expense_type_from_documents(payload, ocr_documents) if ocr_documents else ""
if inferred_label:
normalized_code, normalized_label = self._normalize_expense_type_input(inferred_label)
return self._build_slot_value(
value=normalized_label,
raw_value=inferred_label,
normalized_value=normalized_code,
source="ocr",
confidence=0.74,
evidence="系统根据票据内容推断费用类型,仍建议用户确认。",
)
return self._build_slot_value()
def _build_merchant_slot(
self,
payload: UserAgentRequest,
*,
ocr_documents: list[dict[str, object]],
) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
edited_value = str(review_form_values.get("merchant_name") or "").strip()
if edited_value:
return self._build_slot_value(
value=edited_value,
normalized_value=edited_value,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
merchant_value = ""
for document in ocr_documents:
if not self._is_hotel_document_item(document):
continue
merchant_value = self._extract_document_merchant_name(document)
if merchant_value:
break
if merchant_value:
return self._build_slot_value(
value=merchant_value,
normalized_value=merchant_value,
source="ocr",
confidence=0.72,
evidence="商户名称来自 OCR 票据识别结果,仍建议用户核对。",
)
return self._build_slot_value()
def _build_attachment_slot(self, payload: UserAgentRequest) -> dict[str, str | float]:
review_form_values = self._resolve_review_form_values(payload)
attachment_names = str(review_form_values.get("attachment_names") or "").strip()
if attachment_names:
return self._build_slot_value(
value=attachment_names,
normalized_value=attachment_names,
source="user_form",
confidence=1.0,
evidence="来源于用户修改后的结构化表单。",
)
count = self._resolve_attachment_count(payload)
if count > 0:
names = self._resolve_attachment_names(payload)
value = "".join(names) if names else f"{count} 份附件"
return self._build_slot_value(
value=value,
raw_value=value,
normalized_value=str(count),
source="upload",
confidence=1.0,
evidence="系统已接收到用户上传的附件。",
)
return self._build_slot_value()
@staticmethod
def _normalize_amount_text(value: str) -> str:
cleaned = str(value or "").strip()
if not cleaned:
return ""
for alias, canonical in sorted(AMOUNT_UNIT_ALIASES.items(), key=lambda item: len(item[0]), reverse=True):
cleaned = cleaned.replace(alias, canonical)
match = AMOUNT_TEXT_PATTERN.search(cleaned)
if not match:
return cleaned
number = float(match.group(1))
return f"{number:.2f}"
@staticmethod
def _normalize_expense_type_input(value: str) -> tuple[str, str]:
compact = str(value or "").replace(" ", "")
if "招待" in compact or ("客户" in compact and any(keyword in compact for keyword in ("吃饭", "用餐", "宴请", "请客"))):
return "entertainment", "业务招待费"
if any(keyword in compact for keyword in ("差旅", "出差", "机票", "行程")):
return "travel", "差旅费"
if any(keyword in compact for keyword in ("住宿", "酒店", "宾馆")):
return "hotel", "住宿费"
if any(keyword in compact for keyword in ("交通", "打车", "网约车", "出租车", "乘车", "用车", "叫车", "车费", "车资", "的士", "停车")):
return "transport", "交通费"
if any(keyword in compact for keyword in ("餐费", "用餐", "午餐", "晚餐", "早餐", "伙食")):
return "meal", "餐费"
if "会务" in compact:
return "meeting", "会务费"
if any(keyword in compact for keyword in ("办公费", "办公用品", "文具", "耗材", "办公耗材", "打印纸", "办公设备", "键盘", "鼠标", "白板")):
return "office", "办公费"
if any(keyword in compact for keyword in ("培训费", "培训", "讲师费", "课时费", "课程费")):
return "training", "培训费"
if any(keyword in compact for keyword in ("通讯费", "话费", "流量费", "宽带费")):
return "communication", "通讯费"
if any(keyword in compact for keyword in ("福利费", "团建", "慰问", "节日福利", "体检费")):
return "welfare", "福利费"
return "other", str(value or "").strip() or "其他费用"
def _resolve_required_review_keys(
self,
payload: UserAgentRequest,
*,
primary_expense_type: str,
claim_groups: list[UserAgentReviewClaimGroup],
) -> set[str]:
required = {"expense_type", "time_range", "amount", "reason", "attachments"}
scene_codes = {
str(item.group_code or "").strip()
for item in claim_groups
if str(item.group_code or "").strip()
}
if primary_expense_type:
scene_codes.add(primary_expense_type)
for scene_code in scene_codes:
required.update(SCENE_REQUIRED_SLOT_KEYS.get(scene_code, set()))
compact_message = re.sub(r"\s+", "", self._resolve_reason_source_text(payload) or payload.message)
if "entertainment" in scene_codes or (
"客户" in compact_message and any(keyword in compact_message for keyword in ("招待", "吃饭", "用餐", "宴请", "请客"))
):
required.update({"customer_name", "participants"})
return required
@staticmethod
def _infer_reason_from_claim_groups(
*,
claim_groups: list[UserAgentReviewClaimGroup],
) -> str:
if len(claim_groups) == 1:
document_indexes = list(claim_groups[0].document_indexes or [])
if not document_indexes:
return ""
expense_type = str(claim_groups[0].expense_type or "").strip()
group_code = str(claim_groups[0].group_code or "").strip()
if expense_type:
return INFERRED_REASON_LABELS.get(expense_type, "") or str(claim_groups[0].scene_label or "").strip()
if group_code:
return INFERRED_REASON_LABELS.get(group_code, "") or str(claim_groups[0].scene_label or "").strip()
return ""
@staticmethod
def _resolve_review_missing_slot_keys(
payload: UserAgentRequest,
*,
slot_cards: list[UserAgentReviewSlotCard],
) -> list[str]:
required_keys = {item.key for item in slot_cards if item.required}
slot_map = {item.key: item for item in slot_cards}
missing_keys = {
item.key
for item in slot_cards
if item.required and (item.status == "missing" or not str(item.value).strip())
}
for key in payload.ontology.missing_slots:
normalized_key = str(key or "").strip()
if (
normalized_key
and normalized_key in required_keys
and (
normalized_key not in slot_map
or slot_map[normalized_key].status == "missing"
or not str(slot_map[normalized_key].value).strip()
)
):
missing_keys.add(normalized_key)
ordered_keys: list[str] = []
for item in slot_cards:
if item.required and item.key in missing_keys and item.key not in ordered_keys:
ordered_keys.append(item.key)
return ordered_keys
def _make_slot_card(
self,
*,
key: str,
value: str,
raw_value: str,
normalized_value: str,
source: str,
confidence: float,
evidence: str,
required: bool = True,
) -> UserAgentReviewSlotCard:
is_missing = required and not str(value).strip()
source_key = source if source in SOURCE_LABELS else "system"
return UserAgentReviewSlotCard(
key=key,
label=SLOT_LABELS.get(key, key),
value=str(value or "").strip(),
raw_value=str(raw_value or "").strip(),
normalized_value=str(normalized_value or "").strip(),
source=source,
source_label=SOURCE_LABELS.get(source_key, "系统判断"),
confidence=confidence,
required=required,
confirmed=not is_missing and source in {"user_text", "user_form"},
status="missing" if is_missing else "identified" if source in {"user_text", "user_form"} else "inferred",
hint=f"建议补充 {SLOT_LABELS.get(key, key)}"
if is_missing and required
else ("该字段来自系统辅助上下文,建议你再核对一次。" if source in {"detail_context", "ocr"} else ""),
evidence=evidence,
)

View File

@@ -0,0 +1,360 @@
from __future__ import annotations
import json
import re
from datetime import UTC, datetime, timedelta
from decimal import Decimal, InvalidOperation
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.schemas.agent_asset import AgentAssetListItem
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
from app.schemas.user_agent import (
UserAgentCitation,
UserAgentDraftPayload,
UserAgentExpenseQueryRecord,
UserAgentQueryPayload,
UserAgentQueryStatusGroup,
UserAgentReviewAction,
UserAgentReviewClaimGroup,
UserAgentReviewDocumentCard,
UserAgentReviewDocumentField,
UserAgentReviewEditField,
UserAgentReviewPayload,
UserAgentReviewRiskBrief,
UserAgentReviewSlotCard,
UserAgentRequest,
UserAgentSuggestedAction,
)
from app.services.agent_assets import AgentAssetService
from app.services.expense_claims import ExpenseClaimService
from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label
from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
from app.services.user_agent_constants import *
class UserAgentReviewTravelPolicyMixin:
def _build_travel_policy_precheck_briefs(
self,
payload: UserAgentRequest,
*,
document_cards: list[UserAgentReviewDocumentCard],
claim_groups: list[UserAgentReviewClaimGroup],
) -> list[UserAgentReviewRiskBrief]:
if not document_cards or not self._is_travel_review_context(payload, document_cards, claim_groups):
return []
rule_catalog = ExpenseRuleRuntimeService(self.db).load_catalog()
policy = rule_catalog.travel_policy
if policy is None:
return []
employee = self._resolve_employee_profile(payload)
grade = self._resolve_review_employee_grade(payload, employee=employee)
grade_band = ExpenseClaimService._resolve_travel_policy_band(grade)
band_label = policy.band_labels.get(grade_band or "", grade or "当前职级")
declared_city = self._resolve_declared_travel_city(payload, policy)
reason_corpus = self._build_review_reason_corpus(payload)
has_exception_note = self._text_contains_any(reason_corpus, policy.standard_exception_keywords)
standard_rule_name = str(getattr(policy, "standard_rule_name", "") or policy.rule_name)
standard_rule_version = str(getattr(policy, "standard_rule_version", "") or policy.rule_version)
briefs: list[UserAgentReviewRiskBrief] = []
amount_measurement_lines: list[str] = []
seen_keys: set[str] = set()
def append_once(key: str, brief: UserAgentReviewRiskBrief) -> None:
if key in seen_keys:
return
seen_keys.add(key)
briefs.append(brief)
for card in document_cards:
document_type = str(card.document_type or "").strip().lower()
suggested_type = str(card.suggested_expense_type or "").strip().lower()
card_text = self._build_review_document_card_text(card)
document_type_label = resolve_document_type_label(document_type)
amount = self._extract_amount_decimal_from_card(card)
if self._is_review_hotel_card(card):
hotel_city = self._extract_policy_city_from_text(card_text, policy) or declared_city
city_tier = policy.city_tiers.get(hotel_city, "tier_3")
city_tier_label = self._format_travel_city_tier(city_tier)
if amount is None:
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法完成住宿差标测算。"
)
append_once(
f"hotel-amount-missing-{card.index}",
UserAgentReviewRiskBrief(
title="住宿金额待补充",
level="warning",
content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算的住宿金额。",
detail=(
f"依据《{standard_rule_name}》({standard_rule_version}),住宿票据需要按员工职级、城市级别和每晚金额进行差标核算。"
"当前票据缺少金额,系统无法判断是否超出差旅标准。"
),
suggestion="请在票据识别结果中补充或更正住宿金额,再继续核对报销单。",
),
)
continue
if grade_band is None:
amount_measurement_lines.append(
f"{card.filename}:识别住宿金额 {amount:.2f} 元,但缺少员工职级,无法匹配住宿标准。"
)
append_once(
f"hotel-grade-missing-{card.index}",
UserAgentReviewRiskBrief(
title="职级信息待确认",
level="warning",
content=f"{card.filename} 已识别住宿金额 {amount:.2f} 元,但当前员工职级缺失,无法匹配住宿标准。",
detail=(
f"依据《{standard_rule_name}》({standard_rule_version}),住宿标准按职级档位和城市级别配置。"
"当前未能识别员工职级,因此无法完成创建前差标核算。"
),
suggestion="请确认员工档案或页面上下文中的职级信息,再重新进行差旅规则预检。",
),
)
continue
cap = self._resolve_review_hotel_cap(
policy,
grade_band=grade_band,
city=hotel_city,
city_tier=city_tier,
)
if cap <= Decimal("0.00"):
continue
night_count = self._extract_review_hotel_night_count(card)
nightly_amount = (amount / Decimal(max(night_count, 1))).quantize(Decimal("0.01"))
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元,"
f"{night_count} 晚折算 {nightly_amount:.2f} 元/晚;"
f"适用标准为 {band_label}{city_tier_label} {cap:.2f} 元/晚,"
f"{'超出标准' if nightly_amount > cap else '测算通过'}"
)
if nightly_amount <= cap:
continue
basis = (
f"依据《{standard_rule_name}》({standard_rule_version}{band_label}{city_tier_label}"
f"住宿标准为 {cap:.2f} 元/晚;{card.filename} 识别为{document_type_label}"
f"金额 {amount:.2f} 元,按 {night_count} 晚折算约 {nightly_amount:.2f} 元/晚。"
)
append_once(
f"hotel-over-limit-{card.index}",
UserAgentReviewRiskBrief(
title="住宿超标待说明" if not has_exception_note else "住宿超标提醒",
level="high",
content=(
f"{card.filename} 住宿金额约 {nightly_amount:.2f} 元/晚,"
f"超过 {band_label} {city_tier_label}标准 {cap:.2f} 元/晚。"
),
detail=(
basis
+ (
"当前未识别到超标说明,创建单据前需要先补充原因。"
if not has_exception_note
else "当前已识别到例外说明,后续仍需审批人重点复核。"
)
),
suggestion="补充超标说明、协议酒店满房/会议高峰等原因,或调整住宿金额后再继续。",
),
)
continue
if document_type == "meal_receipt":
allowance = self._resolve_review_travel_allowance_standard(
policy,
declared_city=declared_city,
card_text=card_text,
)
if allowance is not None:
region_label, standard_amount = allowance
if amount is None:
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法按{region_label}伙食补助标准测算。"
)
append_once(
f"travel-meal-amount-missing-{card.index}",
UserAgentReviewRiskBrief(
title="差旅餐饮金额待补充",
level="high",
content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算金额。",
detail=(
f"依据《{standard_rule_name}》({standard_rule_version}),差旅餐饮票据优先按出差补助标准中的伙食补助进行测算。"
f"当前匹配区域为{region_label},但票据缺少金额,系统无法判断是否超出补助标准。"
),
suggestion="请在票据识别结果中补充或更正餐饮金额,再继续创建报销单。",
),
)
continue
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;"
f"适用《{standard_rule_name}{region_label}伙食补助标准 {standard_amount:.2f} 元/天,"
f"{'超出标准' if amount > standard_amount else '测算通过'}"
)
if amount > standard_amount:
append_once(
f"travel-meal-allowance-over-limit-{card.index}",
UserAgentReviewRiskBrief(
title="差旅餐饮金额超出伙食补助标准",
level="high",
content=(
f"{card.filename} 识别金额 {amount:.2f} 元,"
f"超过{region_label}伙食补助标准 {standard_amount:.2f} 元/天。"
),
detail=(
f"依据《{standard_rule_name}》({standard_rule_version})的出差补助标准,"
f"{region_label}伙食补助为 {standard_amount:.2f} 元/天;"
f"当前票据类型识别为{document_type_label},识别金额 {amount:.2f} 元。"
"首轮上传阶段按单张票据先行测算,后续可结合出差天数和实际餐补口径复核。"
),
suggestion="如该票据属于差旅餐补,请调整金额或补充超标/拆分说明;如属于业务招待或普通餐费,请改为对应费用类型后再提交。",
),
)
continue
scene_code = self._resolve_review_amount_scene_code(card, payload)
scene_policy = rule_catalog.get_scene_policy(scene_code)
scene_limit = self._resolve_review_scene_amount_limit(scene_policy)
if scene_policy is not None and scene_limit is not None:
metric_label = str(getattr(scene_limit, "metric_label", "") or scene_policy.label or "金额").strip()
standard_amount = self._resolve_scene_standard_amount(scene_limit)
if amount is None:
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},但未识别到可核算金额,无法按{metric_label}测算。"
)
append_once(
f"{scene_code}-amount-missing-{card.index}",
UserAgentReviewRiskBrief(
title=f"{scene_policy.label}金额待补充",
level="warning",
content=f"{card.filename} 已识别为{document_type_label},但未识别到可核算金额。",
detail=(
f"依据《{scene_policy.rule_name}》({scene_policy.rule_version}"
f"{scene_policy.label}需要按{metric_label}进行金额审核。当前票据缺少金额,系统无法判断是否合规。"
),
suggestion="请在票据识别结果中补充或更正金额,再继续核对报销单。",
),
)
continue
if standard_amount is not None:
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;"
f"适用《{scene_policy.rule_name}{metric_label}标准 {standard_amount:.2f} 元,"
f"{'超出标准' if amount > standard_amount else '测算通过'}"
)
amount_risk = self._evaluate_review_scene_amount(
amount=amount,
limit_config=scene_limit,
reason_text=reason_corpus,
)
if amount_risk is not None:
severity, threshold = amount_risk
append_once(
f"{scene_code}-amount-over-limit-{card.index}",
UserAgentReviewRiskBrief(
title=f"{scene_policy.label}金额超标待说明",
level="high" if severity == "high" else "warning",
content=(
f"{card.filename} 识别金额 {amount:.2f} 元,"
f"超过{metric_label}标准 {threshold:.2f} 元。"
),
detail=(
f"依据《{scene_policy.rule_name}》({scene_policy.rule_version}"
f"{scene_policy.label}{metric_label}审核,当前票据类型识别为{document_type_label}"
f"识别金额 {amount:.2f} 元,标准阈值 {threshold:.2f} 元。"
),
suggestion="请补充超标原因或拆分到更准确的费用类型;如属于例外场景,请在事由中写明业务背景。",
),
)
continue
transport_class = self._detect_review_transport_class(card, policy)
if transport_class and grade_band is not None:
transport_kind, class_label, class_level = transport_class
allowed_level = policy.transport_limits.get(grade_band, {}).get(transport_kind)
if allowed_level is not None and class_level > allowed_level:
append_once(
f"transport-class-over-limit-{card.index}-{class_label}",
UserAgentReviewRiskBrief(
title="交通舱位超标待说明" if not has_exception_note else "交通舱位超标提醒",
level="warning",
content=f"{card.filename} 识别为 {class_label}{band_label} 当前默认不可报销该舱位/席别。",
detail=(
f"依据《{standard_rule_name}》({standard_rule_version}{band_label} 的交通席别标准"
f"未覆盖 {class_label};票据类型识别为{document_type_label}"
+ (
"当前未识别到例外说明,创建单据前需要补充原因。"
if not has_exception_note
else "当前已识别到例外说明,后续仍需审批人重点复核。"
)
),
suggestion="补充无直达、临时改签、行程变更等例外说明,或更换为符合标准的票据。",
),
)
continue
if document_type == "meal_receipt" and self._is_travel_review_context(payload, document_cards, claim_groups):
if amount is not None:
amount_measurement_lines.append(
f"{card.filename}:识别为{document_type_label},金额 {amount:.2f} 元;需确认按餐补、餐费或业务招待口径归口。"
)
append_once(
f"travel-meal-card-{card.index}",
UserAgentReviewRiskBrief(
title="差旅餐饮票据待归口",
level="warning",
content=f"{card.filename} 已识别为餐饮票据,当前差旅报销单需要确认是否允许并入差旅费用。",
detail=(
f"依据《{standard_rule_name}》({standard_rule_version})的差旅票据预检口径,系统优先核算交通、住宿等差旅核心票据。"
"餐饮票据可能需要按餐费或业务招待场景拆分,并补充同行人员或客户信息。"
),
suggestion="如属于差旅餐补,请补充制度允许口径;如属于招待或普通餐费,建议拆成对应费用类型单据。",
),
)
continue
if suggested_type in {"travel", "hotel", "transport"} and document_type in {"other", "travel_ticket"}:
append_once(
f"travel-type-uncertain-{card.index}",
UserAgentReviewRiskBrief(
title="差旅票据类型待确认",
level="warning",
content=f"{card.filename} 归入差旅场景,但票据类型仍需确认。",
detail=(
f"依据《{standard_rule_name}》({standard_rule_version}),差旅预检需要先明确票据是机票、火车票、住宿票据、打车票等,"
"再匹配对应的金额或舱位规则。当前类型识别不够稳定。"
),
suggestion="请在附件识别结果中更正票据类型,或重新上传更清晰的附件后再继续。",
),
)
if amount_measurement_lines:
briefs.insert(
0,
UserAgentReviewRiskBrief(
title="附件金额测算结果",
level="info",
content="系统已根据首轮上传附件识别金额,并匹配当前可执行的报销标准进行测算。",
detail="".join(dict.fromkeys(amount_measurement_lines)),
suggestion="如测算结果超标,请补充超标说明、调整金额或更正票据类型后再继续。",
),
)
return briefs

View File

@@ -0,0 +1,625 @@
from __future__ import annotations
import json
import re
from datetime import UTC, datetime, timedelta
from decimal import Decimal, InvalidOperation
from typing import Any
from sqlalchemy import or_, select
from sqlalchemy.orm import selectinload
from app.api.deps import CurrentUserContext
from app.core.agent_enums import AgentAssetStatus, AgentAssetType
from app.models.employee import Employee
from app.models.financial_record import ExpenseClaim
from app.schemas.agent_asset import AgentAssetListItem
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
from app.schemas.user_agent import (
UserAgentCitation,
UserAgentDraftPayload,
UserAgentExpenseQueryRecord,
UserAgentQueryPayload,
UserAgentQueryStatusGroup,
UserAgentReviewAction,
UserAgentReviewClaimGroup,
UserAgentReviewDocumentCard,
UserAgentReviewDocumentField,
UserAgentReviewEditField,
UserAgentReviewPayload,
UserAgentReviewRiskBrief,
UserAgentReviewSlotCard,
UserAgentRequest,
UserAgentSuggestedAction,
)
from app.services.agent_assets import AgentAssetService
from app.services.expense_claims import ExpenseClaimService
from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy, resolve_document_type_label
from app.services.risk_ontology_bridge import resolve_rule_codes_for_risk_check
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
from app.services.user_agent_constants import *
class UserAgentReviewTravelReceiptMixin:
def _is_travel_review_context(
self,
payload: UserAgentRequest,
document_cards: list[UserAgentReviewDocumentCard],
claim_groups: list[UserAgentReviewClaimGroup],
) -> bool:
entity_expense_type = self._collect_entity_values(payload).get("expense_type_code", "")
review_form_values = self._resolve_review_form_values(payload)
form_expense_type = str(review_form_values.get("expense_type") or "").strip()
message_context = " ".join(
[
str(payload.message or ""),
str(payload.context_json.get("user_input_text") or ""),
str(payload.context_json.get("expense_type") or ""),
form_expense_type,
]
)
if entity_expense_type in {"travel", "hotel", "transport"}:
return True
if any(group.group_code == "travel" or group.expense_type in {"travel", "hotel", "transport"} for group in claim_groups):
return True
if any(card.suggested_expense_type in {"travel", "hotel", "transport"} for card in document_cards):
return True
return any(keyword in message_context for keyword in ("差旅", "出差", "机票", "火车", "高铁", "酒店", "住宿"))
def _build_travel_receipt_state(
self,
payload: UserAgentRequest,
*,
document_cards: list[UserAgentReviewDocumentCard],
claim_groups: list[UserAgentReviewClaimGroup],
) -> dict[str, Any]:
empty_state: dict[str, Any] = {
"is_travel_context": False,
"has_long_distance_ticket": False,
"ticket_type_label": "",
"ticket_amount": Decimal("0.00"),
"destination": "",
"days": 1,
"has_hotel_invoice": False,
"has_local_transport": False,
"required_missing_labels": [],
"optional_missing_labels": [],
"blocks_next_step": False,
}
if not document_cards or not self._is_travel_review_context(payload, document_cards, claim_groups):
return empty_state
long_distance_cards = [card for card in document_cards if self._is_long_distance_travel_card(card)]
if not long_distance_cards:
return {
**empty_state,
"is_travel_context": True,
}
has_hotel_invoice = any(self._is_review_hotel_card(card) for card in document_cards)
has_local_transport = any(self._is_local_transport_receipt_card(card) for card in document_cards)
required_missing_labels = [] if has_hotel_invoice else ["酒店的报销票据待上传(必须)"]
optional_missing_labels = [] if has_local_transport else ["市内交通/乘车票据可继续上传(非必须)"]
ticket_amount = sum(
(self._extract_amount_decimal_from_card(card) or Decimal("0.00"))
for card in long_distance_cards
).quantize(Decimal("0.01"))
return {
**empty_state,
"is_travel_context": True,
"has_long_distance_ticket": True,
"ticket_type_label": self._resolve_travel_ticket_type_label(long_distance_cards),
"ticket_amount": ticket_amount,
"destination": self._resolve_travel_receipt_destination(payload, long_distance_cards),
"days": self._resolve_travel_receipt_days(payload, long_distance_cards),
"has_hotel_invoice": has_hotel_invoice,
"has_local_transport": has_local_transport,
"required_missing_labels": required_missing_labels,
"optional_missing_labels": optional_missing_labels,
"blocks_next_step": bool(required_missing_labels),
}
@staticmethod
def _is_long_distance_travel_card(card: UserAgentReviewDocumentCard) -> bool:
document_type = str(card.document_type or "").strip().lower()
return document_type in {"train_ticket", "flight_itinerary"}
@staticmethod
def _is_local_transport_receipt_card(card: UserAgentReviewDocumentCard) -> bool:
document_type = str(card.document_type or "").strip().lower()
suggested_type = str(card.suggested_expense_type or "").strip().lower()
return document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"} or (
suggested_type == "transport" and document_type not in {"train_ticket", "flight_itinerary"}
)
@staticmethod
def _resolve_travel_ticket_type_label(cards: list[UserAgentReviewDocumentCard]) -> str:
labels: list[str] = []
for card in cards:
document_type = str(card.document_type or "").strip().lower()
if document_type == "train_ticket" and "火车" not in labels:
labels.append("火车")
if document_type == "flight_itinerary" and "飞机" not in labels:
labels.append("飞机")
return "/".join(labels) if labels else "交通"
def _resolve_travel_receipt_destination(
self,
payload: UserAgentRequest,
long_distance_cards: list[UserAgentReviewDocumentCard],
) -> str:
for card in long_distance_cards:
for field in card.fields:
if str(field.label or "").strip() not in {"行程", "路线"}:
continue
destination = self._extract_travel_destination_from_route(field.value)
if destination:
return self._normalize_travel_destination(destination)
card_text = self._build_review_document_card_text(card)
route_match = TRAVEL_ROUTE_PATTERN.search(card_text)
if route_match:
return self._normalize_travel_destination(route_match.group(2))
location = self._resolve_location_value(payload)
if location:
return self._normalize_travel_destination(location)
return ""
@staticmethod
def _extract_travel_destination_from_route(value: str) -> str:
route_text = str(value or "").strip()
if not route_text:
return ""
route_match = TRAVEL_ROUTE_PATTERN.search(route_text)
if route_match:
return route_match.group(2).strip()
parts = [
item.strip()
for item in re.split(r"\s*(?:至|到|→|->|-|—|~|)\s*", route_text)
if item.strip()
]
return parts[-1] if len(parts) >= 2 else ""
def _normalize_travel_destination(self, value: str) -> str:
candidate = re.sub(
r"(?:火车站|高铁站|动车站|车站|站|机场|航站楼)$",
"",
str(value or "").strip(),
)
if not candidate:
return ""
try:
policy = ExpenseRuleRuntimeService(self.db).load_catalog().travel_policy
except Exception:
policy = None
if policy is not None:
policy_city = self._extract_policy_city_from_text(candidate, policy)
if policy_city:
return policy_city
return candidate
def _resolve_travel_receipt_days(
self,
payload: UserAgentRequest,
long_distance_cards: list[UserAgentReviewDocumentCard],
) -> int:
dates: list[datetime] = []
for card in long_distance_cards:
card_text = self._build_review_document_card_text(card)
dates.extend(self._extract_dates_from_text(card_text))
if dates:
return max(1, (max(dates).date() - min(dates).date()).days + 1)
start_date = self._parse_date_text(payload.ontology.time_range.start_date or "")
end_date = self._parse_date_text(payload.ontology.time_range.end_date or "")
if start_date and end_date:
return max(1, (end_date.date() - start_date.date()).days + 1)
return 1
@staticmethod
def _extract_dates_from_text(text: str) -> list[datetime]:
dates: list[datetime] = []
for match in DATE_TEXT_PATTERN.finditer(str(text or "")):
parsed = UserAgentReviewTravelReceiptMixin._parse_date_text(match.group(1))
if parsed is not None:
dates.append(parsed)
return dates
@staticmethod
def _parse_date_text(value: str) -> datetime | None:
raw_value = str(value or "").strip()
if not raw_value:
return None
normalized = (
raw_value.replace("", "-")
.replace("", "-")
.replace("/", "-")
.replace("", "")
.strip()
)
parts = [part for part in normalized.split("-") if part]
if len(parts) != 3:
return None
try:
year, month, day = (int(part) for part in parts)
return datetime(year, month, day)
except ValueError:
return None
def _build_travel_receipt_briefs(
self,
travel_receipt_state: dict[str, Any],
) -> list[UserAgentReviewRiskBrief]:
if not travel_receipt_state.get("has_long_distance_ticket"):
return []
required_labels = [
str(item).strip()
for item in travel_receipt_state.get("required_missing_labels", [])
if str(item).strip()
]
optional_labels = [
str(item).strip()
for item in travel_receipt_state.get("optional_missing_labels", [])
if str(item).strip()
]
if not required_labels and not optional_labels:
return []
content_parts = [*required_labels, *optional_labels]
required_text = "".join(required_labels)
optional_text = "".join(optional_labels)
return [
UserAgentReviewRiskBrief(
title="差旅票据待补充",
level="warning" if required_labels else "info",
content="".join(content_parts),
detail=(
"系统已识别到长途交通票据,会按差旅报销口径核对住宿、交通等票据完整性。"
+ (f"当前必须补充:{required_text}" if required_text else "")
+ (f"当前还可以补充:{optional_text}" if optional_text else "")
),
suggestion=(
"请先补充酒店住宿发票或住宿清单;在补齐前只能保存为草稿。"
if required_labels
else "如还有市内交通、打车、地铁或停车等乘车票据,可以继续上传;没有也可以进入下一步或保存草稿。"
),
)
]
def _resolve_review_travel_allowance_standard(
self,
policy: RuntimeTravelPolicy,
*,
declared_city: str,
card_text: str,
) -> tuple[str, Decimal] | None:
meal_limits = getattr(policy, "allowance_limits", {}).get("meal", {})
if not meal_limits:
return None
region_label = self._resolve_review_travel_allowance_region(
" ".join([declared_city or "", card_text or ""])
)
amount = meal_limits.get(region_label)
if amount is None and region_label != "其他地区":
amount = meal_limits.get("其他地区")
region_label = "其他地区"
if amount is None:
return None
return region_label, Decimal(amount).quantize(Decimal("0.01"))
@staticmethod
def _resolve_review_travel_allowance_region(text: str) -> str:
normalized = re.sub(r"\s+", "", str(text or ""))
if not normalized:
return "其他地区"
if any(keyword in normalized for keyword in ("境外", "国外", "海外")):
return "国外"
if any(keyword in normalized for keyword in ("香港", "澳门", "台湾", "港澳台")):
return "港澳台"
if "乌鲁木齐" in normalized:
return "新疆-乌鲁木齐"
if "新疆" in normalized:
return "新疆-其他"
if any(keyword in normalized for keyword in ("西藏", "拉萨")):
return "西藏"
if any(keyword in normalized for keyword in ("北京", "上海", "天津", "重庆", "深圳", "珠海", "汕头", "厦门")):
return "直辖市/特区"
return "其他地区"
def _resolve_review_amount_scene_code(
self,
card: UserAgentReviewDocumentCard,
payload: UserAgentRequest,
) -> str:
document_type = str(card.document_type or "").strip().lower()
suggested_type = str(card.suggested_expense_type or "").strip().lower()
if document_type in {"taxi_receipt", "parking_toll_receipt", "transport_receipt"}:
return "transport"
if document_type == "meal_receipt":
entity_values = self._collect_entity_values(payload)
if suggested_type == "entertainment" or entity_values.get("expense_type_code") == "entertainment":
return "entertainment"
return "meal"
if document_type == "hotel_invoice" or suggested_type == "hotel":
return "hotel"
if suggested_type in {
"travel",
"transport",
"meal",
"entertainment",
"office",
"meeting",
"training",
"communication",
"welfare",
"other",
}:
return suggested_type
return self._collect_entity_values(payload).get("expense_type_code") or "other"
@staticmethod
def _resolve_review_scene_amount_limit(scene_policy: Any | None) -> Any | None:
if scene_policy is None:
return None
return getattr(scene_policy, "item_amount_limit", None) or getattr(scene_policy, "claim_amount_limit", None)
@staticmethod
def _resolve_scene_standard_amount(limit_config: Any | None) -> Decimal | None:
if limit_config is None:
return None
warn_amount = getattr(limit_config, "warn_amount", None)
block_amount = getattr(limit_config, "block_amount", None)
amount = warn_amount if warn_amount is not None else block_amount
if amount is None:
return None
try:
return Decimal(amount).quantize(Decimal("0.01"))
except (InvalidOperation, ValueError):
return None
@staticmethod
def _evaluate_review_scene_amount(
*,
amount: Decimal,
limit_config: Any,
reason_text: str,
) -> tuple[str, Decimal] | None:
block_amount = getattr(limit_config, "block_amount", None)
warn_amount = getattr(limit_config, "warn_amount", None)
exception_keywords = list(getattr(limit_config, "exception_keywords", []) or [])
has_exception = UserAgentReviewTravelReceiptMixin._text_contains_any(reason_text, exception_keywords)
if block_amount is not None and amount > Decimal(block_amount):
return ("high", Decimal(block_amount).quantize(Decimal("0.01")))
if warn_amount is not None and amount > Decimal(warn_amount):
return ("high", Decimal(warn_amount).quantize(Decimal("0.01")))
return None
def _resolve_review_employee_grade(self, payload: UserAgentRequest, *, employee: Employee | None) -> str:
if employee is not None and employee.grade:
return str(employee.grade).strip()
review_form_values = self._resolve_review_form_values(payload)
for source in (
review_form_values,
payload.context_json,
payload.tool_payload,
):
for key in ("employee_grade", "grade", "user_grade", "position_grade"):
value = str(source.get(key) or "").strip() if isinstance(source, dict) else ""
if value:
return value
return ""
def _build_review_reason_corpus(self, payload: UserAgentRequest) -> str:
review_form_values = self._resolve_review_form_values(payload)
parts = [
str(payload.message or ""),
str(payload.context_json.get("user_input_text") or ""),
str(review_form_values.get("reason") or ""),
str(review_form_values.get("business_reason") or ""),
str(review_form_values.get("location") or ""),
str(review_form_values.get("business_location") or ""),
]
return "\n".join(part.strip() for part in parts if part and part.strip())
def _resolve_declared_travel_city(self, payload: UserAgentRequest, policy: RuntimeTravelPolicy) -> str:
review_form_values = self._resolve_review_form_values(payload)
candidates = [
str(review_form_values.get("business_location") or ""),
str(review_form_values.get("location") or ""),
self._resolve_location_value(payload),
str(payload.message or ""),
]
for candidate in candidates:
city = self._extract_policy_city_from_text(candidate, policy)
if city:
return city
return ""
@staticmethod
def _build_review_document_card_text(card: UserAgentReviewDocumentCard) -> str:
field_text = " ".join(f"{field.label}:{field.value}" for field in card.fields)
return " ".join(
[
str(card.filename or ""),
str(card.document_type or ""),
str(card.scene_label or ""),
str(card.summary or ""),
field_text,
]
).strip()
@staticmethod
def _is_review_hotel_card(card: UserAgentReviewDocumentCard) -> bool:
document_type = str(card.document_type or "").strip().lower()
suggested_type = str(card.suggested_expense_type or "").strip().lower()
scene_label = str(card.scene_label or "").strip()
return document_type == "hotel_invoice" or suggested_type == "hotel" or "住宿" in scene_label
@staticmethod
def _extract_amount_decimal_from_card(card: UserAgentReviewDocumentCard) -> Decimal | None:
for field in card.fields:
if field.label != "金额":
continue
normalized = str(field.value or "").replace("", "").replace("", "").replace("¥", "").replace(",", "").strip()
try:
amount = Decimal(normalized).quantize(Decimal("0.01"))
except (InvalidOperation, ValueError):
continue
if amount > Decimal("0.00"):
return amount
return None
@staticmethod
def _extract_review_hotel_night_count(card: UserAgentReviewDocumentCard) -> int:
text = f"{card.summary or ''} {' '.join(f'{field.label}:{field.value}' for field in card.fields)}"
match = TRAVEL_REVIEW_HOTEL_NIGHT_PATTERN.search(text)
if not match:
return 1
try:
return max(1, int(match.group(1)))
except (TypeError, ValueError):
return 1
@staticmethod
def _extract_policy_city_from_text(text: str, policy: RuntimeTravelPolicy) -> str:
normalized = str(text or "").strip()
if not normalized:
return ""
city_names = set(policy.city_tiers.keys())
city_names.update(getattr(policy, "hotel_city_limits", {}).keys())
for city in sorted(city_names, key=lambda item: len(item), reverse=True):
if city in normalized:
return city
return ""
@staticmethod
def _format_travel_city_tier(city_tier: str) -> str:
return {
"tier_1": "一线城市",
"tier_2": "重点城市",
"tier_3": "其他城市",
}.get(str(city_tier or "").strip(), "当前城市")
@staticmethod
def _resolve_review_hotel_cap(
policy: RuntimeTravelPolicy,
*,
grade_band: str,
city: str,
city_tier: str,
) -> Decimal:
normalized_city = str(city or "").strip()
if normalized_city and getattr(policy, "hotel_city_limits", None):
city_limits = policy.hotel_city_limits.get(normalized_city, {})
city_cap = city_limits.get(grade_band)
if city_cap is not None:
return Decimal(city_cap).quantize(Decimal("0.01"))
return Decimal(policy.hotel_limits.get(grade_band, {}).get(city_tier, Decimal("0.00"))).quantize(
Decimal("0.01")
)
def _detect_review_transport_class(
self,
card: UserAgentReviewDocumentCard,
policy: RuntimeTravelPolicy,
) -> tuple[str, str, int] | None:
document_type = str(card.document_type or "").strip().lower()
text = re.sub(r"\s+", "", self._build_review_document_card_text(card))
if not text:
return None
if document_type == "flight_itinerary" or any(keyword in text for keyword in ("机票", "航班", "登机牌")):
for config in policy.flight_classes:
label = str(config.keyword or "").strip()
if label and label in text:
return "flight", label, int(config.level)
if document_type == "train_ticket" or any(keyword in text for keyword in ("火车", "高铁", "动车", "铁路")):
for config in policy.train_classes:
label = str(config.keyword or "").strip()
if label and label in text:
return "train", label, int(config.level)
return None
@staticmethod
def _text_contains_any(text: str, keywords: list[str] | tuple[str, ...]) -> bool:
compact = re.sub(r"\s+", "", str(text or ""))
return bool(compact) and any(str(keyword or "").strip() and str(keyword).strip() in compact for keyword in keywords)
@staticmethod
def _resolve_submission_blocked_reasons(payload: UserAgentRequest) -> list[str]:
raw_reasons = payload.tool_payload.get("submission_blocked_reasons")
submission_blocked = bool(payload.tool_payload.get("submission_blocked"))
if raw_reasons is None and submission_blocked:
raw_reasons = payload.tool_payload.get("missing_fields")
if raw_reasons is None and not submission_blocked:
return []
reasons: list[str] = []
if isinstance(raw_reasons, list):
reasons.extend(str(item or "").strip() for item in raw_reasons)
elif isinstance(raw_reasons, str):
reasons.extend(
item.strip()
for item in re.split(r"[;\n]+", raw_reasons)
if item.strip()
)
if not reasons and submission_blocked:
message = str(payload.tool_payload.get("message") or "").strip()
for prefix in (
"提交前请先补全信息:",
"AI预审暂未通过原因如下",
"AI预审未通过原因如下",
"AI预审暂未通过",
"AI预审未通过",
):
if message.startswith(prefix):
message = message[len(prefix):].strip()
break
if message:
reasons.extend(
item.strip()
for item in re.split(r"[;\n]+", message)
if item.strip() and not item.strip().startswith("AI预审暂未通过")
)
return list(dict.fromkeys(reason for reason in reasons if reason))

View File

@@ -0,0 +1,82 @@
{
"file_name": "酒店2.jpg",
"storage_key": "5544b2a0-a6f5-4ef8-b5b6-c1ac1b03772f/07085673-a7df-4622-abb7-12f6552c780d/酒店2.jpg",
"media_type": "image/jpeg",
"size_bytes": 156877,
"uploaded_at": "2026-05-21T14:19:49.450265+00:00",
"previewable": true,
"preview_kind": "image",
"preview_storage_key": "5544b2a0-a6f5-4ef8-b5b6-c1ac1b03772f/07085673-a7df-4622-abb7-12f6552c780d/酒店2.preview.jpg",
"preview_media_type": "image/jpeg",
"preview_file_name": "酒店2.preview.jpg",
"analysis": {
"severity": "pass",
"label": "AI提示符合条件",
"headline": "AI提示附件符合基础校验条件",
"summary": "已识别到票据类型和关键字段,且符合当前费用场景的附件要求。",
"points": [
"票据类型:已识别为酒店住宿票据。",
"附件类型要求:当前费用项目为住宿票,已识别为酒店住宿票据。",
"金额字段:已识别到与当前明细接近的金额 2400.00 元。"
],
"suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。"
},
"document_info": {
"document_type": "hotel_invoice",
"document_type_label": "酒店住宿票据",
"scene_code": "hotel",
"scene_label": "住宿票据",
"fields": [
{
"key": "amount",
"label": "金额",
"value": "2400元"
},
{
"key": "date",
"label": "日期",
"value": "2026-02-23"
},
{
"key": "merchant_name",
"label": "商户",
"value": "上海喜来登酒店"
},
{
"key": "invoice_number",
"label": "票据号码",
"value": "SH-SAMPLE-20260223-003"
}
]
},
"requirement_check": {
"matches": true,
"current_expense_type": "hotel_ticket",
"current_expense_type_label": "住宿票",
"allowed_scene_labels": [],
"allowed_document_type_labels": [],
"recognized_scene_code": "hotel",
"recognized_scene_label": "住宿票据",
"recognized_document_type": "hotel_invoice",
"recognized_document_type_label": "酒店住宿票据",
"mismatch_severity": "high",
"rule_code": "rule.expense.scene_submission_standard",
"rule_name": "报销场景提交与附件标准",
"message": "当前费用项目为住宿票,已识别为酒店住宿票据。"
},
"ocr_status": "recognized",
"ocr_error": "",
"ocr_text": "上海喜来登酒店(样例)\n住宿消费明细单\n单号SH-SAMPLE-20260223-003\n出单期2026年2月23\n宾客姓名\n曹笑竹\n房间类型豪华床房\n入住日期\n2026年2月20日\n住晚数 3晚\n离店期 2026年223日\n付款式 现/信卡/其他\n日期\n项目\n计费说明\n单价\n数量\n金额\n2026年2月20日\n至\n住宿费\n豪华大床房\n¥800/晚\n3\n¥2400\n2026年2月22日\n额写贰仟肆佰元整\n合计¥2400\n温馨提示如您对以上账单有任何疑问请在离店后7天内与酒店联系感谢您的理解与支持。\n酒店联系式上海喜来登酒店\n地址上海市浦东新区银城中路88号 电话021-12345678\n样例票据|仅供系统测试|无效凭证",
"ocr_summary": "上海喜来登酒店样例住宿消费明细单单号SH-SAMPLE-20260223-003",
"ocr_avg_score": 0.9784442763775587,
"ocr_line_count": 32,
"ocr_classification_source": "rule",
"ocr_classification_confidence": 0.84,
"ocr_classification_evidence": [
"住宿",
"入住",
"离店",
"酒店"
],
"ocr_warnings": []
}

View File

@@ -0,0 +1,87 @@
{
"file_name": "2月23_上海-武汉.pdf",
"storage_key": "5544b2a0-a6f5-4ef8-b5b6-c1ac1b03772f/ac0a7cc8-7152-41e3-bcce-bd358459a5a8/2月23_上海-武汉.pdf",
"media_type": "application/pdf",
"size_bytes": 24940,
"uploaded_at": "2026-05-21T14:03:40.109269+00:00",
"previewable": true,
"preview_kind": "image",
"preview_storage_key": "5544b2a0-a6f5-4ef8-b5b6-c1ac1b03772f/ac0a7cc8-7152-41e3-bcce-bd358459a5a8/2月23_上海-武汉.preview.png",
"preview_media_type": "image/png",
"preview_file_name": "2月23_上海-武汉.preview.png",
"analysis": {
"severity": "pass",
"label": "AI提示符合条件",
"headline": "AI提示附件符合基础校验条件",
"summary": "已识别到票据类型和关键字段,且符合当前费用场景的附件要求。",
"points": [
"票据类型:已识别为火车/高铁票。",
"附件类型要求:当前费用项目为火车票,已识别为火车/高铁票。",
"金额字段:已识别到与当前明细接近的金额 354.00 元。"
],
"suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。"
},
"document_info": {
"document_type": "train_ticket",
"document_type_label": "火车/高铁票",
"scene_code": "travel",
"scene_label": "差旅票据",
"fields": [
{
"key": "amount",
"label": "金额",
"value": "354元"
},
{
"key": "date",
"label": "列车出发时间",
"value": "2026-02-23 13:54"
},
{
"key": "merchant_name",
"label": "商户",
"value": "中国铁路"
},
{
"key": "invoice_number",
"label": "票据号码",
"value": "26319166100006175398"
},
{
"key": "route",
"label": "行程",
"value": "上海-武汉"
}
]
},
"requirement_check": {
"matches": true,
"current_expense_type": "train_ticket",
"current_expense_type_label": "火车票",
"allowed_scene_labels": [],
"allowed_document_type_labels": [],
"recognized_scene_code": "travel",
"recognized_scene_label": "差旅票据",
"recognized_document_type": "train_ticket",
"recognized_document_type_label": "火车/高铁票",
"mismatch_severity": "high",
"rule_code": "rule.expense.scene_submission_standard",
"rule_name": "报销场景提交与附件标准",
"message": "当前费用项目为火车票,已识别为火车/高铁票。"
},
"ocr_status": "recognized",
"ocr_error": "",
"ocr_text": "电子发票\n铁路电子客票\n州\n国家税务总局\n发票号码26319166100006175398\n开票日期:2026年05月18日\n上海市税务局\n上海虹桥站\n武汉站\nG456\nShanghaihongqiao\nWuhan\n2026年02月23日\n13:54开\n12车08B号\n二等座\n票价¥354.00\n4201061987****1615\n曹笑竹\n电子客票号6610061086021394837402026\n购买方名称:曹笑竹\n统一社会信用代码\n买票请到12306发货请到95306\n中国铁路祝您旅途愉快",
"ocr_summary": "电子发票;(铁路电子客票);州",
"ocr_avg_score": 0.9620026834309101,
"ocr_line_count": 24,
"ocr_classification_source": "rule",
"ocr_classification_confidence": 0.88,
"ocr_classification_evidence": [
"铁路电子客票",
"电子客票",
"铁路",
"二等座"
],
"ocr_warnings": []
}

View File

@@ -0,0 +1,87 @@
{
"file_name": "2月20_武汉-上海.pdf",
"storage_key": "5544b2a0-a6f5-4ef8-b5b6-c1ac1b03772f/b4143190-f375-4f6b-8836-23eee534c99e/2月20_武汉-上海.pdf",
"media_type": "application/pdf",
"size_bytes": 24995,
"uploaded_at": "2026-05-21T14:03:02.982421+00:00",
"previewable": true,
"preview_kind": "image",
"preview_storage_key": "5544b2a0-a6f5-4ef8-b5b6-c1ac1b03772f/b4143190-f375-4f6b-8836-23eee534c99e/2月20_武汉-上海.preview.png",
"preview_media_type": "image/png",
"preview_file_name": "2月20_武汉-上海.preview.png",
"analysis": {
"severity": "pass",
"label": "AI提示符合条件",
"headline": "AI提示附件符合基础校验条件",
"summary": "已识别到票据类型和关键字段,且符合当前费用场景的附件要求。",
"points": [
"票据类型:已识别为火车/高铁票。",
"附件类型要求:当前费用项目为火车票,已识别为火车/高铁票。",
"金额字段:已识别到与当前明细接近的金额 354.00 元。"
],
"suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。"
},
"document_info": {
"document_type": "train_ticket",
"document_type_label": "火车/高铁票",
"scene_code": "travel",
"scene_label": "差旅票据",
"fields": [
{
"key": "amount",
"label": "金额",
"value": "354元"
},
{
"key": "date",
"label": "列车出发时间",
"value": "2026-02-20 07:55"
},
{
"key": "merchant_name",
"label": "商户",
"value": "中国铁路"
},
{
"key": "invoice_number",
"label": "票据号码",
"value": "26429165800002785705"
},
{
"key": "route",
"label": "行程",
"value": "武汉-上海"
}
]
},
"requirement_check": {
"matches": true,
"current_expense_type": "train_ticket",
"current_expense_type_label": "火车票",
"allowed_scene_labels": [],
"allowed_document_type_labels": [],
"recognized_scene_code": "travel",
"recognized_scene_label": "差旅票据",
"recognized_document_type": "train_ticket",
"recognized_document_type_label": "火车/高铁票",
"mismatch_severity": "high",
"rule_code": "rule.expense.scene_submission_standard",
"rule_name": "报销场景提交与附件标准",
"message": "当前费用项目为火车票,已识别为火车/高铁票。"
},
"ocr_status": "recognized",
"ocr_error": "",
"ocr_text": "电子发票\n铁路电子客票)\n州\n国家税务总局\n发票号码26429165800002785705\n湖北省税务局\n开票日期:2026年05月18日\n武汉站\n上海虹桥站\nG458\nWuhan\nShanghaihongqiao\n2026年02月20日\n07:55开\n06车01B号\n二等座\n票价¥354.00\n4201061987****1615\n曹笑竹\n电子客票号6580061086021391007342026\n购买方名称:曹笑竹\n统一社会信用代码\n买票请到12306发货请到95306\n中国铁路祝您旅途愉快",
"ocr_summary": "电子发票;(铁路电子客票);州",
"ocr_avg_score": 0.9580968717734019,
"ocr_line_count": 24,
"ocr_classification_source": "rule",
"ocr_classification_confidence": 0.88,
"ocr_classification_evidence": [
"铁路电子客票",
"电子客票",
"铁路",
"二等座"
],
"ocr_warnings": []
}

View File

@@ -0,0 +1,88 @@
{
"file_name": "2月20_武汉-上海.pdf",
"storage_key": "b00cb2a5-0af3-4a49-9f7a-1f79d0ab873a/ab4d8fae-f59d-460d-94a8-eaf644c83591/2月20_武汉-上海.pdf",
"media_type": "application/pdf",
"size_bytes": 24995,
"uploaded_at": "2026-05-22T00:38:09.743522+00:00",
"previewable": true,
"preview_kind": "image",
"preview_storage_key": "b00cb2a5-0af3-4a49-9f7a-1f79d0ab873a/ab4d8fae-f59d-460d-94a8-eaf644c83591/2月20_武汉-上海.preview.png",
"preview_media_type": "image/png",
"preview_file_name": "2月20_武汉-上海.preview.png",
"analysis": {
"severity": "pass",
"label": "AI提示符合条件",
"headline": "AI提示附件符合基础校验条件",
"summary": "已识别到票据类型和关键字段,且符合当前费用场景的附件要求。",
"points": [
"票据类型:已识别为火车/高铁票。",
"附件类型要求:当前费用项目为火车票,已识别为火车/高铁票。",
"金额字段:已识别到与当前明细接近的金额 354.00 元。"
],
"rule_basis": [],
"suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。"
},
"document_info": {
"document_type": "train_ticket",
"document_type_label": "火车/高铁票",
"scene_code": "travel",
"scene_label": "差旅票据",
"fields": [
{
"key": "amount",
"label": "金额",
"value": "354元"
},
{
"key": "date",
"label": "列车出发时间",
"value": "2026-02-20 07:55"
},
{
"key": "merchant_name",
"label": "商户",
"value": "中国铁路"
},
{
"key": "invoice_number",
"label": "票据号码",
"value": "26429165800002785705"
},
{
"key": "route",
"label": "行程",
"value": "武汉-上海"
}
]
},
"requirement_check": {
"matches": true,
"current_expense_type": "train_ticket",
"current_expense_type_label": "火车票",
"allowed_scene_labels": [],
"allowed_document_type_labels": [],
"recognized_scene_code": "travel",
"recognized_scene_label": "差旅票据",
"recognized_document_type": "train_ticket",
"recognized_document_type_label": "火车/高铁票",
"mismatch_severity": "high",
"rule_code": "rule.expense.scene_submission_standard",
"rule_name": "报销场景提交与附件标准",
"message": "当前费用项目为火车票,已识别为火车/高铁票。"
},
"ocr_status": "recognized",
"ocr_error": "",
"ocr_text": "电子发票\n铁路电子客票)\n州\n国家税务总局\n发票号码26429165800002785705\n湖北省税务局\n开票日期:2026年05月18日\n武汉站\n上海虹桥站\nG458\nWuhan\nShanghaihongqiao\n2026年02月20日\n07:55开\n06车01B号\n二等座\n票价¥354.00\n4201061987****1615\n曹笑竹\n电子客票号6580061086021391007342026\n购买方名称:曹笑竹\n统一社会信用代码\n买票请到12306发货请到95306\n中国铁路祝您旅途愉快",
"ocr_summary": "电子发票;(铁路电子客票);州",
"ocr_avg_score": 0.9580968717734019,
"ocr_line_count": 24,
"ocr_classification_source": "rule",
"ocr_classification_confidence": 0.88,
"ocr_classification_evidence": [
"铁路电子客票",
"电子客票",
"铁路",
"二等座"
],
"ocr_warnings": []
}

View File

@@ -0,0 +1,88 @@
{
"file_name": "2月23_上海-武汉.pdf",
"storage_key": "b00cb2a5-0af3-4a49-9f7a-1f79d0ab873a/b2edd3f3-9efc-44ab-bd3b-60a42f204a60/2月23_上海-武汉.pdf",
"media_type": "application/pdf",
"size_bytes": 24940,
"uploaded_at": "2026-05-22T00:38:30.927361+00:00",
"previewable": true,
"preview_kind": "image",
"preview_storage_key": "b00cb2a5-0af3-4a49-9f7a-1f79d0ab873a/b2edd3f3-9efc-44ab-bd3b-60a42f204a60/2月23_上海-武汉.preview.png",
"preview_media_type": "image/png",
"preview_file_name": "2月23_上海-武汉.preview.png",
"analysis": {
"severity": "pass",
"label": "AI提示符合条件",
"headline": "AI提示附件符合基础校验条件",
"summary": "已识别到票据类型和关键字段,且符合当前费用场景的附件要求。",
"points": [
"票据类型:已识别为火车/高铁票。",
"附件类型要求:当前费用项目为火车票,已识别为火车/高铁票。",
"金额字段:已识别到与当前明细接近的金额 354.00 元。"
],
"rule_basis": [],
"suggestion": "建议继续核对报销分类、费用说明和业务场景是否一致。"
},
"document_info": {
"document_type": "train_ticket",
"document_type_label": "火车/高铁票",
"scene_code": "travel",
"scene_label": "差旅票据",
"fields": [
{
"key": "amount",
"label": "金额",
"value": "354元"
},
{
"key": "date",
"label": "列车出发时间",
"value": "2026-02-23 13:54"
},
{
"key": "merchant_name",
"label": "商户",
"value": "中国铁路"
},
{
"key": "invoice_number",
"label": "票据号码",
"value": "26319166100006175398"
},
{
"key": "route",
"label": "行程",
"value": "上海-武汉"
}
]
},
"requirement_check": {
"matches": true,
"current_expense_type": "train_ticket",
"current_expense_type_label": "火车票",
"allowed_scene_labels": [],
"allowed_document_type_labels": [],
"recognized_scene_code": "travel",
"recognized_scene_label": "差旅票据",
"recognized_document_type": "train_ticket",
"recognized_document_type_label": "火车/高铁票",
"mismatch_severity": "high",
"rule_code": "rule.expense.scene_submission_standard",
"rule_name": "报销场景提交与附件标准",
"message": "当前费用项目为火车票,已识别为火车/高铁票。"
},
"ocr_status": "recognized",
"ocr_error": "",
"ocr_text": "电子发票\n铁路电子客票\n州\n国家税务总局\n发票号码26319166100006175398\n开票日期:2026年05月18日\n上海市税务局\n上海虹桥站\n武汉站\nG456\nShanghaihongqiao\nWuhan\n2026年02月23日\n13:54开\n12车08B号\n二等座\n票价¥354.00\n4201061987****1615\n曹笑竹\n电子客票号6610061086021394837402026\n购买方名称:曹笑竹\n统一社会信用代码\n买票请到12306发货请到95306\n中国铁路祝您旅途愉快",
"ocr_summary": "电子发票;(铁路电子客票);州",
"ocr_avg_score": 0.9620026834309101,
"ocr_line_count": 24,
"ocr_classification_source": "rule",
"ocr_classification_confidence": 0.88,
"ocr_classification_evidence": [
"铁路电子客票",
"电子客票",
"铁路",
"二等座"
],
"ocr_warnings": []
}

View File

@@ -35,13 +35,13 @@
"updated_at": "2026-05-17T13:00:09.485818+00:00",
"uploaded_by": "admin",
"version_number": 1,
"ingest_status": 4,
"ingest_status_updated_at": "2026-05-20T16:00:02.515903+00:00",
"ingest_completed_at": "",
"ingest_document_name": "",
"ingest_document_updated_at": "",
"ingest_document_sha256": "",
"ingest_agent_run_id": "run_3a0b0ecb941b4c8e"
"ingest_status": 3,
"ingest_status_updated_at": "2026-05-21T15:56:58.286585+00:00",
"ingest_completed_at": "2026-05-21T15:56:58.286585+00:00",
"ingest_document_name": "无单需求文档0506.docx",
"ingest_document_updated_at": "2026-05-17T13:00:09.485818+00:00",
"ingest_document_sha256": "00985ec85a8163be9c9ffc5eb522df18ed52d4b131ceed12102c2d75e4df85a9",
"ingest_agent_run_id": "run_9f4f60cf545c470f"
}
]
}

View File

@@ -26,8 +26,7 @@
}
},
"a8f8465df08e455ebe133351721d49f8": {
"status": "failed",
"error_msg": "Embedding func: Worker execution timeout after 60s",
"status": "processed",
"chunks_count": 6,
"chunks_list": [
"chunk-07de6ea74f60535b689f977295770273",
@@ -40,12 +39,29 @@
"content_summary": "# 产品需求文档\n## 文档信息\n| 项目 | 内容 |\n|------|------|\n| 项目名称 |\n无单报销\n|\n| 版本 | V1.0 |\n| 日期 | 2026-05-06 |\n| 状态 | 正式版 |\n---\n## 1. 项目概述\n### 1.1 项目背景\n面向\n大型企业\n从业务人员视角出发解决现有ERP使用体验不佳的问题。\n在ERP的发展历程中“单据化”曾是财务合规的一大进步它确保了每笔支出都有据可查。但不可否认传统的人工填单确实\n也制造了很多\n“枷锁”。在AI时代解...",
"content_length": 9088,
"created_at": "2026-05-19T15:59:57.283110+00:00",
"updated_at": "2026-05-19T16:00:57.323299+00:00",
"updated_at": "2026-05-21T15:56:58.097242+00:00",
"file_path": "/app/server/storage/knowledge/报销制度/a8f8465df08e455ebe133351721d49f8__无单需求文档0506.docx",
"track_id": "insert_20260519_155957_88c49850",
"metadata": {
"processing_start_time": 1779206397,
"processing_end_time": 1779206457
"processing_start_time": 1779378923,
"processing_end_time": 1779379018
}
},
"dup-de90fa8775923ae9a1669c8e24d60529": {
"status": "failed",
"content_summary": "[DUPLICATE] Original document: a8f8465df08e455ebe133351721d49f8",
"content_length": 9088,
"chunks_count": 0,
"chunks_list": [],
"created_at": "2026-05-21T15:55:23.540372+00:00",
"updated_at": "2026-05-21T15:55:23.540380+00:00",
"file_path": "/app/server/storage/knowledge/报销制度/a8f8465df08e455ebe133351721d49f8__无单需求文档0506.docx",
"track_id": "insert_20260521_155523_1e232e61",
"error_msg": "Content already exists. Original doc_id: a8f8465df08e455ebe133351721d49f8, Status: failed",
"metadata": {
"is_duplicate": true,
"original_doc_id": "a8f8465df08e455ebe133351721d49f8",
"original_track_id": "insert_20260519_155957_88c49850"
}
}
}

Some files were not shown because too many files have changed in this diff Show More