refactor(server): user_agent/steward/ocr 等服务重构并适配关联任务
- user_agent 拆分 application/locations/knowledge/response/review 四个子模块,接入申请位置语义与关联草稿分支 - steward planner/runtime/slot/plan_builder 决策链路重构,travel_reimbursement_calculator/orchestrator_expense_query 适配 - ocr/document_preview/document_intelligence/receipt_folder 复用预览与资产缓存,expense_claim_draft_flow/application_handoff 适配 - pyproject.toml 新增依赖,paddleocr bootstrap 脚本与 server_start.sh 调整 - 更新差旅/交通/通信等财务规则表,同步 document_intelligence/ocr/receipt_folder/user_agent 等测试
This commit is contained in:
@@ -19,6 +19,7 @@ dependencies = [
|
||||
"python-dotenv>=1.0.1,<2.0.0",
|
||||
"email-validator>=2.2.0,<3.0.0",
|
||||
"python-multipart>=0.0.20,<1.0.0",
|
||||
"jieba>=0.42.1,<0.43.0",
|
||||
"openpyxl>=3.1.5,<4.0.0",
|
||||
"lightrag-hku>=1.4.16,<1.5.0",
|
||||
"qdrant-client>=1.18.0,<2.0.0",
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -14,7 +14,7 @@ if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
|
||||
fi
|
||||
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 poppler-utils poppler-data
|
||||
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 poppler-utils poppler-data mupdf-tools
|
||||
|
||||
rm -rf "${OCR_VENV_DIR}"
|
||||
"${PYTHON_BIN}" -m venv "${OCR_VENV_DIR}"
|
||||
|
||||
@@ -13,7 +13,7 @@ if ! command -v "${PYTHON_BIN}" >/dev/null 2>&1; then
|
||||
fi
|
||||
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 poppler-utils poppler-data
|
||||
apt-get install -y --no-install-recommends libgl1 libglib2.0-0 poppler-utils poppler-data mupdf-tools
|
||||
|
||||
"${PYTHON_BIN}" -m venv "${OCR_VENV_DIR}"
|
||||
"${OCR_VENV_DIR}/bin/pip" install --upgrade pip
|
||||
|
||||
@@ -272,7 +272,7 @@ run_bootstrap_python() {
|
||||
}
|
||||
|
||||
dependencies_ready() {
|
||||
"$PYTHON_BIN" -c "import alembic, dotenv, email_validator, fastapi, jwt, lightrag, multipart, openpyxl, psycopg, pydantic_settings, qdrant_client, sqlalchemy, uvicorn" >/dev/null 2>&1
|
||||
"$PYTHON_BIN" -c "import alembic, dotenv, email_validator, fastapi, jieba, jwt, lightrag, multipart, openpyxl, psycopg, pydantic_settings, qdrant_client, sqlalchemy, uvicorn" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
pip_ready() {
|
||||
|
||||
@@ -562,7 +562,7 @@ def _extract_document_fields(text: str, document_type: str = "") -> list[Documen
|
||||
if date_value:
|
||||
append_field("date", "日期", date_value)
|
||||
|
||||
merchant = _extract_merchant(text)
|
||||
merchant = "中国铁路" if normalized_type == "train_ticket" else _extract_merchant(text)
|
||||
if merchant:
|
||||
append_field("merchant_name", "商户", merchant)
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
import base64
|
||||
import binascii
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
@@ -11,9 +12,19 @@ from pathlib import Path
|
||||
|
||||
|
||||
class DocumentPreviewAssets:
|
||||
PDF_RENDERER_ID = "pdftoppm-png-r160-poppler-data"
|
||||
PDF_RENDERER_ID = "pdf-raster-cjk-safe-v3"
|
||||
PDF_PREVIEW_MEDIA_TYPE = "image/png"
|
||||
PDF_PREVIEW_SUFFIX = ".png"
|
||||
PDF_UNUSABLE_PREVIEW_ERRORS = (
|
||||
"Missing language pack",
|
||||
"Unknown font tag",
|
||||
"No font in show",
|
||||
)
|
||||
POPPLER_DATA_DIR_CANDIDATES = (
|
||||
"/usr/share/poppler",
|
||||
"/usr/local/share/poppler",
|
||||
"/opt/homebrew/share/poppler",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def decode_data_url(payload: str) -> tuple[str, bytes] | None:
|
||||
@@ -64,30 +75,117 @@ class DocumentPreviewAssets:
|
||||
) -> Path:
|
||||
preview_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with tempfile.TemporaryDirectory(prefix=".pdf-preview-", dir=str(preview_path.parent)) as temp_dir:
|
||||
prefix = Path(temp_dir) / "page"
|
||||
pages = cls.render_pdf_pages(
|
||||
pdf_path=pdf_path,
|
||||
output_dir=Path(temp_dir),
|
||||
timeout_seconds=timeout_seconds,
|
||||
)
|
||||
shutil.copyfile(pages[0], preview_path)
|
||||
return preview_path
|
||||
|
||||
@classmethod
|
||||
def render_pdf_pages(
|
||||
cls,
|
||||
*,
|
||||
pdf_path: Path,
|
||||
output_dir: Path,
|
||||
timeout_seconds: int | float,
|
||||
resolution: int = 160,
|
||||
) -> list[Path]:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
errors: list[str] = []
|
||||
for renderer_name, command in cls._pdf_render_commands(
|
||||
pdf_path=pdf_path,
|
||||
output_dir=output_dir,
|
||||
resolution=resolution,
|
||||
):
|
||||
executable = shutil.which(renderer_name)
|
||||
if not executable:
|
||||
errors.append(f"{renderer_name}: executable not found")
|
||||
continue
|
||||
|
||||
cls._clear_rendered_pdf_pages(output_dir)
|
||||
command[0] = executable
|
||||
completed = subprocess.run(
|
||||
[
|
||||
"pdftoppm",
|
||||
"-png",
|
||||
"-r",
|
||||
"160",
|
||||
str(pdf_path),
|
||||
str(prefix),
|
||||
],
|
||||
command,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout_seconds,
|
||||
check=False,
|
||||
env=cls._pdf_render_env(),
|
||||
)
|
||||
if completed.returncode != 0:
|
||||
detail = (completed.stderr or completed.stdout or "").strip()
|
||||
raise RuntimeError(detail or "pdftoppm failed to render PDF preview.")
|
||||
if completed.returncode != 0:
|
||||
errors.append(f"{renderer_name}: {detail or 'renderer returned non-zero status'}")
|
||||
continue
|
||||
if cls.render_output_indicates_unusable_pdf_preview(detail):
|
||||
errors.append(f"{renderer_name}: {detail or 'renderer produced unusable output'}")
|
||||
continue
|
||||
|
||||
pages = sorted(Path(temp_dir).glob("page-*.png"), key=cls._extract_pdf_page_sort_key)
|
||||
if not pages:
|
||||
raise RuntimeError("pdftoppm did not generate a preview image.")
|
||||
shutil.copyfile(pages[0], preview_path)
|
||||
return preview_path
|
||||
pages = sorted(output_dir.glob("page-*.png"), key=cls._extract_pdf_page_sort_key)
|
||||
if pages:
|
||||
return pages
|
||||
errors.append(f"{renderer_name}: renderer did not generate PNG pages")
|
||||
|
||||
cls._clear_rendered_pdf_pages(output_dir)
|
||||
detail = ";".join(errors[-3:])
|
||||
raise RuntimeError(detail or "no PDF renderer generated usable PNG pages")
|
||||
|
||||
@classmethod
|
||||
def render_output_indicates_unusable_pdf_preview(cls, output: str) -> bool:
|
||||
return any(token in str(output or "") for token in cls.PDF_UNUSABLE_PREVIEW_ERRORS)
|
||||
|
||||
@classmethod
|
||||
def _pdf_render_commands(
|
||||
cls,
|
||||
*,
|
||||
pdf_path: Path,
|
||||
output_dir: Path,
|
||||
resolution: int,
|
||||
) -> list[tuple[str, list[str]]]:
|
||||
prefix = output_dir / "page"
|
||||
page_pattern = output_dir / "page-%d.png"
|
||||
return [
|
||||
(
|
||||
"pdftoppm",
|
||||
["pdftoppm", "-png", "-r", str(resolution), str(pdf_path), str(prefix)],
|
||||
),
|
||||
(
|
||||
"mutool",
|
||||
["mutool", "draw", "-r", str(resolution), "-o", str(page_pattern), str(pdf_path)],
|
||||
),
|
||||
(
|
||||
"gs",
|
||||
[
|
||||
"gs",
|
||||
"-dSAFER",
|
||||
"-dBATCH",
|
||||
"-dNOPAUSE",
|
||||
"-sDEVICE=png16m",
|
||||
f"-r{resolution}",
|
||||
f"-sOutputFile={page_pattern}",
|
||||
str(pdf_path),
|
||||
],
|
||||
),
|
||||
(
|
||||
"pdftocairo",
|
||||
["pdftocairo", "-png", "-r", str(resolution), str(pdf_path), str(prefix)],
|
||||
),
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def _pdf_render_env(cls) -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
for candidate in cls.POPPLER_DATA_DIR_CANDIDATES:
|
||||
if (Path(candidate) / "cMap").exists():
|
||||
env.setdefault("POPPLER_DATADIR", candidate)
|
||||
break
|
||||
return env
|
||||
|
||||
@staticmethod
|
||||
def _clear_rendered_pdf_pages(output_dir: Path) -> None:
|
||||
for page in output_dir.glob("page-*.png"):
|
||||
page.unlink(missing_ok=True)
|
||||
|
||||
@staticmethod
|
||||
def _extract_pdf_page_sort_key(path: Path) -> tuple[int, str]:
|
||||
|
||||
@@ -9,7 +9,10 @@ from sqlalchemy import or_, select
|
||||
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.services.expense_claim_risk_stage import with_risk_business_stage
|
||||
from app.services.expense_claim_workflow_constants import APPLICATION_ARCHIVE_STAGE
|
||||
from app.services.expense_claim_workflow_constants import (
|
||||
APPLICATION_ARCHIVE_STAGE,
|
||||
APPLICATION_LINK_STATUS_STAGE,
|
||||
)
|
||||
|
||||
|
||||
APPLICATION_REIMBURSEMENT_TYPE_MAP = {
|
||||
@@ -248,3 +251,151 @@ class ExpenseClaimApplicationHandoffMixin:
|
||||
)
|
||||
|
||||
return archived_applications
|
||||
|
||||
@staticmethod
|
||||
def _reference_matches_deleted_reimbursement(
|
||||
flag: dict[str, Any],
|
||||
*,
|
||||
reimbursement_claim_id: str,
|
||||
reimbursement_claim_no: str,
|
||||
) -> bool:
|
||||
reference_ids = {
|
||||
str(flag.get(key) or "").strip()
|
||||
for key in (
|
||||
"generated_draft_claim_id",
|
||||
"generatedDraftClaimId",
|
||||
"reimbursement_claim_id",
|
||||
"reimbursementClaimId",
|
||||
)
|
||||
}
|
||||
reference_nos = {
|
||||
str(flag.get(key) or "").strip().upper()
|
||||
for key in (
|
||||
"generated_draft_claim_no",
|
||||
"generatedDraftClaimNo",
|
||||
"reimbursement_claim_no",
|
||||
"reimbursementClaimNo",
|
||||
)
|
||||
}
|
||||
return (
|
||||
bool(reimbursement_claim_id and reimbursement_claim_id in reference_ids)
|
||||
or bool(reimbursement_claim_no and reimbursement_claim_no.upper() in reference_nos)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _remove_deleted_reimbursement_link_references(
|
||||
cls,
|
||||
risk_flags: list[Any],
|
||||
*,
|
||||
reimbursement_claim_id: str,
|
||||
reimbursement_claim_no: str,
|
||||
) -> tuple[list[Any], bool]:
|
||||
next_flags: list[Any] = []
|
||||
changed = False
|
||||
stale_link_keys = {
|
||||
"generated_draft_claim_id",
|
||||
"generatedDraftClaimId",
|
||||
"generated_draft_claim_no",
|
||||
"generatedDraftClaimNo",
|
||||
"reimbursement_claim_id",
|
||||
"reimbursementClaimId",
|
||||
"reimbursement_claim_no",
|
||||
"reimbursementClaimNo",
|
||||
"handoff_event_type",
|
||||
"handoffEventType",
|
||||
"handoff_message",
|
||||
"handoffMessage",
|
||||
}
|
||||
for flag in list(risk_flags or []):
|
||||
if not isinstance(flag, dict):
|
||||
next_flags.append(flag)
|
||||
continue
|
||||
if not cls._reference_matches_deleted_reimbursement(
|
||||
flag,
|
||||
reimbursement_claim_id=reimbursement_claim_id,
|
||||
reimbursement_claim_no=reimbursement_claim_no,
|
||||
):
|
||||
next_flags.append(flag)
|
||||
continue
|
||||
|
||||
next_flag = dict(flag)
|
||||
for key in stale_link_keys:
|
||||
if key in next_flag:
|
||||
next_flag.pop(key, None)
|
||||
changed = True
|
||||
next_flags.append(next_flag)
|
||||
return next_flags, changed
|
||||
|
||||
def _sync_linked_applications_after_reimbursement_deleted(
|
||||
self,
|
||||
*,
|
||||
reimbursement_claim: ExpenseClaim,
|
||||
operator: str,
|
||||
current_user: Any,
|
||||
) -> list[dict[str, str]]:
|
||||
reimbursement_claim_id = str(reimbursement_claim.id or "").strip()
|
||||
reimbursement_claim_no = str(reimbursement_claim.claim_no or "").strip()
|
||||
synced_applications: list[dict[str, str]] = []
|
||||
|
||||
for application_claim in self._find_linked_application_claims(reimbursement_claim):
|
||||
previous_status = str(application_claim.status or "").strip()
|
||||
previous_stage = str(application_claim.approval_stage or "").strip()
|
||||
before_json = self._serialize_claim(application_claim)
|
||||
next_flags, removed_link_references = self._remove_deleted_reimbursement_link_references(
|
||||
list(application_claim.risk_flags_json or []),
|
||||
reimbursement_claim_id=reimbursement_claim_id,
|
||||
reimbursement_claim_no=reimbursement_claim_no,
|
||||
)
|
||||
sync_flag = with_risk_business_stage(
|
||||
{
|
||||
"source": "application_link_sync",
|
||||
"event_type": "expense_application_reimbursement_deleted",
|
||||
"sync_event_id": str(uuid.uuid4()),
|
||||
"severity": "info",
|
||||
"actionability": "system_trace",
|
||||
"label": "关联报销单已删除",
|
||||
"message": (
|
||||
f"关联报销单 {reimbursement_claim_no or reimbursement_claim_id} 已删除,"
|
||||
"申请单已回到待关联状态。"
|
||||
),
|
||||
"operator": operator,
|
||||
"operator_username": getattr(current_user, "username", ""),
|
||||
"operator_role_codes": [
|
||||
str(item).strip().lower()
|
||||
for item in getattr(current_user, "role_codes", [])
|
||||
if str(item).strip()
|
||||
],
|
||||
"application_claim_id": application_claim.id,
|
||||
"application_claim_no": application_claim.claim_no,
|
||||
"deleted_reimbursement_claim_id": reimbursement_claim_id,
|
||||
"deleted_reimbursement_claim_no": reimbursement_claim_no,
|
||||
"previous_status": previous_status,
|
||||
"previous_approval_stage": previous_stage,
|
||||
"next_status": "approved",
|
||||
"next_approval_stage": APPLICATION_LINK_STATUS_STAGE,
|
||||
"removed_link_references": removed_link_references,
|
||||
"created_at": datetime.now(UTC).isoformat(),
|
||||
},
|
||||
"expense_application",
|
||||
)
|
||||
|
||||
application_claim.status = "approved"
|
||||
application_claim.approval_stage = APPLICATION_LINK_STATUS_STAGE
|
||||
application_claim.risk_flags_json = [*next_flags, sync_flag]
|
||||
synced_applications.append(
|
||||
{
|
||||
"application_claim_id": application_claim.id,
|
||||
"application_claim_no": str(application_claim.claim_no or "").strip(),
|
||||
"next_approval_stage": APPLICATION_LINK_STATUS_STAGE,
|
||||
}
|
||||
)
|
||||
self.audit_service.log_action(
|
||||
actor=operator,
|
||||
action="expense_application.unlink_deleted_reimbursement",
|
||||
resource_type="expense_claim",
|
||||
resource_id=application_claim.id,
|
||||
before_json=before_json,
|
||||
after_json=self._serialize_claim(application_claim),
|
||||
)
|
||||
|
||||
return synced_applications
|
||||
|
||||
@@ -714,6 +714,17 @@ class ExpenseClaimAttachmentOperationsMixin:
|
||||
timeout_seconds=OcrService(self.db).settings.ocr_timeout_seconds,
|
||||
)
|
||||
except Exception:
|
||||
metadata.update(
|
||||
{
|
||||
"previewable": True,
|
||||
"preview_kind": "pdf",
|
||||
"preview_storage_key": self._attachment_storage.to_storage_key(file_path),
|
||||
"preview_media_type": "application/pdf",
|
||||
"preview_file_name": file_path.name,
|
||||
"preview_rendered_with": "",
|
||||
}
|
||||
)
|
||||
self._attachment_storage.write_meta(file_path, metadata)
|
||||
return metadata
|
||||
|
||||
metadata.update(
|
||||
|
||||
@@ -827,8 +827,8 @@ class ExpenseClaimDraftFlowMixin(ExpenseClaimApplicationLinkMixin, ExpenseClaimD
|
||||
document_count = max(len(context_documents), len(attachment_names), self._resolve_attachment_count(context_json))
|
||||
return {
|
||||
"message": (
|
||||
f"检测到你已有草稿 {association_candidate.claim_no},"
|
||||
f"当前新上传了 {document_count} 张票据,请先选择关联到现有草稿,或单独建立新的报销单。"
|
||||
f"检测到您已有草稿 {association_candidate.claim_no},"
|
||||
f"当前新上传了 {document_count} 张票据,请先选择关联到现有草稿,或单独新建一张报销单。"
|
||||
),
|
||||
"draft_only": False,
|
||||
"status": "pending_association_decision",
|
||||
@@ -859,7 +859,7 @@ class ExpenseClaimDraftFlowMixin(ExpenseClaimApplicationLinkMixin, ExpenseClaimD
|
||||
if existing_draft_count >= MAX_DRAFT_CLAIMS_PER_USER:
|
||||
return {
|
||||
"message": (
|
||||
f"你当前已保存 {MAX_DRAFT_CLAIMS_PER_USER} 个草稿,请先完成已保存的草稿,"
|
||||
f"您当前已保存 {MAX_DRAFT_CLAIMS_PER_USER} 个草稿,请先完成已保存的草稿,"
|
||||
"才能再次新建草稿。"
|
||||
),
|
||||
"draft_limit_reached": True,
|
||||
|
||||
@@ -688,6 +688,13 @@ class ExpenseClaimItemActionMixin:
|
||||
|
||||
before_json = self._serialize_claim(claim)
|
||||
resource_id = claim.id
|
||||
operator = self._access_policy.resolve_current_user_display_name(current_user)
|
||||
if not self._is_expense_application_claim(claim):
|
||||
self._sync_linked_applications_after_reimbursement_deleted(
|
||||
reimbursement_claim=claim,
|
||||
operator=operator,
|
||||
current_user=current_user,
|
||||
)
|
||||
|
||||
self._release_budget_for_delete(claim, current_user)
|
||||
self._delete_claim_analysis_records(resource_id)
|
||||
@@ -1008,4 +1015,3 @@ class ExpenseClaimService(ExpenseClaimStandardAdjustmentMixin, ExpenseClaimItemA
|
||||
)
|
||||
|
||||
return claim
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from threading import Lock
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -16,11 +17,23 @@ from app.schemas.notification_state import (
|
||||
|
||||
|
||||
class NotificationStateService:
|
||||
_storage_ready_bind_ids: set[int] = set()
|
||||
_storage_ready_lock = Lock()
|
||||
|
||||
def __init__(self, db: Session) -> None:
|
||||
self.db = db
|
||||
|
||||
def ensure_storage_ready(self) -> None:
|
||||
Base.metadata.create_all(bind=self.db.get_bind(), tables=[NotificationState.__table__])
|
||||
bind = self.db.get_bind()
|
||||
bind_id = id(bind)
|
||||
if bind_id in self._storage_ready_bind_ids:
|
||||
return
|
||||
|
||||
with self._storage_ready_lock:
|
||||
if bind_id in self._storage_ready_bind_ids:
|
||||
return
|
||||
Base.metadata.create_all(bind=bind, tables=[NotificationState.__table__])
|
||||
self._storage_ready_bind_ids.add(bind_id)
|
||||
|
||||
def list_states(self, current_user: CurrentUserContext) -> NotificationStateListRead:
|
||||
self.ensure_storage_ready()
|
||||
|
||||
@@ -16,11 +16,13 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from app.core.config import SERVER_DIR, get_settings
|
||||
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeFieldRead, OcrRecognizeLineRead
|
||||
from app.services.document_preview import DocumentPreviewAssets
|
||||
from app.services.document_intelligence import DocumentIntelligenceService
|
||||
|
||||
WORKER_JSON_PREFIX = "__OCR_JSON__="
|
||||
SUPPORTED_SUFFIXES = {".png", ".jpg", ".jpeg", ".webp", ".bmp", ".pdf"}
|
||||
OCR_RESULT_CACHE_LIMIT = 32
|
||||
OCR_RESULT_CACHE_PIPELINE_VERSION = f"pdf-image-ocr:{DocumentPreviewAssets.PDF_RENDERER_ID}:no-pdf-direct-v2"
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
@@ -142,16 +144,6 @@ class OcrService:
|
||||
cleanup_paths=cleanup_paths,
|
||||
text_layer=text_layer,
|
||||
)
|
||||
if self._has_usable_pdf_text_layer(text_layer):
|
||||
document = self._build_text_layer_document(
|
||||
filename=normalized_name,
|
||||
media_type=resolved_media_type,
|
||||
text_layer=text_layer,
|
||||
pdf_inputs=pdf_inputs,
|
||||
)
|
||||
documents.append(document)
|
||||
self._write_cached_document(cache_key, document)
|
||||
continue
|
||||
prepared_inputs.extend(pdf_inputs)
|
||||
for item in pdf_inputs:
|
||||
cache_keys_by_source.setdefault(item.source_key, cache_key)
|
||||
@@ -257,6 +249,7 @@ class OcrService:
|
||||
digest = hashlib.sha256(content).hexdigest()
|
||||
return "|".join(
|
||||
[
|
||||
OCR_RESULT_CACHE_PIPELINE_VERSION,
|
||||
self.settings.ocr_language,
|
||||
self.settings.ocr_device,
|
||||
self.settings.ocr_text_detection_model,
|
||||
@@ -406,11 +399,15 @@ class OcrService:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
cleanup_paths.append(output_dir)
|
||||
|
||||
image_paths = self._convert_pdf_to_images(pdf_path=pdf_path, output_dir=output_dir)
|
||||
image_paths, preview_usable = self._convert_pdf_to_images(pdf_path=pdf_path, output_dir=output_dir)
|
||||
if not image_paths:
|
||||
raise RuntimeError("PDF 转图片后未生成可识别页面。")
|
||||
|
||||
preview_data_url = self._build_preview_data_url(image_paths[0], media_type="image/png")
|
||||
preview_data_url = (
|
||||
self._build_preview_data_url(image_paths[0], media_type="image/png")
|
||||
if preview_usable
|
||||
else ""
|
||||
)
|
||||
source_key = uuid4().hex
|
||||
descriptors: list[PreparedOcrInput] = []
|
||||
for page_index, image_path in enumerate(image_paths):
|
||||
@@ -421,7 +418,7 @@ class OcrService:
|
||||
filename=filename,
|
||||
media_type=media_type,
|
||||
page_index=page_index,
|
||||
preview_kind="image" if page_index == 0 else "",
|
||||
preview_kind="image" if page_index == 0 and preview_data_url else "",
|
||||
preview_data_url=preview_data_url if page_index == 0 else "",
|
||||
text_layer=text_layer if page_index == 0 else "",
|
||||
)
|
||||
@@ -450,27 +447,17 @@ class OcrService:
|
||||
|
||||
return self._normalize_extracted_text(completed.stdout)
|
||||
|
||||
def _convert_pdf_to_images(self, *, pdf_path: Path, output_dir: Path) -> list[Path]:
|
||||
prefix = output_dir / "page"
|
||||
completed = subprocess.run(
|
||||
[
|
||||
"pdftoppm",
|
||||
"-png",
|
||||
"-r",
|
||||
"160",
|
||||
str(pdf_path),
|
||||
str(prefix),
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=self.settings.ocr_timeout_seconds,
|
||||
check=False,
|
||||
def _convert_pdf_to_images(self, *, pdf_path: Path, output_dir: Path) -> tuple[list[Path], bool]:
|
||||
try:
|
||||
pages = DocumentPreviewAssets.render_pdf_pages(
|
||||
pdf_path=pdf_path,
|
||||
output_dir=output_dir,
|
||||
timeout_seconds=self.settings.ocr_timeout_seconds,
|
||||
)
|
||||
if completed.returncode != 0:
|
||||
detail = (completed.stderr or completed.stdout or "").strip()
|
||||
raise RuntimeError(f"PDF 转图片失败:{detail or 'pdftoppm 返回非 0 状态码。'}")
|
||||
except RuntimeError as exc:
|
||||
raise RuntimeError(f"PDF 转图片失败:{exc}") from exc
|
||||
|
||||
return sorted(output_dir.glob("page-*.png"), key=self._extract_pdf_page_sort_key)
|
||||
return pages, True
|
||||
|
||||
@staticmethod
|
||||
def _extract_pdf_page_sort_key(path: Path) -> tuple[int, str]:
|
||||
@@ -595,30 +582,6 @@ class OcrService:
|
||||
|
||||
return documents
|
||||
|
||||
def _build_text_layer_document(
|
||||
self,
|
||||
*,
|
||||
filename: str,
|
||||
media_type: str,
|
||||
text_layer: str,
|
||||
pdf_inputs: list[PreparedOcrInput],
|
||||
) -> OcrRecognizeDocumentRead:
|
||||
first_input = pdf_inputs[0] if pdf_inputs else None
|
||||
aggregated = AggregatedOcrDocument(
|
||||
filename=filename,
|
||||
media_type=media_type,
|
||||
source_key=first_input.source_key if first_input is not None else uuid4().hex,
|
||||
page_count=max(1, len(pdf_inputs)),
|
||||
preview_kind=str(first_input.preview_kind if first_input is not None else ""),
|
||||
preview_data_url=str(first_input.preview_data_url if first_input is not None else ""),
|
||||
)
|
||||
aggregated.text_layer_fragments.append(text_layer)
|
||||
return self._finalize_document(aggregated)
|
||||
|
||||
@classmethod
|
||||
def _has_usable_pdf_text_layer(cls, text_layer: str) -> bool:
|
||||
return cls._meaningful_char_count(text_layer) >= 8
|
||||
|
||||
@staticmethod
|
||||
def _collect_descriptor_text_layer(descriptors: list[PreparedOcrInput]) -> str:
|
||||
for descriptor in descriptors:
|
||||
@@ -685,13 +648,6 @@ class OcrService:
|
||||
summary = self._summarize_text(full_text)
|
||||
preview_kind = aggregated.preview_kind
|
||||
preview_data_url = aggregated.preview_data_url
|
||||
if (
|
||||
used_text_layer
|
||||
and aggregated.media_type == "application/pdf"
|
||||
and self._placeholder_ratio(ocr_text) >= 0.12
|
||||
):
|
||||
preview_kind = ""
|
||||
preview_data_url = ""
|
||||
insight = self.document_intelligence_service.build_document_insight(
|
||||
filename=aggregated.filename,
|
||||
summary=summary,
|
||||
|
||||
@@ -214,7 +214,7 @@ class OntologyValidationMixin:
|
||||
labels = [self._display_slot_label(item) for item in missing_slots[:4]]
|
||||
if not labels:
|
||||
return "请补充更多上下文后再继续。"
|
||||
return f"请补充{'、'.join(labels)},我再继续帮你解析和处理。"
|
||||
return f"请补充{'、'.join(labels)},我会继续帮您解析和处理。"
|
||||
|
||||
@staticmethod
|
||||
def _compute_confidence(
|
||||
|
||||
@@ -570,7 +570,7 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
scoped_to_current_user = True
|
||||
else:
|
||||
conditions.append(ExpenseClaim.id == "__no_visible_claim__")
|
||||
scope_label = "你的报销单"
|
||||
scope_label = "您的报销单"
|
||||
scoped_to_current_user = True
|
||||
elif explicit_employee_names:
|
||||
conditions.append(ExpenseClaim.employee_name.in_(explicit_employee_names))
|
||||
@@ -586,7 +586,7 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
scoped_to_current_user = True
|
||||
else:
|
||||
conditions.append(ExpenseClaim.id == "__no_visible_claim__")
|
||||
scope_label = "你的报销单"
|
||||
scope_label = "您的报销单"
|
||||
scoped_to_current_user = True
|
||||
else:
|
||||
scope_label = "全部报销单"
|
||||
@@ -703,7 +703,7 @@ class OrchestratorDatabaseQueryBuilder:
|
||||
|
||||
subject_name = (employee.name if employee is not None else "") or normalized_user_id
|
||||
if subject_name:
|
||||
return conditions, "你的报销单"
|
||||
return conditions, "您的报销单"
|
||||
return conditions, "当前用户的报销单"
|
||||
|
||||
def _employee_name_is_unique(self, employee: Employee) -> bool:
|
||||
|
||||
@@ -37,10 +37,18 @@ TRAIN_ROUTE_PATTERN = re.compile(
|
||||
r"([\u4e00-\u9fa5]{2,12})站?\s*(?:至|到|→|->|—|–|-)\s*"
|
||||
r"([\u4e00-\u9fa5]{2,12})站?"
|
||||
)
|
||||
TRAIN_ROUTE_WITH_NO_PATTERN = re.compile(
|
||||
r"([\u4e00-\u9fa5]{2,12})站?\s+[GCDZKTLYS]\d{1,5}\s+"
|
||||
r"([\u4e00-\u9fa5]{2,12})站?",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
TRAIN_NO_PATTERN = re.compile(r"(?:车次|列车号)\s*[::]?\s*([GCDZKTLYS]\d{1,5})", re.IGNORECASE)
|
||||
TRAIN_STANDALONE_NO_PATTERN = re.compile(r"(?<![A-Z0-9])([GCDZKTLYS]\d{1,5})(?![A-Z0-9])", re.IGNORECASE)
|
||||
TRAIN_PASSENGER_PATTERN = re.compile(r"(?:乘车人|旅客姓名|姓名)\s*[::]?\s*([\u4e00-\u9fa5·]{2,20})")
|
||||
TRAIN_ID_PATTERN = re.compile(r"(?:有效身份证件号码|身份证件号码|证件号码|身份证号)\s*[::]?\s*([0-9Xx*]{6,24})")
|
||||
TRAIN_PASSENGER_PATTERN = re.compile(r"(?:乘车人|乘客|旅客姓名|姓名)\s*[::]?\s*([\u4e00-\u9fa5·]{2,20})")
|
||||
TRAIN_PURCHASER_NAME_PATTERN = re.compile(
|
||||
r"购买方名称\s*[::]?\s*([·\u4e00-\u9fa5]{2,20}?)(?=\s*(?:统一社会信用代码|纳税人识别号|$))"
|
||||
)
|
||||
TRAIN_ID_PATTERN = re.compile(r"(?:有效身份证件号码|身份证件号码|证件号码|身份证号|证件号)\s*[::]?\s*([0-9Xx*]{6,24})")
|
||||
TRAIN_ID_FALLBACK_PATTERN = re.compile(r"(?<![0-9A-Za-z])([0-9]{6,17}[0-9Xx*]{2,8})(?![0-9A-Za-z])")
|
||||
TRAIN_ETICKET_PATTERN = re.compile(r"(?:电子客票号|客票号)\s*[::]?\s*([A-Z0-9]{6,32})", re.IGNORECASE)
|
||||
TRAIN_SEAT_CLASS_PATTERN = re.compile(r"(商务座|特等座|一等座|二等座|一等卧|二等卧|软卧|硬卧|软座|硬座|无座)")
|
||||
@@ -50,6 +58,28 @@ TRAIN_COMBINED_SEAT_PATTERN = re.compile(r"([0-9]{1,2})车\s*([0-9]{1,3}[A-F])
|
||||
TRAIN_LOOSE_SEAT_PATTERN = re.compile(r"(?<!\d)([0-9]{1,2})\s+([0-9]{1,3}[A-F])(?![A-Za-z0-9])", re.IGNORECASE)
|
||||
TRAIN_FARE_PATTERN = re.compile(r"(?:票价|金额)\s*[::¥¥\s]*([0-9]+(?:[.,][0-9]{1,2})?)")
|
||||
TRAIN_LOOSE_FARE_PATTERN = re.compile(r"(?<!\d)([0-9]{1,6}\.\d{1,2})(?!\d)")
|
||||
TRAIN_STATION_FIELD_KEYS = {"departure_station", "arrival_station"}
|
||||
TRAIN_STATION_FIELD_LABELS = {"出发地点", "到达地点"}
|
||||
TRAIN_INVALID_STATION_TOKENS = (
|
||||
"座",
|
||||
"席",
|
||||
"扫码",
|
||||
"无效",
|
||||
"票价",
|
||||
"金额",
|
||||
"车厢",
|
||||
"座位",
|
||||
"乘客",
|
||||
"证件",
|
||||
"身份证",
|
||||
"订单",
|
||||
"单据",
|
||||
"日期",
|
||||
"渠道",
|
||||
"官方",
|
||||
"演示",
|
||||
"不可报销",
|
||||
)
|
||||
|
||||
|
||||
class ReceiptFolderStorageMixin:
|
||||
@@ -119,6 +149,29 @@ class ReceiptFolderStorageMixin:
|
||||
"preview_media_type": preview_media_type,
|
||||
"preview_rendered_with": DocumentPreviewAssets.renderer_id_for_source(media_type),
|
||||
}
|
||||
if str(media_type or "").strip() == "application/pdf":
|
||||
preview_path = receipt_dir / f"preview{DocumentPreviewAssets.PDF_PREVIEW_SUFFIX}"
|
||||
try:
|
||||
DocumentPreviewAssets.render_pdf_first_page(
|
||||
pdf_path=source_path,
|
||||
preview_path=preview_path,
|
||||
timeout_seconds=get_settings().ocr_timeout_seconds,
|
||||
)
|
||||
except Exception:
|
||||
return {
|
||||
"previewable": True,
|
||||
"preview_kind": "pdf",
|
||||
"preview_file_name": source_path.name,
|
||||
"preview_media_type": media_type,
|
||||
"preview_rendered_with": "",
|
||||
}
|
||||
return {
|
||||
"previewable": True,
|
||||
"preview_kind": "image",
|
||||
"preview_file_name": preview_path.name,
|
||||
"preview_media_type": DocumentPreviewAssets.PDF_PREVIEW_MEDIA_TYPE,
|
||||
"preview_rendered_with": DocumentPreviewAssets.PDF_RENDERER_ID,
|
||||
}
|
||||
if self._is_previewable(media_type):
|
||||
return {
|
||||
"previewable": True,
|
||||
@@ -172,6 +225,16 @@ class ReceiptFolderStorageMixin:
|
||||
timeout_seconds=get_settings().ocr_timeout_seconds,
|
||||
)
|
||||
except Exception:
|
||||
meta.update(
|
||||
{
|
||||
"previewable": True,
|
||||
"preview_kind": "pdf",
|
||||
"preview_file_name": source_path.name,
|
||||
"preview_media_type": "application/pdf",
|
||||
"preview_rendered_with": "",
|
||||
}
|
||||
)
|
||||
self._write_meta(receipt_dir, meta)
|
||||
return meta
|
||||
|
||||
meta.update(
|
||||
@@ -543,6 +606,10 @@ class ReceiptFolderTrainTicketMixin:
|
||||
value = str(field.get("value") or "").strip()
|
||||
if not label or not value:
|
||||
continue
|
||||
if key == "merchant_name" or label == "商户":
|
||||
continue
|
||||
if not cls._should_keep_train_ticket_field(key=key, label=label, value=value):
|
||||
continue
|
||||
if key == "trip_no" and label == "车次/航班":
|
||||
label = "车次"
|
||||
if key == "route" and label == "行程":
|
||||
@@ -559,6 +626,8 @@ class ReceiptFolderTrainTicketMixin:
|
||||
return
|
||||
normalized.append({"key": key, "label": label, "value": cleaned})
|
||||
|
||||
add_field("merchant_name", "商户", "中国铁路")
|
||||
|
||||
invoice_date = cls._extract_train_invoice_date(text)
|
||||
add_field("invoice_date", "开票日期", invoice_date)
|
||||
|
||||
@@ -690,6 +759,13 @@ class ReceiptFolderTrainTicketMixin:
|
||||
@classmethod
|
||||
def _extract_train_route_points(cls, text: str) -> tuple[str, str]:
|
||||
raw_text = str(text or "")
|
||||
split_line_match = TRAIN_ROUTE_WITH_NO_PATTERN.search(raw_text)
|
||||
if split_line_match:
|
||||
departure = cls._clean_train_station(split_line_match.group(1))
|
||||
arrival = cls._clean_train_station(split_line_match.group(2))
|
||||
if cls._is_valid_train_station_value(departure) and cls._is_valid_train_station_value(arrival) and departure != arrival:
|
||||
return departure, arrival
|
||||
|
||||
station_candidates: list[str] = []
|
||||
for line in raw_text.replace("\r", "\n").splitlines():
|
||||
candidate = cls._clean_train_station(line)
|
||||
@@ -697,7 +773,7 @@ class ReceiptFolderTrainTicketMixin:
|
||||
continue
|
||||
if not str(line or "").strip().endswith("站"):
|
||||
continue
|
||||
if any(token in candidate for token in ("发票", "客票", "铁路", "票价", "日期")):
|
||||
if not cls._is_valid_train_station_value(candidate):
|
||||
continue
|
||||
station_candidates.append(candidate)
|
||||
if len(station_candidates) >= 2:
|
||||
@@ -707,7 +783,7 @@ class ReceiptFolderTrainTicketMixin:
|
||||
if match:
|
||||
departure = cls._clean_train_station(match.group(1))
|
||||
arrival = cls._clean_train_station(match.group(2))
|
||||
if departure and arrival and departure != arrival:
|
||||
if cls._is_valid_train_station_value(departure) and cls._is_valid_train_station_value(arrival) and departure != arrival:
|
||||
return departure, arrival
|
||||
return "", ""
|
||||
|
||||
@@ -717,6 +793,25 @@ class ReceiptFolderTrainTicketMixin:
|
||||
cleaned = re.sub(r"(?:火车站|高铁站|站)$", "", cleaned)
|
||||
return cleaned.strip()
|
||||
|
||||
@classmethod
|
||||
def _should_keep_train_ticket_field(cls, *, key: str, label: str, value: str) -> bool:
|
||||
if key in TRAIN_STATION_FIELD_KEYS or label in TRAIN_STATION_FIELD_LABELS:
|
||||
return cls._is_valid_train_station_value(value)
|
||||
if key == "passenger_name" or label == "乘车人":
|
||||
return bool(cls._clean_train_passenger_candidate(value))
|
||||
return True
|
||||
|
||||
@classmethod
|
||||
def _is_valid_train_station_value(cls, value: str) -> bool:
|
||||
cleaned = cls._clean_train_station(value)
|
||||
if not 2 <= len(cleaned) <= 12:
|
||||
return False
|
||||
if any(token in cleaned for token in TRAIN_INVALID_STATION_TOKENS):
|
||||
return False
|
||||
if re.search(r"[A-Za-z0-9]", cleaned):
|
||||
return False
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _extract_first(pattern: re.Pattern[str], text: str) -> str:
|
||||
match = pattern.search(str(text or ""))
|
||||
@@ -724,14 +819,19 @@ class ReceiptFolderTrainTicketMixin:
|
||||
|
||||
@classmethod
|
||||
def _extract_train_passenger_name(cls, text: str, *, id_number: str = "") -> str:
|
||||
labeled = cls._extract_first(TRAIN_PASSENGER_PATTERN, text)
|
||||
lines = [line.strip() for line in str(text or "").replace("\r", "\n").splitlines() if line.strip()]
|
||||
for line in lines:
|
||||
labeled = cls._clean_train_passenger_candidate(cls._extract_first(TRAIN_PASSENGER_PATTERN, line))
|
||||
if labeled:
|
||||
return labeled
|
||||
|
||||
lines = [line.strip() for line in str(text or "").replace("\r", "\n").splitlines() if line.strip()]
|
||||
if id_number:
|
||||
for index, line in enumerate(lines):
|
||||
if id_number and id_number not in line:
|
||||
if id_number not in line:
|
||||
continue
|
||||
candidate = cls._clean_train_passenger_candidate(line.replace(id_number, " "))
|
||||
if candidate:
|
||||
return candidate
|
||||
for offset in (1, -1, 2):
|
||||
target_index = index + offset
|
||||
if target_index < 0 or target_index >= len(lines):
|
||||
@@ -740,8 +840,9 @@ class ReceiptFolderTrainTicketMixin:
|
||||
if candidate:
|
||||
return candidate
|
||||
for line in lines:
|
||||
if "购买方名称" in line:
|
||||
candidate = cls._clean_train_passenger_candidate(line.split(":", 1)[-1].split(":", 1)[-1])
|
||||
purchase_match = TRAIN_PURCHASER_NAME_PATTERN.search(line)
|
||||
if purchase_match:
|
||||
candidate = cls._clean_train_passenger_candidate(purchase_match.group(1))
|
||||
if candidate:
|
||||
return candidate
|
||||
return ""
|
||||
@@ -764,6 +865,16 @@ class ReceiptFolderTrainTicketMixin:
|
||||
"开票",
|
||||
"日期",
|
||||
"车厢",
|
||||
"席别",
|
||||
"二等座",
|
||||
"一等座",
|
||||
"商务座",
|
||||
"特等座",
|
||||
"软座",
|
||||
"硬座",
|
||||
"无座",
|
||||
"软卧",
|
||||
"硬卧",
|
||||
"座位",
|
||||
"票价",
|
||||
"金额",
|
||||
@@ -771,6 +882,14 @@ class ReceiptFolderTrainTicketMixin:
|
||||
"出发",
|
||||
"到达",
|
||||
"车次",
|
||||
"公司",
|
||||
"信用代码",
|
||||
"纳税人",
|
||||
"扫码",
|
||||
"无效",
|
||||
"二维码",
|
||||
"座席",
|
||||
"证件",
|
||||
)
|
||||
):
|
||||
return ""
|
||||
|
||||
@@ -266,7 +266,7 @@ class StewardModelPlanBuilder:
|
||||
event_id="intent_agent_function_call",
|
||||
stage="llm_function_call",
|
||||
title="识别财务事项",
|
||||
content="我识别到这句话包含出差事项,但还需要确认你要进入申请流程还是报销流程。",
|
||||
content="我识别到这句话包含出差事项,但还需要确认您是要进入申请流程还是报销流程。",
|
||||
)
|
||||
]
|
||||
raw_events = payload.get("thinking_events")
|
||||
@@ -292,7 +292,7 @@ class StewardModelPlanBuilder:
|
||||
event_id="intent_agent_pending_flow",
|
||||
stage="flow_confirmation",
|
||||
title="等待确认流程方向",
|
||||
content=f"当前输入“{request.message}”缺少明确动作词,需要先由你选择补办出差申请或发起费用报销。",
|
||||
content=f"当前输入“{request.message}”缺少明确的动作词,需要先由您选择是补办出差申请,还是发起费用报销。",
|
||||
)
|
||||
)
|
||||
return events
|
||||
@@ -302,7 +302,7 @@ class StewardModelPlanBuilder:
|
||||
candidate_labels = [item.label for item in pending_flow_confirmation.candidate_flows if item.label]
|
||||
if len(candidate_labels) >= 2:
|
||||
return (
|
||||
f"我识别到这是一次财务事项,但还不能确定你要做的是"
|
||||
f"我识别到这是一次财务事项,但还不能确定您要做的是"
|
||||
f"**{candidate_labels[0]}**还是**{candidate_labels[1]}**。请先选择一个方向。"
|
||||
)
|
||||
return "我识别到这是一次财务事项,但还需要先确认具体流程方向。"
|
||||
|
||||
@@ -335,7 +335,7 @@ class StewardPlannerFallbackMixin:
|
||||
flow_id="travel_application",
|
||||
label="先发起出差申请",
|
||||
confidence=0.86,
|
||||
reason="已先查询你名下可关联的差旅申请单,暂未查到可关联单据,因此应先申请单据。",
|
||||
reason="已先查询您名下可关联的差旅申请单,暂未查到可关联单据,因此应先申请单据。",
|
||||
ontology_fields=application_fields,
|
||||
missing_fields=self._resolve_missing_fields("expense_application", application_fields),
|
||||
)
|
||||
@@ -345,7 +345,7 @@ class StewardPlannerFallbackMixin:
|
||||
if gate.get("checked"):
|
||||
candidate_count = int(gate.get("candidate_count") or 0)
|
||||
reimbursement_label = "关联已有申请单并发起报销"
|
||||
reimbursement_reason = f"已先查到 {candidate_count} 个可关联申请单,选择后会先请你关联具体单据。"
|
||||
reimbursement_reason = f"已先查到 {candidate_count} 个可关联申请单,选择后会先请您关联具体单据。"
|
||||
return [
|
||||
StewardCandidateFlow(
|
||||
flow_id="travel_application",
|
||||
@@ -390,10 +390,10 @@ class StewardPlannerFallbackMixin:
|
||||
@staticmethod
|
||||
def _build_pending_flow_reason(gate: dict[str, Any]) -> str:
|
||||
if gate.get("checked") and int(gate.get("candidate_count") or 0) <= 0:
|
||||
return "我已经先查询你名下可关联的差旅申请单,未查到可关联单据,所以当前应先申请单据。"
|
||||
return "我已先查询您名下可关联的差旅申请单,未查到可关联单据,所以当前应先申请单据。"
|
||||
if gate.get("checked"):
|
||||
candidate_count = int(gate.get("candidate_count") or 0)
|
||||
return f"我已经先查询你名下的差旅申请单,查到 {candidate_count} 个可关联申请单,需要你确认是否关联单据后发起报销。"
|
||||
return f"我已先查询您名下的差旅申请单,查到 {candidate_count} 个可关联申请单,需要您确认是否关联单据后发起报销。"
|
||||
return "当前话术描述了出差事项,但没有明确说明要补办申请还是发起报销。"
|
||||
|
||||
@staticmethod
|
||||
@@ -404,10 +404,10 @@ class StewardPlannerFallbackMixin:
|
||||
candidate_count = int(gate.get("candidate_count") or 0)
|
||||
return (
|
||||
f"我已先查询可关联申请单,查到 {candidate_count} 个可关联申请单;"
|
||||
"你可以选择关联已有申请单发起报销,或改为补办新的出差申请。"
|
||||
"您可以选择关联已有申请单发起报销,也可以改为补办新的出差申请。"
|
||||
)
|
||||
return (
|
||||
"我识别到这是一次出差事项,但还不能确定你要做的是"
|
||||
"我识别到这是一次出差事项,但还不能确定您要做的是"
|
||||
"**补办出差申请**还是**发起费用报销**。请先选择一个方向。"
|
||||
)
|
||||
|
||||
|
||||
@@ -90,7 +90,7 @@ class StewardRuntimeDecisionAgent:
|
||||
next_action="continue_selected_flow",
|
||||
target_task_id=selected_flow_id,
|
||||
response_text=self._build_selected_flow_response_text(selected_flow_id),
|
||||
rationale="已按你选择的候选流程继续处理。",
|
||||
rationale="已按您选择的候选流程继续处理。",
|
||||
steward_state=next_state,
|
||||
model_call_traces=traces,
|
||||
)
|
||||
@@ -268,7 +268,7 @@ class StewardRuntimeDecisionAgent:
|
||||
next_action="submit_current_application",
|
||||
target_message_id=str(pending_application.get("message_id") or ""),
|
||||
target_task_id=str(pending_application.get("task_id") or ""),
|
||||
rationale="模型运行时决策暂不可用,我先按当前待提交申请单上下文处理你的确认。",
|
||||
rationale="模型运行时决策暂不可用,我先按当前待提交申请单的上下文处理您的确认。",
|
||||
model_call_traces=traces,
|
||||
)
|
||||
if confirmation_text and pending_steward_action:
|
||||
@@ -295,7 +295,7 @@ class StewardRuntimeDecisionAgent:
|
||||
target_task_id=str(current_task.get("task_id") or ""),
|
||||
field_key=field_key,
|
||||
field_value=request.user_message,
|
||||
rationale="模型运行时决策暂不可用,我先把你的补充写入当前小财管家流程字段。",
|
||||
rationale="模型运行时决策暂不可用,我先把您的补充写入当前小财管家流程字段。",
|
||||
model_call_traces=traces,
|
||||
)
|
||||
if field_key:
|
||||
|
||||
@@ -275,7 +275,7 @@ class StewardSlotDecisionAgent:
|
||||
missing_fields=missing_fields,
|
||||
question=self._build_fallback_question(field),
|
||||
options=self._sanitize_options([], [field]),
|
||||
rationale="模型字段决策暂不可用,我先按上游意图识别给出的本体缺口向你确认。",
|
||||
rationale="模型字段决策暂不可用,我先按上游意图识别给出的本体缺口向您确认。",
|
||||
model_call_traces=traces,
|
||||
)
|
||||
return StewardSlotDecisionResponse(
|
||||
@@ -285,7 +285,7 @@ class StewardSlotDecisionAgent:
|
||||
missing_fields=[],
|
||||
question="",
|
||||
options=[],
|
||||
rationale="当前任务没有上游标记的关键字段缺口,可以先生成核对结果供你确认。",
|
||||
rationale="当前任务没有上游标记的关键字段缺口,可以先生成核对结果供您确认。",
|
||||
model_call_traces=traces,
|
||||
)
|
||||
|
||||
@@ -293,7 +293,7 @@ class StewardSlotDecisionAgent:
|
||||
def _build_fallback_question(field: str) -> str:
|
||||
label = FIELD_CATALOG.get(field, {}).get("label") or field
|
||||
if field == "transport_mode":
|
||||
return "请问你这次打算怎么出行?可以选择火车、飞机或轮船。"
|
||||
return "请问您这次打算怎么出行?可以选择火车、飞机或轮船。"
|
||||
return f"当前还缺少{label},请先补充后我再继续处理。"
|
||||
|
||||
@staticmethod
|
||||
|
||||
@@ -15,14 +15,16 @@ from app.schemas.reimbursement import (
|
||||
TravelReimbursementCalculatorResponse,
|
||||
)
|
||||
from app.services.agent_assets import AgentAssetService
|
||||
from app.services.application_location_semantics import validate_application_location_text
|
||||
from app.services.expense_claims import ExpenseClaimService
|
||||
from app.services.expense_rule_runtime import RuntimeTravelPolicy, ExpenseRuleRuntimeService
|
||||
from app.services.expense_rule_runtime import ExpenseRuleRuntimeService, RuntimeTravelPolicy
|
||||
from app.services.travel_policy_grades import travel_policy_grade_key_candidates
|
||||
from app.services.travel_reimbursement_regions import (
|
||||
AMBIGUOUS_PROVINCE_CITY_NAMES,
|
||||
OTHER_REGION_LOCATION_KEYWORDS,
|
||||
OTHER_REGION_PROVINCE_KEYWORDS,
|
||||
)
|
||||
from app.services.user_agent_application_locations import normalize_application_location
|
||||
|
||||
|
||||
class TravelReimbursementCalculatorService:
|
||||
@@ -35,9 +37,13 @@ class TravelReimbursementCalculatorService:
|
||||
current_user: CurrentUserContext,
|
||||
) -> TravelReimbursementCalculatorResponse:
|
||||
days = max(1, int(payload.days))
|
||||
location = str(payload.location or "").strip()
|
||||
if not location:
|
||||
raw_location = str(payload.location or "").strip()
|
||||
if not raw_location:
|
||||
raise ValueError("请先填写出差地点。")
|
||||
location = normalize_application_location(raw_location) or raw_location
|
||||
location_error = validate_application_location_text(location)
|
||||
if location_error:
|
||||
raise ValueError(f"{location_error}请填写真实出差地点后再计算。")
|
||||
|
||||
policy = self._load_travel_policy()
|
||||
grade = self._resolve_grade(payload.grade, current_user)
|
||||
|
||||
@@ -8,20 +8,25 @@ from sqlalchemy import or_, select
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from app.models.financial_record import ExpenseClaim
|
||||
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
|
||||
from app.schemas.user_agent import (
|
||||
UserAgentDraftPayload,
|
||||
UserAgentRequest,
|
||||
UserAgentResponse,
|
||||
UserAgentSuggestedAction,
|
||||
)
|
||||
from app.schemas.reimbursement import TravelReimbursementCalculatorRequest
|
||||
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
|
||||
from app.services.expense_claim_risk_stage import with_risk_business_stage
|
||||
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
|
||||
from app.services.application_location_semantics import (
|
||||
strip_route_location_prefix_with_jieba,
|
||||
validate_application_location_text,
|
||||
)
|
||||
from app.services.application_system_estimate import apply_application_system_estimate_to_facts
|
||||
from app.services.document_numbering import (
|
||||
build_document_number,
|
||||
generate_unique_expense_claim_no,
|
||||
)
|
||||
from app.services.expense_claim_access_policy import ExpenseClaimAccessPolicy
|
||||
from app.services.expense_claim_risk_stage import with_risk_business_stage
|
||||
from app.services.travel_reimbursement_calculator import TravelReimbursementCalculatorService
|
||||
from app.services.user_agent_application_dates import (
|
||||
expand_application_time_with_days,
|
||||
resolve_application_date_range,
|
||||
@@ -33,7 +38,6 @@ from app.services.user_agent_application_summary import (
|
||||
build_application_summary_table,
|
||||
resolve_application_time_label,
|
||||
)
|
||||
from app.services.application_system_estimate import apply_application_system_estimate_to_facts
|
||||
|
||||
APPLICATION_CONTEXT_VALUES = {
|
||||
"application",
|
||||
@@ -182,6 +186,17 @@ class UserAgentApplicationSlotMixin:
|
||||
if not str(facts.get(field) or "").strip()
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _resolve_application_validation_issues(facts: dict[str, str]) -> list[dict[str, str]]:
|
||||
issues: list[dict[str, str]] = []
|
||||
location_error = validate_application_location_text(facts.get("location", ""))
|
||||
if location_error:
|
||||
issues.append({
|
||||
"field": "location",
|
||||
"message": location_error,
|
||||
})
|
||||
return issues
|
||||
|
||||
def _resolve_application_missing_fields(self, facts: dict[str, str]) -> list[str]:
|
||||
return [
|
||||
*self._resolve_application_missing_base_fields(facts),
|
||||
@@ -391,6 +406,10 @@ class UserAgentApplicationSlotMixin:
|
||||
if re.fullmatch(r"(?:去|到|前往)?[\u4e00-\u9fa5]{1,8}出差(?P<days>\d+|[一二两三四五六七八九十]{1,3})?天?", text):
|
||||
return ""
|
||||
|
||||
tokenized = strip_route_location_prefix_with_jieba(text)
|
||||
if tokenized != text:
|
||||
text = tokenized
|
||||
else:
|
||||
text = re.sub(r"^.*?(?:出差|前往|去|到|赴)[\u4e00-\u9fa5]{1,8}(?:出差)?(?P<days>\d+|[一二两三四五六七八九十]{1,3})?天?[,,\s]*", "", text)
|
||||
text = re.sub(r"^(?:出差|申请|费用申请|业务|本次|去|到|前往)\s*", "", text)
|
||||
text = text.strip(" ::,,。;;")
|
||||
@@ -537,8 +556,16 @@ class UserAgentApplicationSlotMixin:
|
||||
step: str,
|
||||
facts: dict[str, str],
|
||||
) -> list[UserAgentSuggestedAction]:
|
||||
if step == "ask_missing":
|
||||
missing_fields = self._resolve_application_missing_fields(facts)
|
||||
if step in {"ask_missing", "ask_invalid"}:
|
||||
missing_fields = (
|
||||
self._resolve_application_missing_fields(facts)
|
||||
if step == "ask_missing"
|
||||
else [
|
||||
issue.get("field", "")
|
||||
for issue in self._resolve_application_validation_issues(facts)
|
||||
if issue.get("field")
|
||||
]
|
||||
)
|
||||
return [
|
||||
UserAgentSuggestedAction(
|
||||
label="一次性补充申请信息",
|
||||
@@ -1209,7 +1236,22 @@ class UserAgentApplicationMixin(UserAgentApplicationSlotMixin, UserAgentApplicat
|
||||
"我已按「费用申请 / 事前审批」来处理这条内容。",
|
||||
"已识别信息:\n" + recognized_table,
|
||||
f"当前还需要补充:{missing_text}。",
|
||||
"请一次性补齐上述字段,我会继续生成申请核对结果并让你确认是否提交。",
|
||||
"请一次性补齐上述字段,我会继续生成申请核对结果,并请您确认是否提交。",
|
||||
]
|
||||
)
|
||||
|
||||
if step == "ask_invalid":
|
||||
issue_messages = [
|
||||
item["message"]
|
||||
for item in self._resolve_application_validation_issues(facts)
|
||||
if str(item.get("message") or "").strip()
|
||||
]
|
||||
return "\n\n".join(
|
||||
[
|
||||
"我已识别到申请信息里有需要先修正的字段。",
|
||||
"已识别信息:\n" + recognized_table,
|
||||
*issue_messages,
|
||||
"请把地点改为真实出差地点,业务事项放在事由中;修正后我再帮您提交申请。",
|
||||
]
|
||||
)
|
||||
|
||||
@@ -1473,7 +1515,7 @@ class UserAgentApplicationMixin(UserAgentApplicationSlotMixin, UserAgentApplicat
|
||||
pick("applicationType", "application_type")
|
||||
),
|
||||
"time": pick("time", "timeRange", "time_range"),
|
||||
"location": pick("location"),
|
||||
"location": normalize_application_location(pick("location")),
|
||||
"reason": reason,
|
||||
"days": pick("days"),
|
||||
"transport_mode": pick("transportMode", "transport_mode"),
|
||||
@@ -1507,6 +1549,8 @@ class UserAgentApplicationMixin(UserAgentApplicationSlotMixin, UserAgentApplicat
|
||||
payload: UserAgentRequest,
|
||||
facts: dict[str, str],
|
||||
) -> str:
|
||||
if self._resolve_application_validation_issues(facts):
|
||||
return "ask_invalid"
|
||||
if self._is_application_save_draft_action(payload):
|
||||
return "draft"
|
||||
if self._resolve_application_missing_base_fields(facts):
|
||||
@@ -1516,4 +1560,3 @@ class UserAgentApplicationMixin(UserAgentApplicationSlotMixin, UserAgentApplicat
|
||||
if self._is_application_submit_confirmation(payload):
|
||||
return "submitted"
|
||||
return "preview"
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
|
||||
DIRECT_MUNICIPALITY_DISPLAY = {
|
||||
"北京": "北京市",
|
||||
"北京市": "北京市",
|
||||
@@ -79,7 +78,7 @@ CITY_TO_PROVINCE = {
|
||||
}
|
||||
|
||||
LOCATION_NOISE_PATTERN = re.compile(
|
||||
r"(?:出差|驻场|现场|支撑|支持|部署|上线|实施|拜访|验收|会议|采购|培训|协助|处理|办理|参加|进行).*$"
|
||||
r"(?:出差|驻场|现场|支撑|支持|辅助|部署|上线|实施|拜访|验收|会议|采购|培训|协助|处理|办理|参加|进行).*$"
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -716,7 +716,7 @@ class UserAgentKnowledgeMixin(UserAgentKnowledgeHelpersMixin):
|
||||
self._append_markdown_section(
|
||||
answer_lines,
|
||||
"说明",
|
||||
["- 请补充费用类型、适用地区、职级或具体业务场景,我再继续帮你缩小范围。"],
|
||||
["- 请补充费用类型、适用地区、职级或具体业务场景,我会继续帮您缩小范围。"],
|
||||
)
|
||||
return "\n".join(answer_lines).strip()
|
||||
|
||||
@@ -729,7 +729,7 @@ class UserAgentKnowledgeMixin(UserAgentKnowledgeHelpersMixin):
|
||||
self._append_markdown_section(
|
||||
answer_lines,
|
||||
"说明",
|
||||
["- 以上只使用当前命中的知识库证据;没有在证据中出现的适用条件或金额,我不会替你默认补齐。"],
|
||||
["- 以上只使用当前命中的知识库证据;没有在证据中出现的适用条件或金额,我不会替您默认补齐。"],
|
||||
)
|
||||
return "\n".join(answer_lines).strip()
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ class UserAgentResponseMixin:
|
||||
if payload.ontology.intent == "draft":
|
||||
tool_message = str(payload.tool_payload.get("message") or "").strip()
|
||||
if payload.tool_payload.get("draft_limit_reached"):
|
||||
return tool_message or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。"
|
||||
return tool_message or "您当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。"
|
||||
if tool_message and (
|
||||
str(payload.tool_payload.get("claim_id") or "").strip()
|
||||
or str(payload.tool_payload.get("claim_no") or "").strip()
|
||||
@@ -88,12 +88,12 @@ class UserAgentResponseMixin:
|
||||
elif attachment_names:
|
||||
attachment_hint = (
|
||||
f" 我已带入 {len(attachment_names)} 份附件名称,但目前还不能直接读取附件内容,"
|
||||
"仍需要你补充关键信息。"
|
||||
"还需要您补充关键信息。"
|
||||
)
|
||||
|
||||
return (
|
||||
"可以帮你发起报销。请补充费用类型、发生时间、金额、事由和相关对象,"
|
||||
"或者直接上传票据附件,我再继续帮你判断能否报、缺什么材料,并整理待核对信息。"
|
||||
"可以帮您发起报销。请补充费用类型、发生时间、金额、事由和相关对象,"
|
||||
"或者直接上传票据附件,我会继续帮您判断能否报销、还缺哪些材料,并整理出待核对信息。"
|
||||
f"{attachment_hint}"
|
||||
)
|
||||
|
||||
@@ -122,8 +122,8 @@ class UserAgentResponseMixin:
|
||||
|
||||
return (
|
||||
f"已识别到一笔{time_text}的{expense_type}支出{amount_hint}。"
|
||||
"如果要继续整理报销核对信息,还需要补充客户单位、参与人员、费用明细和票据附件。"
|
||||
"你也可以继续上传发票或图片,我会把这些信息带入后续对话。"
|
||||
"如果需要继续整理报销核对信息,还需要补充客户单位、参与人员、费用明细和票据附件。"
|
||||
"您也可以继续上传发票或图片,我会把这些信息带入后续对话。"
|
||||
)
|
||||
|
||||
|
||||
@@ -347,7 +347,7 @@ class UserAgentResponseMixin:
|
||||
query_payload = self._build_query_payload(payload)
|
||||
scope_label = str(data.get("scope_label") or subject).strip() or subject
|
||||
if query_payload is None:
|
||||
return f"当前没有查到{scope_label}。你可以补充时间范围、单号或状态继续筛选。"
|
||||
return f"当前没有查到{scope_label}。您可以补充时间范围、单号或状态继续筛选。"
|
||||
|
||||
window_prefix = (
|
||||
f"{query_payload.window_start_date} 至 {query_payload.window_end_date}"
|
||||
@@ -367,10 +367,10 @@ class UserAgentResponseMixin:
|
||||
f"另有 {query_payload.older_record_count} 笔超过 {query_payload.window_days} 日的单据,"
|
||||
"请前往个人报销中心查看。"
|
||||
)
|
||||
return f"{window_prefix}没有查到{query_payload.scope_label}。你可以补充时间范围、单号或状态继续筛选。"
|
||||
return f"{window_prefix}没有查到{query_payload.scope_label}。您可以补充时间范围、单号或状态继续筛选。"
|
||||
|
||||
answer_parts = [
|
||||
f"已按你的筛选条件查询{query_payload.scope_label}。",
|
||||
f"已按您的筛选条件查询{query_payload.scope_label}。",
|
||||
f"下面先列出最近 {query_payload.preview_count} 条记录,点击任一单据即可查看详情。",
|
||||
f"本次共命中 {query_payload.record_count} 笔,金额合计 {query_payload.total_amount:.2f} 元。",
|
||||
]
|
||||
|
||||
@@ -68,8 +68,8 @@ class UserAgentReviewCoreMixin:
|
||||
if has_time:
|
||||
context_hint += ",并看到了业务发生时间"
|
||||
return (
|
||||
f"{context_hint}。但你还没有明确这笔单据属于哪类报销。"
|
||||
"请先在下面选择报销场景,我会按你选择的场景再继续识别时间、地点、事由、金额和所需票据,"
|
||||
f"{context_hint}。但您还没有明确这笔单据属于哪类报销。"
|
||||
"请先在下面选择报销场景,我会按您选择的场景继续识别时间、地点、事由、金额和所需票据,"
|
||||
"避免系统先入为主把项目支持、部署等描述误判成差旅。"
|
||||
)
|
||||
|
||||
|
||||
@@ -164,7 +164,7 @@ class UserAgentReviewMessageMixin:
|
||||
if payload.tool_payload.get("draft_limit_reached"):
|
||||
return (
|
||||
str(payload.tool_payload.get("message") or "").strip()
|
||||
or "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。"
|
||||
or "您当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。"
|
||||
)
|
||||
|
||||
review_action = str(payload.context_json.get("review_action") or "").strip()
|
||||
@@ -254,11 +254,11 @@ class UserAgentReviewMessageMixin:
|
||||
if claim_no:
|
||||
return (
|
||||
f"已识别出本次上传的 {document_count} 张票据。"
|
||||
f"系统检测到你已有草稿 {claim_no},请选择关联到该草稿,或单独建立一张新的报销单。"
|
||||
f"系统检测到您已有草稿 {claim_no},请选择关联到该草稿,或单独新建一张报销单。"
|
||||
)
|
||||
return (
|
||||
f"已识别出本次上传的 {document_count} 张票据。"
|
||||
"系统检测到你已有可用草稿,请先选择关联到现有草稿,或单独建立一张新的报销单。"
|
||||
"系统检测到您已有可用草稿,请先选择关联到现有草稿,或单独新建一张报销单。"
|
||||
)
|
||||
|
||||
blocked_reasons = self._resolve_submission_blocked_reasons(payload)
|
||||
|
||||
@@ -668,7 +668,7 @@ class UserAgentReviewSlotMixin:
|
||||
status="missing" if is_missing else "identified" if source in {"user_text", "user_form"} else "inferred",
|
||||
hint=f"建议补充 {SLOT_LABELS.get(key, key)}。"
|
||||
if is_missing and required
|
||||
else ("该字段来自系统辅助上下文,建议你再核对一次。" if source in {"detail_context", "ocr"} else ""),
|
||||
else ("该字段来自系统辅助上下文,建议您再核对一次。" if source in {"detail_context", "ocr"} else ""),
|
||||
evidence=evidence,
|
||||
)
|
||||
|
||||
|
||||
@@ -888,6 +888,34 @@ def test_travel_reimbursement_calculator_rejects_unrecognized_location() -> None
|
||||
)
|
||||
|
||||
|
||||
def test_travel_reimbursement_calculator_normalizes_location_mixed_with_business_content() -> None:
|
||||
with build_session() as db:
|
||||
db.add(
|
||||
Employee(
|
||||
employee_no="E9004",
|
||||
name="混合地点员工",
|
||||
email="mixed-location@example.com",
|
||||
position="产品经理",
|
||||
grade="P4",
|
||||
)
|
||||
)
|
||||
db.commit()
|
||||
|
||||
result = TravelReimbursementCalculatorService(db).calculate(
|
||||
TravelReimbursementCalculatorRequest(days=4, location="上海辅助国网仿生产服务器"),
|
||||
CurrentUserContext(
|
||||
username="mixed-location@example.com",
|
||||
name="混合地点员工",
|
||||
role_codes=[],
|
||||
is_admin=False,
|
||||
),
|
||||
)
|
||||
|
||||
assert result.location == "上海市"
|
||||
assert result.matched_city == "上海"
|
||||
assert result.hotel_amount > 0
|
||||
|
||||
|
||||
def test_agent_run_service_lists_seeded_trace_data() -> None:
|
||||
with build_session() as db:
|
||||
service = AgentRunService(db)
|
||||
|
||||
@@ -84,6 +84,33 @@ def test_document_intelligence_prefers_train_ticket_for_railway_e_ticket_invoice
|
||||
assert any(field.label == "金额" and field.value == "354元" for field in insight.fields)
|
||||
|
||||
|
||||
def test_document_intelligence_train_ticket_uses_railway_merchant_not_invoice_title() -> None:
|
||||
insight = build_document_insight(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
summary="电子发票(铁路电子客票);发票监;统一 制",
|
||||
text=(
|
||||
"电子发票(铁路电子客票)\n"
|
||||
"发票号码:26429165800002785705 湖北\n"
|
||||
"开票日期:2026年05月18日\n"
|
||||
"武汉站 G458 上海虹桥站\n"
|
||||
"Wuhan Shanghaihongqiao\n"
|
||||
"2026年02月20日 07:55开 06车01B号 二等座\n"
|
||||
"票价:¥354.00\n"
|
||||
"4201061987****1615 曹笑竹\n"
|
||||
"电子客票号:6580061086021391007342026\n"
|
||||
"购买方名称:曹笑竹 统一社会信用代码:\n"
|
||||
"买票请到12306 发货请到95306\n"
|
||||
"中国铁路祝您旅途愉快"
|
||||
),
|
||||
)
|
||||
|
||||
assert insight.document_type == "train_ticket"
|
||||
fields = {field.label: field.value for field in insight.fields}
|
||||
assert fields["商户"] == "中国铁路"
|
||||
assert fields["金额"] == "354元"
|
||||
assert fields["列车出发时间"] == "2026-02-20 07:55"
|
||||
|
||||
|
||||
def test_document_intelligence_recovers_train_ticket_from_english_station_ocr_text() -> None:
|
||||
insight = build_document_insight(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
|
||||
@@ -28,6 +28,7 @@ from app.schemas.reimbursement import (
|
||||
)
|
||||
from app.services.agent_conversations import AgentConversationService
|
||||
from app.services.budget import BudgetService
|
||||
from app.services.document_preview import DocumentPreviewAssets
|
||||
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
|
||||
from app.services.expense_claim_budget_flow import ExpenseClaimBudgetFlowMixin
|
||||
from app.services.expense_claim_workflow_constants import (
|
||||
@@ -3314,6 +3315,68 @@ def test_attachment_preview_resolves_legacy_filename_in_claim_item_directory(mon
|
||||
assert filename == "legacy-ticket.pdf"
|
||||
|
||||
|
||||
def test_attachment_pdf_preview_falls_back_to_source_when_render_fonts_missing(monkeypatch, tmp_path) -> None:
|
||||
current_user = CurrentUserContext(
|
||||
username="emp-1",
|
||||
name="张三",
|
||||
role_codes=[],
|
||||
is_admin=False,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path)
|
||||
|
||||
with build_session() as db:
|
||||
claim = build_claim(expense_type="train", location="上海")
|
||||
db.add(claim)
|
||||
db.commit()
|
||||
|
||||
attachment_dir = tmp_path / claim.id / claim.items[0].id
|
||||
attachment_dir.mkdir(parents=True)
|
||||
file_path = attachment_dir / "2月20_武汉-上海.pdf"
|
||||
preview_path = attachment_dir / "2月20_武汉-上海.preview.png"
|
||||
file_path.write_bytes(b"%PDF-1.7 fake")
|
||||
preview_path.write_bytes(b"broken-preview")
|
||||
claim.items[0].invoice_id = f"{claim.id}/{claim.items[0].id}/{file_path.name}"
|
||||
db.commit()
|
||||
|
||||
storage = ExpenseClaimAttachmentStorage()
|
||||
storage.write_meta(
|
||||
file_path,
|
||||
{
|
||||
"file_name": file_path.name,
|
||||
"storage_key": storage.to_storage_key(file_path),
|
||||
"media_type": "application/pdf",
|
||||
"previewable": True,
|
||||
"preview_kind": "image",
|
||||
"preview_storage_key": storage.to_storage_key(preview_path),
|
||||
"preview_media_type": "image/png",
|
||||
"preview_file_name": preview_path.name,
|
||||
"preview_rendered_with": "pdftoppm-png-r160-poppler-data",
|
||||
},
|
||||
)
|
||||
|
||||
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
|
||||
raise RuntimeError("Missing language pack for 'Adobe-GB1' mapping")
|
||||
|
||||
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
|
||||
|
||||
resolved_path, media_type, filename = ExpenseClaimService(db).get_claim_item_attachment_preview_content(
|
||||
claim_id=claim.id,
|
||||
item_id=claim.items[0].id,
|
||||
current_user=current_user,
|
||||
)
|
||||
|
||||
assert resolved_path == file_path
|
||||
assert media_type == "application/pdf"
|
||||
assert filename == file_path.name
|
||||
refreshed_meta = storage.read_meta(file_path)
|
||||
assert refreshed_meta["preview_kind"] == "pdf"
|
||||
assert refreshed_meta["preview_storage_key"] == storage.to_storage_key(file_path)
|
||||
assert refreshed_meta["preview_media_type"] == "application/pdf"
|
||||
assert refreshed_meta["preview_file_name"] == file_path.name
|
||||
assert refreshed_meta["preview_rendered_with"] == ""
|
||||
|
||||
|
||||
def test_submit_claim_runs_ai_review_and_routes_to_direct_manager() -> None:
|
||||
current_user = CurrentUserContext(
|
||||
username="emp-submit@example.com",
|
||||
@@ -5199,6 +5262,103 @@ def test_admin_delete_claim_unlinks_receipt_folder_items(monkeypatch, tmp_path)
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_admin_delete_linked_reimbursement_resets_application_link_status() -> None:
|
||||
admin_user = CurrentUserContext(
|
||||
username="superadmin",
|
||||
name="系统管理员",
|
||||
role_codes=["admin"],
|
||||
is_admin=True,
|
||||
)
|
||||
|
||||
with build_session() as db:
|
||||
application_claim = ExpenseClaim(
|
||||
id="application-delete-linked-reimbursement",
|
||||
claim_no="APP-DEL-LINKED-APPLICATION",
|
||||
employee_name="张三",
|
||||
department_name="交付部",
|
||||
project_code="PRJ-A",
|
||||
expense_type="travel_application",
|
||||
reason="支撑国网仿生产环境部署",
|
||||
location="上海",
|
||||
amount=Decimal("3000.00"),
|
||||
currency="CNY",
|
||||
invoice_count=0,
|
||||
occurred_at=datetime(2026, 6, 21, 22, 30, tzinfo=UTC),
|
||||
submitted_at=datetime(2026, 6, 21, 22, 35, tzinfo=UTC),
|
||||
status="approved",
|
||||
approval_stage=APPLICATION_LINK_STATUS_STAGE,
|
||||
risk_flags_json=[
|
||||
{
|
||||
"source": "manual_approval",
|
||||
"event_type": "expense_application_approval",
|
||||
"operator": "向万红",
|
||||
"previous_approval_stage": DIRECT_MANAGER_APPROVAL_STAGE,
|
||||
"next_status": "approved",
|
||||
"next_approval_stage": APPLICATION_LINK_STATUS_STAGE,
|
||||
"generated_draft_claim_id": "reimbursement-delete-linked-application",
|
||||
"generated_draft_claim_no": "RDELETE01",
|
||||
"created_at": "2026-06-21T22:45:00+00:00",
|
||||
}
|
||||
],
|
||||
)
|
||||
reimbursement_claim = ExpenseClaim(
|
||||
id="reimbursement-delete-linked-application",
|
||||
claim_no="RDELETE01",
|
||||
employee_name="张三",
|
||||
department_name="交付部",
|
||||
project_code="PRJ-A",
|
||||
expense_type="travel",
|
||||
reason="支撑国网仿生产环境部署报销",
|
||||
location="上海",
|
||||
amount=Decimal("3000.00"),
|
||||
currency="CNY",
|
||||
invoice_count=1,
|
||||
occurred_at=datetime(2026, 6, 21, 22, 30, tzinfo=UTC),
|
||||
submitted_at=None,
|
||||
status="draft",
|
||||
approval_stage="待提交",
|
||||
risk_flags_json=[
|
||||
{
|
||||
"source": "application_handoff",
|
||||
"event_type": "expense_application_to_reimbursement_draft",
|
||||
"application_claim_id": application_claim.id,
|
||||
"application_claim_no": application_claim.claim_no,
|
||||
}
|
||||
],
|
||||
)
|
||||
db.add_all([application_claim, reimbursement_claim])
|
||||
db.commit()
|
||||
|
||||
deleted = ExpenseClaimService(db).delete_claim(reimbursement_claim.id, admin_user)
|
||||
|
||||
assert deleted is not None
|
||||
assert deleted.claim_no == "RDELETE01"
|
||||
assert db.get(ExpenseClaim, reimbursement_claim.id) is None
|
||||
db.refresh(application_claim)
|
||||
assert application_claim.status == "approved"
|
||||
assert application_claim.approval_stage == APPLICATION_LINK_STATUS_STAGE
|
||||
|
||||
approval_flag = next(
|
||||
flag
|
||||
for flag in application_claim.risk_flags_json
|
||||
if isinstance(flag, dict) and flag.get("event_type") == "expense_application_approval"
|
||||
)
|
||||
assert "generated_draft_claim_id" not in approval_flag
|
||||
assert "generated_draft_claim_no" not in approval_flag
|
||||
|
||||
sync_flag = next(
|
||||
flag
|
||||
for flag in application_claim.risk_flags_json
|
||||
if isinstance(flag, dict) and flag.get("event_type") == "expense_application_reimbursement_deleted"
|
||||
)
|
||||
assert sync_flag["source"] == "application_link_sync"
|
||||
assert sync_flag["severity"] == "info"
|
||||
assert sync_flag["actionability"] == "system_trace"
|
||||
assert sync_flag["deleted_reimbursement_claim_id"] == "reimbursement-delete-linked-application"
|
||||
assert sync_flag["deleted_reimbursement_claim_no"] == "RDELETE01"
|
||||
assert sync_flag["next_approval_stage"] == APPLICATION_LINK_STATUS_STAGE
|
||||
|
||||
|
||||
def test_direct_manager_can_return_subordinate_claim_to_pending_submission() -> None:
|
||||
current_user = CurrentUserContext(
|
||||
username="manager-return@example.com",
|
||||
|
||||
@@ -85,6 +85,31 @@ def test_notification_state_service_persists_user_scoped_read_and_hidden_state()
|
||||
assert other_saved.states[0].hidden_at is None
|
||||
|
||||
|
||||
def test_notification_state_storage_ready_runs_once_per_database_bind(monkeypatch) -> None:
|
||||
with build_session() as db:
|
||||
service = NotificationStateService(db)
|
||||
user = CurrentUserContext(username="alice", name="Alice", role_codes=[], is_admin=False)
|
||||
calls: list[object] = []
|
||||
original_create_all = Base.metadata.create_all
|
||||
|
||||
def track_create_all(*args, **kwargs):
|
||||
calls.append(kwargs.get("bind"))
|
||||
return original_create_all(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr(Base.metadata, "create_all", track_create_all)
|
||||
|
||||
service.list_states(user)
|
||||
service.list_states(user)
|
||||
service.patch_states(
|
||||
NotificationStateBatchPatch(
|
||||
states=[NotificationStatePatch(notification_id="workbench:todo:EXP-002", read=True)]
|
||||
),
|
||||
user,
|
||||
)
|
||||
|
||||
assert len(calls) == 1
|
||||
|
||||
|
||||
def test_notification_state_endpoint_reads_and_updates_current_user_state() -> None:
|
||||
client = build_client()
|
||||
headers = {"x-auth-username": "alice", "x-auth-name": "Alice"}
|
||||
|
||||
@@ -5,19 +5,23 @@ import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from app.core.config import get_settings
|
||||
from app.services import document_preview
|
||||
from app.services.ocr import OcrService
|
||||
|
||||
|
||||
def test_ocr_runtime_installers_include_poppler_cjk_data() -> None:
|
||||
def test_ocr_runtime_installers_include_cjk_safe_pdf_rendering_tools() -> None:
|
||||
repo_root = Path(__file__).resolve().parents[2]
|
||||
dependency_sources = [
|
||||
repo_root / "docker-compose.yml",
|
||||
repo_root / "docker-compose.full.yml",
|
||||
repo_root / "server" / "scripts" / "bootstrap_paddleocr_mobile.sh",
|
||||
repo_root / "server" / "scripts" / "bootstrap_paddleocr_gpu.sh",
|
||||
]
|
||||
|
||||
for path in dependency_sources:
|
||||
assert "poppler-data" in path.read_text(encoding="utf-8")
|
||||
content = path.read_text(encoding="utf-8")
|
||||
assert "poppler-data" in content
|
||||
assert "mupdf-tools" in content
|
||||
|
||||
|
||||
def test_ocr_service_uses_worker_runtime_and_keeps_unsupported_files_as_warnings(
|
||||
@@ -163,6 +167,7 @@ def test_ocr_service_passes_configured_device_to_worker(
|
||||
text: bool,
|
||||
timeout: int,
|
||||
check: bool,
|
||||
env: dict[str, str] | None = None,
|
||||
) -> subprocess.CompletedProcess[str]:
|
||||
captured_commands.append(command)
|
||||
return subprocess.CompletedProcess(
|
||||
@@ -194,12 +199,12 @@ def test_ocr_service_converts_pdf_to_images_and_returns_image_preview(
|
||||
monkeypatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
def fake_convert_pdf_to_images(self, *, pdf_path: Path, output_dir: Path) -> list[Path]:
|
||||
def fake_convert_pdf_to_images(self, *, pdf_path: Path, output_dir: Path) -> tuple[list[Path], bool]:
|
||||
first = output_dir / "page-1.png"
|
||||
second = output_dir / "page-2.png"
|
||||
first.write_bytes(b"fake-page-1")
|
||||
second.write_bytes(b"fake-page-2")
|
||||
return [first, second]
|
||||
return [first, second], True
|
||||
|
||||
def fake_invoke_worker(
|
||||
self,
|
||||
@@ -281,26 +286,143 @@ def test_ocr_service_converts_pdf_to_images_and_returns_image_preview(
|
||||
assert recognized.lines[1].page_index == 1
|
||||
|
||||
|
||||
def test_ocr_service_uses_pdf_text_layer_without_worker_runtime(
|
||||
def test_ocr_service_rejects_pdf_ocr_when_rendered_image_fonts_are_broken(
|
||||
monkeypatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
def fake_convert_pdf_to_images(self, *, pdf_path: Path, output_dir: Path) -> list[Path]:
|
||||
page = output_dir / "page-1.png"
|
||||
page.write_bytes(b"fake-rendered-page")
|
||||
return [page]
|
||||
def fake_convert_pdf_to_images(self, *, pdf_path: Path, output_dir: Path) -> tuple[list[Path], bool]:
|
||||
raise RuntimeError("PDF 转图片失败:检测到中文字体映射缺失,未生成可 OCR 的图片。")
|
||||
|
||||
def fail_resolve_python(self) -> str:
|
||||
raise AssertionError("PDF 文本层可用时不应强制解析 OCR worker。")
|
||||
|
||||
def fail_invoke_worker(self, **kwargs) -> dict:
|
||||
raise AssertionError("PDF 文本层可用时不应调用 OCR worker。")
|
||||
def fake_invoke_worker(
|
||||
self,
|
||||
*,
|
||||
python_bin: str,
|
||||
worker_path: str,
|
||||
input_paths: list[Path],
|
||||
) -> dict:
|
||||
raise AssertionError("PDF 转图片已确认丢中文时,不应继续调用 OCR worker。")
|
||||
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
monkeypatch.setattr(OcrService, "_resolve_python_bin", fail_resolve_python)
|
||||
monkeypatch.setattr(OcrService, "_resolve_python_bin", lambda self: "python")
|
||||
monkeypatch.setattr(OcrService, "_resolve_worker_path", lambda self: "worker.py")
|
||||
monkeypatch.setattr(OcrService, "_convert_pdf_to_images", fake_convert_pdf_to_images)
|
||||
monkeypatch.setattr(OcrService, "_invoke_worker", fail_invoke_worker)
|
||||
monkeypatch.setattr(OcrService, "_invoke_worker", fake_invoke_worker)
|
||||
get_settings.cache_clear()
|
||||
try:
|
||||
result = OcrService().recognize_files(
|
||||
[
|
||||
("2月20_武汉-上海.pdf", b"%PDF-1.7 fake", "application/pdf"),
|
||||
]
|
||||
)
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
failed = result.documents[0]
|
||||
assert failed.line_count == 0
|
||||
assert failed.preview_kind == ""
|
||||
assert failed.preview_data_url == ""
|
||||
assert failed.warnings == ["PDF 转图片失败:检测到中文字体映射缺失,未生成可 OCR 的图片。"]
|
||||
|
||||
|
||||
def test_ocr_pdf_conversion_tries_next_renderer_when_poppler_font_mapping_fails(
|
||||
monkeypatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
output_dir = tmp_path / "pages"
|
||||
output_dir.mkdir()
|
||||
calls: list[str] = []
|
||||
|
||||
def fake_run(
|
||||
command: list[str],
|
||||
*,
|
||||
capture_output: bool,
|
||||
text: bool,
|
||||
timeout: int,
|
||||
check: bool,
|
||||
) -> subprocess.CompletedProcess[str]:
|
||||
calls.append(Path(command[0]).name)
|
||||
if Path(command[0]).name == "pdftoppm":
|
||||
(output_dir / "page-1.png").write_bytes(b"broken-preview")
|
||||
return subprocess.CompletedProcess(
|
||||
args=command,
|
||||
returncode=0,
|
||||
stdout="",
|
||||
stderr="Syntax Error: Missing language pack for 'Adobe-GB1' mapping",
|
||||
)
|
||||
(output_dir / "page-1.png").write_bytes(b"rendered-with-chinese")
|
||||
return subprocess.CompletedProcess(
|
||||
args=command,
|
||||
returncode=0,
|
||||
stdout="",
|
||||
stderr="",
|
||||
)
|
||||
|
||||
monkeypatch.setattr(
|
||||
document_preview.shutil,
|
||||
"which",
|
||||
lambda name: f"/usr/bin/{name}" if name in {"pdftoppm", "mutool"} else None,
|
||||
)
|
||||
monkeypatch.setattr(subprocess, "run", fake_run)
|
||||
|
||||
pages, preview_usable = OcrService()._convert_pdf_to_images(
|
||||
pdf_path=tmp_path / "ticket.pdf",
|
||||
output_dir=output_dir,
|
||||
)
|
||||
|
||||
assert pages == [output_dir / "page-1.png"]
|
||||
assert preview_usable is True
|
||||
assert calls == ["pdftoppm", "mutool"]
|
||||
|
||||
|
||||
def test_ocr_service_invokes_worker_even_when_pdf_text_layer_is_usable(
|
||||
monkeypatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
calls = {"worker": 0}
|
||||
|
||||
def fake_convert_pdf_to_images(self, *, pdf_path: Path, output_dir: Path) -> tuple[list[Path], bool]:
|
||||
page = output_dir / "page-1.png"
|
||||
page.write_bytes(b"fake-rendered-page")
|
||||
return [page], True
|
||||
|
||||
def fake_invoke_worker(
|
||||
self,
|
||||
*,
|
||||
python_bin: str,
|
||||
worker_path: str,
|
||||
input_paths: list[Path],
|
||||
) -> dict:
|
||||
calls["worker"] += 1
|
||||
return {
|
||||
"engine": "paddleocr_mobile",
|
||||
"model": "PP-OCRv5_mobile",
|
||||
"documents": [
|
||||
{
|
||||
"input_path": str(input_paths[0]),
|
||||
"engine": "paddleocr_mobile",
|
||||
"model": "PP-OCRv5_mobile",
|
||||
"text": "电子发票(铁路电子客票) 武汉站 上海虹桥站 G458 票价 ¥354.00",
|
||||
"summary": "铁路电子客票",
|
||||
"avg_score": 0.95,
|
||||
"line_count": 1,
|
||||
"page_count": 1,
|
||||
"warnings": [],
|
||||
"lines": [
|
||||
{
|
||||
"text": "电子发票(铁路电子客票) 武汉站 上海虹桥站 G458 票价 ¥354.00",
|
||||
"score": 0.95,
|
||||
"box": [[1, 2], [10, 2], [10, 8], [1, 8]],
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
monkeypatch.setattr(OcrService, "_resolve_python_bin", lambda self: "python")
|
||||
monkeypatch.setattr(OcrService, "_resolve_worker_path", lambda self: "worker.py")
|
||||
monkeypatch.setattr(OcrService, "_convert_pdf_to_images", fake_convert_pdf_to_images)
|
||||
monkeypatch.setattr(OcrService, "_invoke_worker", fake_invoke_worker)
|
||||
monkeypatch.setattr(
|
||||
OcrService,
|
||||
"_extract_pdf_text_layer",
|
||||
@@ -326,9 +448,9 @@ def test_ocr_service_uses_pdf_text_layer_without_worker_runtime(
|
||||
|
||||
recognized = result.documents[0]
|
||||
assert result.success_count == 1
|
||||
assert calls["worker"] == 1
|
||||
assert recognized.document_type == "train_ticket"
|
||||
assert "电子发票(铁路电子客票)" in recognized.text
|
||||
assert "电子客票号:6580061086021391007342026" in recognized.text
|
||||
assert any(field.label == "金额" and field.value == "354元" for field in recognized.document_fields)
|
||||
assert recognized.preview_kind == "image"
|
||||
assert recognized.preview_data_url.startswith("data:image/png;base64,")
|
||||
@@ -392,14 +514,22 @@ def test_ocr_service_reuses_cached_document_for_same_content(
|
||||
assert second.documents[0].summary == first.documents[0].summary
|
||||
|
||||
|
||||
def test_ocr_cache_key_includes_pdf_render_pipeline_version() -> None:
|
||||
cache_key = OcrService()._build_cache_key(b"same-pdf-content")
|
||||
|
||||
assert "pdf-image-ocr:" in cache_key
|
||||
assert document_preview.DocumentPreviewAssets.PDF_RENDERER_ID in cache_key
|
||||
assert "no-pdf-direct" in cache_key
|
||||
|
||||
|
||||
def test_ocr_service_prefers_pdf_text_layer_when_rendered_ocr_is_placeholder_heavy(
|
||||
monkeypatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
def fake_convert_pdf_to_images(self, *, pdf_path: Path, output_dir: Path) -> list[Path]:
|
||||
def fake_convert_pdf_to_images(self, *, pdf_path: Path, output_dir: Path) -> tuple[list[Path], bool]:
|
||||
page = output_dir / "page-1.png"
|
||||
page.write_bytes(b"fake-page")
|
||||
return [page]
|
||||
return [page], True
|
||||
|
||||
def fake_invoke_worker(
|
||||
self,
|
||||
|
||||
@@ -4,7 +4,7 @@ import base64
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from app.core.config import get_settings
|
||||
from app.schemas.ocr import OcrRecognizeDocumentRead
|
||||
from app.schemas.ocr import OcrRecognizeDocumentRead, OcrRecognizeFieldRead
|
||||
from app.services.document_preview import DocumentPreviewAssets
|
||||
from app.services.receipt_folder import ReceiptFolderService
|
||||
|
||||
@@ -72,6 +72,55 @@ def test_receipt_folder_train_ticket_uses_invoice_date_and_enriches_fields(monke
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_receipt_folder_pdf_save_eagerly_renders_image_preview(monkeypatch, tmp_path) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
try:
|
||||
current_user = CurrentUserContext(
|
||||
username="pytest",
|
||||
name="Py Test",
|
||||
role_codes=[],
|
||||
is_admin=False,
|
||||
)
|
||||
|
||||
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
|
||||
preview_path.write_bytes(b"rendered-preview")
|
||||
return preview_path
|
||||
|
||||
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
|
||||
|
||||
service = ReceiptFolderService()
|
||||
receipt = service.save_receipt(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
content=b"%PDF-1.4 fake",
|
||||
media_type="application/pdf",
|
||||
current_user=current_user,
|
||||
document=OcrRecognizeDocumentRead(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
media_type="application/pdf",
|
||||
text="铁路电子客票 武汉 上海虹桥 354.00",
|
||||
summary="铁路电子客票,武汉至上海虹桥。",
|
||||
),
|
||||
)
|
||||
|
||||
receipt_dir = next(service.root.glob("pytest/*"))
|
||||
preview_path = receipt_dir / "preview.png"
|
||||
meta = service._read_meta(receipt_dir)
|
||||
|
||||
assert receipt.preview_kind == "image"
|
||||
assert preview_path.read_bytes() == b"rendered-preview"
|
||||
assert meta["preview_file_name"] == "preview.png"
|
||||
assert meta["preview_media_type"] == "image/png"
|
||||
assert meta["preview_rendered_with"] == DocumentPreviewAssets.PDF_RENDERER_ID
|
||||
|
||||
resolved_path, media_type, file_name = service.resolve_preview(receipt.id, current_user)
|
||||
assert resolved_path == preview_path
|
||||
assert media_type == "image/png"
|
||||
assert file_name == "preview.png"
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_receipt_folder_pdf_preview_regenerates_stale_cached_image(monkeypatch, tmp_path) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
@@ -123,6 +172,213 @@ def test_receipt_folder_pdf_preview_regenerates_stale_cached_image(monkeypatch,
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_receipt_folder_pdf_preview_falls_back_to_source_when_render_fonts_missing(
|
||||
monkeypatch,
|
||||
tmp_path,
|
||||
) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
try:
|
||||
current_user = CurrentUserContext(
|
||||
username="pytest",
|
||||
name="Py Test",
|
||||
role_codes=[],
|
||||
is_admin=False,
|
||||
)
|
||||
stale_preview = b"broken-preview"
|
||||
preview_data_url = f"data:image/png;base64,{base64.b64encode(stale_preview).decode('ascii')}"
|
||||
service = ReceiptFolderService()
|
||||
receipt = service.save_receipt(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
content=b"%PDF-1.7 fake",
|
||||
media_type="application/pdf",
|
||||
current_user=current_user,
|
||||
document=OcrRecognizeDocumentRead(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
media_type="application/pdf",
|
||||
preview_kind="image",
|
||||
preview_data_url=preview_data_url,
|
||||
),
|
||||
)
|
||||
|
||||
receipt_dir = next(service.root.glob("pytest/*"))
|
||||
meta = service._read_meta(receipt_dir)
|
||||
meta["preview_rendered_with"] = "pdftoppm-png-r160-poppler-data"
|
||||
service._write_meta(receipt_dir, meta)
|
||||
|
||||
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
|
||||
raise RuntimeError("Missing language pack for 'Adobe-GB1' mapping")
|
||||
|
||||
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
|
||||
|
||||
resolved_path, media_type, file_name = service.resolve_preview(receipt.id, current_user)
|
||||
|
||||
assert resolved_path == receipt_dir / "2月20_武汉-上海.pdf"
|
||||
assert media_type == "application/pdf"
|
||||
assert file_name == "2月20_武汉-上海.pdf"
|
||||
refreshed_meta = service._read_meta(receipt_dir)
|
||||
assert refreshed_meta["preview_kind"] == "pdf"
|
||||
assert refreshed_meta["preview_file_name"] == "2月20_武汉-上海.pdf"
|
||||
assert refreshed_meta["preview_media_type"] == "application/pdf"
|
||||
assert refreshed_meta["preview_rendered_with"] == ""
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_receipt_folder_train_ticket_extracts_passenger_from_id_line_and_purchase_name(
|
||||
monkeypatch,
|
||||
tmp_path,
|
||||
) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
try:
|
||||
current_user = CurrentUserContext(
|
||||
username="pytest",
|
||||
name="Py Test",
|
||||
role_codes=[],
|
||||
is_admin=False,
|
||||
)
|
||||
service = ReceiptFolderService()
|
||||
receipt = service.save_receipt(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
content=b"%PDF-1.4 fake",
|
||||
media_type="application/pdf",
|
||||
current_user=current_user,
|
||||
document=OcrRecognizeDocumentRead(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
media_type="application/pdf",
|
||||
text=(
|
||||
"电子发票(铁路电子客票)\n"
|
||||
"发票号码:26429165800002785705 湖北\n"
|
||||
"开票日期:2026年05月18日\n"
|
||||
"武汉站 G458 上海虹桥站\n"
|
||||
"Wuhan Shanghaihongqiao\n"
|
||||
"2026年02月20日 07:55开 06车01B号 二等座\n"
|
||||
"票价:¥354.00\n"
|
||||
"4201061987****1615 曹笑竹\n"
|
||||
"电子客票号:6580061086021391007342026\n"
|
||||
"购买方名称:曹笑竹 统一社会信用代码:\n"
|
||||
"买票请到12306 发货请到95306\n"
|
||||
"中国铁路祝您旅途愉快"
|
||||
),
|
||||
summary="电子发票(铁路电子客票);发票监;统一 制",
|
||||
document_type="train_ticket",
|
||||
document_type_label="火车/高铁票",
|
||||
scene_code="travel",
|
||||
scene_label="差旅票据",
|
||||
document_fields=[
|
||||
OcrRecognizeFieldRead(key="merchant_name", label="商户", value="电子发票(铁路"),
|
||||
OcrRecognizeFieldRead(key="amount", label="金额", value="354元"),
|
||||
OcrRecognizeFieldRead(key="date", label="列车出发时间", value="2026-02-20 07:55"),
|
||||
OcrRecognizeFieldRead(key="trip_no", label="车次", value="G458"),
|
||||
OcrRecognizeFieldRead(key="route", label="行程", value="武汉-上海"),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
assert receipt.merchant_name == "中国铁路"
|
||||
|
||||
detail = service.get_receipt(receipt.id, current_user)
|
||||
fields = {field.label: field.value for field in detail.fields}
|
||||
assert fields["商户"] == "中国铁路"
|
||||
assert fields["乘车人"] == "曹笑竹"
|
||||
assert fields["出发地点"] == "武汉"
|
||||
assert fields["到达地点"] == "上海虹桥"
|
||||
assert fields["身份证号"] == "4201061987****1615"
|
||||
assert fields["电子客票号"] == "6580061086021391007342026"
|
||||
assert fields["开票日期"] == "2026-05-18"
|
||||
assert fields["列车出发时间"] == "2026-02-20 07:55"
|
||||
assert fields["车厢"] == "06车"
|
||||
assert fields["座位号"] == "01B"
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_receipt_folder_train_ticket_repairs_invalid_generated_fields_from_ocr_text(
|
||||
monkeypatch,
|
||||
tmp_path,
|
||||
) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
try:
|
||||
current_user = CurrentUserContext(
|
||||
username="pytest",
|
||||
name="Py Test",
|
||||
role_codes=[],
|
||||
is_admin=False,
|
||||
)
|
||||
service = ReceiptFolderService()
|
||||
receipt = service.save_receipt(
|
||||
filename="2月21日_上海-深圳.png",
|
||||
content=b"fake image",
|
||||
media_type="image/png",
|
||||
current_user=current_user,
|
||||
document=OcrRecognizeDocumentRead(
|
||||
filename="2月21日_上海-深圳.png",
|
||||
media_type="image/png",
|
||||
text=(
|
||||
"行程单示意\n"
|
||||
"出票渠道:示例平台\n"
|
||||
"非官方车票\n"
|
||||
"不可报销\n"
|
||||
"仅供演示\n"
|
||||
"创建日期:2026年02月15日\n"
|
||||
"订单号:DEMO202602210001\n"
|
||||
"单据编号:DEMO-IT-000001\n"
|
||||
"上海虹桥\n"
|
||||
"G999\n"
|
||||
"深圳北\n"
|
||||
"站\n"
|
||||
"站\n"
|
||||
"Shanghaihongqiao\n"
|
||||
"Shenzhenbei\n"
|
||||
"2026年02月21日\n"
|
||||
"08:30出发\n"
|
||||
"全程约7小时30分\n"
|
||||
"15:00到达\n"
|
||||
"DEMO\n"
|
||||
"乘客:示例旅客\n"
|
||||
"车厢:05车\n"
|
||||
"席别:二等座\n"
|
||||
"-\n"
|
||||
"扫码无效\n"
|
||||
"证件号:310101199001010000\n"
|
||||
"座位:08A\n"
|
||||
"票价:¥438.00\n"
|
||||
"仅为演示"
|
||||
),
|
||||
summary="行程单示意;出票渠道:示例平台;非官方车票",
|
||||
document_type="train_ticket",
|
||||
document_type_label="火车/高铁票",
|
||||
scene_code="travel",
|
||||
scene_label="差旅票据",
|
||||
document_fields=[
|
||||
OcrRecognizeFieldRead(key="amount", label="金额", value="438元"),
|
||||
OcrRecognizeFieldRead(key="date", label="列车出发时间", value="2026-02-21 08:30"),
|
||||
OcrRecognizeFieldRead(key="invoice_number", label="票据号码", value="DEMO202602210001"),
|
||||
OcrRecognizeFieldRead(key="trip_no", label="车次", value="G999"),
|
||||
OcrRecognizeFieldRead(key="route", label="行程", value="上海-深圳"),
|
||||
OcrRecognizeFieldRead(key="departure_station", label="出发地点", value="二等座"),
|
||||
OcrRecognizeFieldRead(key="arrival_station", label="到达地点", value="扫码无效"),
|
||||
OcrRecognizeFieldRead(key="passenger_name", label="乘车人", value="席别二等座"),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
detail = service.get_receipt(receipt.id, current_user)
|
||||
fields = {field.label: field.value for field in detail.fields}
|
||||
assert fields["出发地点"] == "上海虹桥"
|
||||
assert fields["到达地点"] == "深圳北"
|
||||
assert fields["乘车人"] == "示例旅客"
|
||||
assert fields["身份证号"] == "310101199001010000"
|
||||
assert fields["席别"] == "二等座"
|
||||
assert fields["车厢"] == "05车"
|
||||
assert fields["座位号"] == "08A"
|
||||
assert fields["票价"] == "438.00元"
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_receipt_folder_delete_removes_duplicate_marker(monkeypatch, tmp_path) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
|
||||
@@ -15,6 +15,7 @@ from app.models.financial_record import ExpenseClaim
|
||||
from app.schemas.ontology import OntologyParseRequest
|
||||
from app.schemas.user_agent import UserAgentCitation, UserAgentRequest, UserAgentReviewRiskBrief
|
||||
from app.services.agent_assets import AgentAssetService
|
||||
from app.services.application_location_semantics import resolve_jieba_tokens
|
||||
from app.services.ontology import SemanticOntologyService
|
||||
from app.services.user_agent import UserAgentService
|
||||
from app.services.user_agent_documents import UserAgentDocumentService
|
||||
@@ -763,6 +764,67 @@ def test_user_agent_application_submit_blocks_overlapping_travel_dates() -> None
|
||||
assert response.draft_payload is None
|
||||
|
||||
|
||||
def test_user_agent_application_submit_normalizes_location_mixed_with_business_content() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
response = build_application_user_agent_response(
|
||||
db,
|
||||
"确认提交",
|
||||
context_overrides={
|
||||
"manager_name": "向万红",
|
||||
"application_preview": {
|
||||
"fields": {
|
||||
"applicationType": "差旅费用申请",
|
||||
"time": "2026-02-20 至 2026-02-23",
|
||||
"location": "上海辅助国网仿生产服务器",
|
||||
"reason": "辅助国网仿生产服务器部署",
|
||||
"days": "4天",
|
||||
"transportMode": "火车",
|
||||
"amount": "2120元",
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
claim = application_claim_query(db).one()
|
||||
assert claim.location == "上海市"
|
||||
assert claim.reason == "辅助国网仿生产服务器部署"
|
||||
assert "申请单据已生成" in response.answer
|
||||
assert response.draft_payload is not None
|
||||
|
||||
|
||||
def test_user_agent_application_submit_splits_location_and_reason_from_raw_sentence() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
response = build_application_user_agent_response(
|
||||
db,
|
||||
"确认提交",
|
||||
history=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "2026-02-20 至 2026-02-23,去上海辅助国网仿生产服务器部署,火车",
|
||||
}
|
||||
],
|
||||
context_overrides={
|
||||
"manager_name": "向万红",
|
||||
"grade": "P5",
|
||||
"department_name": "技术部",
|
||||
},
|
||||
)
|
||||
|
||||
claim = application_claim_query(db).one()
|
||||
assert claim.location == "上海市"
|
||||
assert claim.reason == "辅助国网仿生产服务器部署"
|
||||
assert "申请单据已生成" in response.answer
|
||||
|
||||
|
||||
def test_application_sentence_jieba_tokenizer_recognizes_location_boundary() -> None:
|
||||
tokens = resolve_jieba_tokens("上海辅助国网仿生产服务器部署")
|
||||
|
||||
assert ("上海", "ns") in tokens
|
||||
assert [word for word, _ in tokens] == ["上海", "辅助", "国网", "仿生产", "服务器", "部署"]
|
||||
|
||||
|
||||
def test_user_agent_application_maps_preview_travel_type_label() -> None:
|
||||
session_factory = build_session_factory()
|
||||
with session_factory() as db:
|
||||
@@ -2155,7 +2217,7 @@ def test_user_agent_returns_draft_limit_message_when_save_is_blocked() -> None:
|
||||
context_json={"review_action": "save_draft"},
|
||||
tool_payload={
|
||||
"draft_limit_reached": True,
|
||||
"message": "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。",
|
||||
"message": "您当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。",
|
||||
"status": "blocked",
|
||||
},
|
||||
)
|
||||
@@ -2163,7 +2225,7 @@ def test_user_agent_returns_draft_limit_message_when_save_is_blocked() -> None:
|
||||
|
||||
assert (
|
||||
response.answer
|
||||
== "你当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。"
|
||||
== "您当前已保存 3 个草稿,请先完成已保存的草稿,才能再次新建草稿。"
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user