Files
X-Financial/server/tests/test_receipt_folder_service.py
caoxiaozhu 0264a4b5b4 refactor(server): user_agent/steward/ocr 等服务重构并适配关联任务
- user_agent 拆分 application/locations/knowledge/response/review 四个子模块,接入申请位置语义与关联草稿分支
- steward planner/runtime/slot/plan_builder 决策链路重构,travel_reimbursement_calculator/orchestrator_expense_query 适配
- ocr/document_preview/document_intelligence/receipt_folder 复用预览与资产缓存,expense_claim_draft_flow/application_handoff 适配
- pyproject.toml 新增依赖,paddleocr bootstrap 脚本与 server_start.sh 调整
- 更新差旅/交通/通信等财务规则表,同步 document_intelligence/ocr/receipt_folder/user_agent 等测试
2026-06-24 10:42:24 +08:00

541 lines
22 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import base64
from app.api.deps import CurrentUserContext
from app.core.config import get_settings
from app.schemas.ocr import OcrRecognizeDocumentRead, OcrRecognizeFieldRead
from app.services.document_preview import DocumentPreviewAssets
from app.services.receipt_folder import ReceiptFolderService
def test_receipt_folder_train_ticket_uses_invoice_date_and_enriches_fields(monkeypatch, tmp_path) -> None:
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
get_settings.cache_clear()
try:
current_user = CurrentUserContext(
username="pytest",
name="Py Test",
role_codes=[],
is_admin=False,
)
service = ReceiptFolderService()
receipt = service.save_receipt(
filename="2月23_上海-武汉.pdf",
content=b"%PDF-1.4 fake",
media_type="application/pdf",
current_user=current_user,
document=OcrRecognizeDocumentRead(
filename="2月23_上海-武汉.pdf",
media_type="application/pdf",
text=(
"电子发票(铁路电子客票)\n"
"发票号码:26319166100006175398\n"
"电子客票号:E1234567890123\n"
"开票日期:2026-02-18\n"
"上海虹桥站\n"
"武汉站\n"
"G456\n"
"二等座\n"
"06车01B号\n"
"2026-02-20 08:30开\n"
"票价:¥354.00\n"
"1101011990****1234\n"
"张三"
),
summary="铁路电子客票,上海虹桥至武汉,票价 354 元。",
document_type="train_ticket",
document_type_label="火车/高铁票",
scene_code="travel",
scene_label="差旅票据",
),
)
assert receipt.document_date == "2026-02-18"
assert receipt.merchant_name == "中国铁路"
assert receipt.amount == "354.00元"
detail = service.get_receipt(receipt.id, current_user)
fields = {field.label: field.value for field in detail.fields}
assert fields["开票日期"] == "2026-02-18"
assert fields["乘车人"] == "张三"
assert fields["出发地点"] == "上海虹桥"
assert fields["到达地点"] == "武汉"
assert fields["车次"] == "G456"
assert fields["电子客票号"] == "E1234567890123"
assert fields["身份证号"] == "1101011990****1234"
assert fields["席别"] == "二等座"
assert fields["车厢"] == "06车"
assert fields["座位号"] == "01B"
assert fields["列车出发时间"] == "2026-02-20 08:30"
finally:
get_settings.cache_clear()
def test_receipt_folder_pdf_save_eagerly_renders_image_preview(monkeypatch, tmp_path) -> None:
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
get_settings.cache_clear()
try:
current_user = CurrentUserContext(
username="pytest",
name="Py Test",
role_codes=[],
is_admin=False,
)
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
preview_path.write_bytes(b"rendered-preview")
return preview_path
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
service = ReceiptFolderService()
receipt = service.save_receipt(
filename="2月20_武汉-上海.pdf",
content=b"%PDF-1.4 fake",
media_type="application/pdf",
current_user=current_user,
document=OcrRecognizeDocumentRead(
filename="2月20_武汉-上海.pdf",
media_type="application/pdf",
text="铁路电子客票 武汉 上海虹桥 354.00",
summary="铁路电子客票,武汉至上海虹桥。",
),
)
receipt_dir = next(service.root.glob("pytest/*"))
preview_path = receipt_dir / "preview.png"
meta = service._read_meta(receipt_dir)
assert receipt.preview_kind == "image"
assert preview_path.read_bytes() == b"rendered-preview"
assert meta["preview_file_name"] == "preview.png"
assert meta["preview_media_type"] == "image/png"
assert meta["preview_rendered_with"] == DocumentPreviewAssets.PDF_RENDERER_ID
resolved_path, media_type, file_name = service.resolve_preview(receipt.id, current_user)
assert resolved_path == preview_path
assert media_type == "image/png"
assert file_name == "preview.png"
finally:
get_settings.cache_clear()
def test_receipt_folder_pdf_preview_regenerates_stale_cached_image(monkeypatch, tmp_path) -> None:
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
get_settings.cache_clear()
try:
current_user = CurrentUserContext(
username="pytest",
name="Py Test",
role_codes=[],
is_admin=False,
)
stale_preview = b"stale-preview"
preview_data_url = f"data:image/png;base64,{base64.b64encode(stale_preview).decode('ascii')}"
service = ReceiptFolderService()
receipt = service.save_receipt(
filename="2月20_武汉-上海.pdf",
content=b"%PDF-1.4 fake",
media_type="application/pdf",
current_user=current_user,
document=OcrRecognizeDocumentRead(
filename="2月20_武汉-上海.pdf",
media_type="application/pdf",
preview_kind="image",
preview_data_url=preview_data_url,
),
)
receipt_dir = next(service.root.glob("pytest/*"))
preview_path = receipt_dir / "preview.png"
assert preview_path.read_bytes() == stale_preview
stale_meta = service._read_meta(receipt_dir)
stale_meta.pop("preview_rendered_with", None)
service._write_meta(receipt_dir, stale_meta)
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
preview_path.write_bytes(b"refreshed-preview")
return preview_path
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
resolved_path, media_type, file_name = service.resolve_preview(receipt.id, current_user)
assert resolved_path == preview_path
assert media_type == "image/png"
assert file_name == "preview.png"
assert preview_path.read_bytes() == b"refreshed-preview"
meta = service._read_meta(receipt_dir)
assert meta["preview_rendered_with"] == DocumentPreviewAssets.PDF_RENDERER_ID
finally:
get_settings.cache_clear()
def test_receipt_folder_pdf_preview_falls_back_to_source_when_render_fonts_missing(
monkeypatch,
tmp_path,
) -> None:
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
get_settings.cache_clear()
try:
current_user = CurrentUserContext(
username="pytest",
name="Py Test",
role_codes=[],
is_admin=False,
)
stale_preview = b"broken-preview"
preview_data_url = f"data:image/png;base64,{base64.b64encode(stale_preview).decode('ascii')}"
service = ReceiptFolderService()
receipt = service.save_receipt(
filename="2月20_武汉-上海.pdf",
content=b"%PDF-1.7 fake",
media_type="application/pdf",
current_user=current_user,
document=OcrRecognizeDocumentRead(
filename="2月20_武汉-上海.pdf",
media_type="application/pdf",
preview_kind="image",
preview_data_url=preview_data_url,
),
)
receipt_dir = next(service.root.glob("pytest/*"))
meta = service._read_meta(receipt_dir)
meta["preview_rendered_with"] = "pdftoppm-png-r160-poppler-data"
service._write_meta(receipt_dir, meta)
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
raise RuntimeError("Missing language pack for 'Adobe-GB1' mapping")
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
resolved_path, media_type, file_name = service.resolve_preview(receipt.id, current_user)
assert resolved_path == receipt_dir / "2月20_武汉-上海.pdf"
assert media_type == "application/pdf"
assert file_name == "2月20_武汉-上海.pdf"
refreshed_meta = service._read_meta(receipt_dir)
assert refreshed_meta["preview_kind"] == "pdf"
assert refreshed_meta["preview_file_name"] == "2月20_武汉-上海.pdf"
assert refreshed_meta["preview_media_type"] == "application/pdf"
assert refreshed_meta["preview_rendered_with"] == ""
finally:
get_settings.cache_clear()
def test_receipt_folder_train_ticket_extracts_passenger_from_id_line_and_purchase_name(
monkeypatch,
tmp_path,
) -> None:
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
get_settings.cache_clear()
try:
current_user = CurrentUserContext(
username="pytest",
name="Py Test",
role_codes=[],
is_admin=False,
)
service = ReceiptFolderService()
receipt = service.save_receipt(
filename="2月20_武汉-上海.pdf",
content=b"%PDF-1.4 fake",
media_type="application/pdf",
current_user=current_user,
document=OcrRecognizeDocumentRead(
filename="2月20_武汉-上海.pdf",
media_type="application/pdf",
text=(
"电子发票(铁路电子客票)\n"
"发票号码:26429165800002785705 湖北\n"
"开票日期:2026年05月18日\n"
"武汉站 G458 上海虹桥站\n"
"Wuhan Shanghaihongqiao\n"
"2026年02月20日 07:55开 06车01B号 二等座\n"
"票价:¥354.00\n"
"4201061987****1615 曹笑竹\n"
"电子客票号:6580061086021391007342026\n"
"购买方名称:曹笑竹 统一社会信用代码:\n"
"买票请到12306 发货请到95306\n"
"中国铁路祝您旅途愉快"
),
summary="电子发票(铁路电子客票);发票监;统一 制",
document_type="train_ticket",
document_type_label="火车/高铁票",
scene_code="travel",
scene_label="差旅票据",
document_fields=[
OcrRecognizeFieldRead(key="merchant_name", label="商户", value="电子发票(铁路"),
OcrRecognizeFieldRead(key="amount", label="金额", value="354元"),
OcrRecognizeFieldRead(key="date", label="列车出发时间", value="2026-02-20 07:55"),
OcrRecognizeFieldRead(key="trip_no", label="车次", value="G458"),
OcrRecognizeFieldRead(key="route", label="行程", value="武汉-上海"),
],
),
)
assert receipt.merchant_name == "中国铁路"
detail = service.get_receipt(receipt.id, current_user)
fields = {field.label: field.value for field in detail.fields}
assert fields["商户"] == "中国铁路"
assert fields["乘车人"] == "曹笑竹"
assert fields["出发地点"] == "武汉"
assert fields["到达地点"] == "上海虹桥"
assert fields["身份证号"] == "4201061987****1615"
assert fields["电子客票号"] == "6580061086021391007342026"
assert fields["开票日期"] == "2026-05-18"
assert fields["列车出发时间"] == "2026-02-20 07:55"
assert fields["车厢"] == "06车"
assert fields["座位号"] == "01B"
finally:
get_settings.cache_clear()
def test_receipt_folder_train_ticket_repairs_invalid_generated_fields_from_ocr_text(
monkeypatch,
tmp_path,
) -> None:
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
get_settings.cache_clear()
try:
current_user = CurrentUserContext(
username="pytest",
name="Py Test",
role_codes=[],
is_admin=False,
)
service = ReceiptFolderService()
receipt = service.save_receipt(
filename="2月21日_上海-深圳.png",
content=b"fake image",
media_type="image/png",
current_user=current_user,
document=OcrRecognizeDocumentRead(
filename="2月21日_上海-深圳.png",
media_type="image/png",
text=(
"行程单示意\n"
"出票渠道:示例平台\n"
"非官方车票\n"
"不可报销\n"
"仅供演示\n"
"创建日期2026年02月15日\n"
"订单号DEMO202602210001\n"
"单据编号DEMO-IT-000001\n"
"上海虹桥\n"
"G999\n"
"深圳北\n"
"\n"
"\n"
"Shanghaihongqiao\n"
"Shenzhenbei\n"
"2026年02月21日\n"
"08:30出发\n"
"全程约7小时30分\n"
"15:00到达\n"
"DEMO\n"
"乘客:示例旅客\n"
"车厢05车\n"
"席别:二等座\n"
"-\n"
"扫码无效\n"
"证件号310101199001010000\n"
"座位08A\n"
"票价¥438.00\n"
"仅为演示"
),
summary="行程单示意;出票渠道:示例平台;非官方车票",
document_type="train_ticket",
document_type_label="火车/高铁票",
scene_code="travel",
scene_label="差旅票据",
document_fields=[
OcrRecognizeFieldRead(key="amount", label="金额", value="438元"),
OcrRecognizeFieldRead(key="date", label="列车出发时间", value="2026-02-21 08:30"),
OcrRecognizeFieldRead(key="invoice_number", label="票据号码", value="DEMO202602210001"),
OcrRecognizeFieldRead(key="trip_no", label="车次", value="G999"),
OcrRecognizeFieldRead(key="route", label="行程", value="上海-深圳"),
OcrRecognizeFieldRead(key="departure_station", label="出发地点", value="二等座"),
OcrRecognizeFieldRead(key="arrival_station", label="到达地点", value="扫码无效"),
OcrRecognizeFieldRead(key="passenger_name", label="乘车人", value="席别二等座"),
],
),
)
detail = service.get_receipt(receipt.id, current_user)
fields = {field.label: field.value for field in detail.fields}
assert fields["出发地点"] == "上海虹桥"
assert fields["到达地点"] == "深圳北"
assert fields["乘车人"] == "示例旅客"
assert fields["身份证号"] == "310101199001010000"
assert fields["席别"] == "二等座"
assert fields["车厢"] == "05车"
assert fields["座位号"] == "08A"
assert fields["票价"] == "438.00元"
finally:
get_settings.cache_clear()
def test_receipt_folder_delete_removes_duplicate_marker(monkeypatch, tmp_path) -> None:
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
get_settings.cache_clear()
try:
current_user = CurrentUserContext(
username="pytest",
name="Py Test",
role_codes=[],
is_admin=False,
)
service = ReceiptFolderService()
content = b"%PDF-1.4 same receipt"
receipt = service.save_receipt(
filename="same-receipt.pdf",
content=content,
media_type="application/pdf",
current_user=current_user,
document=OcrRecognizeDocumentRead(
filename="same-receipt.pdf",
media_type="application/pdf",
text="same receipt amount 354",
document_type="other",
document_type_label="其他单据",
scene_code="other",
scene_label="其他票据",
),
)
receipt_dir = service.root / "pytest" / receipt.id
assert receipt_dir.exists()
duplicate = service.find_duplicate_receipt(
filename="same-receipt.pdf",
content=content,
current_user=current_user,
)
assert duplicate is not None
assert duplicate.id == receipt.id
service.delete_receipt(receipt_id=receipt.id, current_user=current_user)
assert not receipt_dir.exists()
assert (
service.find_duplicate_receipt(
filename="same-receipt.pdf",
content=content,
current_user=current_user,
)
is None
)
finally:
get_settings.cache_clear()
def test_receipt_folder_recovers_train_ticket_detail_from_other_english_ocr(monkeypatch, tmp_path) -> None:
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
get_settings.cache_clear()
try:
current_user = CurrentUserContext(
username="pytest",
name="Py Test",
role_codes=[],
is_admin=False,
)
service = ReceiptFolderService()
receipt = service.save_receipt(
filename="2月20_武汉-上海.pdf",
content=b"%PDF-1.4 fake",
media_type="application/pdf",
current_user=current_user,
document=OcrRecognizeDocumentRead(
filename="2月20_武汉-上海.pdf",
media_type="application/pdf",
text=(
":26429165800002785705\n"
":2026 05 18\n"
"G458\n"
"Wuhan\n"
"Shanghaihongqiao\n"
"2026 02 20 07:55\n"
"06 01B\n"
": 354.00\n"
"4201061987****1615\n"
":6580061086021391007342026\n"
"12306 95306"
),
summary="Wuhan Shanghaihongqiao G458 354.00",
document_type="other",
document_type_label="其他单据",
scene_code="other",
scene_label="其他票据",
),
)
assert receipt.document_type == "train_ticket"
assert receipt.document_type_label == "火车/高铁票"
assert receipt.scene_code == "travel"
assert receipt.amount == "354.00元"
assert receipt.document_date == "2026-02-20"
assert receipt.merchant_name == "中国铁路"
detail = service.get_receipt(receipt.id, current_user)
fields = {field.label: field.value for field in detail.fields}
assert fields["行程"] == "武汉-上海"
assert fields["车次"] == "G458"
assert fields["列车出发时间"] == "2026-02-20 07:55"
assert fields["票价"] == "354.00元"
assert fields["身份证号"] == "4201061987****1615"
assert fields["车厢"] == "06车"
assert fields["座位号"] == "01B"
assert "乘车人" not in fields
finally:
get_settings.cache_clear()
def test_receipt_folder_unlink_receipts_for_claim_marks_linked_receipts_unlinked(monkeypatch, tmp_path) -> None:
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
get_settings.cache_clear()
try:
current_user = CurrentUserContext(
username="pytest",
name="Py Test",
role_codes=[],
is_admin=False,
)
service = ReceiptFolderService()
receipt = service.save_receipt(
filename="linked-receipt.pdf",
content=b"%PDF-1.4 linked",
media_type="application/pdf",
current_user=current_user,
linked_claim_id="claim-1",
linked_claim_no="RE-001",
linked_item_id="item-1",
document=OcrRecognizeDocumentRead(
filename="linked-receipt.pdf",
media_type="application/pdf",
text="invoice number 123 amount 100",
document_type="vat_invoice",
document_type_label="invoice",
scene_code="other",
scene_label="receipt",
),
)
linked_detail = service.get_receipt(receipt.id, current_user)
assert linked_detail.status == "linked"
assert linked_detail.linked_claim_id == "claim-1"
assert linked_detail.linked_claim_no == "RE-001"
assert service.unlink_receipts_for_claim("claim-1") == 1
unlinked_detail = service.get_receipt(receipt.id, current_user)
assert unlinked_detail.status == "unlinked"
assert unlinked_detail.linked_claim_id == ""
assert unlinked_detail.linked_claim_no == ""
assert unlinked_detail.linked_at is None
finally:
get_settings.cache_clear()