- 新增 system_cache 模块与 POST /settings/cache/clear,管理员可一键清理 OCR 结果/运行时配置/模型失败冷却/知识库索引/地点语义等进程内缓存 - 各服务暴露 clear_*_cache 方法(ocr/runtime_settings/runtime_chat/knowledge/application_location_semantic),SettingsCacheClearRead 汇总清理项 - OCR 转图片失败时尝试用 PDF 文本层兜底构建识别文档(有效字符≥8),并写结果缓存;OcrService 暴露 clear_result_cache - receipt_folder 车票过滤补充身份证号关键词,附件文档/操作/展示模块同步适配 - 新增 system_cache_endpoints 测试,更新 openapi_schema/ocr/receipt_folder/attachment_association_jobs 测试
657 lines
26 KiB
Python
657 lines
26 KiB
Python
from __future__ import annotations
|
||
|
||
import base64
|
||
|
||
from app.api.deps import CurrentUserContext
|
||
from app.core.config import get_settings
|
||
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeFieldRead
|
||
from app.services.document_preview import DocumentPreviewAssets
|
||
from app.services.receipt_folder import ReceiptFolderService
|
||
|
||
|
||
def test_receipt_folder_train_ticket_uses_invoice_date_and_enriches_fields(monkeypatch, tmp_path) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
service = ReceiptFolderService()
|
||
receipt = service.save_receipt(
|
||
filename="2月23_上海-武汉.pdf",
|
||
content=b"%PDF-1.4 fake",
|
||
media_type="application/pdf",
|
||
current_user=current_user,
|
||
document=OcrRecognizeDocumentRead(
|
||
filename="2月23_上海-武汉.pdf",
|
||
media_type="application/pdf",
|
||
text=(
|
||
"电子发票(铁路电子客票)\n"
|
||
"发票号码:26319166100006175398\n"
|
||
"电子客票号:E1234567890123\n"
|
||
"开票日期:2026-02-18\n"
|
||
"上海虹桥站\n"
|
||
"武汉站\n"
|
||
"G456\n"
|
||
"二等座\n"
|
||
"06车01B号\n"
|
||
"2026-02-20 08:30开\n"
|
||
"票价:¥354.00\n"
|
||
"1101011990****1234\n"
|
||
"张三"
|
||
),
|
||
summary="铁路电子客票,上海虹桥至武汉,票价 354 元。",
|
||
document_type="train_ticket",
|
||
document_type_label="火车/高铁票",
|
||
scene_code="travel",
|
||
scene_label="差旅票据",
|
||
),
|
||
)
|
||
|
||
assert receipt.document_date == "2026-02-18"
|
||
assert receipt.merchant_name == "中国铁路"
|
||
assert receipt.amount == "354.00元"
|
||
|
||
detail = service.get_receipt(receipt.id, current_user)
|
||
fields = {field.label: field.value for field in detail.fields}
|
||
assert fields["开票日期"] == "2026-02-18"
|
||
assert fields["乘车人"] == "张三"
|
||
assert fields["出发地点"] == "上海虹桥"
|
||
assert fields["到达地点"] == "武汉"
|
||
assert fields["车次"] == "G456"
|
||
assert fields["电子客票号"] == "E1234567890123"
|
||
assert fields["身份证号"] == "1101011990****1234"
|
||
assert fields["席别"] == "二等座"
|
||
assert fields["车厢"] == "06车"
|
||
assert fields["座位号"] == "01B"
|
||
assert fields["列车出发时间"] == "2026-02-20 08:30"
|
||
finally:
|
||
get_settings.cache_clear()
|
||
|
||
|
||
def test_receipt_folder_pdf_save_eagerly_renders_image_preview(monkeypatch, tmp_path) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
|
||
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
|
||
preview_path.write_bytes(b"rendered-preview")
|
||
return preview_path
|
||
|
||
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
|
||
|
||
service = ReceiptFolderService()
|
||
receipt = service.save_receipt(
|
||
filename="2月20_武汉-上海.pdf",
|
||
content=b"%PDF-1.4 fake",
|
||
media_type="application/pdf",
|
||
current_user=current_user,
|
||
document=OcrRecognizeDocumentRead(
|
||
filename="2月20_武汉-上海.pdf",
|
||
media_type="application/pdf",
|
||
text="铁路电子客票 武汉 上海虹桥 354.00",
|
||
summary="铁路电子客票,武汉至上海虹桥。",
|
||
),
|
||
)
|
||
|
||
receipt_dir = next(service.root.glob("pytest/*"))
|
||
preview_path = receipt_dir / "preview.png"
|
||
meta = service._read_meta(receipt_dir)
|
||
|
||
assert receipt.preview_kind == "image"
|
||
assert preview_path.read_bytes() == b"rendered-preview"
|
||
assert meta["preview_file_name"] == "preview.png"
|
||
assert meta["preview_media_type"] == "image/png"
|
||
assert meta["preview_rendered_with"] == DocumentPreviewAssets.PDF_RENDERER_ID
|
||
|
||
resolved_path, media_type, file_name = service.resolve_preview(receipt.id, current_user)
|
||
assert resolved_path == preview_path
|
||
assert media_type == "image/png"
|
||
assert file_name == "preview.png"
|
||
finally:
|
||
get_settings.cache_clear()
|
||
|
||
|
||
def test_receipt_folder_persist_enriches_pdf_ocr_document_with_image_preview(monkeypatch, tmp_path) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
|
||
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
|
||
preview_path.write_bytes(b"rendered-preview")
|
||
return preview_path
|
||
|
||
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
|
||
|
||
service = ReceiptFolderService()
|
||
result = service.persist_ocr_batch(
|
||
files=[("2月23_上海-武汉.pdf", b"%PDF-1.4 fake", "application/pdf")],
|
||
result=OcrRecognizeBatchRead(
|
||
total_file_count=1,
|
||
success_count=1,
|
||
documents=[
|
||
OcrRecognizeDocumentRead(
|
||
filename="2月23_上海-武汉.pdf",
|
||
media_type="application/pdf",
|
||
text="铁路电子客票 上海虹桥 武汉 G456 354.00",
|
||
summary="铁路电子客票,上海虹桥至武汉。",
|
||
document_type="train_ticket",
|
||
document_type_label="火车/高铁票",
|
||
scene_code="travel",
|
||
scene_label="差旅票据",
|
||
),
|
||
],
|
||
),
|
||
current_user=current_user,
|
||
)
|
||
|
||
document = result.documents[0]
|
||
assert document.receipt_id
|
||
assert document.receipt_preview_url.endswith(f"/receipt-folder/{document.receipt_id}/preview")
|
||
assert document.preview_kind == "image"
|
||
finally:
|
||
get_settings.cache_clear()
|
||
|
||
|
||
def test_receipt_folder_pdf_preview_regenerates_stale_cached_image(monkeypatch, tmp_path) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
stale_preview = b"stale-preview"
|
||
preview_data_url = f"data:image/png;base64,{base64.b64encode(stale_preview).decode('ascii')}"
|
||
service = ReceiptFolderService()
|
||
receipt = service.save_receipt(
|
||
filename="2月20_武汉-上海.pdf",
|
||
content=b"%PDF-1.4 fake",
|
||
media_type="application/pdf",
|
||
current_user=current_user,
|
||
document=OcrRecognizeDocumentRead(
|
||
filename="2月20_武汉-上海.pdf",
|
||
media_type="application/pdf",
|
||
preview_kind="image",
|
||
preview_data_url=preview_data_url,
|
||
),
|
||
)
|
||
|
||
receipt_dir = next(service.root.glob("pytest/*"))
|
||
preview_path = receipt_dir / "preview.png"
|
||
assert preview_path.read_bytes() == stale_preview
|
||
stale_meta = service._read_meta(receipt_dir)
|
||
stale_meta.pop("preview_rendered_with", None)
|
||
service._write_meta(receipt_dir, stale_meta)
|
||
|
||
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
|
||
preview_path.write_bytes(b"refreshed-preview")
|
||
return preview_path
|
||
|
||
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
|
||
|
||
resolved_path, media_type, file_name = service.resolve_preview(receipt.id, current_user)
|
||
|
||
assert resolved_path == preview_path
|
||
assert media_type == "image/png"
|
||
assert file_name == "preview.png"
|
||
assert preview_path.read_bytes() == b"refreshed-preview"
|
||
meta = service._read_meta(receipt_dir)
|
||
assert meta["preview_rendered_with"] == DocumentPreviewAssets.PDF_RENDERER_ID
|
||
finally:
|
||
get_settings.cache_clear()
|
||
|
||
|
||
def test_receipt_folder_pdf_preview_falls_back_to_source_when_render_fonts_missing(
|
||
monkeypatch,
|
||
tmp_path,
|
||
) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
stale_preview = b"broken-preview"
|
||
preview_data_url = f"data:image/png;base64,{base64.b64encode(stale_preview).decode('ascii')}"
|
||
service = ReceiptFolderService()
|
||
receipt = service.save_receipt(
|
||
filename="2月20_武汉-上海.pdf",
|
||
content=b"%PDF-1.7 fake",
|
||
media_type="application/pdf",
|
||
current_user=current_user,
|
||
document=OcrRecognizeDocumentRead(
|
||
filename="2月20_武汉-上海.pdf",
|
||
media_type="application/pdf",
|
||
preview_kind="image",
|
||
preview_data_url=preview_data_url,
|
||
),
|
||
)
|
||
|
||
receipt_dir = next(service.root.glob("pytest/*"))
|
||
meta = service._read_meta(receipt_dir)
|
||
meta["preview_rendered_with"] = "pdftoppm-png-r160-poppler-data"
|
||
service._write_meta(receipt_dir, meta)
|
||
|
||
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
|
||
raise RuntimeError("Missing language pack for 'Adobe-GB1' mapping")
|
||
|
||
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
|
||
|
||
resolved_path, media_type, file_name = service.resolve_preview(receipt.id, current_user)
|
||
|
||
assert resolved_path == receipt_dir / "2月20_武汉-上海.pdf"
|
||
assert media_type == "application/pdf"
|
||
assert file_name == "2月20_武汉-上海.pdf"
|
||
refreshed_meta = service._read_meta(receipt_dir)
|
||
assert refreshed_meta["preview_kind"] == "pdf"
|
||
assert refreshed_meta["preview_file_name"] == "2月20_武汉-上海.pdf"
|
||
assert refreshed_meta["preview_media_type"] == "application/pdf"
|
||
assert refreshed_meta["preview_rendered_with"] == ""
|
||
finally:
|
||
get_settings.cache_clear()
|
||
|
||
|
||
def test_receipt_folder_train_ticket_extracts_passenger_from_id_line_and_purchase_name(
|
||
monkeypatch,
|
||
tmp_path,
|
||
) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
service = ReceiptFolderService()
|
||
receipt = service.save_receipt(
|
||
filename="2月20_武汉-上海.pdf",
|
||
content=b"%PDF-1.4 fake",
|
||
media_type="application/pdf",
|
||
current_user=current_user,
|
||
document=OcrRecognizeDocumentRead(
|
||
filename="2月20_武汉-上海.pdf",
|
||
media_type="application/pdf",
|
||
text=(
|
||
"电子发票(铁路电子客票)\n"
|
||
"发票号码:26429165800002785705 湖北\n"
|
||
"开票日期:2026年05月18日\n"
|
||
"武汉站 G458 上海虹桥站\n"
|
||
"Wuhan Shanghaihongqiao\n"
|
||
"2026年02月20日 07:55开 06车01B号 二等座\n"
|
||
"票价:¥354.00\n"
|
||
"4201061987****1615 曹笑竹\n"
|
||
"电子客票号:6580061086021391007342026\n"
|
||
"购买方名称:曹笑竹 统一社会信用代码:\n"
|
||
"买票请到12306 发货请到95306\n"
|
||
"中国铁路祝您旅途愉快"
|
||
),
|
||
summary="电子发票(铁路电子客票);发票监;统一 制",
|
||
document_type="train_ticket",
|
||
document_type_label="火车/高铁票",
|
||
scene_code="travel",
|
||
scene_label="差旅票据",
|
||
document_fields=[
|
||
OcrRecognizeFieldRead(key="merchant_name", label="商户", value="电子发票(铁路"),
|
||
OcrRecognizeFieldRead(key="amount", label="金额", value="354元"),
|
||
OcrRecognizeFieldRead(key="date", label="列车出发时间", value="2026-02-20 07:55"),
|
||
OcrRecognizeFieldRead(key="trip_no", label="车次", value="G458"),
|
||
OcrRecognizeFieldRead(key="route", label="行程", value="武汉-上海"),
|
||
],
|
||
),
|
||
)
|
||
|
||
assert receipt.merchant_name == "中国铁路"
|
||
|
||
detail = service.get_receipt(receipt.id, current_user)
|
||
fields = {field.label: field.value for field in detail.fields}
|
||
assert fields["商户"] == "中国铁路"
|
||
assert fields["乘车人"] == "曹笑竹"
|
||
assert fields["出发地点"] == "武汉"
|
||
assert fields["到达地点"] == "上海虹桥"
|
||
assert fields["身份证号"] == "4201061987****1615"
|
||
assert fields["电子客票号"] == "6580061086021391007342026"
|
||
assert fields["开票日期"] == "2026-05-18"
|
||
assert fields["列车出发时间"] == "2026-02-20 07:55"
|
||
assert fields["车厢"] == "06车"
|
||
assert fields["座位号"] == "01B"
|
||
finally:
|
||
get_settings.cache_clear()
|
||
|
||
|
||
def test_receipt_folder_train_ticket_repairs_invalid_generated_fields_from_ocr_text(
|
||
monkeypatch,
|
||
tmp_path,
|
||
) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
service = ReceiptFolderService()
|
||
receipt = service.save_receipt(
|
||
filename="2月21日_上海-深圳.png",
|
||
content=b"fake image",
|
||
media_type="image/png",
|
||
current_user=current_user,
|
||
document=OcrRecognizeDocumentRead(
|
||
filename="2月21日_上海-深圳.png",
|
||
media_type="image/png",
|
||
text=(
|
||
"行程单示意\n"
|
||
"出票渠道:示例平台\n"
|
||
"非官方车票\n"
|
||
"不可报销\n"
|
||
"仅供演示\n"
|
||
"创建日期:2026年02月15日\n"
|
||
"订单号:DEMO202602210001\n"
|
||
"单据编号:DEMO-IT-000001\n"
|
||
"上海虹桥\n"
|
||
"G999\n"
|
||
"深圳北\n"
|
||
"站\n"
|
||
"站\n"
|
||
"Shanghaihongqiao\n"
|
||
"Shenzhenbei\n"
|
||
"2026年02月21日\n"
|
||
"08:30出发\n"
|
||
"全程约7小时30分\n"
|
||
"15:00到达\n"
|
||
"DEMO\n"
|
||
"乘客:示例旅客\n"
|
||
"车厢:05车\n"
|
||
"席别:二等座\n"
|
||
"-\n"
|
||
"扫码无效\n"
|
||
"证件号:310101199001010000\n"
|
||
"座位:08A\n"
|
||
"票价:¥438.00\n"
|
||
"仅为演示"
|
||
),
|
||
summary="行程单示意;出票渠道:示例平台;非官方车票",
|
||
document_type="train_ticket",
|
||
document_type_label="火车/高铁票",
|
||
scene_code="travel",
|
||
scene_label="差旅票据",
|
||
document_fields=[
|
||
OcrRecognizeFieldRead(key="amount", label="金额", value="438元"),
|
||
OcrRecognizeFieldRead(key="date", label="列车出发时间", value="2026-02-21 08:30"),
|
||
OcrRecognizeFieldRead(key="invoice_number", label="票据号码", value="DEMO202602210001"),
|
||
OcrRecognizeFieldRead(key="trip_no", label="车次", value="G999"),
|
||
OcrRecognizeFieldRead(key="route", label="行程", value="上海-深圳"),
|
||
OcrRecognizeFieldRead(key="departure_station", label="出发地点", value="二等座"),
|
||
OcrRecognizeFieldRead(key="arrival_station", label="到达地点", value="扫码无效"),
|
||
OcrRecognizeFieldRead(key="passenger_name", label="乘车人", value="席别二等座"),
|
||
],
|
||
),
|
||
)
|
||
|
||
detail = service.get_receipt(receipt.id, current_user)
|
||
fields = {field.label: field.value for field in detail.fields}
|
||
assert fields["出发地点"] == "上海虹桥"
|
||
assert fields["到达地点"] == "深圳北"
|
||
assert fields["乘车人"] == "示例旅客"
|
||
assert fields["身份证号"] == "310101199001010000"
|
||
assert fields["席别"] == "二等座"
|
||
assert fields["车厢"] == "05车"
|
||
assert fields["座位号"] == "08A"
|
||
assert fields["票价"] == "438.00元"
|
||
finally:
|
||
get_settings.cache_clear()
|
||
|
||
|
||
def test_receipt_folder_delete_removes_duplicate_marker(monkeypatch, tmp_path) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
service = ReceiptFolderService()
|
||
content = b"%PDF-1.4 same receipt"
|
||
|
||
receipt = service.save_receipt(
|
||
filename="same-receipt.pdf",
|
||
content=content,
|
||
media_type="application/pdf",
|
||
current_user=current_user,
|
||
document=OcrRecognizeDocumentRead(
|
||
filename="same-receipt.pdf",
|
||
media_type="application/pdf",
|
||
text="same receipt amount 354",
|
||
document_type="other",
|
||
document_type_label="其他单据",
|
||
scene_code="other",
|
||
scene_label="其他票据",
|
||
),
|
||
)
|
||
receipt_dir = service.root / "pytest" / receipt.id
|
||
|
||
assert receipt_dir.exists()
|
||
duplicate = service.find_duplicate_receipt(
|
||
filename="same-receipt.pdf",
|
||
content=content,
|
||
current_user=current_user,
|
||
)
|
||
assert duplicate is not None
|
||
assert duplicate.id == receipt.id
|
||
|
||
service.delete_receipt(receipt_id=receipt.id, current_user=current_user)
|
||
|
||
assert not receipt_dir.exists()
|
||
assert (
|
||
service.find_duplicate_receipt(
|
||
filename="same-receipt.pdf",
|
||
content=content,
|
||
current_user=current_user,
|
||
)
|
||
is None
|
||
)
|
||
finally:
|
||
get_settings.cache_clear()
|
||
|
||
|
||
def test_receipt_folder_duplicate_uses_newer_ocr_when_existing_meta_is_weaker(monkeypatch, tmp_path) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
service = ReceiptFolderService()
|
||
content = b"%PDF-1.7 same train ticket"
|
||
stale_receipt = service.save_receipt(
|
||
filename="2月20_武汉-上海.pdf",
|
||
content=content,
|
||
media_type="application/pdf",
|
||
current_user=current_user,
|
||
document=OcrRecognizeDocumentRead(
|
||
filename="2月20_武汉-上海.pdf",
|
||
media_type="application/pdf",
|
||
document_type="other",
|
||
document_type_label="其他单据",
|
||
scene_code="other",
|
||
scene_label="其他票据",
|
||
warnings=["PDF 转图片失败:Missing language pack for Adobe-GB1"],
|
||
),
|
||
)
|
||
|
||
result = service.persist_ocr_batch(
|
||
files=[("2月20_武汉-上海.pdf", content, "application/pdf")],
|
||
result=OcrRecognizeBatchRead(
|
||
total_file_count=1,
|
||
success_count=1,
|
||
documents=[
|
||
OcrRecognizeDocumentRead(
|
||
filename="2月20_武汉-上海.pdf",
|
||
media_type="application/pdf",
|
||
text="G458 Wuhan Shanghaihongqiao 2026 02 20 07:55 票价: 354.00 12306",
|
||
summary="Wuhan Shanghaihongqiao G458 354.00",
|
||
document_type="train_ticket",
|
||
document_type_label="火车/高铁票",
|
||
scene_code="travel",
|
||
scene_label="差旅票据",
|
||
document_fields=[
|
||
OcrRecognizeFieldRead(key="amount", label="金额", value="354元"),
|
||
OcrRecognizeFieldRead(key="trip_no", label="车次/航班", value="G458"),
|
||
OcrRecognizeFieldRead(key="route", label="行程", value="武汉-上海"),
|
||
],
|
||
),
|
||
],
|
||
),
|
||
current_user=current_user,
|
||
)
|
||
|
||
document = result.documents[0]
|
||
assert document.receipt_id == stale_receipt.id
|
||
assert document.document_type == "train_ticket"
|
||
assert document.document_type_label == "火车/高铁票"
|
||
assert any(field.label == "金额" and field.value == "354元" for field in document.document_fields)
|
||
assert any("重复上传" in warning for warning in document.warnings)
|
||
|
||
repaired = service.get_receipt(stale_receipt.id, current_user)
|
||
assert repaired.document_type == "train_ticket"
|
||
assert repaired.document_type_label == "火车/高铁票"
|
||
assert {field.label: field.value for field in repaired.fields}["金额"] == "354元"
|
||
finally:
|
||
get_settings.cache_clear()
|
||
|
||
|
||
def test_receipt_folder_recovers_train_ticket_detail_from_other_english_ocr(monkeypatch, tmp_path) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
service = ReceiptFolderService()
|
||
receipt = service.save_receipt(
|
||
filename="2月20_武汉-上海.pdf",
|
||
content=b"%PDF-1.4 fake",
|
||
media_type="application/pdf",
|
||
current_user=current_user,
|
||
document=OcrRecognizeDocumentRead(
|
||
filename="2月20_武汉-上海.pdf",
|
||
media_type="application/pdf",
|
||
text=(
|
||
":26429165800002785705\n"
|
||
":2026 05 18\n"
|
||
"G458\n"
|
||
"Wuhan\n"
|
||
"Shanghaihongqiao\n"
|
||
"2026 02 20 07:55\n"
|
||
"06 01B\n"
|
||
": 354.00\n"
|
||
"4201061987****1615\n"
|
||
":6580061086021391007342026\n"
|
||
"12306 95306"
|
||
),
|
||
summary="Wuhan Shanghaihongqiao G458 354.00",
|
||
document_type="other",
|
||
document_type_label="其他单据",
|
||
scene_code="other",
|
||
scene_label="其他票据",
|
||
),
|
||
)
|
||
|
||
assert receipt.document_type == "train_ticket"
|
||
assert receipt.document_type_label == "火车/高铁票"
|
||
assert receipt.scene_code == "travel"
|
||
assert receipt.amount == "354.00元"
|
||
assert receipt.document_date == "2026-02-20"
|
||
assert receipt.merchant_name == "中国铁路"
|
||
|
||
detail = service.get_receipt(receipt.id, current_user)
|
||
fields = {field.label: field.value for field in detail.fields}
|
||
assert fields["行程"] == "武汉-上海"
|
||
assert fields["车次"] == "G458"
|
||
assert fields["列车出发时间"] == "2026-02-20 07:55"
|
||
assert fields["票价"] == "354.00元"
|
||
assert fields["身份证号"] == "4201061987****1615"
|
||
assert fields["车厢"] == "06车"
|
||
assert fields["座位号"] == "01B"
|
||
assert "乘车人" not in fields
|
||
finally:
|
||
get_settings.cache_clear()
|
||
|
||
|
||
def test_receipt_folder_unlink_receipts_for_claim_marks_linked_receipts_unlinked(monkeypatch, tmp_path) -> None:
|
||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||
get_settings.cache_clear()
|
||
try:
|
||
current_user = CurrentUserContext(
|
||
username="pytest",
|
||
name="Py Test",
|
||
role_codes=[],
|
||
is_admin=False,
|
||
)
|
||
service = ReceiptFolderService()
|
||
receipt = service.save_receipt(
|
||
filename="linked-receipt.pdf",
|
||
content=b"%PDF-1.4 linked",
|
||
media_type="application/pdf",
|
||
current_user=current_user,
|
||
linked_claim_id="claim-1",
|
||
linked_claim_no="RE-001",
|
||
linked_item_id="item-1",
|
||
document=OcrRecognizeDocumentRead(
|
||
filename="linked-receipt.pdf",
|
||
media_type="application/pdf",
|
||
text="invoice number 123 amount 100",
|
||
document_type="vat_invoice",
|
||
document_type_label="invoice",
|
||
scene_code="other",
|
||
scene_label="receipt",
|
||
),
|
||
)
|
||
|
||
linked_detail = service.get_receipt(receipt.id, current_user)
|
||
assert linked_detail.status == "linked"
|
||
assert linked_detail.linked_claim_id == "claim-1"
|
||
assert linked_detail.linked_claim_no == "RE-001"
|
||
|
||
assert service.unlink_receipts_for_claim("claim-1") == 1
|
||
|
||
unlinked_detail = service.get_receipt(receipt.id, current_user)
|
||
assert unlinked_detail.status == "unlinked"
|
||
assert unlinked_detail.linked_claim_id == ""
|
||
assert unlinked_detail.linked_claim_no == ""
|
||
assert unlinked_detail.linked_at is None
|
||
finally:
|
||
get_settings.cache_clear()
|