feat(server): 系统缓存清理接口与 OCR 文本层兜底增强
- 新增 system_cache 模块与 POST /settings/cache/clear,管理员可一键清理 OCR 结果/运行时配置/模型失败冷却/知识库索引/地点语义等进程内缓存 - 各服务暴露 clear_*_cache 方法(ocr/runtime_settings/runtime_chat/knowledge/application_location_semantic),SettingsCacheClearRead 汇总清理项 - OCR 转图片失败时尝试用 PDF 文本层兜底构建识别文档(有效字符≥8),并写结果缓存;OcrService 暴露 clear_result_cache - receipt_folder 车票过滤补充身份证号关键词,附件文档/操作/展示模块同步适配 - 新增 system_cache_endpoints 测试,更新 openapi_schema/ocr/receipt_folder/attachment_association_jobs 测试
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from collections.abc import Generator
|
||||
from datetime import UTC, date, datetime
|
||||
from decimal import Decimal
|
||||
@@ -16,6 +17,7 @@ from app.models.employee import Employee
|
||||
from app.models.financial_record import ExpenseClaim, ExpenseClaimItem
|
||||
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeFieldRead
|
||||
from app.services.attachment_association_jobs import clear_attachment_association_jobs_for_tests
|
||||
from app.services.expense_claims import ExpenseClaimService
|
||||
from app.services.expense_claim_attachment_storage import ExpenseClaimAttachmentStorage
|
||||
from app.services.ocr import OcrService
|
||||
from app.services.receipt_folder import ReceiptFolderService
|
||||
@@ -149,6 +151,13 @@ def fake_ocr_recognize(
|
||||
)
|
||||
|
||||
|
||||
def fake_ocr_recognize_without_preview(
|
||||
self,
|
||||
files: list[tuple[str, bytes, str | None]],
|
||||
) -> OcrRecognizeBatchRead:
|
||||
return fake_ocr_recognize(self, files)
|
||||
|
||||
|
||||
def test_attachment_association_job_links_receipts_after_conversation_exit(monkeypatch, tmp_path) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
@@ -233,6 +242,233 @@ def test_attachment_association_job_links_receipts_after_conversation_exit(monke
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_attachment_association_keeps_receipt_folder_preview_and_fields_after_cache_clear(
|
||||
monkeypatch,
|
||||
tmp_path,
|
||||
) -> None:
|
||||
preview_bytes = b"receipt-folder-preview-png"
|
||||
preview_data_url = f"data:image/png;base64,{base64.b64encode(preview_bytes).decode('ascii')}"
|
||||
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
clear_attachment_association_jobs_for_tests()
|
||||
monkeypatch.setattr(OcrService, "recognize_files", fake_ocr_recognize_without_preview)
|
||||
monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path / "attachments")
|
||||
try:
|
||||
client, session_factory = build_client(monkeypatch)
|
||||
current_user = CurrentUserContext(
|
||||
username="zhangsan@example.com",
|
||||
name="张三",
|
||||
role_codes=["user"],
|
||||
is_admin=False,
|
||||
employee_no="E10001",
|
||||
)
|
||||
with session_factory() as db:
|
||||
seed_travel_claim(db)
|
||||
|
||||
receipt = ReceiptFolderService().save_receipt(
|
||||
filename="2月20 武汉-上海.pdf",
|
||||
content=b"%PDF-1.7 fake-ticket",
|
||||
media_type="application/pdf",
|
||||
current_user=current_user,
|
||||
document=OcrRecognizeDocumentRead(
|
||||
filename="2月20 武汉-上海.pdf",
|
||||
media_type="application/pdf",
|
||||
text="电子发票(铁路电子客票) 武汉站 G458 上海虹桥站 2026年02月20日 07:55开 二等座 票价 354.00",
|
||||
summary="铁路电子客票,武汉-上海,票价 354 元。",
|
||||
avg_score=0.96,
|
||||
line_count=1,
|
||||
page_count=1,
|
||||
document_type="train_ticket",
|
||||
document_type_label="火车/高铁票",
|
||||
scene_code="travel",
|
||||
scene_label="差旅票据",
|
||||
preview_kind="image",
|
||||
preview_data_url=preview_data_url,
|
||||
document_fields=[
|
||||
OcrRecognizeFieldRead(key="date", label="列车出发时间", value="2026-02-20 07:55"),
|
||||
OcrRecognizeFieldRead(key="route", label="行程", value="武汉-上海"),
|
||||
OcrRecognizeFieldRead(key="amount", label="金额", value="354元"),
|
||||
],
|
||||
),
|
||||
)
|
||||
OcrService.clear_result_cache()
|
||||
|
||||
headers = {
|
||||
"x-auth-username": "zhangsan@example.com",
|
||||
"x-auth-name": "Zhang San",
|
||||
"x-auth-employee-no": "E10001",
|
||||
"x-auth-role-codes": "user",
|
||||
}
|
||||
response = client.post(
|
||||
"/api/v1/reimbursements/attachment-association-jobs",
|
||||
headers=headers,
|
||||
json={
|
||||
"receipt_ids": [receipt.id],
|
||||
"prompt": "请帮我处理已上传的附件。",
|
||||
"conversation_id": "inline-test",
|
||||
},
|
||||
)
|
||||
assert response.status_code == 202
|
||||
job_id = response.json()["job_id"]
|
||||
|
||||
status_response = client.get(
|
||||
f"/api/v1/reimbursements/attachment-association-jobs/{job_id}",
|
||||
headers=headers,
|
||||
)
|
||||
assert status_response.status_code == 200
|
||||
assert status_response.json()["status"] == "succeeded"
|
||||
|
||||
with session_factory() as db:
|
||||
claim = db.scalar(
|
||||
select(ExpenseClaim)
|
||||
.options(selectinload(ExpenseClaim.items))
|
||||
.where(ExpenseClaim.id == "claim-bg-association")
|
||||
)
|
||||
assert claim is not None
|
||||
attached_item = next(item for item in claim.items if item.invoice_id)
|
||||
metadata = ExpenseClaimService(db).get_claim_item_attachment_meta(
|
||||
claim_id=claim.id,
|
||||
item_id=attached_item.id,
|
||||
current_user=current_user,
|
||||
)
|
||||
assert metadata is not None
|
||||
assert metadata["preview_kind"] == "image"
|
||||
assert metadata["document_info"]["document_type"] == "train_ticket"
|
||||
assert metadata["document_info"]["document_type_label"] == "火车/高铁票"
|
||||
assert {
|
||||
(field["label"], field["value"])
|
||||
for field in metadata["document_info"]["fields"]
|
||||
} >= {
|
||||
("列车出发时间", "2026-02-20 07:55"),
|
||||
("行程", "武汉-上海"),
|
||||
("金额", "354元"),
|
||||
}
|
||||
|
||||
preview_path, media_type, filename = ExpenseClaimService(db).get_claim_item_attachment_preview_content(
|
||||
claim_id=claim.id,
|
||||
item_id=attached_item.id,
|
||||
current_user=current_user,
|
||||
)
|
||||
assert media_type == "image/png"
|
||||
assert filename.endswith(".png")
|
||||
assert preview_path.read_bytes() == preview_bytes
|
||||
finally:
|
||||
clear_attachment_association_jobs_for_tests()
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_attachment_meta_repairs_existing_pdf_fallback_from_source_receipt(
|
||||
monkeypatch,
|
||||
tmp_path,
|
||||
) -> None:
|
||||
preview_bytes = b"legacy-repaired-preview-png"
|
||||
preview_data_url = f"data:image/png;base64,{base64.b64encode(preview_bytes).decode('ascii')}"
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
monkeypatch.setattr(ExpenseClaimAttachmentStorage, "root", lambda self: tmp_path / "attachments")
|
||||
try:
|
||||
current_user = CurrentUserContext(
|
||||
username="zhangsan@example.com",
|
||||
name="张三",
|
||||
role_codes=["user"],
|
||||
is_admin=False,
|
||||
employee_no="E10001",
|
||||
)
|
||||
client, session_factory = build_client(monkeypatch)
|
||||
client.close()
|
||||
|
||||
with session_factory() as db:
|
||||
claim = seed_travel_claim(db)
|
||||
item = claim.items[0]
|
||||
receipt = ReceiptFolderService().save_receipt(
|
||||
filename="2月20 武汉-上海.pdf",
|
||||
content=b"%PDF-1.7 fake-ticket",
|
||||
media_type="application/pdf",
|
||||
current_user=current_user,
|
||||
document=OcrRecognizeDocumentRead(
|
||||
filename="2月20 武汉-上海.pdf",
|
||||
media_type="application/pdf",
|
||||
text="电子发票(铁路电子客票) 武汉站 G458 上海虹桥站 2026年02月20日 07:55开 二等座 票价 354.00",
|
||||
summary="铁路电子客票,武汉-上海,票价 354 元。",
|
||||
avg_score=0.96,
|
||||
line_count=1,
|
||||
page_count=1,
|
||||
document_type="train_ticket",
|
||||
document_type_label="火车/高铁票",
|
||||
scene_code="travel",
|
||||
scene_label="差旅票据",
|
||||
preview_kind="image",
|
||||
preview_data_url=preview_data_url,
|
||||
document_fields=[
|
||||
OcrRecognizeFieldRead(key="date", label="列车出发时间", value="2026-02-20 07:55"),
|
||||
OcrRecognizeFieldRead(key="route", label="行程", value="武汉-上海"),
|
||||
OcrRecognizeFieldRead(key="amount", label="金额", value="354元"),
|
||||
],
|
||||
),
|
||||
)
|
||||
|
||||
attachment_dir = tmp_path / "attachments" / claim.id / item.id
|
||||
attachment_dir.mkdir(parents=True)
|
||||
file_path = attachment_dir / "2月20_武汉-上海.pdf"
|
||||
file_path.write_bytes(b"%PDF-1.7 persisted-but-bad-meta")
|
||||
storage = ExpenseClaimAttachmentStorage()
|
||||
item.invoice_id = storage.to_storage_key(file_path)
|
||||
storage.write_meta(
|
||||
file_path,
|
||||
{
|
||||
"file_name": file_path.name,
|
||||
"storage_key": storage.to_storage_key(file_path),
|
||||
"media_type": "application/pdf",
|
||||
"size_bytes": file_path.stat().st_size,
|
||||
"previewable": True,
|
||||
"preview_kind": "pdf",
|
||||
"preview_storage_key": storage.to_storage_key(file_path),
|
||||
"preview_media_type": "application/pdf",
|
||||
"preview_file_name": file_path.name,
|
||||
"document_info": {
|
||||
"document_type": "other",
|
||||
"document_type_label": "其他单据",
|
||||
"scene_code": "other",
|
||||
"scene_label": "其他票据",
|
||||
"fields": [],
|
||||
},
|
||||
"source_receipt_id": receipt.id,
|
||||
},
|
||||
)
|
||||
db.commit()
|
||||
|
||||
service = ExpenseClaimService(db)
|
||||
metadata = service.get_claim_item_attachment_meta(
|
||||
claim_id=claim.id,
|
||||
item_id=item.id,
|
||||
current_user=current_user,
|
||||
)
|
||||
assert metadata is not None
|
||||
assert metadata["preview_kind"] == "image"
|
||||
assert metadata["document_info"]["document_type"] == "train_ticket"
|
||||
assert metadata["document_info"]["document_type_label"] == "火车/高铁票"
|
||||
assert {
|
||||
(field["label"], field["value"])
|
||||
for field in metadata["document_info"]["fields"]
|
||||
} >= {
|
||||
("列车出发时间", "2026-02-20 07:55"),
|
||||
("行程", "武汉-上海"),
|
||||
("金额", "354元"),
|
||||
}
|
||||
|
||||
preview_path, media_type, filename = service.get_claim_item_attachment_preview_content(
|
||||
claim_id=claim.id,
|
||||
item_id=item.id,
|
||||
current_user=current_user,
|
||||
)
|
||||
assert media_type == "image/png"
|
||||
assert filename.endswith(".png")
|
||||
assert preview_path.read_bytes() == preview_bytes
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_attachment_association_job_fails_without_editable_claim(monkeypatch, tmp_path) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
|
||||
@@ -308,6 +308,7 @@ def test_ocr_service_rejects_pdf_ocr_when_rendered_image_fonts_are_broken(
|
||||
monkeypatch.setattr(OcrService, "_convert_pdf_to_images", fake_convert_pdf_to_images)
|
||||
monkeypatch.setattr(OcrService, "_invoke_worker", fake_invoke_worker)
|
||||
get_settings.cache_clear()
|
||||
OcrService._result_cache.clear()
|
||||
try:
|
||||
result = OcrService().recognize_files(
|
||||
[
|
||||
@@ -315,6 +316,7 @@ def test_ocr_service_rejects_pdf_ocr_when_rendered_image_fonts_are_broken(
|
||||
]
|
||||
)
|
||||
finally:
|
||||
OcrService._result_cache.clear()
|
||||
get_settings.cache_clear()
|
||||
|
||||
failed = result.documents[0]
|
||||
@@ -324,6 +326,63 @@ def test_ocr_service_rejects_pdf_ocr_when_rendered_image_fonts_are_broken(
|
||||
assert failed.warnings == ["PDF 转图片失败:检测到中文字体映射缺失,未生成可 OCR 的图片。"]
|
||||
|
||||
|
||||
def test_ocr_service_uses_pdf_text_layer_when_rendering_fails(
|
||||
monkeypatch,
|
||||
tmp_path: Path,
|
||||
) -> None:
|
||||
def fake_convert_pdf_to_images(self, *, pdf_path: Path, output_dir: Path) -> tuple[list[Path], bool]:
|
||||
raise RuntimeError("PDF 转图片失败:Missing language pack for Adobe-GB1")
|
||||
|
||||
def fake_invoke_worker(
|
||||
self,
|
||||
*,
|
||||
python_bin: str,
|
||||
worker_path: str,
|
||||
input_paths: list[Path],
|
||||
) -> dict:
|
||||
raise AssertionError("PDF 转图失败但文本层可用时,不应调用 OCR worker。")
|
||||
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
monkeypatch.setattr(OcrService, "_resolve_python_bin", lambda self: "python")
|
||||
monkeypatch.setattr(OcrService, "_resolve_worker_path", lambda self: "worker.py")
|
||||
monkeypatch.setattr(OcrService, "_convert_pdf_to_images", fake_convert_pdf_to_images)
|
||||
monkeypatch.setattr(OcrService, "_invoke_worker", fake_invoke_worker)
|
||||
monkeypatch.setattr(
|
||||
OcrService,
|
||||
"_extract_pdf_text_layer",
|
||||
lambda self, pdf_path: (
|
||||
"G458\n"
|
||||
"Wuhan Shanghaihongqiao\n"
|
||||
"2026 02 20 07:55\n"
|
||||
"票价: 354.00\n"
|
||||
"12306 95306"
|
||||
),
|
||||
)
|
||||
get_settings.cache_clear()
|
||||
OcrService._result_cache.clear()
|
||||
try:
|
||||
result = OcrService().recognize_files(
|
||||
[
|
||||
("2月20_武汉-上海.pdf", b"%PDF-1.7 text-layer-fallback", "application/pdf"),
|
||||
]
|
||||
)
|
||||
finally:
|
||||
OcrService._result_cache.clear()
|
||||
get_settings.cache_clear()
|
||||
|
||||
recovered = result.documents[0]
|
||||
assert result.success_count == 1
|
||||
assert recovered.document_type == "train_ticket"
|
||||
assert recovered.document_type_label == "火车/高铁票"
|
||||
assert recovered.preview_kind == ""
|
||||
assert recovered.preview_data_url == ""
|
||||
assert any(field.label == "金额" and field.value == "354元" for field in recovered.document_fields)
|
||||
assert any(field.label == "车次/航班" and field.value == "G458" for field in recovered.document_fields)
|
||||
assert any(field.label == "行程" and field.value == "武汉-上海" for field in recovered.document_fields)
|
||||
assert "PDF 转图片失败" in recovered.warnings[0]
|
||||
assert "已使用 PDF 文本层" in recovered.warnings[1]
|
||||
|
||||
|
||||
def test_ocr_pdf_conversion_tries_next_renderer_when_poppler_font_mapping_fails(
|
||||
monkeypatch,
|
||||
tmp_path: Path,
|
||||
@@ -339,6 +398,7 @@ def test_ocr_pdf_conversion_tries_next_renderer_when_poppler_font_mapping_fails(
|
||||
text: bool,
|
||||
timeout: int,
|
||||
check: bool,
|
||||
env: dict[str, str] | None = None,
|
||||
) -> subprocess.CompletedProcess[str]:
|
||||
calls.append(Path(command[0]).name)
|
||||
if Path(command[0]).name == "pdftoppm":
|
||||
@@ -437,6 +497,7 @@ def test_ocr_service_invokes_worker_even_when_pdf_text_layer_is_usable(
|
||||
),
|
||||
)
|
||||
get_settings.cache_clear()
|
||||
OcrService._result_cache.clear()
|
||||
try:
|
||||
result = OcrService().recognize_files(
|
||||
[
|
||||
@@ -444,6 +505,7 @@ def test_ocr_service_invokes_worker_even_when_pdf_text_layer_is_usable(
|
||||
]
|
||||
)
|
||||
finally:
|
||||
OcrService._result_cache.clear()
|
||||
get_settings.cache_clear()
|
||||
|
||||
recognized = result.documents[0]
|
||||
|
||||
@@ -49,5 +49,8 @@ def test_openapi_schema_includes_documented_backend_routes() -> None:
|
||||
analytics_get = schema["paths"]["/api/v1/analytics/system-dashboard"]["get"]
|
||||
assert analytics_get["summary"] == "查询系统看板真实指标"
|
||||
|
||||
settings_cache_clear_post = schema["paths"]["/api/v1/settings/cache/clear"]["post"]
|
||||
assert settings_cache_clear_post["summary"] == "清理系统缓存"
|
||||
|
||||
root_get = schema["paths"]["/"]["get"]
|
||||
assert root_get["summary"] == "服务根检查"
|
||||
|
||||
@@ -4,7 +4,7 @@ import base64
|
||||
|
||||
from app.api.deps import CurrentUserContext
|
||||
from app.core.config import get_settings
|
||||
from app.schemas.ocr import OcrRecognizeDocumentRead, OcrRecognizeFieldRead
|
||||
from app.schemas.ocr import OcrRecognizeBatchRead, OcrRecognizeDocumentRead, OcrRecognizeFieldRead
|
||||
from app.services.document_preview import DocumentPreviewAssets
|
||||
from app.services.receipt_folder import ReceiptFolderService
|
||||
|
||||
@@ -121,6 +121,53 @@ def test_receipt_folder_pdf_save_eagerly_renders_image_preview(monkeypatch, tmp_
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_receipt_folder_persist_enriches_pdf_ocr_document_with_image_preview(monkeypatch, tmp_path) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
try:
|
||||
current_user = CurrentUserContext(
|
||||
username="pytest",
|
||||
name="Py Test",
|
||||
role_codes=[],
|
||||
is_admin=False,
|
||||
)
|
||||
|
||||
def fake_render_pdf_first_page(*, pdf_path, preview_path, timeout_seconds):
|
||||
preview_path.write_bytes(b"rendered-preview")
|
||||
return preview_path
|
||||
|
||||
monkeypatch.setattr(DocumentPreviewAssets, "render_pdf_first_page", fake_render_pdf_first_page)
|
||||
|
||||
service = ReceiptFolderService()
|
||||
result = service.persist_ocr_batch(
|
||||
files=[("2月23_上海-武汉.pdf", b"%PDF-1.4 fake", "application/pdf")],
|
||||
result=OcrRecognizeBatchRead(
|
||||
total_file_count=1,
|
||||
success_count=1,
|
||||
documents=[
|
||||
OcrRecognizeDocumentRead(
|
||||
filename="2月23_上海-武汉.pdf",
|
||||
media_type="application/pdf",
|
||||
text="铁路电子客票 上海虹桥 武汉 G456 354.00",
|
||||
summary="铁路电子客票,上海虹桥至武汉。",
|
||||
document_type="train_ticket",
|
||||
document_type_label="火车/高铁票",
|
||||
scene_code="travel",
|
||||
scene_label="差旅票据",
|
||||
),
|
||||
],
|
||||
),
|
||||
current_user=current_user,
|
||||
)
|
||||
|
||||
document = result.documents[0]
|
||||
assert document.receipt_id
|
||||
assert document.receipt_preview_url.endswith(f"/receipt-folder/{document.receipt_id}/preview")
|
||||
assert document.preview_kind == "image"
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_receipt_folder_pdf_preview_regenerates_stale_cached_image(monkeypatch, tmp_path) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
@@ -433,6 +480,75 @@ def test_receipt_folder_delete_removes_duplicate_marker(monkeypatch, tmp_path) -
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_receipt_folder_duplicate_uses_newer_ocr_when_existing_meta_is_weaker(monkeypatch, tmp_path) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
try:
|
||||
current_user = CurrentUserContext(
|
||||
username="pytest",
|
||||
name="Py Test",
|
||||
role_codes=[],
|
||||
is_admin=False,
|
||||
)
|
||||
service = ReceiptFolderService()
|
||||
content = b"%PDF-1.7 same train ticket"
|
||||
stale_receipt = service.save_receipt(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
content=content,
|
||||
media_type="application/pdf",
|
||||
current_user=current_user,
|
||||
document=OcrRecognizeDocumentRead(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
media_type="application/pdf",
|
||||
document_type="other",
|
||||
document_type_label="其他单据",
|
||||
scene_code="other",
|
||||
scene_label="其他票据",
|
||||
warnings=["PDF 转图片失败:Missing language pack for Adobe-GB1"],
|
||||
),
|
||||
)
|
||||
|
||||
result = service.persist_ocr_batch(
|
||||
files=[("2月20_武汉-上海.pdf", content, "application/pdf")],
|
||||
result=OcrRecognizeBatchRead(
|
||||
total_file_count=1,
|
||||
success_count=1,
|
||||
documents=[
|
||||
OcrRecognizeDocumentRead(
|
||||
filename="2月20_武汉-上海.pdf",
|
||||
media_type="application/pdf",
|
||||
text="G458 Wuhan Shanghaihongqiao 2026 02 20 07:55 票价: 354.00 12306",
|
||||
summary="Wuhan Shanghaihongqiao G458 354.00",
|
||||
document_type="train_ticket",
|
||||
document_type_label="火车/高铁票",
|
||||
scene_code="travel",
|
||||
scene_label="差旅票据",
|
||||
document_fields=[
|
||||
OcrRecognizeFieldRead(key="amount", label="金额", value="354元"),
|
||||
OcrRecognizeFieldRead(key="trip_no", label="车次/航班", value="G458"),
|
||||
OcrRecognizeFieldRead(key="route", label="行程", value="武汉-上海"),
|
||||
],
|
||||
),
|
||||
],
|
||||
),
|
||||
current_user=current_user,
|
||||
)
|
||||
|
||||
document = result.documents[0]
|
||||
assert document.receipt_id == stale_receipt.id
|
||||
assert document.document_type == "train_ticket"
|
||||
assert document.document_type_label == "火车/高铁票"
|
||||
assert any(field.label == "金额" and field.value == "354元" for field in document.document_fields)
|
||||
assert any("重复上传" in warning for warning in document.warnings)
|
||||
|
||||
repaired = service.get_receipt(stale_receipt.id, current_user)
|
||||
assert repaired.document_type == "train_ticket"
|
||||
assert repaired.document_type_label == "火车/高铁票"
|
||||
assert {field.label: field.value for field in repaired.fields}["金额"] == "354元"
|
||||
finally:
|
||||
get_settings.cache_clear()
|
||||
|
||||
|
||||
def test_receipt_folder_recovers_train_ticket_detail_from_other_english_ocr(monkeypatch, tmp_path) -> None:
|
||||
monkeypatch.setenv("STORAGE_ROOT_DIR", str(tmp_path / "storage"))
|
||||
get_settings.cache_clear()
|
||||
|
||||
91
server/tests/test_system_cache_endpoints.py
Normal file
91
server/tests/test_system_cache_endpoints.py
Normal file
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Generator
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.api.deps import get_db
|
||||
from app.db.base import Base
|
||||
from app.main import create_app
|
||||
from app.schemas.ocr import OcrRecognizeDocumentRead
|
||||
from app.services.ocr import OcrService
|
||||
|
||||
|
||||
def build_client() -> TestClient:
|
||||
engine = create_engine(
|
||||
"sqlite+pysqlite:///:memory:",
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
Base.metadata.create_all(bind=engine)
|
||||
session_factory = sessionmaker(bind=engine, autoflush=False, autocommit=False)
|
||||
app = create_app()
|
||||
|
||||
def override_db() -> Generator[Session, None, None]:
|
||||
db = session_factory()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
app.dependency_overrides[get_db] = override_db
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
def _seed_ocr_cache() -> None:
|
||||
OcrService._write_cached_document(
|
||||
"pytest-cache-key",
|
||||
OcrRecognizeDocumentRead(
|
||||
filename="receipt.pdf",
|
||||
media_type="application/pdf",
|
||||
text="旧 OCR 缓存",
|
||||
summary="旧 OCR 缓存",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_clear_settings_cache_endpoint_clears_ocr_result_cache() -> None:
|
||||
OcrService.clear_result_cache()
|
||||
_seed_ocr_cache()
|
||||
assert len(OcrService._result_cache) == 1
|
||||
|
||||
client = build_client()
|
||||
response = client.post(
|
||||
"/api/v1/settings/cache/clear",
|
||||
headers={
|
||||
"x-auth-username": "admin",
|
||||
"x-auth-name": "Admin",
|
||||
"x-auth-is-admin": "true",
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["totalCleared"] >= 1
|
||||
assert {
|
||||
"cacheKey": "ocr_result_cache",
|
||||
"label": "OCR 识别结果缓存",
|
||||
"clearedCount": 1,
|
||||
} in payload["items"]
|
||||
assert len(OcrService._result_cache) == 0
|
||||
|
||||
|
||||
def test_clear_settings_cache_endpoint_requires_admin() -> None:
|
||||
OcrService.clear_result_cache()
|
||||
_seed_ocr_cache()
|
||||
|
||||
client = build_client()
|
||||
response = client.post(
|
||||
"/api/v1/settings/cache/clear",
|
||||
headers={
|
||||
"x-auth-username": "ordinary-user",
|
||||
"x-auth-name": "Ordinary User",
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 403
|
||||
assert len(OcrService._result_cache) == 1
|
||||
OcrService.clear_result_cache()
|
||||
Reference in New Issue
Block a user