feat(server): 系统缓存清理接口与 OCR 文本层兜底增强

- 新增 system_cache 模块与 POST /settings/cache/clear,管理员可一键清理 OCR 结果/运行时配置/模型失败冷却/知识库索引/地点语义等进程内缓存
- 各服务暴露 clear_*_cache 方法(ocr/runtime_settings/runtime_chat/knowledge/application_location_semantic),SettingsCacheClearRead 汇总清理项
- OCR 转图片失败时尝试用 PDF 文本层兜底构建识别文档(有效字符≥8),并写结果缓存;OcrService 暴露 clear_result_cache
- receipt_folder 车票过滤补充身份证号关键词,附件文档/操作/展示模块同步适配
- 新增 system_cache_endpoints 测试,更新 openapi_schema/ocr/receipt_folder/attachment_association_jobs 测试
This commit is contained in:
caoxiaozhu
2026-06-24 12:35:51 +08:00
parent 50d2dc579a
commit 9a5ed0e94a
17 changed files with 932 additions and 13 deletions

View File

@@ -889,6 +889,8 @@ class ReceiptFolderTrainTicketMixin:
"无效",
"二维码",
"座席",
"身份",
"身份证号",
"证件",
)
):
@@ -993,6 +995,11 @@ class ReceiptFolderService(ReceiptFolderStorageMixin, ReceiptFolderItemMixin, Re
current_user=current_user,
)
if duplicate_receipt is not None:
duplicate_receipt = self._refresh_duplicate_receipt_from_document_if_stronger(
receipt=duplicate_receipt,
document=document,
current_user=current_user,
)
warning = "已上传过同样的单据,请不要重复上传。"
existing_warnings = [str(item) for item in list(document.warnings or []) if str(item).strip()]
enriched.append(
@@ -1061,6 +1068,7 @@ class ReceiptFolderService(ReceiptFolderStorageMixin, ReceiptFolderItemMixin, Re
if str(value).strip()
],
"document_fields": self._build_ocr_document_fields_from_meta(meta),
"preview_kind": str(meta.get("preview_kind") or document.preview_kind or ""),
}
)
@@ -1073,6 +1081,62 @@ class ReceiptFolderService(ReceiptFolderStorageMixin, ReceiptFolderItemMixin, Re
update["warnings"] = list(dict.fromkeys(warnings))
return document.model_copy(update=update)
def _refresh_duplicate_receipt_from_document_if_stronger(
self,
*,
receipt: ReceiptFolderItemRead,
document: OcrRecognizeDocumentRead,
current_user: CurrentUserContext,
) -> ReceiptFolderItemRead:
try:
meta = self._read_receipt_meta(receipt.id, current_user)
except FileNotFoundError:
return receipt
incoming_meta = self._build_document_meta(document)
if not self._is_incoming_document_meta_stronger(meta, incoming_meta):
return receipt
for key in (
"engine",
"model",
"ocr_text",
"summary",
"ocr_avg_score",
"ocr_line_count",
"page_count",
"document_type",
"document_type_label",
"scene_code",
"scene_label",
"ocr_classification_source",
"ocr_classification_confidence",
"ocr_classification_evidence",
"document_fields",
"ocr_warnings",
):
meta[key] = incoming_meta[key]
meta["updated_at"] = datetime.now(UTC).isoformat()
self._write_meta(self._receipt_dir(self._owner_key(current_user), receipt.id), meta)
return self._build_item(meta)
@staticmethod
def _is_incoming_document_meta_stronger(existing_meta: dict[str, Any], incoming_meta: dict[str, Any]) -> bool:
existing_type = str(existing_meta.get("document_type") or "other").strip() or "other"
incoming_type = str(incoming_meta.get("document_type") or "other").strip() or "other"
existing_fields = [field for field in list(existing_meta.get("document_fields") or []) if isinstance(field, dict)]
incoming_fields = [field for field in list(incoming_meta.get("document_fields") or []) if isinstance(field, dict)]
existing_text = str(existing_meta.get("ocr_text") or "").strip()
incoming_text = str(incoming_meta.get("ocr_text") or "").strip()
if incoming_type != "other" and existing_type == "other":
return True
if incoming_fields and not existing_fields:
return True
if incoming_text and not existing_text:
return True
return False
def _build_ocr_document_fields_from_meta(self, meta: dict[str, Any]) -> list[OcrRecognizeFieldRead]:
return [
OcrRecognizeFieldRead(