feat(server): 系统缓存清理接口与 OCR 文本层兜底增强
- 新增 system_cache 模块与 POST /settings/cache/clear,管理员可一键清理 OCR 结果/运行时配置/模型失败冷却/知识库索引/地点语义等进程内缓存 - 各服务暴露 clear_*_cache 方法(ocr/runtime_settings/runtime_chat/knowledge/application_location_semantic),SettingsCacheClearRead 汇总清理项 - OCR 转图片失败时尝试用 PDF 文本层兜底构建识别文档(有效字符≥8),并写结果缓存;OcrService 暴露 clear_result_cache - receipt_folder 车票过滤补充身份证号关键词,附件文档/操作/展示模块同步适配 - 新增 system_cache_endpoints 测试,更新 openapi_schema/ocr/receipt_folder/attachment_association_jobs 测试
This commit is contained in:
@@ -889,6 +889,8 @@ class ReceiptFolderTrainTicketMixin:
|
||||
"无效",
|
||||
"二维码",
|
||||
"座席",
|
||||
"身份",
|
||||
"身份证号",
|
||||
"证件",
|
||||
)
|
||||
):
|
||||
@@ -993,6 +995,11 @@ class ReceiptFolderService(ReceiptFolderStorageMixin, ReceiptFolderItemMixin, Re
|
||||
current_user=current_user,
|
||||
)
|
||||
if duplicate_receipt is not None:
|
||||
duplicate_receipt = self._refresh_duplicate_receipt_from_document_if_stronger(
|
||||
receipt=duplicate_receipt,
|
||||
document=document,
|
||||
current_user=current_user,
|
||||
)
|
||||
warning = "已上传过同样的单据,请不要重复上传。"
|
||||
existing_warnings = [str(item) for item in list(document.warnings or []) if str(item).strip()]
|
||||
enriched.append(
|
||||
@@ -1061,6 +1068,7 @@ class ReceiptFolderService(ReceiptFolderStorageMixin, ReceiptFolderItemMixin, Re
|
||||
if str(value).strip()
|
||||
],
|
||||
"document_fields": self._build_ocr_document_fields_from_meta(meta),
|
||||
"preview_kind": str(meta.get("preview_kind") or document.preview_kind or ""),
|
||||
}
|
||||
)
|
||||
|
||||
@@ -1073,6 +1081,62 @@ class ReceiptFolderService(ReceiptFolderStorageMixin, ReceiptFolderItemMixin, Re
|
||||
update["warnings"] = list(dict.fromkeys(warnings))
|
||||
return document.model_copy(update=update)
|
||||
|
||||
def _refresh_duplicate_receipt_from_document_if_stronger(
|
||||
self,
|
||||
*,
|
||||
receipt: ReceiptFolderItemRead,
|
||||
document: OcrRecognizeDocumentRead,
|
||||
current_user: CurrentUserContext,
|
||||
) -> ReceiptFolderItemRead:
|
||||
try:
|
||||
meta = self._read_receipt_meta(receipt.id, current_user)
|
||||
except FileNotFoundError:
|
||||
return receipt
|
||||
|
||||
incoming_meta = self._build_document_meta(document)
|
||||
if not self._is_incoming_document_meta_stronger(meta, incoming_meta):
|
||||
return receipt
|
||||
|
||||
for key in (
|
||||
"engine",
|
||||
"model",
|
||||
"ocr_text",
|
||||
"summary",
|
||||
"ocr_avg_score",
|
||||
"ocr_line_count",
|
||||
"page_count",
|
||||
"document_type",
|
||||
"document_type_label",
|
||||
"scene_code",
|
||||
"scene_label",
|
||||
"ocr_classification_source",
|
||||
"ocr_classification_confidence",
|
||||
"ocr_classification_evidence",
|
||||
"document_fields",
|
||||
"ocr_warnings",
|
||||
):
|
||||
meta[key] = incoming_meta[key]
|
||||
meta["updated_at"] = datetime.now(UTC).isoformat()
|
||||
self._write_meta(self._receipt_dir(self._owner_key(current_user), receipt.id), meta)
|
||||
return self._build_item(meta)
|
||||
|
||||
@staticmethod
|
||||
def _is_incoming_document_meta_stronger(existing_meta: dict[str, Any], incoming_meta: dict[str, Any]) -> bool:
|
||||
existing_type = str(existing_meta.get("document_type") or "other").strip() or "other"
|
||||
incoming_type = str(incoming_meta.get("document_type") or "other").strip() or "other"
|
||||
existing_fields = [field for field in list(existing_meta.get("document_fields") or []) if isinstance(field, dict)]
|
||||
incoming_fields = [field for field in list(incoming_meta.get("document_fields") or []) if isinstance(field, dict)]
|
||||
existing_text = str(existing_meta.get("ocr_text") or "").strip()
|
||||
incoming_text = str(incoming_meta.get("ocr_text") or "").strip()
|
||||
|
||||
if incoming_type != "other" and existing_type == "other":
|
||||
return True
|
||||
if incoming_fields and not existing_fields:
|
||||
return True
|
||||
if incoming_text and not existing_text:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _build_ocr_document_fields_from_meta(self, meta: dict[str, Any]) -> list[OcrRecognizeFieldRead]:
|
||||
return [
|
||||
OcrRecognizeFieldRead(
|
||||
|
||||
Reference in New Issue
Block a user