后端新增票据夹端点、数据模型和服务模块,优化 OCR 端点 Schema 和附件操作逻辑,完善员工行为画像服务和辅助函数, 前端新增票据夹视图和服务层,优化文档中心样式和侧边栏导 航,完善员工画像详情弹窗和权限控制,补充单元测试。
59 lines
3.7 KiB
Python
59 lines
3.7 KiB
Python
from __future__ import annotations
|
||
|
||
from pydantic import BaseModel, Field
|
||
|
||
|
||
class OcrRecognizeLineRead(BaseModel):
|
||
text: str = Field(description="识别出的文本行。")
|
||
score: float = Field(default=0.0, ge=0.0, le=1.0, description="该行识别置信度。")
|
||
box: list[list[int]] = Field(default_factory=list, description="文本框坐标。")
|
||
page_index: int | None = Field(default=None, description="页码,从 0 开始。")
|
||
|
||
|
||
class OcrRecognizeFieldRead(BaseModel):
|
||
key: str = Field(description="结构化字段键。")
|
||
label: str = Field(description="结构化字段展示名。")
|
||
value: str = Field(default="", description="结构化字段值。")
|
||
|
||
|
||
class OcrRecognizeDocumentRead(BaseModel):
|
||
filename: str = Field(description="原始文件名。")
|
||
media_type: str = Field(description="文件媒体类型。")
|
||
engine: str = Field(default="paddleocr_mobile", description="使用的 OCR 引擎。")
|
||
model: str = Field(default="PP-OCRv5_mobile", description="模型族标识。")
|
||
text: str = Field(default="", description="合并后的完整 OCR 文本。")
|
||
summary: str = Field(default="", description="供对话和语义层复用的简短摘要。")
|
||
avg_score: float = Field(default=0.0, ge=0.0, le=1.0, description="平均识别置信度。")
|
||
line_count: int = Field(default=0, ge=0, description="文本行数。")
|
||
page_count: int = Field(default=1, ge=0, description="识别页数。")
|
||
document_type: str = Field(default="other", description="识别出的票据类型编码。")
|
||
document_type_label: str = Field(default="其他单据", description="识别出的票据类型名称。")
|
||
scene_code: str = Field(default="other", description="识别出的票据场景编码。")
|
||
scene_label: str = Field(default="其他票据", description="识别出的票据场景名称。")
|
||
classification_source: str = Field(default="rule", description="票据类型判断来源,当前固定为 rule。")
|
||
classification_confidence: float = Field(default=0.0, ge=0.0, le=1.0, description="票据类型判断置信度。")
|
||
classification_evidence: list[str] = Field(default_factory=list, description="票据类型判断依据摘要。")
|
||
document_fields: list[OcrRecognizeFieldRead] = Field(
|
||
default_factory=list,
|
||
description="识别出的结构化票据信息。",
|
||
)
|
||
preview_kind: str = Field(default="", description="预览类型,PDF 转图后通常为 image。")
|
||
preview_data_url: str = Field(default="", description="用于前端展示的图片预览 data URL。")
|
||
receipt_id: str = Field(default="", description="票据夹中的持久化票据 ID。")
|
||
receipt_status: str = Field(default="", description="票据夹关联状态,unlinked / linked。")
|
||
receipt_preview_url: str = Field(default="", description="票据夹预览接口地址。")
|
||
receipt_source_url: str = Field(default="", description="票据夹原始文件接口地址。")
|
||
warnings: list[str] = Field(default_factory=list, description="该文件的识别提示或警告。")
|
||
lines: list[OcrRecognizeLineRead] = Field(default_factory=list, description="逐行识别结果。")
|
||
|
||
|
||
class OcrRecognizeBatchRead(BaseModel):
|
||
engine: str = Field(default="paddleocr_mobile", description="使用的 OCR 引擎。")
|
||
model: str = Field(default="PP-OCRv5_mobile", description="模型族标识。")
|
||
total_file_count: int = Field(default=0, ge=0, description="本次上传的总文件数。")
|
||
success_count: int = Field(default=0, ge=0, description="成功进入 OCR 的文件数。")
|
||
documents: list[OcrRecognizeDocumentRead] = Field(
|
||
default_factory=list,
|
||
description="逐文件 OCR 结果。",
|
||
)
|