feat(server): 新增文档智能识别服务,扩展OCR接口支持 Azure Document Intelligence
This commit is contained in:
@@ -10,6 +10,12 @@ class OcrRecognizeLineRead(BaseModel):
|
||||
page_index: int | None = Field(default=None, description="页码,从 0 开始。")
|
||||
|
||||
|
||||
class OcrRecognizeFieldRead(BaseModel):
|
||||
key: str = Field(description="结构化字段键。")
|
||||
label: str = Field(description="结构化字段展示名。")
|
||||
value: str = Field(default="", description="结构化字段值。")
|
||||
|
||||
|
||||
class OcrRecognizeDocumentRead(BaseModel):
|
||||
filename: str = Field(description="原始文件名。")
|
||||
media_type: str = Field(description="文件媒体类型。")
|
||||
@@ -20,6 +26,19 @@ class OcrRecognizeDocumentRead(BaseModel):
|
||||
avg_score: float = Field(default=0.0, ge=0.0, le=1.0, description="平均识别置信度。")
|
||||
line_count: int = Field(default=0, ge=0, description="文本行数。")
|
||||
page_count: int = Field(default=1, ge=0, description="识别页数。")
|
||||
document_type: str = Field(default="other", description="识别出的票据类型编码。")
|
||||
document_type_label: str = Field(default="其他单据", description="识别出的票据类型名称。")
|
||||
scene_code: str = Field(default="other", description="识别出的票据场景编码。")
|
||||
scene_label: str = Field(default="其他票据", description="识别出的票据场景名称。")
|
||||
classification_source: str = Field(default="rule", description="票据类型判断来源,例如 rule / llm_text / llm_vision。")
|
||||
classification_confidence: float = Field(default=0.0, ge=0.0, le=1.0, description="票据类型判断置信度。")
|
||||
classification_evidence: list[str] = Field(default_factory=list, description="票据类型判断依据摘要。")
|
||||
document_fields: list[OcrRecognizeFieldRead] = Field(
|
||||
default_factory=list,
|
||||
description="识别出的结构化票据信息。",
|
||||
)
|
||||
preview_kind: str = Field(default="", description="预览类型,PDF 转图后通常为 image。")
|
||||
preview_data_url: str = Field(default="", description="用于前端展示的图片预览 data URL。")
|
||||
warnings: list[str] = Field(default_factory=list, description="该文件的识别提示或警告。")
|
||||
lines: list[OcrRecognizeLineRead] = Field(default_factory=list, description="逐行识别结果。")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user