feat: 增强 AI-Core 文档解析器

- 添加 VLM 客户端支持
- 优化解析器配置
- 添加配置示例文件
- 生成新的 gRPC protobuf 文件

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-09 15:42:35 +08:00
parent ab7131eb05
commit 5012a25f99
10 changed files with 1177 additions and 42 deletions

View File

@@ -0,0 +1,18 @@
# AI-Core 配置文件示例
# 复制此文件为 config.yaml 并填入实际配置
# VLM 配置(可选)
# 如果配置了 VLM图片文件会自动使用 VLM 解析
vlm:
enabled: false # 是否启用 VLM
provider: "openai" # openai / anthropic / qwen
model: "gpt-4o" # 模型名称
api_key: "" # API Key
base_url: "" # 自定义 API 地址(可选)
prompt: "" # 自定义提示词(可选)
# 服务配置
server:
port: 50051
max_workers: 10
log_level: INFO

84
ai-core/parser/config.py Normal file
View File

@@ -0,0 +1,84 @@
"""
配置管理模块
"""
import os
import yaml
import logging
from typing import Optional, Dict, Any
logger = logging.getLogger(__name__)
# 默认配置
DEFAULT_CONFIG = {
"vlm": {
"enabled": False,
"provider": "openai",
"model": "gpt-4o",
"api_key": "",
"base_url": "",
"prompt": ""
},
"server": {
"port": 50051,
"max_workers": 10,
"log_level": "INFO"
}
}
def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
"""加载配置文件"""
if config_path is None:
# 默认查找 config.yaml
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
config_path = os.path.join(base_dir, "config.yaml")
# 环境变量覆盖
vlm_api_key = os.environ.get("VLM_API_KEY", "")
if vlm_api_key:
DEFAULT_CONFIG["vlm"]["api_key"] = vlm_api_key
DEFAULT_CONFIG["vlm"]["enabled"] = True
logger.info("VLM enabled via environment variable")
vlm_provider = os.environ.get("VLM_PROVIDER", "")
if vlm_provider:
DEFAULT_CONFIG["vlm"]["provider"] = vlm_provider
vlm_model = os.environ.get("VLM_MODEL", "")
if vlm_model:
DEFAULT_CONFIG["vlm"]["model"] = vlm_model
# 尝试加载配置文件
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
file_config = yaml.safe_load(f)
if file_config:
# 合并配置
for key in file_config:
if key in DEFAULT_CONFIG:
DEFAULT_CONFIG[key].update(file_config[key])
logger.info(f"Loaded config from {config_path}")
except Exception as e:
logger.warning(f"Failed to load config: {e}")
# 检查 VLM 是否有效
if DEFAULT_CONFIG["vlm"]["enabled"] and not DEFAULT_CONFIG["vlm"]["api_key"]:
logger.warning("VLM enabled but API key is empty, disabling VLM")
DEFAULT_CONFIG["vlm"]["enabled"] = False
return DEFAULT_CONFIG
def get_vlm_config() -> Optional[Dict[str, Any]]:
"""获取 VLM 配置"""
config = load_config()
if config.get("vlm", {}).get("enabled") and config["vlm"].get("api_key"):
return config["vlm"]
return None
def get_server_config() -> Dict[str, Any]:
"""获取服务器配置"""
config = load_config()
return config.get("server", DEFAULT_CONFIG["server"])

View File

@@ -1,32 +1,61 @@
import logging import logging
import os import os
import tempfile import tempfile
from typing import Optional from typing import Optional, Dict, Any
from markitdown import MarkItDown from markitdown import MarkItDown
from .vlm_client import VLMClient
from .config import get_vlm_config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class Parser: class Parser:
"""基于 MarkItDown 的统一文档解析器 """基于 MarkItDown + VLM 的统一文档解析器
支持格式PDF、DOCX、DOC、PPTX、PPT、XLSX、XLS、CSV、图片、网页、Markdown 等 支持格式PDF、DOCX、DOC、PPTX、PPT、XLSX、XLS、CSV、图片、网页、Markdown 等
VLM 解析:
- 方式一启动时配置config.yaml 或环境变量)
- 方式二gRPC 请求时传入 VLM 配置(优先级更高)
""" """
def __init__(self): def __init__(self):
self.markitdown = MarkItDown() self.markitdown = MarkItDown()
logger.info("Parser initialized with MarkItDown") self.vlm_client: Optional[VLMClient] = None
def parse(self, file_path: str, file_type: Optional[str] = None) -> dict: # 尝试加载配置的 VLM
vlm_config = get_vlm_config()
if vlm_config:
self.vlm_client = VLMClient(vlm_config)
logger.info(f"VLM enabled: provider={vlm_config.get('provider')}, model={vlm_config.get('model')}")
else:
logger.info("VLM not configured, using MarkItDown only")
def set_vlm_config(self, config: Dict[str, Any]) -> None:
"""手动设置 VLM 配置(优先级高于全局配置)"""
if config and config.get("enabled") and config.get("api_key"):
self.vlm_client = VLMClient(config)
logger.info(f"VLM enabled: provider={config.get('provider')}, model={config.get('model')}")
else:
self.vlm_client = None
logger.info("VLM disabled")
def parse(self, file_path: str, file_type: Optional[str] = None, vlm_config: Optional[Dict[str, Any]] = None) -> dict:
"""解析文档为 Markdown """解析文档为 Markdown
Args: Args:
file_path: 文件路径或 URL file_path: 文件路径或 URL
file_type: 文件类型可选MarkItDown 会自动检测) file_type: 文件类型可选MarkItDown 会自动检测)
vlm_config: VLM 配置(可选,优先级高于全局配置)
Returns: Returns:
dict: 包含 markdown 内容和元数据 dict: 包含 markdown 内容和元数据
""" """
# 如果有 VLM 配置,覆盖全局配置
if vlm_config:
self.set_vlm_config(vlm_config)
try: try:
logger.info(f"Parsing file: {file_path}") logger.info(f"Parsing file: {file_path}")
@@ -49,20 +78,31 @@ class Parser:
"error": str(e) "error": str(e)
} }
def parse_bytes(self, content: bytes, file_name: str, file_type: Optional[str] = None) -> dict: def parse_bytes(self, content: bytes, file_name: str, file_type: Optional[str] = None, vlm_config: Optional[Dict[str, Any]] = None) -> dict:
"""解析字节内容为 Markdown """解析字节内容为 Markdown
Args: Args:
content: 文件字节内容 content: 文件字节内容
file_name: 文件名 file_name: 文件名
file_type: 文件类型(可选) file_type: 文件类型(可选)
vlm_config: VLM 配置(可选,优先级高于全局配置)
Returns: Returns:
dict: 包含 markdown 内容和元数据 dict: 包含 markdown 内容和元数据
""" """
# 如果有 VLM 配置,覆盖全局配置
if vlm_config:
self.set_vlm_config(vlm_config)
try: try:
logger.info(f"Parsing bytes: {file_name}, size: {len(content)} bytes") logger.info(f"Parsing bytes: {file_name}, size: {len(content)} bytes")
# 检查是否应该使用 VLM根据文件名自动判断
if self._should_use_vlm(file_name):
logger.info("Using VLM for parsing")
return self._parse_with_vlm(content, file_name)
# 否则使用 MarkItDown
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1] or '') as temp_file: with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1] or '') as temp_file:
temp_file.write(content) temp_file.write(content)
temp_path = temp_file.name temp_path = temp_file.name
@@ -89,6 +129,65 @@ class Parser:
"error": str(e) "error": str(e)
} }
def _should_use_vlm(self, file_name: str) -> bool:
"""判断是否应该使用 VLM"""
if not self.vlm_client:
return False
# 图片文件使用 VLM
image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
ext = os.path.splitext(file_name)[1].lower()
return ext in image_exts
def _parse_with_vlm(self, content: bytes, file_name: str) -> dict:
"""使用 VLM 解析"""
if not self.vlm_client:
return {
"success": False,
"content": "",
"content_length": 0,
"error": "VLM not configured"
}
# 确定 MIME 类型
ext = os.path.splitext(file_name)[1].lower()
mime_types = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.bmp': 'image/bmp',
'.webp': 'image/webp',
'.tiff': 'image/tiff',
}
mime_type = mime_types.get(ext, 'image/png')
try:
result = self.vlm_client.analyze_image(content, mime_type)
if result.get("success"):
return {
"success": True,
"content": result["content"],
"content_length": len(result["content"]),
"metadata": {"vlm_used": True}
}
else:
return {
"success": False,
"content": "",
"content_length": 0,
"error": result.get("error", "VLM parsing failed")
}
except Exception as e:
logger.error(f"VLM parsing error: {e}")
return {
"success": False,
"content": "",
"content_length": 0,
"error": str(e)
}
if __name__ == "__main__": if __name__ == "__main__":
parser = Parser() parser = Parser()

View File

@@ -0,0 +1,209 @@
"""
VLM 客户端 - 用于调用 VLM 模型进行文档理解
"""
import logging
import base64
import requests
from typing import Optional, Dict, Any
logger = logging.getLogger(__name__)
class VLMClient:
"""VLM 客户端,支持多种提供商"""
def __init__(self, config: Dict[str, Any]):
"""
初始化 VLM 客户端
Args:
config: VLM 配置,包含 provider, model, api_key, base_url, prompt 等
"""
self.config = config
self.provider = config.get("provider", "openai")
self.model = config.get("model", "gpt-4o")
self.api_key = config.get("api_key", "")
self.base_url = config.get("base_url", "")
self.prompt = config.get("prompt", "") or self._default_prompt()
logger.info(f"VLMClient initialized: provider={self.provider}, model={self.model}")
def _default_prompt(self) -> str:
"""默认提示词"""
return """请分析这张图片中的文档内容,并将其转换为 Markdown 格式。
要求:
1. 保持原文的格式和结构
2. 表格用 Markdown 表格格式
3. 标题用 # ## ### 标记
4. 代码块用 ``` 标记
5. 尽量保留原文的所有信息"""
def analyze_image(self, image_data: bytes, mime_type: str = "image/png") -> Dict[str, Any]:
"""
使用 VLM 分析图片
Args:
image_data: 图片二进制数据
mime_type: 图片 MIME 类型
Returns:
包含分析结果的字典
"""
if self.provider == "openai":
return self._call_openai(image_data, mime_type)
elif self.provider == "anthropic":
return self._call_anthropic(image_data, mime_type)
elif self.provider == "qwen":
return self._call_qwen(image_data, mime_type)
else:
return {
"success": False,
"content": "",
"error": f"Unsupported provider: {self.provider}"
}
def _call_openai(self, image_data: bytes, mime_type: str) -> Dict[str, Any]:
"""调用 OpenAI GPT-4o API"""
try:
url = (self.base_url or "https://api.openai.com/v1") + "/chat/completions"
# Base64 编码图片
image_base64 = base64.b64encode(image_data).decode("utf-8")
data_url = f"data:{mime_type};base64,{image_base64}"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": self.prompt},
{"type": "image_url", "image_url": {"url": data_url}}
]
}
],
"max_tokens": 4096
}
response = requests.post(url, headers=headers, json=payload, timeout=120)
response.raise_for_status()
result = response.json()
content = result["choices"][0]["message"]["content"]
return {
"success": True,
"content": content,
"usage": result.get("usage", {})
}
except Exception as e:
logger.error(f"OpenAI API error: {e}")
return {
"success": False,
"content": "",
"error": str(e)
}
def _call_anthropic(self, image_data: bytes, mime_type: str) -> Dict[str, Any]:
"""调用 Anthropic Claude API"""
try:
url = (self.base_url or "https://api.anthropic.com/v1") + "/messages"
image_base64 = base64.b64encode(image_data).decode("utf-8")
headers = {
"x-api-key": self.api_key,
"anthropic-version": "2023-06-01",
"Content-Type": "application/json"
}
# Anthropic 支持 image 类型
payload = {
"model": self.model,
"max_tokens": 4096,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": self.prompt},
{
"type": "image",
"source": {
"type": "base64",
"media_type": mime_type,
"data": image_base64
}
}
]
}
]
}
response = requests.post(url, headers=headers, json=payload, timeout=120)
response.raise_for_status()
result = response.json()
content = result["content"][0]["text"]
return {
"success": True,
"content": content,
"usage": result.get("usage", {})
}
except Exception as e:
logger.error(f"Anthropic API error: {e}")
return {
"success": False,
"content": "",
"error": str(e)
}
def _call_qwen(self, image_data: bytes, mime_type: str) -> Dict[str, Any]:
"""调用阿里 Qwen VL API"""
try:
url = (self.base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1") + "/chat/completions"
image_base64 = base64.b64encode(image_data).decode("utf-8")
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
# Qwen 格式
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": self.prompt},
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_base64}"}}
]
}
]
}
response = requests.post(url, headers=headers, json=payload, timeout=120)
response.raise_for_status()
result = response.json()
content = result["choices"][0]["message"]["content"]
return {
"success": True,
"content": content,
"usage": {}
}
except Exception as e:
logger.error(f"Qwen API error: {e}")
return {
"success": False,
"content": "",
"error": str(e)
}

View File

@@ -16,6 +16,18 @@ message ParseRequest {
string file_type = 3; string file_type = 3;
string parser_engine = 4; string parser_engine = 4;
map<string, string> engine_overrides = 5; map<string, string> engine_overrides = 5;
// VLM 配置(可选)
VLMConfig vlm_config = 6;
}
message VLMConfig {
bool enabled = 1; // 是否启用 VLM
string provider = 2; // VLM 提供商: openai, anthropic, local 等
string model = 3; // 模型名称
string api_key = 4; // API Key
string base_url = 5; // 自定义 API 地址
string prompt = 6; // 自定义提示词
} }
message ParseResponse { message ParseResponse {

View File

@@ -24,7 +24,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x64ocument_parser.proto\x12\tdocparser\"\xdd\x01\n\x0cParseRequest\x12\x10\n\x08\x66ile_url\x18\x01 \x01(\t\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x11\n\tfile_type\x18\x03 \x01(\t\x12\x15\n\rparser_engine\x18\x04 \x01(\t\x12\x46\n\x10\x65ngine_overrides\x18\x05 \x03(\x0b\x32,.docparser.ParseRequest.EngineOverridesEntry\x1a\x36\n\x14\x45ngineOverridesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x84\x01\n\rParseResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\x12\x0f\n\x07message\x18\x03 \x01(\t\x12\x16\n\x0e\x63ontent_length\x18\x04 \x01(\x05\x12\x11\n\tfile_type\x18\x05 \x01(\t\x12\x15\n\rparser_engine\x18\x06 \x01(\t\"\x07\n\x05\x45mpty\"\xca\x01\n\x18SupportedFormatsResponse\x12\x12\n\nfile_types\x18\x01 \x03(\t\x12]\n\x16\x66ile_type_descriptions\x18\x02 \x03(\x0b\x32=.docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry\x1a;\n\x19\x46ileTypeDescriptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"9\n\x0f\x45nginesResponse\x12&\n\x07\x65ngines\x18\x01 \x03(\x0b\x32\x15.docparser.EngineInfo\"|\n\nEngineInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x1c\n\x14supported_file_types\x18\x03 \x03(\t\x12\x11\n\tavailable\x18\x04 \x01(\x08\x12\x1a\n\x12unavailable_reason\x18\x05 \x01(\t2\xde\x01\n\x0e\x44ocumentParser\x12\x42\n\rParseDocument\x12\x17.docparser.ParseRequest\x1a\x18.docparser.ParseResponse\x12L\n\x13GetSupportedFormats\x12\x10.docparser.Empty\x1a#.docparser.SupportedFormatsResponse\x12:\n\nGetEngines\x12\x10.docparser.Empty\x1a\x1a.docparser.EnginesResponseB\x1aZ\x18x-agents/proto/docparserb\x06proto3') DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x64ocument_parser.proto\x12\tdocparser\"\x87\x02\n\x0cParseRequest\x12\x10\n\x08\x66ile_url\x18\x01 \x01(\t\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x11\n\tfile_type\x18\x03 \x01(\t\x12\x15\n\rparser_engine\x18\x04 \x01(\t\x12\x46\n\x10\x65ngine_overrides\x18\x05 \x03(\x0b\x32,.docparser.ParseRequest.EngineOverridesEntry\x12(\n\nvlm_config\x18\x06 \x01(\x0b\x32\x14.docparser.VLMConfig\x1a\x36\n\x14\x45ngineOverridesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"p\n\tVLMConfig\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12\x10\n\x08provider\x18\x02 \x01(\t\x12\r\n\x05model\x18\x03 \x01(\t\x12\x0f\n\x07\x61pi_key\x18\x04 \x01(\t\x12\x10\n\x08\x62\x61se_url\x18\x05 \x01(\t\x12\x0e\n\x06prompt\x18\x06 \x01(\t\"\x84\x01\n\rParseResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\x12\x0f\n\x07message\x18\x03 \x01(\t\x12\x16\n\x0e\x63ontent_length\x18\x04 \x01(\x05\x12\x11\n\tfile_type\x18\x05 \x01(\t\x12\x15\n\rparser_engine\x18\x06 \x01(\t\"\x07\n\x05\x45mpty\"\xca\x01\n\x18SupportedFormatsResponse\x12\x12\n\nfile_types\x18\x01 \x03(\t\x12]\n\x16\x66ile_type_descriptions\x18\x02 \x03(\x0b\x32=.docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry\x1a;\n\x19\x46ileTypeDescriptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"9\n\x0f\x45nginesResponse\x12&\n\x07\x65ngines\x18\x01 \x03(\x0b\x32\x15.docparser.EngineInfo\"|\n\nEngineInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x1c\n\x14supported_file_types\x18\x03 \x03(\t\x12\x11\n\tavailable\x18\x04 \x01(\x08\x12\x1a\n\x12unavailable_reason\x18\x05 \x01(\t2\xde\x01\n\x0e\x44ocumentParser\x12\x42\n\rParseDocument\x12\x17.docparser.ParseRequest\x1a\x18.docparser.ParseResponse\x12L\n\x13GetSupportedFormats\x12\x10.docparser.Empty\x1a#.docparser.SupportedFormatsResponse\x12:\n\nGetEngines\x12\x10.docparser.Empty\x1a\x1a.docparser.EnginesResponseB\x1aZ\x18x-agents/proto/docparserb\x06proto3')
_globals = globals() _globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -37,21 +37,23 @@ if not _descriptor._USE_C_DESCRIPTORS:
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._loaded_options = None _globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._loaded_options = None
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_options = b'8\001' _globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_options = b'8\001'
_globals['_PARSEREQUEST']._serialized_start=37 _globals['_PARSEREQUEST']._serialized_start=37
_globals['_PARSEREQUEST']._serialized_end=258 _globals['_PARSEREQUEST']._serialized_end=300
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_start=204 _globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_start=246
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_end=258 _globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_end=300
_globals['_PARSERESPONSE']._serialized_start=261 _globals['_VLMCONFIG']._serialized_start=302
_globals['_PARSERESPONSE']._serialized_end=393 _globals['_VLMCONFIG']._serialized_end=414
_globals['_EMPTY']._serialized_start=395 _globals['_PARSERESPONSE']._serialized_start=417
_globals['_EMPTY']._serialized_end=402 _globals['_PARSERESPONSE']._serialized_end=549
_globals['_SUPPORTEDFORMATSRESPONSE']._serialized_start=405 _globals['_EMPTY']._serialized_start=551
_globals['_SUPPORTEDFORMATSRESPONSE']._serialized_end=607 _globals['_EMPTY']._serialized_end=558
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_start=548 _globals['_SUPPORTEDFORMATSRESPONSE']._serialized_start=561
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_end=607 _globals['_SUPPORTEDFORMATSRESPONSE']._serialized_end=763
_globals['_ENGINESRESPONSE']._serialized_start=609 _globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_start=704
_globals['_ENGINESRESPONSE']._serialized_end=666 _globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_end=763
_globals['_ENGINEINFO']._serialized_start=668 _globals['_ENGINESRESPONSE']._serialized_start=765
_globals['_ENGINEINFO']._serialized_end=792 _globals['_ENGINESRESPONSE']._serialized_end=822
_globals['_DOCUMENTPARSER']._serialized_start=795 _globals['_ENGINEINFO']._serialized_start=824
_globals['_DOCUMENTPARSER']._serialized_end=1017 _globals['_ENGINEINFO']._serialized_end=948
_globals['_DOCUMENTPARSER']._serialized_start=951
_globals['_DOCUMENTPARSER']._serialized_end=1173
# @@protoc_insertion_point(module_scope) # @@protoc_insertion_point(module_scope)

View File

@@ -6,8 +6,9 @@ grpcio-tools>=1.60.0
grpcio-reflection>=1.60.0 grpcio-reflection>=1.60.0
protobuf>=4.25.0 protobuf>=4.25.0
# HTTP 请求 # 配置文件解析
pyyaml>=6.0
requests>=2.31.0 requests>=2.31.0
# 文档解析 # 文档解析 - markitdown 及其所有依赖
markitdown>=0.0.1 markitdown[pdf,docx,pptx,xlsx,all]>=0.0.1

View File

@@ -75,6 +75,21 @@ class DocumentParserServicer:
content_length=0, content_length=0,
) )
# 提取 VLM 配置
vlm_config = None
if hasattr(request, 'vlm_config') and request.vlm_config:
vlm_cfg = request.vlm_config
if vlm_cfg.enabled:
vlm_config = {
"enabled": vlm_cfg.enabled,
"provider": vlm_cfg.provider,
"model": vlm_cfg.model,
"api_key": vlm_cfg.api_key,
"base_url": vlm_cfg.base_url,
"prompt": vlm_cfg.prompt,
}
logger.info(f"VLM config: provider={vlm_cfg.provider}, model={vlm_cfg.model}")
logger.info("Downloading file from URL: %s", file_url) logger.info("Downloading file from URL: %s", file_url)
try: try:
@@ -95,9 +110,9 @@ class DocumentParserServicer:
content_length=0, content_length=0,
) )
logger.info("Parsing file with MarkItDown") logger.info("Parsing file with MarkItDown + VLM")
result = self.parser.parse_bytes(content, file_name) result = self.parser.parse_bytes(content, file_name, vlm_config=vlm_config)
if not result.get("success", False): if not result.get("success", False):
logger.warning("Parser returned failure: %s", result.get("error", "Unknown error")) logger.warning("Parser returned failure: %s", result.get("error", "Unknown error"))

View File

@@ -0,0 +1,498 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.11
// protoc v5.29.3
// source: document_parser.proto
package docparser
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
type ParseRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
FileUrl string `protobuf:"bytes,1,opt,name=file_url,json=fileUrl,proto3" json:"file_url,omitempty"`
FileName string `protobuf:"bytes,2,opt,name=file_name,json=fileName,proto3" json:"file_name,omitempty"`
FileType string `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"`
ParserEngine string `protobuf:"bytes,4,opt,name=parser_engine,json=parserEngine,proto3" json:"parser_engine,omitempty"`
EngineOverrides map[string]string `protobuf:"bytes,5,rep,name=engine_overrides,json=engineOverrides,proto3" json:"engine_overrides,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ParseRequest) Reset() {
*x = ParseRequest{}
mi := &file_document_parser_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ParseRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ParseRequest) ProtoMessage() {}
func (x *ParseRequest) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ParseRequest.ProtoReflect.Descriptor instead.
func (*ParseRequest) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{0}
}
func (x *ParseRequest) GetFileUrl() string {
if x != nil {
return x.FileUrl
}
return ""
}
func (x *ParseRequest) GetFileName() string {
if x != nil {
return x.FileName
}
return ""
}
func (x *ParseRequest) GetFileType() string {
if x != nil {
return x.FileType
}
return ""
}
func (x *ParseRequest) GetParserEngine() string {
if x != nil {
return x.ParserEngine
}
return ""
}
func (x *ParseRequest) GetEngineOverrides() map[string]string {
if x != nil {
return x.EngineOverrides
}
return nil
}
type ParseResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
Content string `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"`
Message string `protobuf:"bytes,3,opt,name=message,proto3" json:"message,omitempty"`
ContentLength int32 `protobuf:"varint,4,opt,name=content_length,json=contentLength,proto3" json:"content_length,omitempty"`
FileType string `protobuf:"bytes,5,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"`
ParserEngine string `protobuf:"bytes,6,opt,name=parser_engine,json=parserEngine,proto3" json:"parser_engine,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ParseResponse) Reset() {
*x = ParseResponse{}
mi := &file_document_parser_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ParseResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ParseResponse) ProtoMessage() {}
func (x *ParseResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ParseResponse.ProtoReflect.Descriptor instead.
func (*ParseResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{1}
}
func (x *ParseResponse) GetSuccess() bool {
if x != nil {
return x.Success
}
return false
}
func (x *ParseResponse) GetContent() string {
if x != nil {
return x.Content
}
return ""
}
func (x *ParseResponse) GetMessage() string {
if x != nil {
return x.Message
}
return ""
}
func (x *ParseResponse) GetContentLength() int32 {
if x != nil {
return x.ContentLength
}
return 0
}
func (x *ParseResponse) GetFileType() string {
if x != nil {
return x.FileType
}
return ""
}
func (x *ParseResponse) GetParserEngine() string {
if x != nil {
return x.ParserEngine
}
return ""
}
type Empty struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Empty) Reset() {
*x = Empty{}
mi := &file_document_parser_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Empty) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Empty) ProtoMessage() {}
func (x *Empty) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Empty.ProtoReflect.Descriptor instead.
func (*Empty) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{2}
}
type SupportedFormatsResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
FileTypes []string `protobuf:"bytes,1,rep,name=file_types,json=fileTypes,proto3" json:"file_types,omitempty"`
FileTypeDescriptions map[string]string `protobuf:"bytes,2,rep,name=file_type_descriptions,json=fileTypeDescriptions,proto3" json:"file_type_descriptions,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *SupportedFormatsResponse) Reset() {
*x = SupportedFormatsResponse{}
mi := &file_document_parser_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *SupportedFormatsResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SupportedFormatsResponse) ProtoMessage() {}
func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SupportedFormatsResponse.ProtoReflect.Descriptor instead.
func (*SupportedFormatsResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{3}
}
func (x *SupportedFormatsResponse) GetFileTypes() []string {
if x != nil {
return x.FileTypes
}
return nil
}
func (x *SupportedFormatsResponse) GetFileTypeDescriptions() map[string]string {
if x != nil {
return x.FileTypeDescriptions
}
return nil
}
type EnginesResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Engines []*EngineInfo `protobuf:"bytes,1,rep,name=engines,proto3" json:"engines,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *EnginesResponse) Reset() {
*x = EnginesResponse{}
mi := &file_document_parser_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *EnginesResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*EnginesResponse) ProtoMessage() {}
func (x *EnginesResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use EnginesResponse.ProtoReflect.Descriptor instead.
func (*EnginesResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{4}
}
func (x *EnginesResponse) GetEngines() []*EngineInfo {
if x != nil {
return x.Engines
}
return nil
}
type EngineInfo struct {
state protoimpl.MessageState `protogen:"open.v1"`
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
Description string `protobuf:"bytes,2,opt,name=description,proto3" json:"description,omitempty"`
SupportedFileTypes []string `protobuf:"bytes,3,rep,name=supported_file_types,json=supportedFileTypes,proto3" json:"supported_file_types,omitempty"`
Available bool `protobuf:"varint,4,opt,name=available,proto3" json:"available,omitempty"`
UnavailableReason string `protobuf:"bytes,5,opt,name=unavailable_reason,json=unavailableReason,proto3" json:"unavailable_reason,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *EngineInfo) Reset() {
*x = EngineInfo{}
mi := &file_document_parser_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *EngineInfo) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*EngineInfo) ProtoMessage() {}
func (x *EngineInfo) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[5]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use EngineInfo.ProtoReflect.Descriptor instead.
func (*EngineInfo) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{5}
}
func (x *EngineInfo) GetName() string {
if x != nil {
return x.Name
}
return ""
}
func (x *EngineInfo) GetDescription() string {
if x != nil {
return x.Description
}
return ""
}
func (x *EngineInfo) GetSupportedFileTypes() []string {
if x != nil {
return x.SupportedFileTypes
}
return nil
}
func (x *EngineInfo) GetAvailable() bool {
if x != nil {
return x.Available
}
return false
}
func (x *EngineInfo) GetUnavailableReason() string {
if x != nil {
return x.UnavailableReason
}
return ""
}
var File_document_parser_proto protoreflect.FileDescriptor
const file_document_parser_proto_rawDesc = "" +
"\n" +
"\x15document_parser.proto\x12\tdocparser\"\xa5\x02\n" +
"\fParseRequest\x12\x19\n" +
"\bfile_url\x18\x01 \x01(\tR\afileUrl\x12\x1b\n" +
"\tfile_name\x18\x02 \x01(\tR\bfileName\x12\x1b\n" +
"\tfile_type\x18\x03 \x01(\tR\bfileType\x12#\n" +
"\rparser_engine\x18\x04 \x01(\tR\fparserEngine\x12W\n" +
"\x10engine_overrides\x18\x05 \x03(\v2,.docparser.ParseRequest.EngineOverridesEntryR\x0fengineOverrides\x1aB\n" +
"\x14EngineOverridesEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xc6\x01\n" +
"\rParseResponse\x12\x18\n" +
"\asuccess\x18\x01 \x01(\bR\asuccess\x12\x18\n" +
"\acontent\x18\x02 \x01(\tR\acontent\x12\x18\n" +
"\amessage\x18\x03 \x01(\tR\amessage\x12%\n" +
"\x0econtent_length\x18\x04 \x01(\x05R\rcontentLength\x12\x1b\n" +
"\tfile_type\x18\x05 \x01(\tR\bfileType\x12#\n" +
"\rparser_engine\x18\x06 \x01(\tR\fparserEngine\"\a\n" +
"\x05Empty\"\xf7\x01\n" +
"\x18SupportedFormatsResponse\x12\x1d\n" +
"\n" +
"file_types\x18\x01 \x03(\tR\tfileTypes\x12s\n" +
"\x16file_type_descriptions\x18\x02 \x03(\v2=.docparser.SupportedFormatsResponse.FileTypeDescriptionsEntryR\x14fileTypeDescriptions\x1aG\n" +
"\x19FileTypeDescriptionsEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"B\n" +
"\x0fEnginesResponse\x12/\n" +
"\aengines\x18\x01 \x03(\v2\x15.docparser.EngineInfoR\aengines\"\xc1\x01\n" +
"\n" +
"EngineInfo\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\x12 \n" +
"\vdescription\x18\x02 \x01(\tR\vdescription\x120\n" +
"\x14supported_file_types\x18\x03 \x03(\tR\x12supportedFileTypes\x12\x1c\n" +
"\tavailable\x18\x04 \x01(\bR\tavailable\x12-\n" +
"\x12unavailable_reason\x18\x05 \x01(\tR\x11unavailableReason2\xde\x01\n" +
"\x0eDocumentParser\x12B\n" +
"\rParseDocument\x12\x17.docparser.ParseRequest\x1a\x18.docparser.ParseResponse\x12L\n" +
"\x13GetSupportedFormats\x12\x10.docparser.Empty\x1a#.docparser.SupportedFormatsResponse\x12:\n" +
"\n" +
"GetEngines\x12\x10.docparser.Empty\x1a\x1a.docparser.EnginesResponseB\x1aZ\x18x-agents/proto/docparserb\x06proto3"
var (
file_document_parser_proto_rawDescOnce sync.Once
file_document_parser_proto_rawDescData []byte
)
func file_document_parser_proto_rawDescGZIP() []byte {
file_document_parser_proto_rawDescOnce.Do(func() {
file_document_parser_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_document_parser_proto_rawDesc), len(file_document_parser_proto_rawDesc)))
})
return file_document_parser_proto_rawDescData
}
var file_document_parser_proto_msgTypes = make([]protoimpl.MessageInfo, 8)
var file_document_parser_proto_goTypes = []any{
(*ParseRequest)(nil), // 0: docparser.ParseRequest
(*ParseResponse)(nil), // 1: docparser.ParseResponse
(*Empty)(nil), // 2: docparser.Empty
(*SupportedFormatsResponse)(nil), // 3: docparser.SupportedFormatsResponse
(*EnginesResponse)(nil), // 4: docparser.EnginesResponse
(*EngineInfo)(nil), // 5: docparser.EngineInfo
nil, // 6: docparser.ParseRequest.EngineOverridesEntry
nil, // 7: docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
}
var file_document_parser_proto_depIdxs = []int32{
6, // 0: docparser.ParseRequest.engine_overrides:type_name -> docparser.ParseRequest.EngineOverridesEntry
7, // 1: docparser.SupportedFormatsResponse.file_type_descriptions:type_name -> docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
5, // 2: docparser.EnginesResponse.engines:type_name -> docparser.EngineInfo
0, // 3: docparser.DocumentParser.ParseDocument:input_type -> docparser.ParseRequest
2, // 4: docparser.DocumentParser.GetSupportedFormats:input_type -> docparser.Empty
2, // 5: docparser.DocumentParser.GetEngines:input_type -> docparser.Empty
1, // 6: docparser.DocumentParser.ParseDocument:output_type -> docparser.ParseResponse
3, // 7: docparser.DocumentParser.GetSupportedFormats:output_type -> docparser.SupportedFormatsResponse
4, // 8: docparser.DocumentParser.GetEngines:output_type -> docparser.EnginesResponse
6, // [6:9] is the sub-list for method output_type
3, // [3:6] is the sub-list for method input_type
3, // [3:3] is the sub-list for extension type_name
3, // [3:3] is the sub-list for extension extendee
0, // [0:3] is the sub-list for field type_name
}
func init() { file_document_parser_proto_init() }
func file_document_parser_proto_init() {
if File_document_parser_proto != nil {
return
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_document_parser_proto_rawDesc), len(file_document_parser_proto_rawDesc)),
NumEnums: 0,
NumMessages: 8,
NumExtensions: 0,
NumServices: 1,
},
GoTypes: file_document_parser_proto_goTypes,
DependencyIndexes: file_document_parser_proto_depIdxs,
MessageInfos: file_document_parser_proto_msgTypes,
}.Build()
File_document_parser_proto = out.File
file_document_parser_proto_goTypes = nil
file_document_parser_proto_depIdxs = nil
}

View File

@@ -0,0 +1,197 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.6.1
// - protoc v5.29.3
// source: document_parser.proto
package docparser
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
DocumentParser_ParseDocument_FullMethodName = "/docparser.DocumentParser/ParseDocument"
DocumentParser_GetSupportedFormats_FullMethodName = "/docparser.DocumentParser/GetSupportedFormats"
DocumentParser_GetEngines_FullMethodName = "/docparser.DocumentParser/GetEngines"
)
// DocumentParserClient is the client API for DocumentParser service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type DocumentParserClient interface {
ParseDocument(ctx context.Context, in *ParseRequest, opts ...grpc.CallOption) (*ParseResponse, error)
GetSupportedFormats(ctx context.Context, in *Empty, opts ...grpc.CallOption) (*SupportedFormatsResponse, error)
GetEngines(ctx context.Context, in *Empty, opts ...grpc.CallOption) (*EnginesResponse, error)
}
type documentParserClient struct {
cc grpc.ClientConnInterface
}
func NewDocumentParserClient(cc grpc.ClientConnInterface) DocumentParserClient {
return &documentParserClient{cc}
}
func (c *documentParserClient) ParseDocument(ctx context.Context, in *ParseRequest, opts ...grpc.CallOption) (*ParseResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ParseResponse)
err := c.cc.Invoke(ctx, DocumentParser_ParseDocument_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *documentParserClient) GetSupportedFormats(ctx context.Context, in *Empty, opts ...grpc.CallOption) (*SupportedFormatsResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(SupportedFormatsResponse)
err := c.cc.Invoke(ctx, DocumentParser_GetSupportedFormats_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *documentParserClient) GetEngines(ctx context.Context, in *Empty, opts ...grpc.CallOption) (*EnginesResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(EnginesResponse)
err := c.cc.Invoke(ctx, DocumentParser_GetEngines_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// DocumentParserServer is the server API for DocumentParser service.
// All implementations must embed UnimplementedDocumentParserServer
// for forward compatibility.
type DocumentParserServer interface {
ParseDocument(context.Context, *ParseRequest) (*ParseResponse, error)
GetSupportedFormats(context.Context, *Empty) (*SupportedFormatsResponse, error)
GetEngines(context.Context, *Empty) (*EnginesResponse, error)
mustEmbedUnimplementedDocumentParserServer()
}
// UnimplementedDocumentParserServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedDocumentParserServer struct{}
func (UnimplementedDocumentParserServer) ParseDocument(context.Context, *ParseRequest) (*ParseResponse, error) {
return nil, status.Error(codes.Unimplemented, "method ParseDocument not implemented")
}
func (UnimplementedDocumentParserServer) GetSupportedFormats(context.Context, *Empty) (*SupportedFormatsResponse, error) {
return nil, status.Error(codes.Unimplemented, "method GetSupportedFormats not implemented")
}
func (UnimplementedDocumentParserServer) GetEngines(context.Context, *Empty) (*EnginesResponse, error) {
return nil, status.Error(codes.Unimplemented, "method GetEngines not implemented")
}
func (UnimplementedDocumentParserServer) mustEmbedUnimplementedDocumentParserServer() {}
func (UnimplementedDocumentParserServer) testEmbeddedByValue() {}
// UnsafeDocumentParserServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to DocumentParserServer will
// result in compilation errors.
type UnsafeDocumentParserServer interface {
mustEmbedUnimplementedDocumentParserServer()
}
func RegisterDocumentParserServer(s grpc.ServiceRegistrar, srv DocumentParserServer) {
// If the following call panics, it indicates UnimplementedDocumentParserServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&DocumentParser_ServiceDesc, srv)
}
func _DocumentParser_ParseDocument_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ParseRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(DocumentParserServer).ParseDocument(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: DocumentParser_ParseDocument_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(DocumentParserServer).ParseDocument(ctx, req.(*ParseRequest))
}
return interceptor(ctx, in, info, handler)
}
func _DocumentParser_GetSupportedFormats_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(Empty)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(DocumentParserServer).GetSupportedFormats(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: DocumentParser_GetSupportedFormats_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(DocumentParserServer).GetSupportedFormats(ctx, req.(*Empty))
}
return interceptor(ctx, in, info, handler)
}
func _DocumentParser_GetEngines_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(Empty)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(DocumentParserServer).GetEngines(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: DocumentParser_GetEngines_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(DocumentParserServer).GetEngines(ctx, req.(*Empty))
}
return interceptor(ctx, in, info, handler)
}
// DocumentParser_ServiceDesc is the grpc.ServiceDesc for DocumentParser service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var DocumentParser_ServiceDesc = grpc.ServiceDesc{
ServiceName: "docparser.DocumentParser",
HandlerType: (*DocumentParserServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "ParseDocument",
Handler: _DocumentParser_ParseDocument_Handler,
},
{
MethodName: "GetSupportedFormats",
Handler: _DocumentParser_GetSupportedFormats_Handler,
},
{
MethodName: "GetEngines",
Handler: _DocumentParser_GetEngines_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "document_parser.proto",
}