feat: 增强 AI-Core 文档解析器

- 添加 VLM 客户端支持
- 优化解析器配置
- 添加配置示例文件
- 生成新的 gRPC protobuf 文件

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-09 15:42:35 +08:00
parent ab7131eb05
commit 5012a25f99
10 changed files with 1177 additions and 42 deletions

View File

@@ -0,0 +1,18 @@
# AI-Core 配置文件示例
# 复制此文件为 config.yaml 并填入实际配置
# VLM 配置(可选)
# 如果配置了 VLM图片文件会自动使用 VLM 解析
vlm:
enabled: false # 是否启用 VLM
provider: "openai" # openai / anthropic / qwen
model: "gpt-4o" # 模型名称
api_key: "" # API Key
base_url: "" # 自定义 API 地址(可选)
prompt: "" # 自定义提示词(可选)
# 服务配置
server:
port: 50051
max_workers: 10
log_level: INFO

84
ai-core/parser/config.py Normal file
View File

@@ -0,0 +1,84 @@
"""
配置管理模块
"""
import os
import yaml
import logging
from typing import Optional, Dict, Any
logger = logging.getLogger(__name__)
# 默认配置
DEFAULT_CONFIG = {
"vlm": {
"enabled": False,
"provider": "openai",
"model": "gpt-4o",
"api_key": "",
"base_url": "",
"prompt": ""
},
"server": {
"port": 50051,
"max_workers": 10,
"log_level": "INFO"
}
}
def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
"""加载配置文件"""
if config_path is None:
# 默认查找 config.yaml
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
config_path = os.path.join(base_dir, "config.yaml")
# 环境变量覆盖
vlm_api_key = os.environ.get("VLM_API_KEY", "")
if vlm_api_key:
DEFAULT_CONFIG["vlm"]["api_key"] = vlm_api_key
DEFAULT_CONFIG["vlm"]["enabled"] = True
logger.info("VLM enabled via environment variable")
vlm_provider = os.environ.get("VLM_PROVIDER", "")
if vlm_provider:
DEFAULT_CONFIG["vlm"]["provider"] = vlm_provider
vlm_model = os.environ.get("VLM_MODEL", "")
if vlm_model:
DEFAULT_CONFIG["vlm"]["model"] = vlm_model
# 尝试加载配置文件
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
file_config = yaml.safe_load(f)
if file_config:
# 合并配置
for key in file_config:
if key in DEFAULT_CONFIG:
DEFAULT_CONFIG[key].update(file_config[key])
logger.info(f"Loaded config from {config_path}")
except Exception as e:
logger.warning(f"Failed to load config: {e}")
# 检查 VLM 是否有效
if DEFAULT_CONFIG["vlm"]["enabled"] and not DEFAULT_CONFIG["vlm"]["api_key"]:
logger.warning("VLM enabled but API key is empty, disabling VLM")
DEFAULT_CONFIG["vlm"]["enabled"] = False
return DEFAULT_CONFIG
def get_vlm_config() -> Optional[Dict[str, Any]]:
"""获取 VLM 配置"""
config = load_config()
if config.get("vlm", {}).get("enabled") and config["vlm"].get("api_key"):
return config["vlm"]
return None
def get_server_config() -> Dict[str, Any]:
"""获取服务器配置"""
config = load_config()
return config.get("server", DEFAULT_CONFIG["server"])

View File

@@ -1,32 +1,61 @@
import logging
import os
import tempfile
from typing import Optional
from typing import Optional, Dict, Any
from markitdown import MarkItDown
from .vlm_client import VLMClient
from .config import get_vlm_config
logger = logging.getLogger(__name__)
class Parser:
"""基于 MarkItDown 的统一文档解析器
"""基于 MarkItDown + VLM 的统一文档解析器
支持格式PDF、DOCX、DOC、PPTX、PPT、XLSX、XLS、CSV、图片、网页、Markdown 等
VLM 解析:
- 方式一启动时配置config.yaml 或环境变量)
- 方式二gRPC 请求时传入 VLM 配置(优先级更高)
"""
def __init__(self):
self.markitdown = MarkItDown()
logger.info("Parser initialized with MarkItDown")
self.vlm_client: Optional[VLMClient] = None
def parse(self, file_path: str, file_type: Optional[str] = None) -> dict:
# 尝试加载配置的 VLM
vlm_config = get_vlm_config()
if vlm_config:
self.vlm_client = VLMClient(vlm_config)
logger.info(f"VLM enabled: provider={vlm_config.get('provider')}, model={vlm_config.get('model')}")
else:
logger.info("VLM not configured, using MarkItDown only")
def set_vlm_config(self, config: Dict[str, Any]) -> None:
"""手动设置 VLM 配置(优先级高于全局配置)"""
if config and config.get("enabled") and config.get("api_key"):
self.vlm_client = VLMClient(config)
logger.info(f"VLM enabled: provider={config.get('provider')}, model={config.get('model')}")
else:
self.vlm_client = None
logger.info("VLM disabled")
def parse(self, file_path: str, file_type: Optional[str] = None, vlm_config: Optional[Dict[str, Any]] = None) -> dict:
"""解析文档为 Markdown
Args:
file_path: 文件路径或 URL
file_type: 文件类型可选MarkItDown 会自动检测)
vlm_config: VLM 配置(可选,优先级高于全局配置)
Returns:
dict: 包含 markdown 内容和元数据
"""
# 如果有 VLM 配置,覆盖全局配置
if vlm_config:
self.set_vlm_config(vlm_config)
try:
logger.info(f"Parsing file: {file_path}")
@@ -49,20 +78,31 @@ class Parser:
"error": str(e)
}
def parse_bytes(self, content: bytes, file_name: str, file_type: Optional[str] = None) -> dict:
def parse_bytes(self, content: bytes, file_name: str, file_type: Optional[str] = None, vlm_config: Optional[Dict[str, Any]] = None) -> dict:
"""解析字节内容为 Markdown
Args:
content: 文件字节内容
file_name: 文件名
file_type: 文件类型(可选)
vlm_config: VLM 配置(可选,优先级高于全局配置)
Returns:
dict: 包含 markdown 内容和元数据
"""
# 如果有 VLM 配置,覆盖全局配置
if vlm_config:
self.set_vlm_config(vlm_config)
try:
logger.info(f"Parsing bytes: {file_name}, size: {len(content)} bytes")
# 检查是否应该使用 VLM根据文件名自动判断
if self._should_use_vlm(file_name):
logger.info("Using VLM for parsing")
return self._parse_with_vlm(content, file_name)
# 否则使用 MarkItDown
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file_name)[1] or '') as temp_file:
temp_file.write(content)
temp_path = temp_file.name
@@ -89,6 +129,65 @@ class Parser:
"error": str(e)
}
def _should_use_vlm(self, file_name: str) -> bool:
"""判断是否应该使用 VLM"""
if not self.vlm_client:
return False
# 图片文件使用 VLM
image_exts = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff']
ext = os.path.splitext(file_name)[1].lower()
return ext in image_exts
def _parse_with_vlm(self, content: bytes, file_name: str) -> dict:
"""使用 VLM 解析"""
if not self.vlm_client:
return {
"success": False,
"content": "",
"content_length": 0,
"error": "VLM not configured"
}
# 确定 MIME 类型
ext = os.path.splitext(file_name)[1].lower()
mime_types = {
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.png': 'image/png',
'.gif': 'image/gif',
'.bmp': 'image/bmp',
'.webp': 'image/webp',
'.tiff': 'image/tiff',
}
mime_type = mime_types.get(ext, 'image/png')
try:
result = self.vlm_client.analyze_image(content, mime_type)
if result.get("success"):
return {
"success": True,
"content": result["content"],
"content_length": len(result["content"]),
"metadata": {"vlm_used": True}
}
else:
return {
"success": False,
"content": "",
"content_length": 0,
"error": result.get("error", "VLM parsing failed")
}
except Exception as e:
logger.error(f"VLM parsing error: {e}")
return {
"success": False,
"content": "",
"content_length": 0,
"error": str(e)
}
if __name__ == "__main__":
parser = Parser()

View File

@@ -0,0 +1,209 @@
"""
VLM 客户端 - 用于调用 VLM 模型进行文档理解
"""
import logging
import base64
import requests
from typing import Optional, Dict, Any
logger = logging.getLogger(__name__)
class VLMClient:
"""VLM 客户端,支持多种提供商"""
def __init__(self, config: Dict[str, Any]):
"""
初始化 VLM 客户端
Args:
config: VLM 配置,包含 provider, model, api_key, base_url, prompt 等
"""
self.config = config
self.provider = config.get("provider", "openai")
self.model = config.get("model", "gpt-4o")
self.api_key = config.get("api_key", "")
self.base_url = config.get("base_url", "")
self.prompt = config.get("prompt", "") or self._default_prompt()
logger.info(f"VLMClient initialized: provider={self.provider}, model={self.model}")
def _default_prompt(self) -> str:
"""默认提示词"""
return """请分析这张图片中的文档内容,并将其转换为 Markdown 格式。
要求:
1. 保持原文的格式和结构
2. 表格用 Markdown 表格格式
3. 标题用 # ## ### 标记
4. 代码块用 ``` 标记
5. 尽量保留原文的所有信息"""
def analyze_image(self, image_data: bytes, mime_type: str = "image/png") -> Dict[str, Any]:
"""
使用 VLM 分析图片
Args:
image_data: 图片二进制数据
mime_type: 图片 MIME 类型
Returns:
包含分析结果的字典
"""
if self.provider == "openai":
return self._call_openai(image_data, mime_type)
elif self.provider == "anthropic":
return self._call_anthropic(image_data, mime_type)
elif self.provider == "qwen":
return self._call_qwen(image_data, mime_type)
else:
return {
"success": False,
"content": "",
"error": f"Unsupported provider: {self.provider}"
}
def _call_openai(self, image_data: bytes, mime_type: str) -> Dict[str, Any]:
"""调用 OpenAI GPT-4o API"""
try:
url = (self.base_url or "https://api.openai.com/v1") + "/chat/completions"
# Base64 编码图片
image_base64 = base64.b64encode(image_data).decode("utf-8")
data_url = f"data:{mime_type};base64,{image_base64}"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": self.prompt},
{"type": "image_url", "image_url": {"url": data_url}}
]
}
],
"max_tokens": 4096
}
response = requests.post(url, headers=headers, json=payload, timeout=120)
response.raise_for_status()
result = response.json()
content = result["choices"][0]["message"]["content"]
return {
"success": True,
"content": content,
"usage": result.get("usage", {})
}
except Exception as e:
logger.error(f"OpenAI API error: {e}")
return {
"success": False,
"content": "",
"error": str(e)
}
def _call_anthropic(self, image_data: bytes, mime_type: str) -> Dict[str, Any]:
"""调用 Anthropic Claude API"""
try:
url = (self.base_url or "https://api.anthropic.com/v1") + "/messages"
image_base64 = base64.b64encode(image_data).decode("utf-8")
headers = {
"x-api-key": self.api_key,
"anthropic-version": "2023-06-01",
"Content-Type": "application/json"
}
# Anthropic 支持 image 类型
payload = {
"model": self.model,
"max_tokens": 4096,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": self.prompt},
{
"type": "image",
"source": {
"type": "base64",
"media_type": mime_type,
"data": image_base64
}
}
]
}
]
}
response = requests.post(url, headers=headers, json=payload, timeout=120)
response.raise_for_status()
result = response.json()
content = result["content"][0]["text"]
return {
"success": True,
"content": content,
"usage": result.get("usage", {})
}
except Exception as e:
logger.error(f"Anthropic API error: {e}")
return {
"success": False,
"content": "",
"error": str(e)
}
def _call_qwen(self, image_data: bytes, mime_type: str) -> Dict[str, Any]:
"""调用阿里 Qwen VL API"""
try:
url = (self.base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1") + "/chat/completions"
image_base64 = base64.b64encode(image_data).decode("utf-8")
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
# Qwen 格式
payload = {
"model": self.model,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": self.prompt},
{"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_base64}"}}
]
}
]
}
response = requests.post(url, headers=headers, json=payload, timeout=120)
response.raise_for_status()
result = response.json()
content = result["choices"][0]["message"]["content"]
return {
"success": True,
"content": content,
"usage": {}
}
except Exception as e:
logger.error(f"Qwen API error: {e}")
return {
"success": False,
"content": "",
"error": str(e)
}

View File

@@ -16,6 +16,18 @@ message ParseRequest {
string file_type = 3;
string parser_engine = 4;
map<string, string> engine_overrides = 5;
// VLM 配置(可选)
VLMConfig vlm_config = 6;
}
message VLMConfig {
bool enabled = 1; // 是否启用 VLM
string provider = 2; // VLM 提供商: openai, anthropic, local 等
string model = 3; // 模型名称
string api_key = 4; // API Key
string base_url = 5; // 自定义 API 地址
string prompt = 6; // 自定义提示词
}
message ParseResponse {

View File

@@ -24,7 +24,7 @@ _sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x64ocument_parser.proto\x12\tdocparser\"\xdd\x01\n\x0cParseRequest\x12\x10\n\x08\x66ile_url\x18\x01 \x01(\t\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x11\n\tfile_type\x18\x03 \x01(\t\x12\x15\n\rparser_engine\x18\x04 \x01(\t\x12\x46\n\x10\x65ngine_overrides\x18\x05 \x03(\x0b\x32,.docparser.ParseRequest.EngineOverridesEntry\x1a\x36\n\x14\x45ngineOverridesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x84\x01\n\rParseResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\x12\x0f\n\x07message\x18\x03 \x01(\t\x12\x16\n\x0e\x63ontent_length\x18\x04 \x01(\x05\x12\x11\n\tfile_type\x18\x05 \x01(\t\x12\x15\n\rparser_engine\x18\x06 \x01(\t\"\x07\n\x05\x45mpty\"\xca\x01\n\x18SupportedFormatsResponse\x12\x12\n\nfile_types\x18\x01 \x03(\t\x12]\n\x16\x66ile_type_descriptions\x18\x02 \x03(\x0b\x32=.docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry\x1a;\n\x19\x46ileTypeDescriptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"9\n\x0f\x45nginesResponse\x12&\n\x07\x65ngines\x18\x01 \x03(\x0b\x32\x15.docparser.EngineInfo\"|\n\nEngineInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x1c\n\x14supported_file_types\x18\x03 \x03(\t\x12\x11\n\tavailable\x18\x04 \x01(\x08\x12\x1a\n\x12unavailable_reason\x18\x05 \x01(\t2\xde\x01\n\x0e\x44ocumentParser\x12\x42\n\rParseDocument\x12\x17.docparser.ParseRequest\x1a\x18.docparser.ParseResponse\x12L\n\x13GetSupportedFormats\x12\x10.docparser.Empty\x1a#.docparser.SupportedFormatsResponse\x12:\n\nGetEngines\x12\x10.docparser.Empty\x1a\x1a.docparser.EnginesResponseB\x1aZ\x18x-agents/proto/docparserb\x06proto3')
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x64ocument_parser.proto\x12\tdocparser\"\x87\x02\n\x0cParseRequest\x12\x10\n\x08\x66ile_url\x18\x01 \x01(\t\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x11\n\tfile_type\x18\x03 \x01(\t\x12\x15\n\rparser_engine\x18\x04 \x01(\t\x12\x46\n\x10\x65ngine_overrides\x18\x05 \x03(\x0b\x32,.docparser.ParseRequest.EngineOverridesEntry\x12(\n\nvlm_config\x18\x06 \x01(\x0b\x32\x14.docparser.VLMConfig\x1a\x36\n\x14\x45ngineOverridesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"p\n\tVLMConfig\x12\x0f\n\x07\x65nabled\x18\x01 \x01(\x08\x12\x10\n\x08provider\x18\x02 \x01(\t\x12\r\n\x05model\x18\x03 \x01(\t\x12\x0f\n\x07\x61pi_key\x18\x04 \x01(\t\x12\x10\n\x08\x62\x61se_url\x18\x05 \x01(\t\x12\x0e\n\x06prompt\x18\x06 \x01(\t\"\x84\x01\n\rParseResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\x12\x0f\n\x07message\x18\x03 \x01(\t\x12\x16\n\x0e\x63ontent_length\x18\x04 \x01(\x05\x12\x11\n\tfile_type\x18\x05 \x01(\t\x12\x15\n\rparser_engine\x18\x06 \x01(\t\"\x07\n\x05\x45mpty\"\xca\x01\n\x18SupportedFormatsResponse\x12\x12\n\nfile_types\x18\x01 \x03(\t\x12]\n\x16\x66ile_type_descriptions\x18\x02 \x03(\x0b\x32=.docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry\x1a;\n\x19\x46ileTypeDescriptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"9\n\x0f\x45nginesResponse\x12&\n\x07\x65ngines\x18\x01 \x03(\x0b\x32\x15.docparser.EngineInfo\"|\n\nEngineInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x1c\n\x14supported_file_types\x18\x03 \x03(\t\x12\x11\n\tavailable\x18\x04 \x01(\x08\x12\x1a\n\x12unavailable_reason\x18\x05 \x01(\t2\xde\x01\n\x0e\x44ocumentParser\x12\x42\n\rParseDocument\x12\x17.docparser.ParseRequest\x1a\x18.docparser.ParseResponse\x12L\n\x13GetSupportedFormats\x12\x10.docparser.Empty\x1a#.docparser.SupportedFormatsResponse\x12:\n\nGetEngines\x12\x10.docparser.Empty\x1a\x1a.docparser.EnginesResponseB\x1aZ\x18x-agents/proto/docparserb\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
@@ -37,21 +37,23 @@ if not _descriptor._USE_C_DESCRIPTORS:
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._loaded_options = None
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_options = b'8\001'
_globals['_PARSEREQUEST']._serialized_start=37
_globals['_PARSEREQUEST']._serialized_end=258
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_start=204
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_end=258
_globals['_PARSERESPONSE']._serialized_start=261
_globals['_PARSERESPONSE']._serialized_end=393
_globals['_EMPTY']._serialized_start=395
_globals['_EMPTY']._serialized_end=402
_globals['_SUPPORTEDFORMATSRESPONSE']._serialized_start=405
_globals['_SUPPORTEDFORMATSRESPONSE']._serialized_end=607
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_start=548
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_end=607
_globals['_ENGINESRESPONSE']._serialized_start=609
_globals['_ENGINESRESPONSE']._serialized_end=666
_globals['_ENGINEINFO']._serialized_start=668
_globals['_ENGINEINFO']._serialized_end=792
_globals['_DOCUMENTPARSER']._serialized_start=795
_globals['_DOCUMENTPARSER']._serialized_end=1017
_globals['_PARSEREQUEST']._serialized_end=300
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_start=246
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_end=300
_globals['_VLMCONFIG']._serialized_start=302
_globals['_VLMCONFIG']._serialized_end=414
_globals['_PARSERESPONSE']._serialized_start=417
_globals['_PARSERESPONSE']._serialized_end=549
_globals['_EMPTY']._serialized_start=551
_globals['_EMPTY']._serialized_end=558
_globals['_SUPPORTEDFORMATSRESPONSE']._serialized_start=561
_globals['_SUPPORTEDFORMATSRESPONSE']._serialized_end=763
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_start=704
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_end=763
_globals['_ENGINESRESPONSE']._serialized_start=765
_globals['_ENGINESRESPONSE']._serialized_end=822
_globals['_ENGINEINFO']._serialized_start=824
_globals['_ENGINEINFO']._serialized_end=948
_globals['_DOCUMENTPARSER']._serialized_start=951
_globals['_DOCUMENTPARSER']._serialized_end=1173
# @@protoc_insertion_point(module_scope)

View File

@@ -6,8 +6,9 @@ grpcio-tools>=1.60.0
grpcio-reflection>=1.60.0
protobuf>=4.25.0
# HTTP 请求
# 配置文件解析
pyyaml>=6.0
requests>=2.31.0
# 文档解析
markitdown>=0.0.1
# 文档解析 - markitdown 及其所有依赖
markitdown[pdf,docx,pptx,xlsx,all]>=0.0.1

View File

@@ -75,6 +75,21 @@ class DocumentParserServicer:
content_length=0,
)
# 提取 VLM 配置
vlm_config = None
if hasattr(request, 'vlm_config') and request.vlm_config:
vlm_cfg = request.vlm_config
if vlm_cfg.enabled:
vlm_config = {
"enabled": vlm_cfg.enabled,
"provider": vlm_cfg.provider,
"model": vlm_cfg.model,
"api_key": vlm_cfg.api_key,
"base_url": vlm_cfg.base_url,
"prompt": vlm_cfg.prompt,
}
logger.info(f"VLM config: provider={vlm_cfg.provider}, model={vlm_cfg.model}")
logger.info("Downloading file from URL: %s", file_url)
try:
@@ -95,9 +110,9 @@ class DocumentParserServicer:
content_length=0,
)
logger.info("Parsing file with MarkItDown")
logger.info("Parsing file with MarkItDown + VLM")
result = self.parser.parse_bytes(content, file_name)
result = self.parser.parse_bytes(content, file_name, vlm_config=vlm_config)
if not result.get("success", False):
logger.warning("Parser returned failure: %s", result.get("error", "Unknown error"))

View File

@@ -0,0 +1,498 @@
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.11
// protoc v5.29.3
// source: document_parser.proto
package docparser
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
type ParseRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
FileUrl string `protobuf:"bytes,1,opt,name=file_url,json=fileUrl,proto3" json:"file_url,omitempty"`
FileName string `protobuf:"bytes,2,opt,name=file_name,json=fileName,proto3" json:"file_name,omitempty"`
FileType string `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"`
ParserEngine string `protobuf:"bytes,4,opt,name=parser_engine,json=parserEngine,proto3" json:"parser_engine,omitempty"`
EngineOverrides map[string]string `protobuf:"bytes,5,rep,name=engine_overrides,json=engineOverrides,proto3" json:"engine_overrides,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ParseRequest) Reset() {
*x = ParseRequest{}
mi := &file_document_parser_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ParseRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ParseRequest) ProtoMessage() {}
func (x *ParseRequest) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ParseRequest.ProtoReflect.Descriptor instead.
func (*ParseRequest) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{0}
}
func (x *ParseRequest) GetFileUrl() string {
if x != nil {
return x.FileUrl
}
return ""
}
func (x *ParseRequest) GetFileName() string {
if x != nil {
return x.FileName
}
return ""
}
func (x *ParseRequest) GetFileType() string {
if x != nil {
return x.FileType
}
return ""
}
func (x *ParseRequest) GetParserEngine() string {
if x != nil {
return x.ParserEngine
}
return ""
}
func (x *ParseRequest) GetEngineOverrides() map[string]string {
if x != nil {
return x.EngineOverrides
}
return nil
}
type ParseResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
Content string `protobuf:"bytes,2,opt,name=content,proto3" json:"content,omitempty"`
Message string `protobuf:"bytes,3,opt,name=message,proto3" json:"message,omitempty"`
ContentLength int32 `protobuf:"varint,4,opt,name=content_length,json=contentLength,proto3" json:"content_length,omitempty"`
FileType string `protobuf:"bytes,5,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"`
ParserEngine string `protobuf:"bytes,6,opt,name=parser_engine,json=parserEngine,proto3" json:"parser_engine,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ParseResponse) Reset() {
*x = ParseResponse{}
mi := &file_document_parser_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ParseResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ParseResponse) ProtoMessage() {}
func (x *ParseResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ParseResponse.ProtoReflect.Descriptor instead.
func (*ParseResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{1}
}
func (x *ParseResponse) GetSuccess() bool {
if x != nil {
return x.Success
}
return false
}
func (x *ParseResponse) GetContent() string {
if x != nil {
return x.Content
}
return ""
}
func (x *ParseResponse) GetMessage() string {
if x != nil {
return x.Message
}
return ""
}
func (x *ParseResponse) GetContentLength() int32 {
if x != nil {
return x.ContentLength
}
return 0
}
func (x *ParseResponse) GetFileType() string {
if x != nil {
return x.FileType
}
return ""
}
func (x *ParseResponse) GetParserEngine() string {
if x != nil {
return x.ParserEngine
}
return ""
}
type Empty struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Empty) Reset() {
*x = Empty{}
mi := &file_document_parser_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Empty) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Empty) ProtoMessage() {}
func (x *Empty) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Empty.ProtoReflect.Descriptor instead.
func (*Empty) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{2}
}
type SupportedFormatsResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
FileTypes []string `protobuf:"bytes,1,rep,name=file_types,json=fileTypes,proto3" json:"file_types,omitempty"`
FileTypeDescriptions map[string]string `protobuf:"bytes,2,rep,name=file_type_descriptions,json=fileTypeDescriptions,proto3" json:"file_type_descriptions,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *SupportedFormatsResponse) Reset() {
*x = SupportedFormatsResponse{}
mi := &file_document_parser_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *SupportedFormatsResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SupportedFormatsResponse) ProtoMessage() {}
func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SupportedFormatsResponse.ProtoReflect.Descriptor instead.
func (*SupportedFormatsResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{3}
}
func (x *SupportedFormatsResponse) GetFileTypes() []string {
if x != nil {
return x.FileTypes
}
return nil
}
func (x *SupportedFormatsResponse) GetFileTypeDescriptions() map[string]string {
if x != nil {
return x.FileTypeDescriptions
}
return nil
}
type EnginesResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Engines []*EngineInfo `protobuf:"bytes,1,rep,name=engines,proto3" json:"engines,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *EnginesResponse) Reset() {
*x = EnginesResponse{}
mi := &file_document_parser_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *EnginesResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*EnginesResponse) ProtoMessage() {}
func (x *EnginesResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use EnginesResponse.ProtoReflect.Descriptor instead.
func (*EnginesResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{4}
}
func (x *EnginesResponse) GetEngines() []*EngineInfo {
if x != nil {
return x.Engines
}
return nil
}
type EngineInfo struct {
state protoimpl.MessageState `protogen:"open.v1"`
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
Description string `protobuf:"bytes,2,opt,name=description,proto3" json:"description,omitempty"`
SupportedFileTypes []string `protobuf:"bytes,3,rep,name=supported_file_types,json=supportedFileTypes,proto3" json:"supported_file_types,omitempty"`
Available bool `protobuf:"varint,4,opt,name=available,proto3" json:"available,omitempty"`
UnavailableReason string `protobuf:"bytes,5,opt,name=unavailable_reason,json=unavailableReason,proto3" json:"unavailable_reason,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *EngineInfo) Reset() {
*x = EngineInfo{}
mi := &file_document_parser_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *EngineInfo) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*EngineInfo) ProtoMessage() {}
func (x *EngineInfo) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[5]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use EngineInfo.ProtoReflect.Descriptor instead.
func (*EngineInfo) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{5}
}
func (x *EngineInfo) GetName() string {
if x != nil {
return x.Name
}
return ""
}
func (x *EngineInfo) GetDescription() string {
if x != nil {
return x.Description
}
return ""
}
func (x *EngineInfo) GetSupportedFileTypes() []string {
if x != nil {
return x.SupportedFileTypes
}
return nil
}
func (x *EngineInfo) GetAvailable() bool {
if x != nil {
return x.Available
}
return false
}
func (x *EngineInfo) GetUnavailableReason() string {
if x != nil {
return x.UnavailableReason
}
return ""
}
var File_document_parser_proto protoreflect.FileDescriptor
const file_document_parser_proto_rawDesc = "" +
"\n" +
"\x15document_parser.proto\x12\tdocparser\"\xa5\x02\n" +
"\fParseRequest\x12\x19\n" +
"\bfile_url\x18\x01 \x01(\tR\afileUrl\x12\x1b\n" +
"\tfile_name\x18\x02 \x01(\tR\bfileName\x12\x1b\n" +
"\tfile_type\x18\x03 \x01(\tR\bfileType\x12#\n" +
"\rparser_engine\x18\x04 \x01(\tR\fparserEngine\x12W\n" +
"\x10engine_overrides\x18\x05 \x03(\v2,.docparser.ParseRequest.EngineOverridesEntryR\x0fengineOverrides\x1aB\n" +
"\x14EngineOverridesEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xc6\x01\n" +
"\rParseResponse\x12\x18\n" +
"\asuccess\x18\x01 \x01(\bR\asuccess\x12\x18\n" +
"\acontent\x18\x02 \x01(\tR\acontent\x12\x18\n" +
"\amessage\x18\x03 \x01(\tR\amessage\x12%\n" +
"\x0econtent_length\x18\x04 \x01(\x05R\rcontentLength\x12\x1b\n" +
"\tfile_type\x18\x05 \x01(\tR\bfileType\x12#\n" +
"\rparser_engine\x18\x06 \x01(\tR\fparserEngine\"\a\n" +
"\x05Empty\"\xf7\x01\n" +
"\x18SupportedFormatsResponse\x12\x1d\n" +
"\n" +
"file_types\x18\x01 \x03(\tR\tfileTypes\x12s\n" +
"\x16file_type_descriptions\x18\x02 \x03(\v2=.docparser.SupportedFormatsResponse.FileTypeDescriptionsEntryR\x14fileTypeDescriptions\x1aG\n" +
"\x19FileTypeDescriptionsEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"B\n" +
"\x0fEnginesResponse\x12/\n" +
"\aengines\x18\x01 \x03(\v2\x15.docparser.EngineInfoR\aengines\"\xc1\x01\n" +
"\n" +
"EngineInfo\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\x12 \n" +
"\vdescription\x18\x02 \x01(\tR\vdescription\x120\n" +
"\x14supported_file_types\x18\x03 \x03(\tR\x12supportedFileTypes\x12\x1c\n" +
"\tavailable\x18\x04 \x01(\bR\tavailable\x12-\n" +
"\x12unavailable_reason\x18\x05 \x01(\tR\x11unavailableReason2\xde\x01\n" +
"\x0eDocumentParser\x12B\n" +
"\rParseDocument\x12\x17.docparser.ParseRequest\x1a\x18.docparser.ParseResponse\x12L\n" +
"\x13GetSupportedFormats\x12\x10.docparser.Empty\x1a#.docparser.SupportedFormatsResponse\x12:\n" +
"\n" +
"GetEngines\x12\x10.docparser.Empty\x1a\x1a.docparser.EnginesResponseB\x1aZ\x18x-agents/proto/docparserb\x06proto3"
var (
file_document_parser_proto_rawDescOnce sync.Once
file_document_parser_proto_rawDescData []byte
)
func file_document_parser_proto_rawDescGZIP() []byte {
file_document_parser_proto_rawDescOnce.Do(func() {
file_document_parser_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_document_parser_proto_rawDesc), len(file_document_parser_proto_rawDesc)))
})
return file_document_parser_proto_rawDescData
}
var file_document_parser_proto_msgTypes = make([]protoimpl.MessageInfo, 8)
var file_document_parser_proto_goTypes = []any{
(*ParseRequest)(nil), // 0: docparser.ParseRequest
(*ParseResponse)(nil), // 1: docparser.ParseResponse
(*Empty)(nil), // 2: docparser.Empty
(*SupportedFormatsResponse)(nil), // 3: docparser.SupportedFormatsResponse
(*EnginesResponse)(nil), // 4: docparser.EnginesResponse
(*EngineInfo)(nil), // 5: docparser.EngineInfo
nil, // 6: docparser.ParseRequest.EngineOverridesEntry
nil, // 7: docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
}
var file_document_parser_proto_depIdxs = []int32{
6, // 0: docparser.ParseRequest.engine_overrides:type_name -> docparser.ParseRequest.EngineOverridesEntry
7, // 1: docparser.SupportedFormatsResponse.file_type_descriptions:type_name -> docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
5, // 2: docparser.EnginesResponse.engines:type_name -> docparser.EngineInfo
0, // 3: docparser.DocumentParser.ParseDocument:input_type -> docparser.ParseRequest
2, // 4: docparser.DocumentParser.GetSupportedFormats:input_type -> docparser.Empty
2, // 5: docparser.DocumentParser.GetEngines:input_type -> docparser.Empty
1, // 6: docparser.DocumentParser.ParseDocument:output_type -> docparser.ParseResponse
3, // 7: docparser.DocumentParser.GetSupportedFormats:output_type -> docparser.SupportedFormatsResponse
4, // 8: docparser.DocumentParser.GetEngines:output_type -> docparser.EnginesResponse
6, // [6:9] is the sub-list for method output_type
3, // [3:6] is the sub-list for method input_type
3, // [3:3] is the sub-list for extension type_name
3, // [3:3] is the sub-list for extension extendee
0, // [0:3] is the sub-list for field type_name
}
func init() { file_document_parser_proto_init() }
func file_document_parser_proto_init() {
if File_document_parser_proto != nil {
return
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_document_parser_proto_rawDesc), len(file_document_parser_proto_rawDesc)),
NumEnums: 0,
NumMessages: 8,
NumExtensions: 0,
NumServices: 1,
},
GoTypes: file_document_parser_proto_goTypes,
DependencyIndexes: file_document_parser_proto_depIdxs,
MessageInfos: file_document_parser_proto_msgTypes,
}.Build()
File_document_parser_proto = out.File
file_document_parser_proto_goTypes = nil
file_document_parser_proto_depIdxs = nil
}

View File

@@ -0,0 +1,197 @@
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.6.1
// - protoc v5.29.3
// source: document_parser.proto
package docparser
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
DocumentParser_ParseDocument_FullMethodName = "/docparser.DocumentParser/ParseDocument"
DocumentParser_GetSupportedFormats_FullMethodName = "/docparser.DocumentParser/GetSupportedFormats"
DocumentParser_GetEngines_FullMethodName = "/docparser.DocumentParser/GetEngines"
)
// DocumentParserClient is the client API for DocumentParser service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type DocumentParserClient interface {
ParseDocument(ctx context.Context, in *ParseRequest, opts ...grpc.CallOption) (*ParseResponse, error)
GetSupportedFormats(ctx context.Context, in *Empty, opts ...grpc.CallOption) (*SupportedFormatsResponse, error)
GetEngines(ctx context.Context, in *Empty, opts ...grpc.CallOption) (*EnginesResponse, error)
}
type documentParserClient struct {
cc grpc.ClientConnInterface
}
func NewDocumentParserClient(cc grpc.ClientConnInterface) DocumentParserClient {
return &documentParserClient{cc}
}
func (c *documentParserClient) ParseDocument(ctx context.Context, in *ParseRequest, opts ...grpc.CallOption) (*ParseResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ParseResponse)
err := c.cc.Invoke(ctx, DocumentParser_ParseDocument_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *documentParserClient) GetSupportedFormats(ctx context.Context, in *Empty, opts ...grpc.CallOption) (*SupportedFormatsResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(SupportedFormatsResponse)
err := c.cc.Invoke(ctx, DocumentParser_GetSupportedFormats_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *documentParserClient) GetEngines(ctx context.Context, in *Empty, opts ...grpc.CallOption) (*EnginesResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(EnginesResponse)
err := c.cc.Invoke(ctx, DocumentParser_GetEngines_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// DocumentParserServer is the server API for DocumentParser service.
// All implementations must embed UnimplementedDocumentParserServer
// for forward compatibility.
type DocumentParserServer interface {
ParseDocument(context.Context, *ParseRequest) (*ParseResponse, error)
GetSupportedFormats(context.Context, *Empty) (*SupportedFormatsResponse, error)
GetEngines(context.Context, *Empty) (*EnginesResponse, error)
mustEmbedUnimplementedDocumentParserServer()
}
// UnimplementedDocumentParserServer must be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedDocumentParserServer struct{}
func (UnimplementedDocumentParserServer) ParseDocument(context.Context, *ParseRequest) (*ParseResponse, error) {
return nil, status.Error(codes.Unimplemented, "method ParseDocument not implemented")
}
func (UnimplementedDocumentParserServer) GetSupportedFormats(context.Context, *Empty) (*SupportedFormatsResponse, error) {
return nil, status.Error(codes.Unimplemented, "method GetSupportedFormats not implemented")
}
func (UnimplementedDocumentParserServer) GetEngines(context.Context, *Empty) (*EnginesResponse, error) {
return nil, status.Error(codes.Unimplemented, "method GetEngines not implemented")
}
func (UnimplementedDocumentParserServer) mustEmbedUnimplementedDocumentParserServer() {}
func (UnimplementedDocumentParserServer) testEmbeddedByValue() {}
// UnsafeDocumentParserServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to DocumentParserServer will
// result in compilation errors.
type UnsafeDocumentParserServer interface {
mustEmbedUnimplementedDocumentParserServer()
}
func RegisterDocumentParserServer(s grpc.ServiceRegistrar, srv DocumentParserServer) {
// If the following call panics, it indicates UnimplementedDocumentParserServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&DocumentParser_ServiceDesc, srv)
}
func _DocumentParser_ParseDocument_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ParseRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(DocumentParserServer).ParseDocument(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: DocumentParser_ParseDocument_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(DocumentParserServer).ParseDocument(ctx, req.(*ParseRequest))
}
return interceptor(ctx, in, info, handler)
}
func _DocumentParser_GetSupportedFormats_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(Empty)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(DocumentParserServer).GetSupportedFormats(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: DocumentParser_GetSupportedFormats_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(DocumentParserServer).GetSupportedFormats(ctx, req.(*Empty))
}
return interceptor(ctx, in, info, handler)
}
func _DocumentParser_GetEngines_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(Empty)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(DocumentParserServer).GetEngines(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: DocumentParser_GetEngines_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(DocumentParserServer).GetEngines(ctx, req.(*Empty))
}
return interceptor(ctx, in, info, handler)
}
// DocumentParser_ServiceDesc is the grpc.ServiceDesc for DocumentParser service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var DocumentParser_ServiceDesc = grpc.ServiceDesc{
ServiceName: "docparser.DocumentParser",
HandlerType: (*DocumentParserServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "ParseDocument",
Handler: _DocumentParser_ParseDocument_Handler,
},
{
MethodName: "GetSupportedFormats",
Handler: _DocumentParser_GetSupportedFormats_Handler,
},
{
MethodName: "GetEngines",
Handler: _DocumentParser_GetEngines_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "document_parser.proto",
}