Files
X-Agents/ai-core/service/grpc_server.py
DESKTOP-72TV0V4\caoxiaozhu 16b6aa0004 chore: 优化 AI-Core 启动脚本
- 更新 start.bat 和 start.sh 启动脚本
- 优化 gRPC 服务器配置

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-09 12:50:33 +08:00

245 lines
8.2 KiB
Python

import logging
import requests
from concurrent import futures
import grpc
from grpc_reflection.v1alpha import reflection
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "proto"))
from parser.parser import Parser
logger = logging.getLogger(__name__)
docparser_pb2 = None
docparser_pb2_grpc = None
def _import_grpc_protobuf():
"""Import gRPC protobuf modules"""
global docparser_pb2, docparser_pb2_grpc
if docparser_pb2 is not None and docparser_pb2_grpc is not None:
return
try:
import document_parser_pb2 as dpb2
import document_parser_pb2_grpc as dpb2_grpc
docparser_pb2 = dpb2
docparser_pb2_grpc = dpb2_grpc
logger.info("Successfully imported gRPC protobuf modules")
except ImportError as e:
logger.error(f"Failed to import gRPC protobuf: {e}")
raise ImportError(
"gRPC protobuf files not found. Please run: python generate_grpc.py"
) from e
class DocumentParserServicer:
"""gRPC 服务实现,使用 MarkItDown"""
def __init__(self, max_workers: int = 10):
_import_grpc_protobuf()
self.parser = Parser()
self.max_workers = max_workers
logger.info("DocumentParserServicer initialized")
def ParseDocument(self, request, context):
"""解析文档"""
try:
logger.info(
"ParseDocument request: file_url=%s, file_name=%s, file_type=%s",
request.file_url,
request.file_name,
request.file_type,
)
file_url = request.file_url
file_name = request.file_name
if not file_url:
return docparser_pb2.ParseResponse(
success=False,
content="",
message="file_url is required",
content_length=0,
)
if not file_name:
return docparser_pb2.ParseResponse(
success=False,
content="",
message="file_name is required",
content_length=0,
)
logger.info("Downloading file from URL: %s", file_url)
try:
response = requests.get(
file_url,
timeout=60,
headers={"User-Agent": "DocParser/1.0"},
)
response.raise_for_status()
content = response.content
logger.info("Downloaded %d bytes", len(content))
except requests.RequestException as e:
logger.error("Failed to download file: %s", str(e))
return docparser_pb2.ParseResponse(
success=False,
content="",
message=f"Failed to download file: {str(e)}",
content_length=0,
)
logger.info("Parsing file with MarkItDown")
result = self.parser.parse_bytes(content, file_name)
if not result.get("success", False):
logger.warning("Parser returned failure: %s", result.get("error", "Unknown error"))
return docparser_pb2.ParseResponse(
success=False,
content="",
message=result.get("error", "Parse failed"),
content_length=0,
)
markdown_content = result.get("content", "")
logger.info(
"Parse successful: content_length=%d",
len(markdown_content),
)
return docparser_pb2.ParseResponse(
success=True,
content=markdown_content,
message="Parse successful",
content_length=len(markdown_content),
file_type=request.file_type or "auto",
parser_engine="markitdown",
)
except Exception as e:
logger.error("ParseDocument error: %s", str(e), exc_info=True)
return docparser_pb2.ParseResponse(
success=False,
content="",
message=f"Parse error: {str(e)}",
content_length=0,
)
def GetSupportedFormats(self, request, context):
"""获取支持的文件格式"""
try:
logger.info("GetSupportedFormats request")
file_types = [
"pdf", "docx", "doc", "pptx", "ppt",
"xlsx", "xls", "csv",
"md", "markdown",
"jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp",
"html", "htm", "txt",
]
file_type_descriptions = {
"pdf": "PDF Document",
"docx": "Microsoft Word Document",
"doc": "Microsoft Word Document (Legacy)",
"pptx": "Microsoft PowerPoint Presentation",
"ppt": "Microsoft PowerPoint Presentation (Legacy)",
"xlsx": "Microsoft Excel Spreadsheet",
"xls": "Microsoft Excel Spreadsheet (Legacy)",
"csv": "Comma-Separated Values",
"md": "Markdown File",
"markdown": "Markdown File",
"jpg": "JPEG Image",
"jpeg": "JPEG Image",
"png": "PNG Image",
"gif": "GIF Image",
"bmp": "BMP Image",
"tiff": "TIFF Image",
"webp": "WebP Image",
"html": "HTML Document",
"htm": "HTML Document",
"txt": "Plain Text File",
}
return docparser_pb2.SupportedFormatsResponse(
file_types=file_types,
file_type_descriptions=file_type_descriptions,
)
except Exception as e:
logger.error("GetSupportedFormats error: %s", str(e), exc_info=True)
context.set_code(grpc.StatusCode.INTERNAL)
context.set_details(str(e))
return docparser_pb2.SupportedFormatsResponse()
def GetEngines(self, request, context):
"""获取可用的解析引擎列表"""
try:
logger.info("GetEngines request")
engine_info = docparser_pb2.EngineInfo(
name="markitdown",
description="Microsoft MarkItDown - 统一文档解析引擎",
supported_file_types=[
"pdf", "docx", "doc", "pptx", "ppt",
"xlsx", "xls", "csv",
"md", "markdown",
"jpg", "jpeg", "png", "gif", "bmp", "tiff", "webp",
"html", "htm", "txt",
],
available=True,
unavailable_reason="",
)
return docparser_pb2.EnginesResponse(engines=[engine_info])
except Exception as e:
logger.error("GetEngines error: %s", str(e), exc_info=True)
context.set_code(grpc.StatusCode.INTERNAL)
context.set_details(str(e))
return docparser_pb2.EnginesResponse()
def serve(port: int = 50051, max_workers: int = 10):
"""启动 gRPC 服务"""
_import_grpc_protobuf()
server = grpc.server(futures.ThreadPoolExecutor(max_workers=max_workers))
servicer = DocumentParserServicer(max_workers=max_workers)
docparser_pb2_grpc.add_DocumentParserServicer_to_server(servicer, server)
reflection.enable_server_reflection(
service_names=[
docparser_pb2.DESCRIPTOR.services_by_name["DocumentParser"].full_name,
reflection.SERVICE_NAME,
],
server=server,
)
server.add_insecure_port(f"0.0.0.0:{port}")
server.start()
logger.info("DocumentParser gRPC server (MarkItDown) started on port %d", port)
logger.info("gRPC reflection enabled")
try:
server.wait_for_termination()
except KeyboardInterrupt:
logger.info("Shutting down server...")
server.stop(0)
logger.info("Server stopped")
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
serve()