refactor: 重构 algorithm 为 ai-core 代码解析服务

- 新增 ai-core 目录,包含代码解析核心服务
- 添加 proto 定义、parser、service 模块
- 添加启动脚本和依赖配置

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-09 10:27:08 +08:00
parent f22f823a4a
commit 797518ec76
15 changed files with 1163 additions and 0 deletions

View File

@@ -0,0 +1,47 @@
syntax = "proto3";
package docparser;
option go_package = "x-agents/proto/docparser";
service DocumentParser {
rpc ParseDocument(ParseRequest) returns (ParseResponse);
rpc GetSupportedFormats(Empty) returns (SupportedFormatsResponse);
rpc GetEngines(Empty) returns (EnginesResponse);
}
message ParseRequest {
string file_url = 1;
string file_name = 2;
string file_type = 3;
string parser_engine = 4;
map<string, string> engine_overrides = 5;
}
message ParseResponse {
bool success = 1;
string content = 2;
string message = 3;
int32 content_length = 4;
string file_type = 5;
string parser_engine = 6;
}
message Empty {}
message SupportedFormatsResponse {
repeated string file_types = 1;
map<string, string> file_type_descriptions = 2;
}
message EnginesResponse {
repeated EngineInfo engines = 1;
}
message EngineInfo {
string name = 1;
string description = 2;
repeated string supported_file_types = 3;
bool available = 4;
string unavailable_reason = 5;
}

View File

@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# NO CHECKED-IN PROTOBUF GENCODE
# source: document_parser.proto
# Protobuf Python Version: 6.31.1
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import runtime_version as _runtime_version
from google.protobuf import symbol_database as _symbol_database
from google.protobuf.internal import builder as _builder
_runtime_version.ValidateProtobufRuntimeVersion(
_runtime_version.Domain.PUBLIC,
6,
31,
1,
'',
'document_parser.proto'
)
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x64ocument_parser.proto\x12\tdocparser\"\xdd\x01\n\x0cParseRequest\x12\x10\n\x08\x66ile_url\x18\x01 \x01(\t\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x11\n\tfile_type\x18\x03 \x01(\t\x12\x15\n\rparser_engine\x18\x04 \x01(\t\x12\x46\n\x10\x65ngine_overrides\x18\x05 \x03(\x0b\x32,.docparser.ParseRequest.EngineOverridesEntry\x1a\x36\n\x14\x45ngineOverridesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x84\x01\n\rParseResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\x12\x0f\n\x07message\x18\x03 \x01(\t\x12\x16\n\x0e\x63ontent_length\x18\x04 \x01(\x05\x12\x11\n\tfile_type\x18\x05 \x01(\t\x12\x15\n\rparser_engine\x18\x06 \x01(\t\"\x07\n\x05\x45mpty\"\xca\x01\n\x18SupportedFormatsResponse\x12\x12\n\nfile_types\x18\x01 \x03(\t\x12]\n\x16\x66ile_type_descriptions\x18\x02 \x03(\x0b\x32=.docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry\x1a;\n\x19\x46ileTypeDescriptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"9\n\x0f\x45nginesResponse\x12&\n\x07\x65ngines\x18\x01 \x03(\x0b\x32\x15.docparser.EngineInfo\"|\n\nEngineInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x1c\n\x14supported_file_types\x18\x03 \x03(\t\x12\x11\n\tavailable\x18\x04 \x01(\x08\x12\x1a\n\x12unavailable_reason\x18\x05 \x01(\t2\xde\x01\n\x0e\x44ocumentParser\x12\x42\n\rParseDocument\x12\x17.docparser.ParseRequest\x1a\x18.docparser.ParseResponse\x12L\n\x13GetSupportedFormats\x12\x10.docparser.Empty\x1a#.docparser.SupportedFormatsResponse\x12:\n\nGetEngines\x12\x10.docparser.Empty\x1a\x1a.docparser.EnginesResponseB\x1aZ\x18x-agents/proto/docparserb\x06proto3')
_globals = globals()
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'document_parser_pb2', _globals)
if not _descriptor._USE_C_DESCRIPTORS:
_globals['DESCRIPTOR']._loaded_options = None
_globals['DESCRIPTOR']._serialized_options = b'Z\030x-agents/proto/docparser'
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._loaded_options = None
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_options = b'8\001'
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._loaded_options = None
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_options = b'8\001'
_globals['_PARSEREQUEST']._serialized_start=37
_globals['_PARSEREQUEST']._serialized_end=258
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_start=204
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_end=258
_globals['_PARSERESPONSE']._serialized_start=261
_globals['_PARSERESPONSE']._serialized_end=393
_globals['_EMPTY']._serialized_start=395
_globals['_EMPTY']._serialized_end=402
_globals['_SUPPORTEDFORMATSRESPONSE']._serialized_start=405
_globals['_SUPPORTEDFORMATSRESPONSE']._serialized_end=607
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_start=548
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_end=607
_globals['_ENGINESRESPONSE']._serialized_start=609
_globals['_ENGINESRESPONSE']._serialized_end=666
_globals['_ENGINEINFO']._serialized_start=668
_globals['_ENGINEINFO']._serialized_end=792
_globals['_DOCUMENTPARSER']._serialized_start=795
_globals['_DOCUMENTPARSER']._serialized_end=1017
# @@protoc_insertion_point(module_scope)

View File

@@ -0,0 +1,183 @@
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
"""Client and server classes corresponding to protobuf-defined services."""
import grpc
import warnings
import document_parser_pb2 as document__parser__pb2
GRPC_GENERATED_VERSION = '1.78.0'
GRPC_VERSION = grpc.__version__
_version_not_supported = False
try:
from grpc._utilities import first_version_is_lower
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
except ImportError:
_version_not_supported = True
if _version_not_supported:
raise RuntimeError(
f'The grpc package installed is at version {GRPC_VERSION},'
+ ' but the generated code in document_parser_pb2_grpc.py depends on'
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
)
class DocumentParserStub(object):
"""Missing associated documentation comment in .proto file."""
def __init__(self, channel):
"""Constructor.
Args:
channel: A grpc.Channel.
"""
self.ParseDocument = channel.unary_unary(
'/docparser.DocumentParser/ParseDocument',
request_serializer=document__parser__pb2.ParseRequest.SerializeToString,
response_deserializer=document__parser__pb2.ParseResponse.FromString,
_registered_method=True)
self.GetSupportedFormats = channel.unary_unary(
'/docparser.DocumentParser/GetSupportedFormats',
request_serializer=document__parser__pb2.Empty.SerializeToString,
response_deserializer=document__parser__pb2.SupportedFormatsResponse.FromString,
_registered_method=True)
self.GetEngines = channel.unary_unary(
'/docparser.DocumentParser/GetEngines',
request_serializer=document__parser__pb2.Empty.SerializeToString,
response_deserializer=document__parser__pb2.EnginesResponse.FromString,
_registered_method=True)
class DocumentParserServicer(object):
"""Missing associated documentation comment in .proto file."""
def ParseDocument(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GetSupportedFormats(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def GetEngines(self, request, context):
"""Missing associated documentation comment in .proto file."""
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
context.set_details('Method not implemented!')
raise NotImplementedError('Method not implemented!')
def add_DocumentParserServicer_to_server(servicer, server):
rpc_method_handlers = {
'ParseDocument': grpc.unary_unary_rpc_method_handler(
servicer.ParseDocument,
request_deserializer=document__parser__pb2.ParseRequest.FromString,
response_serializer=document__parser__pb2.ParseResponse.SerializeToString,
),
'GetSupportedFormats': grpc.unary_unary_rpc_method_handler(
servicer.GetSupportedFormats,
request_deserializer=document__parser__pb2.Empty.FromString,
response_serializer=document__parser__pb2.SupportedFormatsResponse.SerializeToString,
),
'GetEngines': grpc.unary_unary_rpc_method_handler(
servicer.GetEngines,
request_deserializer=document__parser__pb2.Empty.FromString,
response_serializer=document__parser__pb2.EnginesResponse.SerializeToString,
),
}
generic_handler = grpc.method_handlers_generic_handler(
'docparser.DocumentParser', rpc_method_handlers)
server.add_generic_rpc_handlers((generic_handler,))
server.add_registered_method_handlers('docparser.DocumentParser', rpc_method_handlers)
# This class is part of an EXPERIMENTAL API.
class DocumentParser(object):
"""Missing associated documentation comment in .proto file."""
@staticmethod
def ParseDocument(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/docparser.DocumentParser/ParseDocument',
document__parser__pb2.ParseRequest.SerializeToString,
document__parser__pb2.ParseResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def GetSupportedFormats(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/docparser.DocumentParser/GetSupportedFormats',
document__parser__pb2.Empty.SerializeToString,
document__parser__pb2.SupportedFormatsResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)
@staticmethod
def GetEngines(request,
target,
options=(),
channel_credentials=None,
call_credentials=None,
insecure=False,
compression=None,
wait_for_ready=None,
timeout=None,
metadata=None):
return grpc.experimental.unary_unary(
request,
target,
'/docparser.DocumentParser/GetEngines',
document__parser__pb2.Empty.SerializeToString,
document__parser__pb2.EnginesResponse.FromString,
options,
channel_credentials,
insecure,
call_credentials,
compression,
wait_for_ready,
timeout,
metadata,
_registered_method=True)