refactor: 重构 algorithm 为 ai-core 代码解析服务
- 新增 ai-core 目录,包含代码解析核心服务 - 添加 proto 定义、parser、service 模块 - 添加启动脚本和依赖配置 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
47
ai-core/proto/document_parser.proto
Normal file
47
ai-core/proto/document_parser.proto
Normal file
@@ -0,0 +1,47 @@
|
||||
syntax = "proto3";
|
||||
|
||||
package docparser;
|
||||
|
||||
option go_package = "x-agents/proto/docparser";
|
||||
|
||||
service DocumentParser {
|
||||
rpc ParseDocument(ParseRequest) returns (ParseResponse);
|
||||
rpc GetSupportedFormats(Empty) returns (SupportedFormatsResponse);
|
||||
rpc GetEngines(Empty) returns (EnginesResponse);
|
||||
}
|
||||
|
||||
message ParseRequest {
|
||||
string file_url = 1;
|
||||
string file_name = 2;
|
||||
string file_type = 3;
|
||||
string parser_engine = 4;
|
||||
map<string, string> engine_overrides = 5;
|
||||
}
|
||||
|
||||
message ParseResponse {
|
||||
bool success = 1;
|
||||
string content = 2;
|
||||
string message = 3;
|
||||
int32 content_length = 4;
|
||||
string file_type = 5;
|
||||
string parser_engine = 6;
|
||||
}
|
||||
|
||||
message Empty {}
|
||||
|
||||
message SupportedFormatsResponse {
|
||||
repeated string file_types = 1;
|
||||
map<string, string> file_type_descriptions = 2;
|
||||
}
|
||||
|
||||
message EnginesResponse {
|
||||
repeated EngineInfo engines = 1;
|
||||
}
|
||||
|
||||
message EngineInfo {
|
||||
string name = 1;
|
||||
string description = 2;
|
||||
repeated string supported_file_types = 3;
|
||||
bool available = 4;
|
||||
string unavailable_reason = 5;
|
||||
}
|
||||
57
ai-core/proto/document_parser_pb2.py
Normal file
57
ai-core/proto/document_parser_pb2.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
||||
# NO CHECKED-IN PROTOBUF GENCODE
|
||||
# source: document_parser.proto
|
||||
# Protobuf Python Version: 6.31.1
|
||||
"""Generated protocol buffer code."""
|
||||
from google.protobuf import descriptor as _descriptor
|
||||
from google.protobuf import descriptor_pool as _descriptor_pool
|
||||
from google.protobuf import runtime_version as _runtime_version
|
||||
from google.protobuf import symbol_database as _symbol_database
|
||||
from google.protobuf.internal import builder as _builder
|
||||
_runtime_version.ValidateProtobufRuntimeVersion(
|
||||
_runtime_version.Domain.PUBLIC,
|
||||
6,
|
||||
31,
|
||||
1,
|
||||
'',
|
||||
'document_parser.proto'
|
||||
)
|
||||
# @@protoc_insertion_point(imports)
|
||||
|
||||
_sym_db = _symbol_database.Default()
|
||||
|
||||
|
||||
|
||||
|
||||
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15\x64ocument_parser.proto\x12\tdocparser\"\xdd\x01\n\x0cParseRequest\x12\x10\n\x08\x66ile_url\x18\x01 \x01(\t\x12\x11\n\tfile_name\x18\x02 \x01(\t\x12\x11\n\tfile_type\x18\x03 \x01(\t\x12\x15\n\rparser_engine\x18\x04 \x01(\t\x12\x46\n\x10\x65ngine_overrides\x18\x05 \x03(\x0b\x32,.docparser.ParseRequest.EngineOverridesEntry\x1a\x36\n\x14\x45ngineOverridesEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\x84\x01\n\rParseResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\t\x12\x0f\n\x07message\x18\x03 \x01(\t\x12\x16\n\x0e\x63ontent_length\x18\x04 \x01(\x05\x12\x11\n\tfile_type\x18\x05 \x01(\t\x12\x15\n\rparser_engine\x18\x06 \x01(\t\"\x07\n\x05\x45mpty\"\xca\x01\n\x18SupportedFormatsResponse\x12\x12\n\nfile_types\x18\x01 \x03(\t\x12]\n\x16\x66ile_type_descriptions\x18\x02 \x03(\x0b\x32=.docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry\x1a;\n\x19\x46ileTypeDescriptionsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"9\n\x0f\x45nginesResponse\x12&\n\x07\x65ngines\x18\x01 \x03(\x0b\x32\x15.docparser.EngineInfo\"|\n\nEngineInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x1c\n\x14supported_file_types\x18\x03 \x03(\t\x12\x11\n\tavailable\x18\x04 \x01(\x08\x12\x1a\n\x12unavailable_reason\x18\x05 \x01(\t2\xde\x01\n\x0e\x44ocumentParser\x12\x42\n\rParseDocument\x12\x17.docparser.ParseRequest\x1a\x18.docparser.ParseResponse\x12L\n\x13GetSupportedFormats\x12\x10.docparser.Empty\x1a#.docparser.SupportedFormatsResponse\x12:\n\nGetEngines\x12\x10.docparser.Empty\x1a\x1a.docparser.EnginesResponseB\x1aZ\x18x-agents/proto/docparserb\x06proto3')
|
||||
|
||||
_globals = globals()
|
||||
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
||||
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'document_parser_pb2', _globals)
|
||||
if not _descriptor._USE_C_DESCRIPTORS:
|
||||
_globals['DESCRIPTOR']._loaded_options = None
|
||||
_globals['DESCRIPTOR']._serialized_options = b'Z\030x-agents/proto/docparser'
|
||||
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._loaded_options = None
|
||||
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_options = b'8\001'
|
||||
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._loaded_options = None
|
||||
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_options = b'8\001'
|
||||
_globals['_PARSEREQUEST']._serialized_start=37
|
||||
_globals['_PARSEREQUEST']._serialized_end=258
|
||||
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_start=204
|
||||
_globals['_PARSEREQUEST_ENGINEOVERRIDESENTRY']._serialized_end=258
|
||||
_globals['_PARSERESPONSE']._serialized_start=261
|
||||
_globals['_PARSERESPONSE']._serialized_end=393
|
||||
_globals['_EMPTY']._serialized_start=395
|
||||
_globals['_EMPTY']._serialized_end=402
|
||||
_globals['_SUPPORTEDFORMATSRESPONSE']._serialized_start=405
|
||||
_globals['_SUPPORTEDFORMATSRESPONSE']._serialized_end=607
|
||||
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_start=548
|
||||
_globals['_SUPPORTEDFORMATSRESPONSE_FILETYPEDESCRIPTIONSENTRY']._serialized_end=607
|
||||
_globals['_ENGINESRESPONSE']._serialized_start=609
|
||||
_globals['_ENGINESRESPONSE']._serialized_end=666
|
||||
_globals['_ENGINEINFO']._serialized_start=668
|
||||
_globals['_ENGINEINFO']._serialized_end=792
|
||||
_globals['_DOCUMENTPARSER']._serialized_start=795
|
||||
_globals['_DOCUMENTPARSER']._serialized_end=1017
|
||||
# @@protoc_insertion_point(module_scope)
|
||||
183
ai-core/proto/document_parser_pb2_grpc.py
Normal file
183
ai-core/proto/document_parser_pb2_grpc.py
Normal file
@@ -0,0 +1,183 @@
|
||||
# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
|
||||
"""Client and server classes corresponding to protobuf-defined services."""
|
||||
import grpc
|
||||
import warnings
|
||||
|
||||
import document_parser_pb2 as document__parser__pb2
|
||||
|
||||
GRPC_GENERATED_VERSION = '1.78.0'
|
||||
GRPC_VERSION = grpc.__version__
|
||||
_version_not_supported = False
|
||||
|
||||
try:
|
||||
from grpc._utilities import first_version_is_lower
|
||||
_version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION)
|
||||
except ImportError:
|
||||
_version_not_supported = True
|
||||
|
||||
if _version_not_supported:
|
||||
raise RuntimeError(
|
||||
f'The grpc package installed is at version {GRPC_VERSION},'
|
||||
+ ' but the generated code in document_parser_pb2_grpc.py depends on'
|
||||
+ f' grpcio>={GRPC_GENERATED_VERSION}.'
|
||||
+ f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}'
|
||||
+ f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.'
|
||||
)
|
||||
|
||||
|
||||
class DocumentParserStub(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
def __init__(self, channel):
|
||||
"""Constructor.
|
||||
|
||||
Args:
|
||||
channel: A grpc.Channel.
|
||||
"""
|
||||
self.ParseDocument = channel.unary_unary(
|
||||
'/docparser.DocumentParser/ParseDocument',
|
||||
request_serializer=document__parser__pb2.ParseRequest.SerializeToString,
|
||||
response_deserializer=document__parser__pb2.ParseResponse.FromString,
|
||||
_registered_method=True)
|
||||
self.GetSupportedFormats = channel.unary_unary(
|
||||
'/docparser.DocumentParser/GetSupportedFormats',
|
||||
request_serializer=document__parser__pb2.Empty.SerializeToString,
|
||||
response_deserializer=document__parser__pb2.SupportedFormatsResponse.FromString,
|
||||
_registered_method=True)
|
||||
self.GetEngines = channel.unary_unary(
|
||||
'/docparser.DocumentParser/GetEngines',
|
||||
request_serializer=document__parser__pb2.Empty.SerializeToString,
|
||||
response_deserializer=document__parser__pb2.EnginesResponse.FromString,
|
||||
_registered_method=True)
|
||||
|
||||
|
||||
class DocumentParserServicer(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
def ParseDocument(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def GetSupportedFormats(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
def GetEngines(self, request, context):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
||||
context.set_details('Method not implemented!')
|
||||
raise NotImplementedError('Method not implemented!')
|
||||
|
||||
|
||||
def add_DocumentParserServicer_to_server(servicer, server):
|
||||
rpc_method_handlers = {
|
||||
'ParseDocument': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.ParseDocument,
|
||||
request_deserializer=document__parser__pb2.ParseRequest.FromString,
|
||||
response_serializer=document__parser__pb2.ParseResponse.SerializeToString,
|
||||
),
|
||||
'GetSupportedFormats': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.GetSupportedFormats,
|
||||
request_deserializer=document__parser__pb2.Empty.FromString,
|
||||
response_serializer=document__parser__pb2.SupportedFormatsResponse.SerializeToString,
|
||||
),
|
||||
'GetEngines': grpc.unary_unary_rpc_method_handler(
|
||||
servicer.GetEngines,
|
||||
request_deserializer=document__parser__pb2.Empty.FromString,
|
||||
response_serializer=document__parser__pb2.EnginesResponse.SerializeToString,
|
||||
),
|
||||
}
|
||||
generic_handler = grpc.method_handlers_generic_handler(
|
||||
'docparser.DocumentParser', rpc_method_handlers)
|
||||
server.add_generic_rpc_handlers((generic_handler,))
|
||||
server.add_registered_method_handlers('docparser.DocumentParser', rpc_method_handlers)
|
||||
|
||||
|
||||
# This class is part of an EXPERIMENTAL API.
|
||||
class DocumentParser(object):
|
||||
"""Missing associated documentation comment in .proto file."""
|
||||
|
||||
@staticmethod
|
||||
def ParseDocument(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(
|
||||
request,
|
||||
target,
|
||||
'/docparser.DocumentParser/ParseDocument',
|
||||
document__parser__pb2.ParseRequest.SerializeToString,
|
||||
document__parser__pb2.ParseResponse.FromString,
|
||||
options,
|
||||
channel_credentials,
|
||||
insecure,
|
||||
call_credentials,
|
||||
compression,
|
||||
wait_for_ready,
|
||||
timeout,
|
||||
metadata,
|
||||
_registered_method=True)
|
||||
|
||||
@staticmethod
|
||||
def GetSupportedFormats(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(
|
||||
request,
|
||||
target,
|
||||
'/docparser.DocumentParser/GetSupportedFormats',
|
||||
document__parser__pb2.Empty.SerializeToString,
|
||||
document__parser__pb2.SupportedFormatsResponse.FromString,
|
||||
options,
|
||||
channel_credentials,
|
||||
insecure,
|
||||
call_credentials,
|
||||
compression,
|
||||
wait_for_ready,
|
||||
timeout,
|
||||
metadata,
|
||||
_registered_method=True)
|
||||
|
||||
@staticmethod
|
||||
def GetEngines(request,
|
||||
target,
|
||||
options=(),
|
||||
channel_credentials=None,
|
||||
call_credentials=None,
|
||||
insecure=False,
|
||||
compression=None,
|
||||
wait_for_ready=None,
|
||||
timeout=None,
|
||||
metadata=None):
|
||||
return grpc.experimental.unary_unary(
|
||||
request,
|
||||
target,
|
||||
'/docparser.DocumentParser/GetEngines',
|
||||
document__parser__pb2.Empty.SerializeToString,
|
||||
document__parser__pb2.EnginesResponse.FromString,
|
||||
options,
|
||||
channel_credentials,
|
||||
insecure,
|
||||
call_credentials,
|
||||
compression,
|
||||
wait_for_ready,
|
||||
timeout,
|
||||
metadata,
|
||||
_registered_method=True)
|
||||
Reference in New Issue
Block a user