feat: 优化后端知识库服务和文档解析

- 更新文档解析客户端
- 优化知识库服务逻辑
- 更新 protobuf 定义

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-10 15:02:55 +08:00
parent d24b29afe4
commit 0a9f6e278e
4 changed files with 236 additions and 53 deletions

View File

@@ -28,8 +28,10 @@ type ParseRequest struct {
FileType string `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"`
ParserEngine string `protobuf:"bytes,4,opt,name=parser_engine,json=parserEngine,proto3" json:"parser_engine,omitempty"`
EngineOverrides map[string]string `protobuf:"bytes,5,rep,name=engine_overrides,json=engineOverrides,proto3" json:"engine_overrides,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
// VLM 配置(可选)
VlmConfig *VLMConfig `protobuf:"bytes,6,opt,name=vlm_config,json=vlmConfig,proto3" json:"vlm_config,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ParseRequest) Reset() {
@@ -97,6 +99,97 @@ func (x *ParseRequest) GetEngineOverrides() map[string]string {
return nil
}
func (x *ParseRequest) GetVlmConfig() *VLMConfig {
if x != nil {
return x.VlmConfig
}
return nil
}
type VLMConfig struct {
state protoimpl.MessageState `protogen:"open.v1"`
Enabled bool `protobuf:"varint,1,opt,name=enabled,proto3" json:"enabled,omitempty"` // 是否启用 VLM
Provider string `protobuf:"bytes,2,opt,name=provider,proto3" json:"provider,omitempty"` // VLM 提供商: openai, anthropic, local 等
Model string `protobuf:"bytes,3,opt,name=model,proto3" json:"model,omitempty"` // 模型名称
ApiKey string `protobuf:"bytes,4,opt,name=api_key,json=apiKey,proto3" json:"api_key,omitempty"` // API Key
BaseUrl string `protobuf:"bytes,5,opt,name=base_url,json=baseUrl,proto3" json:"base_url,omitempty"` // 自定义 API 地址
Prompt string `protobuf:"bytes,6,opt,name=prompt,proto3" json:"prompt,omitempty"` // 自定义提示词
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *VLMConfig) Reset() {
*x = VLMConfig{}
mi := &file_document_parser_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *VLMConfig) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*VLMConfig) ProtoMessage() {}
func (x *VLMConfig) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use VLMConfig.ProtoReflect.Descriptor instead.
func (*VLMConfig) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{1}
}
func (x *VLMConfig) GetEnabled() bool {
if x != nil {
return x.Enabled
}
return false
}
func (x *VLMConfig) GetProvider() string {
if x != nil {
return x.Provider
}
return ""
}
func (x *VLMConfig) GetModel() string {
if x != nil {
return x.Model
}
return ""
}
func (x *VLMConfig) GetApiKey() string {
if x != nil {
return x.ApiKey
}
return ""
}
func (x *VLMConfig) GetBaseUrl() string {
if x != nil {
return x.BaseUrl
}
return ""
}
func (x *VLMConfig) GetPrompt() string {
if x != nil {
return x.Prompt
}
return ""
}
type ParseResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
@@ -111,7 +204,7 @@ type ParseResponse struct {
func (x *ParseResponse) Reset() {
*x = ParseResponse{}
mi := &file_document_parser_proto_msgTypes[1]
mi := &file_document_parser_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -123,7 +216,7 @@ func (x *ParseResponse) String() string {
func (*ParseResponse) ProtoMessage() {}
func (x *ParseResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[1]
mi := &file_document_parser_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -136,7 +229,7 @@ func (x *ParseResponse) ProtoReflect() protoreflect.Message {
// Deprecated: Use ParseResponse.ProtoReflect.Descriptor instead.
func (*ParseResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{1}
return file_document_parser_proto_rawDescGZIP(), []int{2}
}
func (x *ParseResponse) GetSuccess() bool {
@@ -189,7 +282,7 @@ type Empty struct {
func (x *Empty) Reset() {
*x = Empty{}
mi := &file_document_parser_proto_msgTypes[2]
mi := &file_document_parser_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -201,7 +294,7 @@ func (x *Empty) String() string {
func (*Empty) ProtoMessage() {}
func (x *Empty) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[2]
mi := &file_document_parser_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -214,7 +307,7 @@ func (x *Empty) ProtoReflect() protoreflect.Message {
// Deprecated: Use Empty.ProtoReflect.Descriptor instead.
func (*Empty) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{2}
return file_document_parser_proto_rawDescGZIP(), []int{3}
}
type SupportedFormatsResponse struct {
@@ -227,7 +320,7 @@ type SupportedFormatsResponse struct {
func (x *SupportedFormatsResponse) Reset() {
*x = SupportedFormatsResponse{}
mi := &file_document_parser_proto_msgTypes[3]
mi := &file_document_parser_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -239,7 +332,7 @@ func (x *SupportedFormatsResponse) String() string {
func (*SupportedFormatsResponse) ProtoMessage() {}
func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[3]
mi := &file_document_parser_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -252,7 +345,7 @@ func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {
// Deprecated: Use SupportedFormatsResponse.ProtoReflect.Descriptor instead.
func (*SupportedFormatsResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{3}
return file_document_parser_proto_rawDescGZIP(), []int{4}
}
func (x *SupportedFormatsResponse) GetFileTypes() []string {
@@ -278,7 +371,7 @@ type EnginesResponse struct {
func (x *EnginesResponse) Reset() {
*x = EnginesResponse{}
mi := &file_document_parser_proto_msgTypes[4]
mi := &file_document_parser_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -290,7 +383,7 @@ func (x *EnginesResponse) String() string {
func (*EnginesResponse) ProtoMessage() {}
func (x *EnginesResponse) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[4]
mi := &file_document_parser_proto_msgTypes[5]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -303,7 +396,7 @@ func (x *EnginesResponse) ProtoReflect() protoreflect.Message {
// Deprecated: Use EnginesResponse.ProtoReflect.Descriptor instead.
func (*EnginesResponse) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{4}
return file_document_parser_proto_rawDescGZIP(), []int{5}
}
func (x *EnginesResponse) GetEngines() []*EngineInfo {
@@ -326,7 +419,7 @@ type EngineInfo struct {
func (x *EngineInfo) Reset() {
*x = EngineInfo{}
mi := &file_document_parser_proto_msgTypes[5]
mi := &file_document_parser_proto_msgTypes[6]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
@@ -338,7 +431,7 @@ func (x *EngineInfo) String() string {
func (*EngineInfo) ProtoMessage() {}
func (x *EngineInfo) ProtoReflect() protoreflect.Message {
mi := &file_document_parser_proto_msgTypes[5]
mi := &file_document_parser_proto_msgTypes[6]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
@@ -351,7 +444,7 @@ func (x *EngineInfo) ProtoReflect() protoreflect.Message {
// Deprecated: Use EngineInfo.ProtoReflect.Descriptor instead.
func (*EngineInfo) Descriptor() ([]byte, []int) {
return file_document_parser_proto_rawDescGZIP(), []int{5}
return file_document_parser_proto_rawDescGZIP(), []int{6}
}
func (x *EngineInfo) GetName() string {
@@ -393,16 +486,25 @@ var File_document_parser_proto protoreflect.FileDescriptor
const file_document_parser_proto_rawDesc = "" +
"\n" +
"\x15document_parser.proto\x12\tdocparser\"\xa5\x02\n" +
"\x15document_parser.proto\x12\tdocparser\"\xda\x02\n" +
"\fParseRequest\x12\x19\n" +
"\bfile_url\x18\x01 \x01(\tR\afileUrl\x12\x1b\n" +
"\tfile_name\x18\x02 \x01(\tR\bfileName\x12\x1b\n" +
"\tfile_type\x18\x03 \x01(\tR\bfileType\x12#\n" +
"\rparser_engine\x18\x04 \x01(\tR\fparserEngine\x12W\n" +
"\x10engine_overrides\x18\x05 \x03(\v2,.docparser.ParseRequest.EngineOverridesEntryR\x0fengineOverrides\x1aB\n" +
"\x10engine_overrides\x18\x05 \x03(\v2,.docparser.ParseRequest.EngineOverridesEntryR\x0fengineOverrides\x123\n" +
"\n" +
"vlm_config\x18\x06 \x01(\v2\x14.docparser.VLMConfigR\tvlmConfig\x1aB\n" +
"\x14EngineOverridesEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xc6\x01\n" +
"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xa3\x01\n" +
"\tVLMConfig\x12\x18\n" +
"\aenabled\x18\x01 \x01(\bR\aenabled\x12\x1a\n" +
"\bprovider\x18\x02 \x01(\tR\bprovider\x12\x14\n" +
"\x05model\x18\x03 \x01(\tR\x05model\x12\x17\n" +
"\aapi_key\x18\x04 \x01(\tR\x06apiKey\x12\x19\n" +
"\bbase_url\x18\x05 \x01(\tR\abaseUrl\x12\x16\n" +
"\x06prompt\x18\x06 \x01(\tR\x06prompt\"\xc6\x01\n" +
"\rParseResponse\x12\x18\n" +
"\asuccess\x18\x01 \x01(\bR\asuccess\x12\x18\n" +
"\acontent\x18\x02 \x01(\tR\acontent\x12\x18\n" +
@@ -445,32 +547,34 @@ func file_document_parser_proto_rawDescGZIP() []byte {
return file_document_parser_proto_rawDescData
}
var file_document_parser_proto_msgTypes = make([]protoimpl.MessageInfo, 8)
var file_document_parser_proto_msgTypes = make([]protoimpl.MessageInfo, 9)
var file_document_parser_proto_goTypes = []any{
(*ParseRequest)(nil), // 0: docparser.ParseRequest
(*ParseResponse)(nil), // 1: docparser.ParseResponse
(*Empty)(nil), // 2: docparser.Empty
(*SupportedFormatsResponse)(nil), // 3: docparser.SupportedFormatsResponse
(*EnginesResponse)(nil), // 4: docparser.EnginesResponse
(*EngineInfo)(nil), // 5: docparser.EngineInfo
nil, // 6: docparser.ParseRequest.EngineOverridesEntry
nil, // 7: docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
(*VLMConfig)(nil), // 1: docparser.VLMConfig
(*ParseResponse)(nil), // 2: docparser.ParseResponse
(*Empty)(nil), // 3: docparser.Empty
(*SupportedFormatsResponse)(nil), // 4: docparser.SupportedFormatsResponse
(*EnginesResponse)(nil), // 5: docparser.EnginesResponse
(*EngineInfo)(nil), // 6: docparser.EngineInfo
nil, // 7: docparser.ParseRequest.EngineOverridesEntry
nil, // 8: docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
}
var file_document_parser_proto_depIdxs = []int32{
6, // 0: docparser.ParseRequest.engine_overrides:type_name -> docparser.ParseRequest.EngineOverridesEntry
7, // 1: docparser.SupportedFormatsResponse.file_type_descriptions:type_name -> docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
5, // 2: docparser.EnginesResponse.engines:type_name -> docparser.EngineInfo
0, // 3: docparser.DocumentParser.ParseDocument:input_type -> docparser.ParseRequest
2, // 4: docparser.DocumentParser.GetSupportedFormats:input_type -> docparser.Empty
2, // 5: docparser.DocumentParser.GetEngines:input_type -> docparser.Empty
1, // 6: docparser.DocumentParser.ParseDocument:output_type -> docparser.ParseResponse
3, // 7: docparser.DocumentParser.GetSupportedFormats:output_type -> docparser.SupportedFormatsResponse
4, // 8: docparser.DocumentParser.GetEngines:output_type -> docparser.EnginesResponse
6, // [6:9] is the sub-list for method output_type
3, // [3:6] is the sub-list for method input_type
3, // [3:3] is the sub-list for extension type_name
3, // [3:3] is the sub-list for extension extendee
0, // [0:3] is the sub-list for field type_name
7, // 0: docparser.ParseRequest.engine_overrides:type_name -> docparser.ParseRequest.EngineOverridesEntry
1, // 1: docparser.ParseRequest.vlm_config:type_name -> docparser.VLMConfig
8, // 2: docparser.SupportedFormatsResponse.file_type_descriptions:type_name -> docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
6, // 3: docparser.EnginesResponse.engines:type_name -> docparser.EngineInfo
0, // 4: docparser.DocumentParser.ParseDocument:input_type -> docparser.ParseRequest
3, // 5: docparser.DocumentParser.GetSupportedFormats:input_type -> docparser.Empty
3, // 6: docparser.DocumentParser.GetEngines:input_type -> docparser.Empty
2, // 7: docparser.DocumentParser.ParseDocument:output_type -> docparser.ParseResponse
4, // 8: docparser.DocumentParser.GetSupportedFormats:output_type -> docparser.SupportedFormatsResponse
5, // 9: docparser.DocumentParser.GetEngines:output_type -> docparser.EnginesResponse
7, // [7:10] is the sub-list for method output_type
4, // [4:7] is the sub-list for method input_type
4, // [4:4] is the sub-list for extension type_name
4, // [4:4] is the sub-list for extension extendee
0, // [0:4] is the sub-list for field type_name
}
func init() { file_document_parser_proto_init() }
@@ -484,7 +588,7 @@ func file_document_parser_proto_init() {
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_document_parser_proto_rawDesc), len(file_document_parser_proto_rawDesc)),
NumEnums: 0,
NumMessages: 8,
NumMessages: 9,
NumExtensions: 0,
NumServices: 1,
},