feat: 优化后端知识库服务和文档解析
- 更新文档解析客户端 - 优化知识库服务逻辑 - 更新 protobuf 定义 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -28,8 +28,10 @@ type ParseRequest struct {
|
|||||||
FileType string `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"`
|
FileType string `protobuf:"bytes,3,opt,name=file_type,json=fileType,proto3" json:"file_type,omitempty"`
|
||||||
ParserEngine string `protobuf:"bytes,4,opt,name=parser_engine,json=parserEngine,proto3" json:"parser_engine,omitempty"`
|
ParserEngine string `protobuf:"bytes,4,opt,name=parser_engine,json=parserEngine,proto3" json:"parser_engine,omitempty"`
|
||||||
EngineOverrides map[string]string `protobuf:"bytes,5,rep,name=engine_overrides,json=engineOverrides,proto3" json:"engine_overrides,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
|
EngineOverrides map[string]string `protobuf:"bytes,5,rep,name=engine_overrides,json=engineOverrides,proto3" json:"engine_overrides,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
|
||||||
unknownFields protoimpl.UnknownFields
|
// VLM 配置(可选)
|
||||||
sizeCache protoimpl.SizeCache
|
VlmConfig *VLMConfig `protobuf:"bytes,6,opt,name=vlm_config,json=vlmConfig,proto3" json:"vlm_config,omitempty"`
|
||||||
|
unknownFields protoimpl.UnknownFields
|
||||||
|
sizeCache protoimpl.SizeCache
|
||||||
}
|
}
|
||||||
|
|
||||||
func (x *ParseRequest) Reset() {
|
func (x *ParseRequest) Reset() {
|
||||||
@@ -97,6 +99,97 @@ func (x *ParseRequest) GetEngineOverrides() map[string]string {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (x *ParseRequest) GetVlmConfig() *VLMConfig {
|
||||||
|
if x != nil {
|
||||||
|
return x.VlmConfig
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type VLMConfig struct {
|
||||||
|
state protoimpl.MessageState `protogen:"open.v1"`
|
||||||
|
Enabled bool `protobuf:"varint,1,opt,name=enabled,proto3" json:"enabled,omitempty"` // 是否启用 VLM
|
||||||
|
Provider string `protobuf:"bytes,2,opt,name=provider,proto3" json:"provider,omitempty"` // VLM 提供商: openai, anthropic, local 等
|
||||||
|
Model string `protobuf:"bytes,3,opt,name=model,proto3" json:"model,omitempty"` // 模型名称
|
||||||
|
ApiKey string `protobuf:"bytes,4,opt,name=api_key,json=apiKey,proto3" json:"api_key,omitempty"` // API Key
|
||||||
|
BaseUrl string `protobuf:"bytes,5,opt,name=base_url,json=baseUrl,proto3" json:"base_url,omitempty"` // 自定义 API 地址
|
||||||
|
Prompt string `protobuf:"bytes,6,opt,name=prompt,proto3" json:"prompt,omitempty"` // 自定义提示词
|
||||||
|
unknownFields protoimpl.UnknownFields
|
||||||
|
sizeCache protoimpl.SizeCache
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *VLMConfig) Reset() {
|
||||||
|
*x = VLMConfig{}
|
||||||
|
mi := &file_document_parser_proto_msgTypes[1]
|
||||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
|
ms.StoreMessageInfo(mi)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *VLMConfig) String() string {
|
||||||
|
return protoimpl.X.MessageStringOf(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (*VLMConfig) ProtoMessage() {}
|
||||||
|
|
||||||
|
func (x *VLMConfig) ProtoReflect() protoreflect.Message {
|
||||||
|
mi := &file_document_parser_proto_msgTypes[1]
|
||||||
|
if x != nil {
|
||||||
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
|
if ms.LoadMessageInfo() == nil {
|
||||||
|
ms.StoreMessageInfo(mi)
|
||||||
|
}
|
||||||
|
return ms
|
||||||
|
}
|
||||||
|
return mi.MessageOf(x)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deprecated: Use VLMConfig.ProtoReflect.Descriptor instead.
|
||||||
|
func (*VLMConfig) Descriptor() ([]byte, []int) {
|
||||||
|
return file_document_parser_proto_rawDescGZIP(), []int{1}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *VLMConfig) GetEnabled() bool {
|
||||||
|
if x != nil {
|
||||||
|
return x.Enabled
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *VLMConfig) GetProvider() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.Provider
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *VLMConfig) GetModel() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.Model
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *VLMConfig) GetApiKey() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.ApiKey
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *VLMConfig) GetBaseUrl() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.BaseUrl
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (x *VLMConfig) GetPrompt() string {
|
||||||
|
if x != nil {
|
||||||
|
return x.Prompt
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
type ParseResponse struct {
|
type ParseResponse struct {
|
||||||
state protoimpl.MessageState `protogen:"open.v1"`
|
state protoimpl.MessageState `protogen:"open.v1"`
|
||||||
Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
|
Success bool `protobuf:"varint,1,opt,name=success,proto3" json:"success,omitempty"`
|
||||||
@@ -111,7 +204,7 @@ type ParseResponse struct {
|
|||||||
|
|
||||||
func (x *ParseResponse) Reset() {
|
func (x *ParseResponse) Reset() {
|
||||||
*x = ParseResponse{}
|
*x = ParseResponse{}
|
||||||
mi := &file_document_parser_proto_msgTypes[1]
|
mi := &file_document_parser_proto_msgTypes[2]
|
||||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
ms.StoreMessageInfo(mi)
|
ms.StoreMessageInfo(mi)
|
||||||
}
|
}
|
||||||
@@ -123,7 +216,7 @@ func (x *ParseResponse) String() string {
|
|||||||
func (*ParseResponse) ProtoMessage() {}
|
func (*ParseResponse) ProtoMessage() {}
|
||||||
|
|
||||||
func (x *ParseResponse) ProtoReflect() protoreflect.Message {
|
func (x *ParseResponse) ProtoReflect() protoreflect.Message {
|
||||||
mi := &file_document_parser_proto_msgTypes[1]
|
mi := &file_document_parser_proto_msgTypes[2]
|
||||||
if x != nil {
|
if x != nil {
|
||||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
if ms.LoadMessageInfo() == nil {
|
if ms.LoadMessageInfo() == nil {
|
||||||
@@ -136,7 +229,7 @@ func (x *ParseResponse) ProtoReflect() protoreflect.Message {
|
|||||||
|
|
||||||
// Deprecated: Use ParseResponse.ProtoReflect.Descriptor instead.
|
// Deprecated: Use ParseResponse.ProtoReflect.Descriptor instead.
|
||||||
func (*ParseResponse) Descriptor() ([]byte, []int) {
|
func (*ParseResponse) Descriptor() ([]byte, []int) {
|
||||||
return file_document_parser_proto_rawDescGZIP(), []int{1}
|
return file_document_parser_proto_rawDescGZIP(), []int{2}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (x *ParseResponse) GetSuccess() bool {
|
func (x *ParseResponse) GetSuccess() bool {
|
||||||
@@ -189,7 +282,7 @@ type Empty struct {
|
|||||||
|
|
||||||
func (x *Empty) Reset() {
|
func (x *Empty) Reset() {
|
||||||
*x = Empty{}
|
*x = Empty{}
|
||||||
mi := &file_document_parser_proto_msgTypes[2]
|
mi := &file_document_parser_proto_msgTypes[3]
|
||||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
ms.StoreMessageInfo(mi)
|
ms.StoreMessageInfo(mi)
|
||||||
}
|
}
|
||||||
@@ -201,7 +294,7 @@ func (x *Empty) String() string {
|
|||||||
func (*Empty) ProtoMessage() {}
|
func (*Empty) ProtoMessage() {}
|
||||||
|
|
||||||
func (x *Empty) ProtoReflect() protoreflect.Message {
|
func (x *Empty) ProtoReflect() protoreflect.Message {
|
||||||
mi := &file_document_parser_proto_msgTypes[2]
|
mi := &file_document_parser_proto_msgTypes[3]
|
||||||
if x != nil {
|
if x != nil {
|
||||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
if ms.LoadMessageInfo() == nil {
|
if ms.LoadMessageInfo() == nil {
|
||||||
@@ -214,7 +307,7 @@ func (x *Empty) ProtoReflect() protoreflect.Message {
|
|||||||
|
|
||||||
// Deprecated: Use Empty.ProtoReflect.Descriptor instead.
|
// Deprecated: Use Empty.ProtoReflect.Descriptor instead.
|
||||||
func (*Empty) Descriptor() ([]byte, []int) {
|
func (*Empty) Descriptor() ([]byte, []int) {
|
||||||
return file_document_parser_proto_rawDescGZIP(), []int{2}
|
return file_document_parser_proto_rawDescGZIP(), []int{3}
|
||||||
}
|
}
|
||||||
|
|
||||||
type SupportedFormatsResponse struct {
|
type SupportedFormatsResponse struct {
|
||||||
@@ -227,7 +320,7 @@ type SupportedFormatsResponse struct {
|
|||||||
|
|
||||||
func (x *SupportedFormatsResponse) Reset() {
|
func (x *SupportedFormatsResponse) Reset() {
|
||||||
*x = SupportedFormatsResponse{}
|
*x = SupportedFormatsResponse{}
|
||||||
mi := &file_document_parser_proto_msgTypes[3]
|
mi := &file_document_parser_proto_msgTypes[4]
|
||||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
ms.StoreMessageInfo(mi)
|
ms.StoreMessageInfo(mi)
|
||||||
}
|
}
|
||||||
@@ -239,7 +332,7 @@ func (x *SupportedFormatsResponse) String() string {
|
|||||||
func (*SupportedFormatsResponse) ProtoMessage() {}
|
func (*SupportedFormatsResponse) ProtoMessage() {}
|
||||||
|
|
||||||
func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {
|
func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {
|
||||||
mi := &file_document_parser_proto_msgTypes[3]
|
mi := &file_document_parser_proto_msgTypes[4]
|
||||||
if x != nil {
|
if x != nil {
|
||||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
if ms.LoadMessageInfo() == nil {
|
if ms.LoadMessageInfo() == nil {
|
||||||
@@ -252,7 +345,7 @@ func (x *SupportedFormatsResponse) ProtoReflect() protoreflect.Message {
|
|||||||
|
|
||||||
// Deprecated: Use SupportedFormatsResponse.ProtoReflect.Descriptor instead.
|
// Deprecated: Use SupportedFormatsResponse.ProtoReflect.Descriptor instead.
|
||||||
func (*SupportedFormatsResponse) Descriptor() ([]byte, []int) {
|
func (*SupportedFormatsResponse) Descriptor() ([]byte, []int) {
|
||||||
return file_document_parser_proto_rawDescGZIP(), []int{3}
|
return file_document_parser_proto_rawDescGZIP(), []int{4}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (x *SupportedFormatsResponse) GetFileTypes() []string {
|
func (x *SupportedFormatsResponse) GetFileTypes() []string {
|
||||||
@@ -278,7 +371,7 @@ type EnginesResponse struct {
|
|||||||
|
|
||||||
func (x *EnginesResponse) Reset() {
|
func (x *EnginesResponse) Reset() {
|
||||||
*x = EnginesResponse{}
|
*x = EnginesResponse{}
|
||||||
mi := &file_document_parser_proto_msgTypes[4]
|
mi := &file_document_parser_proto_msgTypes[5]
|
||||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
ms.StoreMessageInfo(mi)
|
ms.StoreMessageInfo(mi)
|
||||||
}
|
}
|
||||||
@@ -290,7 +383,7 @@ func (x *EnginesResponse) String() string {
|
|||||||
func (*EnginesResponse) ProtoMessage() {}
|
func (*EnginesResponse) ProtoMessage() {}
|
||||||
|
|
||||||
func (x *EnginesResponse) ProtoReflect() protoreflect.Message {
|
func (x *EnginesResponse) ProtoReflect() protoreflect.Message {
|
||||||
mi := &file_document_parser_proto_msgTypes[4]
|
mi := &file_document_parser_proto_msgTypes[5]
|
||||||
if x != nil {
|
if x != nil {
|
||||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
if ms.LoadMessageInfo() == nil {
|
if ms.LoadMessageInfo() == nil {
|
||||||
@@ -303,7 +396,7 @@ func (x *EnginesResponse) ProtoReflect() protoreflect.Message {
|
|||||||
|
|
||||||
// Deprecated: Use EnginesResponse.ProtoReflect.Descriptor instead.
|
// Deprecated: Use EnginesResponse.ProtoReflect.Descriptor instead.
|
||||||
func (*EnginesResponse) Descriptor() ([]byte, []int) {
|
func (*EnginesResponse) Descriptor() ([]byte, []int) {
|
||||||
return file_document_parser_proto_rawDescGZIP(), []int{4}
|
return file_document_parser_proto_rawDescGZIP(), []int{5}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (x *EnginesResponse) GetEngines() []*EngineInfo {
|
func (x *EnginesResponse) GetEngines() []*EngineInfo {
|
||||||
@@ -326,7 +419,7 @@ type EngineInfo struct {
|
|||||||
|
|
||||||
func (x *EngineInfo) Reset() {
|
func (x *EngineInfo) Reset() {
|
||||||
*x = EngineInfo{}
|
*x = EngineInfo{}
|
||||||
mi := &file_document_parser_proto_msgTypes[5]
|
mi := &file_document_parser_proto_msgTypes[6]
|
||||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
ms.StoreMessageInfo(mi)
|
ms.StoreMessageInfo(mi)
|
||||||
}
|
}
|
||||||
@@ -338,7 +431,7 @@ func (x *EngineInfo) String() string {
|
|||||||
func (*EngineInfo) ProtoMessage() {}
|
func (*EngineInfo) ProtoMessage() {}
|
||||||
|
|
||||||
func (x *EngineInfo) ProtoReflect() protoreflect.Message {
|
func (x *EngineInfo) ProtoReflect() protoreflect.Message {
|
||||||
mi := &file_document_parser_proto_msgTypes[5]
|
mi := &file_document_parser_proto_msgTypes[6]
|
||||||
if x != nil {
|
if x != nil {
|
||||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||||
if ms.LoadMessageInfo() == nil {
|
if ms.LoadMessageInfo() == nil {
|
||||||
@@ -351,7 +444,7 @@ func (x *EngineInfo) ProtoReflect() protoreflect.Message {
|
|||||||
|
|
||||||
// Deprecated: Use EngineInfo.ProtoReflect.Descriptor instead.
|
// Deprecated: Use EngineInfo.ProtoReflect.Descriptor instead.
|
||||||
func (*EngineInfo) Descriptor() ([]byte, []int) {
|
func (*EngineInfo) Descriptor() ([]byte, []int) {
|
||||||
return file_document_parser_proto_rawDescGZIP(), []int{5}
|
return file_document_parser_proto_rawDescGZIP(), []int{6}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (x *EngineInfo) GetName() string {
|
func (x *EngineInfo) GetName() string {
|
||||||
@@ -393,16 +486,25 @@ var File_document_parser_proto protoreflect.FileDescriptor
|
|||||||
|
|
||||||
const file_document_parser_proto_rawDesc = "" +
|
const file_document_parser_proto_rawDesc = "" +
|
||||||
"\n" +
|
"\n" +
|
||||||
"\x15document_parser.proto\x12\tdocparser\"\xa5\x02\n" +
|
"\x15document_parser.proto\x12\tdocparser\"\xda\x02\n" +
|
||||||
"\fParseRequest\x12\x19\n" +
|
"\fParseRequest\x12\x19\n" +
|
||||||
"\bfile_url\x18\x01 \x01(\tR\afileUrl\x12\x1b\n" +
|
"\bfile_url\x18\x01 \x01(\tR\afileUrl\x12\x1b\n" +
|
||||||
"\tfile_name\x18\x02 \x01(\tR\bfileName\x12\x1b\n" +
|
"\tfile_name\x18\x02 \x01(\tR\bfileName\x12\x1b\n" +
|
||||||
"\tfile_type\x18\x03 \x01(\tR\bfileType\x12#\n" +
|
"\tfile_type\x18\x03 \x01(\tR\bfileType\x12#\n" +
|
||||||
"\rparser_engine\x18\x04 \x01(\tR\fparserEngine\x12W\n" +
|
"\rparser_engine\x18\x04 \x01(\tR\fparserEngine\x12W\n" +
|
||||||
"\x10engine_overrides\x18\x05 \x03(\v2,.docparser.ParseRequest.EngineOverridesEntryR\x0fengineOverrides\x1aB\n" +
|
"\x10engine_overrides\x18\x05 \x03(\v2,.docparser.ParseRequest.EngineOverridesEntryR\x0fengineOverrides\x123\n" +
|
||||||
|
"\n" +
|
||||||
|
"vlm_config\x18\x06 \x01(\v2\x14.docparser.VLMConfigR\tvlmConfig\x1aB\n" +
|
||||||
"\x14EngineOverridesEntry\x12\x10\n" +
|
"\x14EngineOverridesEntry\x12\x10\n" +
|
||||||
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
|
"\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n" +
|
||||||
"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xc6\x01\n" +
|
"\x05value\x18\x02 \x01(\tR\x05value:\x028\x01\"\xa3\x01\n" +
|
||||||
|
"\tVLMConfig\x12\x18\n" +
|
||||||
|
"\aenabled\x18\x01 \x01(\bR\aenabled\x12\x1a\n" +
|
||||||
|
"\bprovider\x18\x02 \x01(\tR\bprovider\x12\x14\n" +
|
||||||
|
"\x05model\x18\x03 \x01(\tR\x05model\x12\x17\n" +
|
||||||
|
"\aapi_key\x18\x04 \x01(\tR\x06apiKey\x12\x19\n" +
|
||||||
|
"\bbase_url\x18\x05 \x01(\tR\abaseUrl\x12\x16\n" +
|
||||||
|
"\x06prompt\x18\x06 \x01(\tR\x06prompt\"\xc6\x01\n" +
|
||||||
"\rParseResponse\x12\x18\n" +
|
"\rParseResponse\x12\x18\n" +
|
||||||
"\asuccess\x18\x01 \x01(\bR\asuccess\x12\x18\n" +
|
"\asuccess\x18\x01 \x01(\bR\asuccess\x12\x18\n" +
|
||||||
"\acontent\x18\x02 \x01(\tR\acontent\x12\x18\n" +
|
"\acontent\x18\x02 \x01(\tR\acontent\x12\x18\n" +
|
||||||
@@ -445,32 +547,34 @@ func file_document_parser_proto_rawDescGZIP() []byte {
|
|||||||
return file_document_parser_proto_rawDescData
|
return file_document_parser_proto_rawDescData
|
||||||
}
|
}
|
||||||
|
|
||||||
var file_document_parser_proto_msgTypes = make([]protoimpl.MessageInfo, 8)
|
var file_document_parser_proto_msgTypes = make([]protoimpl.MessageInfo, 9)
|
||||||
var file_document_parser_proto_goTypes = []any{
|
var file_document_parser_proto_goTypes = []any{
|
||||||
(*ParseRequest)(nil), // 0: docparser.ParseRequest
|
(*ParseRequest)(nil), // 0: docparser.ParseRequest
|
||||||
(*ParseResponse)(nil), // 1: docparser.ParseResponse
|
(*VLMConfig)(nil), // 1: docparser.VLMConfig
|
||||||
(*Empty)(nil), // 2: docparser.Empty
|
(*ParseResponse)(nil), // 2: docparser.ParseResponse
|
||||||
(*SupportedFormatsResponse)(nil), // 3: docparser.SupportedFormatsResponse
|
(*Empty)(nil), // 3: docparser.Empty
|
||||||
(*EnginesResponse)(nil), // 4: docparser.EnginesResponse
|
(*SupportedFormatsResponse)(nil), // 4: docparser.SupportedFormatsResponse
|
||||||
(*EngineInfo)(nil), // 5: docparser.EngineInfo
|
(*EnginesResponse)(nil), // 5: docparser.EnginesResponse
|
||||||
nil, // 6: docparser.ParseRequest.EngineOverridesEntry
|
(*EngineInfo)(nil), // 6: docparser.EngineInfo
|
||||||
nil, // 7: docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
|
nil, // 7: docparser.ParseRequest.EngineOverridesEntry
|
||||||
|
nil, // 8: docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
|
||||||
}
|
}
|
||||||
var file_document_parser_proto_depIdxs = []int32{
|
var file_document_parser_proto_depIdxs = []int32{
|
||||||
6, // 0: docparser.ParseRequest.engine_overrides:type_name -> docparser.ParseRequest.EngineOverridesEntry
|
7, // 0: docparser.ParseRequest.engine_overrides:type_name -> docparser.ParseRequest.EngineOverridesEntry
|
||||||
7, // 1: docparser.SupportedFormatsResponse.file_type_descriptions:type_name -> docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
|
1, // 1: docparser.ParseRequest.vlm_config:type_name -> docparser.VLMConfig
|
||||||
5, // 2: docparser.EnginesResponse.engines:type_name -> docparser.EngineInfo
|
8, // 2: docparser.SupportedFormatsResponse.file_type_descriptions:type_name -> docparser.SupportedFormatsResponse.FileTypeDescriptionsEntry
|
||||||
0, // 3: docparser.DocumentParser.ParseDocument:input_type -> docparser.ParseRequest
|
6, // 3: docparser.EnginesResponse.engines:type_name -> docparser.EngineInfo
|
||||||
2, // 4: docparser.DocumentParser.GetSupportedFormats:input_type -> docparser.Empty
|
0, // 4: docparser.DocumentParser.ParseDocument:input_type -> docparser.ParseRequest
|
||||||
2, // 5: docparser.DocumentParser.GetEngines:input_type -> docparser.Empty
|
3, // 5: docparser.DocumentParser.GetSupportedFormats:input_type -> docparser.Empty
|
||||||
1, // 6: docparser.DocumentParser.ParseDocument:output_type -> docparser.ParseResponse
|
3, // 6: docparser.DocumentParser.GetEngines:input_type -> docparser.Empty
|
||||||
3, // 7: docparser.DocumentParser.GetSupportedFormats:output_type -> docparser.SupportedFormatsResponse
|
2, // 7: docparser.DocumentParser.ParseDocument:output_type -> docparser.ParseResponse
|
||||||
4, // 8: docparser.DocumentParser.GetEngines:output_type -> docparser.EnginesResponse
|
4, // 8: docparser.DocumentParser.GetSupportedFormats:output_type -> docparser.SupportedFormatsResponse
|
||||||
6, // [6:9] is the sub-list for method output_type
|
5, // 9: docparser.DocumentParser.GetEngines:output_type -> docparser.EnginesResponse
|
||||||
3, // [3:6] is the sub-list for method input_type
|
7, // [7:10] is the sub-list for method output_type
|
||||||
3, // [3:3] is the sub-list for extension type_name
|
4, // [4:7] is the sub-list for method input_type
|
||||||
3, // [3:3] is the sub-list for extension extendee
|
4, // [4:4] is the sub-list for extension type_name
|
||||||
0, // [0:3] is the sub-list for field type_name
|
4, // [4:4] is the sub-list for extension extendee
|
||||||
|
0, // [0:4] is the sub-list for field type_name
|
||||||
}
|
}
|
||||||
|
|
||||||
func init() { file_document_parser_proto_init() }
|
func init() { file_document_parser_proto_init() }
|
||||||
@@ -484,7 +588,7 @@ func file_document_parser_proto_init() {
|
|||||||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||||
RawDescriptor: unsafe.Slice(unsafe.StringData(file_document_parser_proto_rawDesc), len(file_document_parser_proto_rawDesc)),
|
RawDescriptor: unsafe.Slice(unsafe.StringData(file_document_parser_proto_rawDesc), len(file_document_parser_proto_rawDesc)),
|
||||||
NumEnums: 0,
|
NumEnums: 0,
|
||||||
NumMessages: 8,
|
NumMessages: 9,
|
||||||
NumExtensions: 0,
|
NumExtensions: 0,
|
||||||
NumServices: 1,
|
NumServices: 1,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -10,9 +10,17 @@ import (
|
|||||||
// ParsingConfig 解析配置
|
// ParsingConfig 解析配置
|
||||||
type ParsingConfig struct {
|
type ParsingConfig struct {
|
||||||
Engine string `json:"engine"` // markitdown / docling
|
Engine string `json:"engine"` // markitdown / docling
|
||||||
DoclingURL string `json:"docling_url"` // Docling 服务 URL
|
DoclingURL string `json:"docling_url"` // Docling 服务 URL
|
||||||
EnablePDF bool `json:"enable_pdf"` // 是否启用 PDF 解析
|
EnablePDF bool `json:"enable_pdf"` // 是否启用 PDF 解析
|
||||||
Pandoc bool `json:"pandoc"` // 是否启用 Pandoc
|
Pandoc bool `json:"pandoc"` // 是否启用 Pandoc
|
||||||
|
|
||||||
|
// VLM 配置(用于图片 OCR 等)
|
||||||
|
VLMEnabled bool `json:"vlm_enabled"` // 是否启用 VLM
|
||||||
|
VLMProvider string `json:"vlm_provider"` // VLM 提供商: openai, anthropic, local 等
|
||||||
|
VLMModel string `json:"vlm_model"` // 模型名称
|
||||||
|
VLMAPIKey string `json:"vlm_api_key"` // API Key
|
||||||
|
VLMBaseURL string `json:"vlm_base_url"` // 自定义 API 地址
|
||||||
|
VLMPrompt string `json:"vlm_prompt"` // 自定义提示词
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan 实现 sql.Scanner 接口
|
// Scan 实现 sql.Scanner 接口
|
||||||
|
|||||||
@@ -27,6 +27,16 @@ type ParseResult struct {
|
|||||||
ParserEngine string
|
ParserEngine string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VLMConfig VLM 模型配置
|
||||||
|
type VLMConfig struct {
|
||||||
|
Enabled bool
|
||||||
|
Provider string // openai, anthropic, local 等
|
||||||
|
Model string
|
||||||
|
APIKey string
|
||||||
|
BaseURL string
|
||||||
|
Prompt string
|
||||||
|
}
|
||||||
|
|
||||||
// NewAICoreClient 创建 AI-Core 客户端
|
// NewAICoreClient 创建 AI-Core 客户端
|
||||||
func NewAICoreClient(address string) (*AICoreClient, error) {
|
func NewAICoreClient(address string) (*AICoreClient, error) {
|
||||||
return &AICoreClient{address: address}, nil
|
return &AICoreClient{address: address}, nil
|
||||||
@@ -56,7 +66,8 @@ func (c *AICoreClient) Close() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ParseDocument 解析文档 - 使用生成的 protobuf 代码
|
// ParseDocument 解析文档 - 使用生成的 protobuf 代码
|
||||||
func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*ParseResult, error) {
|
// vlmConfig 可选,如果不使用 VLM 传 nil
|
||||||
|
func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string, vlmConfig *VLMConfig) (*ParseResult, error) {
|
||||||
if c.conn == nil {
|
if c.conn == nil {
|
||||||
if err := c.Connect(); err != nil {
|
if err := c.Connect(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -72,6 +83,18 @@ func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*Parse
|
|||||||
FileType: fileType,
|
FileType: fileType,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 如果提供了 VLM 配置,添加到请求中
|
||||||
|
if vlmConfig != nil {
|
||||||
|
req.VlmConfig = &docparser.VLMConfig{
|
||||||
|
Enabled: vlmConfig.Enabled,
|
||||||
|
Provider: vlmConfig.Provider,
|
||||||
|
Model: vlmConfig.Model,
|
||||||
|
ApiKey: vlmConfig.APIKey,
|
||||||
|
BaseUrl: vlmConfig.BaseURL,
|
||||||
|
Prompt: vlmConfig.Prompt,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package service
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"mime/multipart"
|
"mime/multipart"
|
||||||
"net/http"
|
"net/http"
|
||||||
@@ -19,8 +20,15 @@ import (
|
|||||||
var knowledgeDebugLog *log.Logger
|
var knowledgeDebugLog *log.Logger
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
debugFile, _ := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
|
// 确保 logs 目录存在
|
||||||
knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime)
|
os.MkdirAll("logs", 0755)
|
||||||
|
debugFile, err := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
|
||||||
|
if err != nil {
|
||||||
|
// 如果文件打开失败,使用 discard 避免输出到控制台
|
||||||
|
knowledgeDebugLog = log.New(io.Discard, "", log.Ldate|log.Ltime)
|
||||||
|
} else {
|
||||||
|
knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type KnowledgeService struct {
|
type KnowledgeService struct {
|
||||||
@@ -133,10 +141,36 @@ func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) e
|
|||||||
|
|
||||||
// Delete 删除知识库
|
// Delete 删除知识库
|
||||||
func (s *KnowledgeService) Delete(id string) error {
|
func (s *KnowledgeService) Delete(id string) error {
|
||||||
// 先删除关联的文档
|
// 获取知识库信息
|
||||||
|
kb, err := s.repo.FindByID(id)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// 获取知识库下所有文档
|
||||||
|
docs, err := s.repo.FindDocumentsByKBID(id, "")
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// 删除每个文档的 MinIO 文件和本地 Markdown 文件
|
||||||
|
for _, doc := range docs {
|
||||||
|
// 删除 MinIO 文件
|
||||||
|
if doc.FileKey != "" && kb.StorageConfig.Type == "minio" {
|
||||||
|
s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig)
|
||||||
|
}
|
||||||
|
// 删除本地 Markdown 文件
|
||||||
|
if s.markdownLocalPath != "" {
|
||||||
|
markdownPath := s.markdownLocalPath + "/" + doc.ID + ".md"
|
||||||
|
os.Remove(markdownPath)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 删除关联的文档(数据库记录)
|
||||||
if err := s.repo.DeleteDocumentsByKBID(id); err != nil {
|
if err := s.repo.DeleteDocumentsByKBID(id); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return s.repo.Delete(id)
|
return s.repo.Delete(id)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -233,7 +267,7 @@ func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeade
|
|||||||
go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig)
|
go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig)
|
||||||
|
|
||||||
// 异步调用 AI-Core gRPC 服务解析文档(获取 Markdown)
|
// 异步调用 AI-Core gRPC 服务解析文档(获取 Markdown)
|
||||||
go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name)
|
go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name, kb.ParsingConfig)
|
||||||
|
|
||||||
return doc, result.URL, nil
|
return doc, result.URL, nil
|
||||||
}
|
}
|
||||||
@@ -293,7 +327,7 @@ func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config mod
|
|||||||
}
|
}
|
||||||
|
|
||||||
// parseDocumentWithAICore 调用 AI-Core gRPC 服务解析文档
|
// parseDocumentWithAICore 调用 AI-Core gRPC 服务解析文档
|
||||||
func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string) {
|
func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string, config model.ParsingConfig) {
|
||||||
if s.aiCoreClient == nil {
|
if s.aiCoreClient == nil {
|
||||||
knowledgeDebugLog.Printf("[AICore] AI-Core 客户端未初始化")
|
knowledgeDebugLog.Printf("[AICore] AI-Core 客户端未初始化")
|
||||||
return
|
return
|
||||||
@@ -301,7 +335,21 @@ func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName stri
|
|||||||
|
|
||||||
knowledgeDebugLog.Printf("[AICore] 开始解析文档: docID=%s, fileURL=%s, fileName=%s", docID, fileURL, fileName)
|
knowledgeDebugLog.Printf("[AICore] 开始解析文档: docID=%s, fileURL=%s, fileName=%s", docID, fileURL, fileName)
|
||||||
|
|
||||||
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "")
|
// 构建 VLM 配置
|
||||||
|
var vlmConfig *VLMConfig
|
||||||
|
if config.VLMEnabled {
|
||||||
|
vlmConfig = &VLMConfig{
|
||||||
|
Enabled: config.VLMEnabled,
|
||||||
|
Provider: config.VLMProvider,
|
||||||
|
Model: config.VLMModel,
|
||||||
|
APIKey: config.VLMAPIKey,
|
||||||
|
BaseURL: config.VLMBaseURL,
|
||||||
|
Prompt: config.VLMPrompt,
|
||||||
|
}
|
||||||
|
knowledgeDebugLog.Printf("[AICore] VLM 配置: provider=%s, model=%s, enabled=%v", config.VLMProvider, config.VLMModel, config.VLMEnabled)
|
||||||
|
}
|
||||||
|
|
||||||
|
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", vlmConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
knowledgeDebugLog.Printf("[AICore] 解析失败: docID=%s, err=%v", docID, err)
|
knowledgeDebugLog.Printf("[AICore] 解析失败: docID=%s, err=%v", docID, err)
|
||||||
return
|
return
|
||||||
@@ -462,7 +510,7 @@ func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*mo
|
|||||||
// Office文件调用解析服务转换为HTML
|
// Office文件调用解析服务转换为HTML
|
||||||
if isOffice && s.aiCoreClient != nil {
|
if isOffice && s.aiCoreClient != nil {
|
||||||
knowledgeDebugLog.Printf("[Preview] Parsing office file: %s, URL: %s", fileName, fileURL)
|
knowledgeDebugLog.Printf("[Preview] Parsing office file: %s, URL: %s", fileName, fileURL)
|
||||||
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "")
|
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", nil) // Preview 不使用 VLM
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// 解析失败,返回文件URL
|
// 解析失败,返回文件URL
|
||||||
knowledgeDebugLog.Printf("[Preview] Parse document failed: %v", err)
|
knowledgeDebugLog.Printf("[Preview] Parse document failed: %v", err)
|
||||||
|
|||||||
Reference in New Issue
Block a user