feat: 优化后端知识库服务和文档解析

- 更新文档解析客户端
- 优化知识库服务逻辑
- 更新 protobuf 定义

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-10 15:02:55 +08:00
parent d24b29afe4
commit 0a9f6e278e
4 changed files with 236 additions and 53 deletions

View File

@@ -27,6 +27,16 @@ type ParseResult struct {
ParserEngine string
}
// VLMConfig VLM 模型配置
type VLMConfig struct {
Enabled bool
Provider string // openai, anthropic, local 等
Model string
APIKey string
BaseURL string
Prompt string
}
// NewAICoreClient 创建 AI-Core 客户端
func NewAICoreClient(address string) (*AICoreClient, error) {
return &AICoreClient{address: address}, nil
@@ -56,7 +66,8 @@ func (c *AICoreClient) Close() {
}
// ParseDocument 解析文档 - 使用生成的 protobuf 代码
func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*ParseResult, error) {
// vlmConfig 可选,如果不使用 VLM 传 nil
func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string, vlmConfig *VLMConfig) (*ParseResult, error) {
if c.conn == nil {
if err := c.Connect(); err != nil {
return nil, err
@@ -72,6 +83,18 @@ func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*Parse
FileType: fileType,
}
// 如果提供了 VLM 配置,添加到请求中
if vlmConfig != nil {
req.VlmConfig = &docparser.VLMConfig{
Enabled: vlmConfig.Enabled,
Provider: vlmConfig.Provider,
Model: vlmConfig.Model,
ApiKey: vlmConfig.APIKey,
BaseUrl: vlmConfig.BaseURL,
Prompt: vlmConfig.Prompt,
}
}
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()

View File

@@ -3,6 +3,7 @@ package service
import (
"bytes"
"encoding/json"
"io"
"log"
"mime/multipart"
"net/http"
@@ -19,8 +20,15 @@ import (
var knowledgeDebugLog *log.Logger
func init() {
debugFile, _ := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime)
// 确保 logs 目录存在
os.MkdirAll("logs", 0755)
debugFile, err := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
if err != nil {
// 如果文件打开失败,使用 discard 避免输出到控制台
knowledgeDebugLog = log.New(io.Discard, "", log.Ldate|log.Ltime)
} else {
knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime)
}
}
type KnowledgeService struct {
@@ -133,10 +141,36 @@ func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) e
// Delete 删除知识库
func (s *KnowledgeService) Delete(id string) error {
// 先删除关联的文档
// 获取知识库信息
kb, err := s.repo.FindByID(id)
if err != nil {
return err
}
// 获取知识库下所有文档
docs, err := s.repo.FindDocumentsByKBID(id, "")
if err != nil {
return err
}
// 删除每个文档的 MinIO 文件和本地 Markdown 文件
for _, doc := range docs {
// 删除 MinIO 文件
if doc.FileKey != "" && kb.StorageConfig.Type == "minio" {
s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig)
}
// 删除本地 Markdown 文件
if s.markdownLocalPath != "" {
markdownPath := s.markdownLocalPath + "/" + doc.ID + ".md"
os.Remove(markdownPath)
}
}
// 删除关联的文档(数据库记录)
if err := s.repo.DeleteDocumentsByKBID(id); err != nil {
return err
}
return s.repo.Delete(id)
}
@@ -233,7 +267,7 @@ func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeade
go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig)
// 异步调用 AI-Core gRPC 服务解析文档(获取 Markdown
go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name)
go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name, kb.ParsingConfig)
return doc, result.URL, nil
}
@@ -293,7 +327,7 @@ func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config mod
}
// parseDocumentWithAICore 调用 AI-Core gRPC 服务解析文档
func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string) {
func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string, config model.ParsingConfig) {
if s.aiCoreClient == nil {
knowledgeDebugLog.Printf("[AICore] AI-Core 客户端未初始化")
return
@@ -301,7 +335,21 @@ func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName stri
knowledgeDebugLog.Printf("[AICore] 开始解析文档: docID=%s, fileURL=%s, fileName=%s", docID, fileURL, fileName)
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "")
// 构建 VLM 配置
var vlmConfig *VLMConfig
if config.VLMEnabled {
vlmConfig = &VLMConfig{
Enabled: config.VLMEnabled,
Provider: config.VLMProvider,
Model: config.VLMModel,
APIKey: config.VLMAPIKey,
BaseURL: config.VLMBaseURL,
Prompt: config.VLMPrompt,
}
knowledgeDebugLog.Printf("[AICore] VLM 配置: provider=%s, model=%s, enabled=%v", config.VLMProvider, config.VLMModel, config.VLMEnabled)
}
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", vlmConfig)
if err != nil {
knowledgeDebugLog.Printf("[AICore] 解析失败: docID=%s, err=%v", docID, err)
return
@@ -462,7 +510,7 @@ func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*mo
// Office文件调用解析服务转换为HTML
if isOffice && s.aiCoreClient != nil {
knowledgeDebugLog.Printf("[Preview] Parsing office file: %s, URL: %s", fileName, fileURL)
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "")
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", nil) // Preview 不使用 VLM
if err != nil {
// 解析失败返回文件URL
knowledgeDebugLog.Printf("[Preview] Parse document failed: %v", err)