feat: 完善后端知识库服务和配置

- 优化 AI-Core 客户端调用
- 添加更多知识库配置选项
- 完善文档解析逻辑

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-09 15:42:42 +08:00
parent 5012a25f99
commit 4a7199de93
21 changed files with 3892 additions and 72 deletions

View File

@@ -7,6 +7,8 @@ import (
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
docparser "x-agents/server"
)
// AICoreClient AI-Core 文档解析服务客户端
@@ -53,7 +55,7 @@ func (c *AICoreClient) Close() {
}
}
// ParseDocument 解析文档
// ParseDocument 解析文档 - 使用生成的 protobuf 代码
func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*ParseResult, error) {
if c.conn == nil {
if err := c.Connect(); err != nil {
@@ -61,17 +63,16 @@ func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*Parse
}
}
// 使用 gRPC raw bytes 调用
// 由于没有生成 protobuf 代码,使用 raw bytes 方式调用
client := NewDocumentParserClient(c.conn)
// 使用生成的 protobuf 客户端
client := docparser.NewDocumentParserClient(c.conn)
req := &ParseRequest{
req := &docparser.ParseRequest{
FileUrl: fileURL,
FileName: fileName,
FileType: fileType,
}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second)
defer cancel()
resp, err := client.ParseDocument(ctx, req)
@@ -80,53 +81,11 @@ func (c *AICoreClient) ParseDocument(fileURL, fileName, fileType string) (*Parse
}
return &ParseResult{
Success: resp.Success,
Content: resp.Content,
Message: resp.Message,
ContentLength: resp.ContentLength,
FileType: resp.FileType,
ParserEngine: resp.ParserEngine,
Success: resp.GetSuccess(),
Content: resp.GetContent(),
Message: resp.GetMessage(),
ContentLength: resp.GetContentLength(),
FileType: resp.GetFileType(),
ParserEngine: resp.GetParserEngine(),
}, nil
}
// 以下是手动定义的 protobuf messages与 proto 文件一致)
// 不需要生成 .pb.go 文件,直接手动定义
type ParseRequest struct {
FileUrl string
FileName string
FileType string
ParserEngine string
}
type ParseResponse struct {
Success bool
Content string
Message string
ContentLength int32
FileType string
ParserEngine string
}
// DocumentParserClient gRPC 客户端接口(手动实现)
type DocumentParserClient interface {
ParseDocument(ctx context.Context, in *ParseRequest, opts ...grpc.CallOption) (*ParseResponse, error)
}
type documentParserClient struct {
cc grpc.ClientConnInterface
}
// NewDocumentParserClient 创建 DocumentParser 客户端
func NewDocumentParserClient(cc grpc.ClientConnInterface) DocumentParserClient {
return &documentParserClient{cc: cc}
}
func (c *documentParserClient) ParseDocument(ctx context.Context, in *ParseRequest, opts ...grpc.CallOption) (*ParseResponse, error) {
out := new(ParseResponse)
err := c.cc.Invoke(ctx, "/docparser.DocumentParser/ParseDocument", in, out, opts...)
if err != nil {
return nil, err
}
return out, nil
}

View File

@@ -29,9 +29,10 @@ type KnowledgeService struct {
uploadService *UploadService
pythonServiceURL string
aiCoreClient *AICoreClient
markdownLocalPath string // Markdown 本地存储路径
}
func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService, pythonServiceURL, aiCoreServiceAddr string) *KnowledgeService {
func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService, pythonServiceURL, aiCoreServiceAddr, markdownLocalPath string) *KnowledgeService {
aiCoreClient, _ := NewAICoreClient(aiCoreServiceAddr)
return &KnowledgeService{
repo: repo,
@@ -39,6 +40,7 @@ func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *reposi
uploadService: uploadService,
pythonServiceURL: pythonServiceURL,
aiCoreClient: aiCoreClient,
markdownLocalPath: markdownLocalPath,
}
}
@@ -307,6 +309,13 @@ func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName stri
if result.Success && result.Content != "" {
knowledgeDebugLog.Printf("[AICore] 解析成功: docID=%s, contentLength=%d", docID, len(result.Content))
// 保存到本地文件
markdownPath := s.saveMarkdownToFile(docID, fileName, result.Content)
if markdownPath != "" {
knowledgeDebugLog.Printf("[AICore] Markdown 保存到本地: docID=%s, path=%s", docID, markdownPath)
}
// 更新文档的 Content 字段
s.repo.UpdateDocument(docID, map[string]interface{}{
"content": result.Content,
@@ -316,6 +325,31 @@ func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName stri
}
}
// saveMarkdownToFile 保存 Markdown 内容到本地文件
func (s *KnowledgeService) saveMarkdownToFile(docID, fileName, content string) string {
if s.markdownLocalPath == "" {
s.markdownLocalPath = "resource/markdown"
}
// 创建目录
if err := os.MkdirAll(s.markdownLocalPath, 0755); err != nil {
knowledgeDebugLog.Printf("[AICore] 创建目录失败: path=%s, err=%v", s.markdownLocalPath, err)
return ""
}
// 生成文件名(用 docID + .md
markdownFileName := docID + ".md"
markdownPath := s.markdownLocalPath + "/" + markdownFileName
// 写入文件
if err := os.WriteFile(markdownPath, []byte(content), 0644); err != nil {
knowledgeDebugLog.Printf("[AICore] 保存 Markdown 失败: path=%s, err=%v", markdownPath, err)
return ""
}
return markdownPath
}
// DeleteDocument 删除文档
func (s *KnowledgeService) DeleteDocument(kbID, docID string) error {
// 验证文档存在
@@ -400,24 +434,66 @@ func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*mo
return nil, nil
}
// 如果已解析,返回解析内容;否则返回文件 URL
if doc.Status == "parsed" {
// TODO: 从存储中读取解析内容(可以存到数据库或文件)
// 暂时返回文件 URL
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
// 获取文件URL
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
// 根据文件类型决定预览方式
fileName := doc.Name
isPDF := strings.HasSuffix(strings.ToLower(fileName), ".pdf")
isOffice := false
officeExts := []string{".csv", ".xlsx", ".xls", ".docx", ".doc", ".pptx", ".ppt", ".txt", ".md"}
for _, ext := range officeExts {
if strings.HasSuffix(strings.ToLower(fileName), ext) {
isOffice = true
break
}
}
// PDF文件返回文件URL
if isPDF {
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: fileURL,
ContentType: "url",
}, nil
}
// 未解析,返回文件 URL
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
// Office文件调用解析服务转换为HTML
if isOffice && s.aiCoreClient != nil {
knowledgeDebugLog.Printf("[Preview] Parsing office file: %s, URL: %s", fileName, fileURL)
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "")
if err != nil {
// 解析失败返回文件URL
knowledgeDebugLog.Printf("[Preview] Parse document failed: %v", err)
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: fileURL,
ContentType: "url",
}, nil
}
knowledgeDebugLog.Printf("[Preview] Parse result: success=%v, content_length=%d", result.Success, len(result.Content))
// 返回HTML内容
if result.Success && result.Content != "" {
knowledgeDebugLog.Printf("[Preview] Returning HTML content, length: %d", len(result.Content))
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: result.Content,
ContentType: "html",
}, nil
}
}
// 其他情况返回文件URL
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: fileURL,
ContentType: "url",
}, nil
}