Files
X-Agents/server/internal/service/knowledge_service.go
DESKTOP-72TV0V4\caoxiaozhu fdd6b2c17d fix: 优化后端各模块 handler
- database_handler, knowledge_handler, model_handler
- neo4j_handler, sub_table_handler
- system_handler, upload_handler
- knowledge_service, upload_service

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 14:26:04 +08:00

571 lines
17 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package service
import (
"bytes"
"encoding/json"
"io"
"log"
"mime/multipart"
"net/http"
"os"
"strings"
"time"
"github.com/google/uuid"
"x-agents/server/internal/model"
"x-agents/server/internal/repository"
)
// debugLog 专用调试日志
var knowledgeDebugLog *log.Logger
func init() {
// 确保 logs 目录存在
os.MkdirAll("logs", 0755)
debugFile, err := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
if err != nil {
// 如果文件打开失败,使用 discard 避免输出到控制台
knowledgeDebugLog = log.New(io.Discard, "", log.Ldate|log.Ltime)
} else {
knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime)
}
}
type KnowledgeService struct {
repo *repository.KnowledgeRepository
modelRepo *repository.ModelRepository
uploadService *UploadService
pythonServiceURL string
aiCoreClient *AICoreClient
markdownLocalPath string // Markdown 本地存储路径
}
func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService, pythonServiceURL, aiCoreServiceAddr, markdownLocalPath string) *KnowledgeService {
aiCoreClient, _ := NewAICoreClient(aiCoreServiceAddr)
return &KnowledgeService{
repo: repo,
modelRepo: modelRepo,
uploadService: uploadService,
pythonServiceURL: pythonServiceURL,
aiCoreClient: aiCoreClient,
markdownLocalPath: markdownLocalPath,
}
}
// Create 创建知识库
func (s *KnowledgeService) Create(req model.CreateKnowledgeRequest) (*model.KnowledgeBase, error) {
// 验证 LLM 模型存在
if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil {
return nil, err
}
// 验证 Embedding 模型存在
if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil {
return nil, err
}
kb := &model.KnowledgeBase{
ID: uuid.New().String(),
Name: req.Name,
Description: req.Description,
LLMModelID: req.LLMModelID,
EmbeddingModelID: req.EmbeddingModelID,
ParsingConfig: req.ParsingConfig,
StorageConfig: req.StorageConfig,
Status: "active",
DocumentCount: 0,
ChunkCount: 0,
}
// 设置默认值
if kb.ParsingConfig.Engine == "" {
kb.ParsingConfig.Engine = "markitdown"
}
if kb.ParsingConfig.EnablePDF != false {
kb.ParsingConfig.EnablePDF = true
}
if kb.ParsingConfig.Pandoc != false {
kb.ParsingConfig.Pandoc = true
}
if err := s.repo.Create(kb); err != nil {
return nil, err
}
return kb, nil
}
// List 获取知识库列表
func (s *KnowledgeService) List() ([]model.KnowledgeBase, error) {
return s.repo.FindAll()
}
// GetByID 获取知识库详情
func (s *KnowledgeService) GetByID(id string) (*model.KnowledgeBase, error) {
return s.repo.FindByID(id)
}
// Update 更新知识库
func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) error {
updates := make(map[string]interface{})
if req.Name != "" {
updates["name"] = req.Name
}
if req.Description != "" {
updates["description"] = req.Description
}
if req.LLMModelID != "" {
// 验证模型存在
if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil {
return err
}
updates["llm_model_id"] = req.LLMModelID
}
if req.EmbeddingModelID != "" {
// 验证模型存在
if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil {
return err
}
updates["embedding_model_id"] = req.EmbeddingModelID
}
if req.ParsingConfig.Engine != "" {
updates["parsing_config"] = req.ParsingConfig
}
if req.StorageConfig.Type != "" {
updates["storage_config"] = req.StorageConfig
}
if req.Status != "" {
updates["status"] = req.Status
}
return s.repo.Update(id, updates)
}
// Delete 删除知识库
func (s *KnowledgeService) Delete(id string) error {
// 获取知识库信息
kb, err := s.repo.FindByID(id)
if err != nil {
return err
}
// 获取知识库下所有文档
docs, err := s.repo.FindDocumentsByKBID(id, "")
if err != nil {
return err
}
// 删除每个文档的 MinIO 文件和本地 Markdown 文件
for _, doc := range docs {
// 删除存储文件MinIO 或本地)
if doc.FileKey != "" {
s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig, kb.Name)
}
// 删除本地 Markdown 文件
if s.markdownLocalPath != "" {
markdownPath := s.markdownLocalPath + "/" + doc.ID + ".md"
os.Remove(markdownPath)
}
}
// 删除关联的文档(数据库记录)
if err := s.repo.DeleteDocumentsByKBID(id); err != nil {
return err
}
return s.repo.Delete(id)
}
// ListDocuments 获取知识库下的文档列表
func (s *KnowledgeService) ListDocuments(kbID string, status string) ([]model.KnowledgeDocument, error) {
docs, err := s.repo.FindDocumentsByKBID(kbID, status)
if err != nil {
knowledgeDebugLog.Printf("[Knowledge ListDocuments] 错误: %v", err)
return nil, err
}
knowledgeDebugLog.Printf("[Knowledge ListDocuments] kbID=%s, count=%d", kbID, len(docs))
for i, doc := range docs {
knowledgeDebugLog.Printf("[Knowledge ListDocuments] doc[%d]: id=%s, name=%s, file_url=%q", i, doc.ID, doc.Name, doc.FileURL)
// 如果是 MinIO 内网地址,转换为代理 URL
if doc.FileURL != "" && (strings.Contains(doc.FileURL, "10.10.10.189") || strings.Contains(doc.FileURL, ":9768")) {
// 从 URL 中提取 fileKey包含扩展名
parts := strings.Split(doc.FileURL, "/")
if len(parts) > 0 {
fileName := parts[len(parts)-1]
fileKey := strings.Split(fileName, "?")[0]
doc.FileURL = "/api/file_proxy?kb_id=" + kbID + "&key=" + fileKey
}
}
}
return docs, nil
}
// UploadDocument 上传文档到知识库
func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, string, error) {
knowledgeDebugLog.Printf("[Knowledge Upload] 开始上传文件: kbID=%s, filename=%s, size=%d", kbID, file.Filename, file.Size)
// 验证知识库存在
kb, err := s.repo.FindByID(kbID)
if err != nil {
knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 知识库不存在, kbID=%s, err=%v", kbID, err)
return nil, "", err
}
knowledgeDebugLog.Printf("[Knowledge Upload] 知识库配置: kbID=%s, storage_config.Type=%q, storage_config=%+v",
kbID, kb.StorageConfig.Type, kb.StorageConfig)
// 上传文件(根据知识库的 storage_config 选择存储方式)
var result *UploadResponse
if kb.StorageConfig.Type != "" && kb.StorageConfig.Type != "local" {
// 使用知识库的存储配置MinIO
knowledgeDebugLog.Printf("[Knowledge Upload] 使用知识库存储配置: type=%s, endpoint=%s, bucket=%s",
kb.StorageConfig.Type, kb.StorageConfig.Endpoint, kb.StorageConfig.Bucket)
result, err = s.uploadService.UploadWithConfig(file, kb.StorageConfig)
} else {
// 本地存储,使用知识库名称作为子目录
knowledgeDebugLog.Printf("[Knowledge Upload] 使用本地存储,路径: resources/%s/", kb.Name)
result, err = s.uploadService.UploadToKnowledgeBase(file, kb.StorageConfig, kb.Name)
}
if err != nil {
knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 上传失败, err=%v", err)
return nil, "", err
}
knowledgeDebugLog.Printf("[Knowledge Upload] 上传结果: success=%v, url=%s, message=%s",
result.Success, result.URL, result.Message)
if !result.Success {
knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 上传返回失败, message=%s", result.Message)
return nil, "", nil
}
// 获取文件扩展名
ext := getFileExt(file.Filename)
knowledgeDebugLog.Printf("[Knowledge Upload] 准备创建文档记录: result.URL=%q, fileKey=%s, ext=%s",
result.URL, result.FileKey, ext)
// 创建文档记录
doc := &model.KnowledgeDocument{
ID: uuid.New().String(),
KnowledgeBaseID: kbID,
Name: file.Filename,
FileKey: result.FileKey + ext,
FileURL: result.URL,
FileSize: file.Size,
Status: "parsing",
UploadedAt: time.Now(),
}
knowledgeDebugLog.Printf("[Knowledge Upload] 文档创建完成: doc.FileURL=%q", doc.FileURL)
if err := s.repo.CreateDocument(doc); err != nil {
return nil, "", err
}
// 更新知识库文档数
s.updateDocumentCount(kbID)
// 异步调用 Python 服务解析文档
go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig)
// 异步调用 AI-Core gRPC 服务解析文档(获取 Markdown
go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name, kb.ParsingConfig)
return doc, result.URL, nil
}
// parseDocument 调用 Python 服务解析文档
func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config model.ParsingConfig) {
// 构建请求
reqBody := map[string]interface{}{
"file_url": fileURL,
"engine": config.Engine,
}
if config.Engine == "docling" && config.DoclingURL != "" {
reqBody["docling_url"] = config.DoclingURL
}
body, _ := json.Marshal(reqBody)
resp, err := http.Post(s.pythonServiceURL+"/parse", "application/json", bytes.NewBuffer(body))
if err != nil {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
return
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
return
}
// 解析响应
var result map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
return
}
if success, ok := result["success"].(bool); ok && success {
// 解析成功,更新状态
chunks := []string{}
if c, ok := result["chunks"].([]interface{}); ok {
for _, chunk := range c {
if c, ok := chunk.(string); ok {
chunks = append(chunks, c)
}
}
}
s.repo.UpdateDocument(docID, map[string]interface{}{
"status": "parsed",
"chunk_count": len(chunks),
})
// 更新知识库的 chunk_count
s.updateChunkCount(kbID)
} else {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
}
}
// parseDocumentWithAICore 调用 AI-Core gRPC 服务解析文档
func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string, config model.ParsingConfig) {
if s.aiCoreClient == nil {
knowledgeDebugLog.Printf("[AICore] AI-Core 客户端未初始化")
return
}
knowledgeDebugLog.Printf("[AICore] 开始解析文档: docID=%s, fileURL=%s, fileName=%s", docID, fileURL, fileName)
// 构建 VLM 配置
var vlmConfig *VLMConfig
if config.VLMEnabled {
vlmConfig = &VLMConfig{
Enabled: config.VLMEnabled,
Provider: config.VLMProvider,
Model: config.VLMModel,
APIKey: config.VLMAPIKey,
BaseURL: config.VLMBaseURL,
Prompt: config.VLMPrompt,
}
knowledgeDebugLog.Printf("[AICore] VLM 配置: provider=%s, model=%s, enabled=%v", config.VLMProvider, config.VLMModel, config.VLMEnabled)
}
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", vlmConfig)
if err != nil {
knowledgeDebugLog.Printf("[AICore] 解析失败: docID=%s, err=%v", docID, err)
return
}
if result.Success && result.Content != "" {
knowledgeDebugLog.Printf("[AICore] 解析成功: docID=%s, contentLength=%d", docID, len(result.Content))
// 保存到本地文件
markdownPath := s.saveMarkdownToFile(docID, fileName, result.Content)
if markdownPath != "" {
knowledgeDebugLog.Printf("[AICore] Markdown 保存到本地: docID=%s, path=%s", docID, markdownPath)
}
// 更新文档的 Content 字段
s.repo.UpdateDocument(docID, map[string]interface{}{
"content": result.Content,
})
} else {
knowledgeDebugLog.Printf("[AICore] 解析返回失败: docID=%s, message=%s", docID, result.Message)
}
}
// saveMarkdownToFile 保存 Markdown 内容到本地文件
func (s *KnowledgeService) saveMarkdownToFile(docID, fileName, content string) string {
if s.markdownLocalPath == "" {
s.markdownLocalPath = "resource/markdown"
}
// 创建目录
if err := os.MkdirAll(s.markdownLocalPath, 0755); err != nil {
knowledgeDebugLog.Printf("[AICore] 创建目录失败: path=%s, err=%v", s.markdownLocalPath, err)
return ""
}
// 生成文件名(用 docID + .md
markdownFileName := docID + ".md"
markdownPath := s.markdownLocalPath + "/" + markdownFileName
// 写入文件
if err := os.WriteFile(markdownPath, []byte(content), 0644); err != nil {
knowledgeDebugLog.Printf("[AICore] 保存 Markdown 失败: path=%s, err=%v", markdownPath, err)
return ""
}
return markdownPath
}
// DeleteDocument 删除文档
func (s *KnowledgeService) DeleteDocument(kbID, docID string) error {
// 验证文档存在
doc, err := s.repo.FindDocumentByID(docID)
if err != nil {
return err
}
if doc.KnowledgeBaseID != kbID {
return nil
}
// 获取知识库配置
kb, err := s.repo.FindByID(kbID)
if err != nil {
return err
}
// 删除文件
if doc.FileKey != "" {
knowledgeDebugLog.Printf("[Knowledge DeleteDocument] 删除文件: kbID=%s, docID=%s, fileKey=%s, storageType=%s",
kbID, docID, doc.FileKey, kb.StorageConfig.Type)
// 使用知识库的存储配置删除(传入知识库名称)
s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig, kb.Name)
}
// 删除文档记录
if err := s.repo.DeleteDocument(docID); err != nil {
return err
}
// 更新知识库文档数
s.updateDocumentCount(kbID)
return nil
}
// ReparseDocument 重新解析文档
func (s *KnowledgeService) ReparseDocument(kbID, docID string) error {
// 验证文档存在
doc, err := s.repo.FindDocumentByID(docID)
if err != nil {
return err
}
if doc.KnowledgeBaseID != kbID {
return nil
}
// 获取知识库配置
kb, err := s.repo.FindByID(kbID)
if err != nil {
return err
}
// 获取文件 URL
fileURL, err := s.uploadService.GetFileURL(doc.FileKey)
if err != nil {
return err
}
// 重置状态为 parsing
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"})
// 异步重新解析
go s.parseDocument(kbID, docID, fileURL, kb.ParsingConfig)
return nil
}
// GetDocumentPreview 获取文档预览
func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*model.DocumentPreviewResponse, error) {
// 验证文档存在
doc, err := s.repo.FindDocumentByID(docID)
if err != nil {
return nil, err
}
if doc.KnowledgeBaseID != kbID {
return nil, nil
}
// 获取文件URL
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
// 根据文件类型决定预览方式
fileName := doc.Name
isPDF := strings.HasSuffix(strings.ToLower(fileName), ".pdf")
isOffice := false
officeExts := []string{".csv", ".xlsx", ".xls", ".docx", ".doc", ".pptx", ".ppt", ".txt", ".md"}
for _, ext := range officeExts {
if strings.HasSuffix(strings.ToLower(fileName), ext) {
isOffice = true
break
}
}
// PDF文件返回文件URL
if isPDF {
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: fileURL,
ContentType: "url",
}, nil
}
// Office文件调用解析服务转换为HTML
if isOffice && s.aiCoreClient != nil {
knowledgeDebugLog.Printf("[Preview] Parsing office file: %s, URL: %s", fileName, fileURL)
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", nil) // Preview 不使用 VLM
if err != nil {
// 解析失败返回文件URL
knowledgeDebugLog.Printf("[Preview] Parse document failed: %v", err)
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: fileURL,
ContentType: "url",
}, nil
}
knowledgeDebugLog.Printf("[Preview] Parse result: success=%v, content_length=%d", result.Success, len(result.Content))
// 返回HTML内容
if result.Success && result.Content != "" {
knowledgeDebugLog.Printf("[Preview] Returning HTML content, length: %d", len(result.Content))
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: result.Content,
ContentType: "html",
}, nil
}
}
// 其他情况返回文件URL
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: fileURL,
ContentType: "url",
}, nil
}
// updateDocumentCount 更新知识库文档数
func (s *KnowledgeService) updateDocumentCount(kbID string) {
count, _ := s.repo.CountDocumentsByKBID(kbID)
s.repo.Update(kbID, map[string]interface{}{"document_count": int(count)})
}
// updateChunkCount 更新知识库 chunk 数
func (s *KnowledgeService) updateChunkCount(kbID string) {
docs, _ := s.repo.FindDocumentsByKBID(kbID, "parsed")
totalChunks := 0
for _, doc := range docs {
totalChunks += doc.ChunkCount
}
s.repo.Update(kbID, map[string]interface{}{"chunk_count": totalChunks})
}
func getFileExt(filename string) string {
exts := []string{".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".html"}
for _, ext := range exts {
if len(filename) >= len(ext) && filename[len(filename)-len(ext):] == ext {
return ext
}
}
if len(filename) > 4 {
return filename[len(filename)-4:]
}
return ""
}