package service import ( "bytes" "encoding/json" "io" "log" "mime/multipart" "net/http" "os" "strings" "time" "github.com/google/uuid" "x-agents/server/internal/model" "x-agents/server/internal/repository" ) // debugLog 专用调试日志 var knowledgeDebugLog *log.Logger func init() { // 确保 logs 目录存在 os.MkdirAll("logs", 0755) debugFile, err := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) if err != nil { // 如果文件打开失败,使用 discard 避免输出到控制台 knowledgeDebugLog = log.New(io.Discard, "", log.Ldate|log.Ltime) } else { knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime) } } type KnowledgeService struct { repo *repository.KnowledgeRepository modelRepo *repository.ModelRepository uploadService *UploadService pythonServiceURL string aiCoreClient *AICoreClient markdownLocalPath string // Markdown 本地存储路径 } func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService, pythonServiceURL, aiCoreServiceAddr, markdownLocalPath string) *KnowledgeService { aiCoreClient, _ := NewAICoreClient(aiCoreServiceAddr) return &KnowledgeService{ repo: repo, modelRepo: modelRepo, uploadService: uploadService, pythonServiceURL: pythonServiceURL, aiCoreClient: aiCoreClient, markdownLocalPath: markdownLocalPath, } } // Create 创建知识库 func (s *KnowledgeService) Create(req model.CreateKnowledgeRequest) (*model.KnowledgeBase, error) { // 验证 LLM 模型存在 if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil { return nil, err } // 验证 Embedding 模型存在 if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil { return nil, err } kb := &model.KnowledgeBase{ ID: uuid.New().String(), Name: req.Name, Description: req.Description, LLMModelID: req.LLMModelID, EmbeddingModelID: req.EmbeddingModelID, ParsingConfig: req.ParsingConfig, StorageConfig: req.StorageConfig, Status: "active", DocumentCount: 0, ChunkCount: 0, } // 设置默认值 if kb.ParsingConfig.Engine == "" { kb.ParsingConfig.Engine = "markitdown" } if kb.ParsingConfig.EnablePDF != false { kb.ParsingConfig.EnablePDF = true } if kb.ParsingConfig.Pandoc != false { kb.ParsingConfig.Pandoc = true } if err := s.repo.Create(kb); err != nil { return nil, err } return kb, nil } // List 获取知识库列表 func (s *KnowledgeService) List() ([]model.KnowledgeBase, error) { return s.repo.FindAll() } // GetByID 获取知识库详情 func (s *KnowledgeService) GetByID(id string) (*model.KnowledgeBase, error) { return s.repo.FindByID(id) } // Update 更新知识库 func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) error { updates := make(map[string]interface{}) if req.Name != "" { updates["name"] = req.Name } if req.Description != "" { updates["description"] = req.Description } if req.LLMModelID != "" { // 验证模型存在 if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil { return err } updates["llm_model_id"] = req.LLMModelID } if req.EmbeddingModelID != "" { // 验证模型存在 if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil { return err } updates["embedding_model_id"] = req.EmbeddingModelID } if req.ParsingConfig.Engine != "" { updates["parsing_config"] = req.ParsingConfig } if req.StorageConfig.Type != "" { updates["storage_config"] = req.StorageConfig } if req.Status != "" { updates["status"] = req.Status } return s.repo.Update(id, updates) } // Delete 删除知识库 func (s *KnowledgeService) Delete(id string) error { // 获取知识库信息 kb, err := s.repo.FindByID(id) if err != nil { return err } // 获取知识库下所有文档 docs, err := s.repo.FindDocumentsByKBID(id, "") if err != nil { return err } // 删除每个文档的 MinIO 文件和本地 Markdown 文件 for _, doc := range docs { // 删除存储文件(MinIO 或本地) if doc.FileKey != "" { s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig, kb.Name) } // 删除本地 Markdown 文件 if s.markdownLocalPath != "" { markdownPath := s.markdownLocalPath + "/" + doc.ID + ".md" os.Remove(markdownPath) } } // 删除关联的文档(数据库记录) if err := s.repo.DeleteDocumentsByKBID(id); err != nil { return err } return s.repo.Delete(id) } // ListDocuments 获取知识库下的文档列表 func (s *KnowledgeService) ListDocuments(kbID string, status string) ([]model.KnowledgeDocument, error) { docs, err := s.repo.FindDocumentsByKBID(kbID, status) if err != nil { knowledgeDebugLog.Printf("[Knowledge ListDocuments] 错误: %v", err) return nil, err } knowledgeDebugLog.Printf("[Knowledge ListDocuments] kbID=%s, count=%d", kbID, len(docs)) for i, doc := range docs { knowledgeDebugLog.Printf("[Knowledge ListDocuments] doc[%d]: id=%s, name=%s, file_url=%q", i, doc.ID, doc.Name, doc.FileURL) // 如果是 MinIO 内网地址,转换为代理 URL if doc.FileURL != "" && (strings.Contains(doc.FileURL, "10.10.10.189") || strings.Contains(doc.FileURL, ":9768")) { // 从 URL 中提取 fileKey(包含扩展名) parts := strings.Split(doc.FileURL, "/") if len(parts) > 0 { fileName := parts[len(parts)-1] fileKey := strings.Split(fileName, "?")[0] doc.FileURL = "/api/file_proxy?kb_id=" + kbID + "&key=" + fileKey } } } return docs, nil } // UploadDocument 上传文档到知识库 func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, string, error) { knowledgeDebugLog.Printf("[Knowledge Upload] 开始上传文件: kbID=%s, filename=%s, size=%d", kbID, file.Filename, file.Size) // 验证知识库存在 kb, err := s.repo.FindByID(kbID) if err != nil { knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 知识库不存在, kbID=%s, err=%v", kbID, err) return nil, "", err } knowledgeDebugLog.Printf("[Knowledge Upload] 知识库配置: kbID=%s, storage_config.Type=%q, storage_config=%+v", kbID, kb.StorageConfig.Type, kb.StorageConfig) // 上传文件(根据知识库的 storage_config 选择存储方式) var result *UploadResponse if kb.StorageConfig.Type != "" && kb.StorageConfig.Type != "local" { // 使用知识库的存储配置(MinIO) knowledgeDebugLog.Printf("[Knowledge Upload] 使用知识库存储配置: type=%s, endpoint=%s, bucket=%s", kb.StorageConfig.Type, kb.StorageConfig.Endpoint, kb.StorageConfig.Bucket) result, err = s.uploadService.UploadWithConfig(file, kb.StorageConfig) } else { // 本地存储,使用知识库名称作为子目录 knowledgeDebugLog.Printf("[Knowledge Upload] 使用本地存储,路径: resources/%s/", kb.Name) result, err = s.uploadService.UploadToKnowledgeBase(file, kb.StorageConfig, kb.Name) } if err != nil { knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 上传失败, err=%v", err) return nil, "", err } knowledgeDebugLog.Printf("[Knowledge Upload] 上传结果: success=%v, url=%s, message=%s", result.Success, result.URL, result.Message) if !result.Success { knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 上传返回失败, message=%s", result.Message) return nil, "", nil } // 获取文件扩展名 ext := getFileExt(file.Filename) knowledgeDebugLog.Printf("[Knowledge Upload] 准备创建文档记录: result.URL=%q, fileKey=%s, ext=%s", result.URL, result.FileKey, ext) // 创建文档记录 doc := &model.KnowledgeDocument{ ID: uuid.New().String(), KnowledgeBaseID: kbID, Name: file.Filename, FileKey: result.FileKey + ext, FileURL: result.URL, FileSize: file.Size, Status: "parsing", UploadedAt: time.Now(), } knowledgeDebugLog.Printf("[Knowledge Upload] 文档创建完成: doc.FileURL=%q", doc.FileURL) if err := s.repo.CreateDocument(doc); err != nil { return nil, "", err } // 更新知识库文档数 s.updateDocumentCount(kbID) // 异步调用 Python 服务解析文档 go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig) // 异步调用 AI-Core gRPC 服务解析文档(获取 Markdown) go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name, kb.ParsingConfig) return doc, result.URL, nil } // parseDocument 调用 Python 服务解析文档 func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config model.ParsingConfig) { // 构建请求 reqBody := map[string]interface{}{ "file_url": fileURL, "engine": config.Engine, } if config.Engine == "docling" && config.DoclingURL != "" { reqBody["docling_url"] = config.DoclingURL } body, _ := json.Marshal(reqBody) resp, err := http.Post(s.pythonServiceURL+"/parse", "application/json", bytes.NewBuffer(body)) if err != nil { s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) return } defer resp.Body.Close() if resp.StatusCode != 200 { s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) return } // 解析响应 var result map[string]interface{} if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) return } if success, ok := result["success"].(bool); ok && success { // 解析成功,更新状态 chunks := []string{} if c, ok := result["chunks"].([]interface{}); ok { for _, chunk := range c { if c, ok := chunk.(string); ok { chunks = append(chunks, c) } } } s.repo.UpdateDocument(docID, map[string]interface{}{ "status": "parsed", "chunk_count": len(chunks), }) // 更新知识库的 chunk_count s.updateChunkCount(kbID) } else { s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) } } // parseDocumentWithAICore 调用 AI-Core gRPC 服务解析文档 func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string, config model.ParsingConfig) { if s.aiCoreClient == nil { knowledgeDebugLog.Printf("[AICore] AI-Core 客户端未初始化") return } knowledgeDebugLog.Printf("[AICore] 开始解析文档: docID=%s, fileURL=%s, fileName=%s", docID, fileURL, fileName) // 构建 VLM 配置 var vlmConfig *VLMConfig if config.VLMEnabled { vlmConfig = &VLMConfig{ Enabled: config.VLMEnabled, Provider: config.VLMProvider, Model: config.VLMModel, APIKey: config.VLMAPIKey, BaseURL: config.VLMBaseURL, Prompt: config.VLMPrompt, } knowledgeDebugLog.Printf("[AICore] VLM 配置: provider=%s, model=%s, enabled=%v", config.VLMProvider, config.VLMModel, config.VLMEnabled) } result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", vlmConfig) if err != nil { knowledgeDebugLog.Printf("[AICore] 解析失败: docID=%s, err=%v", docID, err) return } if result.Success && result.Content != "" { knowledgeDebugLog.Printf("[AICore] 解析成功: docID=%s, contentLength=%d", docID, len(result.Content)) // 保存到本地文件 markdownPath := s.saveMarkdownToFile(docID, fileName, result.Content) if markdownPath != "" { knowledgeDebugLog.Printf("[AICore] Markdown 保存到本地: docID=%s, path=%s", docID, markdownPath) } // 更新文档的 Content 字段 s.repo.UpdateDocument(docID, map[string]interface{}{ "content": result.Content, }) } else { knowledgeDebugLog.Printf("[AICore] 解析返回失败: docID=%s, message=%s", docID, result.Message) } } // saveMarkdownToFile 保存 Markdown 内容到本地文件 func (s *KnowledgeService) saveMarkdownToFile(docID, fileName, content string) string { if s.markdownLocalPath == "" { s.markdownLocalPath = "resource/markdown" } // 创建目录 if err := os.MkdirAll(s.markdownLocalPath, 0755); err != nil { knowledgeDebugLog.Printf("[AICore] 创建目录失败: path=%s, err=%v", s.markdownLocalPath, err) return "" } // 生成文件名(用 docID + .md) markdownFileName := docID + ".md" markdownPath := s.markdownLocalPath + "/" + markdownFileName // 写入文件 if err := os.WriteFile(markdownPath, []byte(content), 0644); err != nil { knowledgeDebugLog.Printf("[AICore] 保存 Markdown 失败: path=%s, err=%v", markdownPath, err) return "" } return markdownPath } // DeleteDocument 删除文档 func (s *KnowledgeService) DeleteDocument(kbID, docID string) error { // 验证文档存在 doc, err := s.repo.FindDocumentByID(docID) if err != nil { return err } if doc.KnowledgeBaseID != kbID { return nil } // 获取知识库配置 kb, err := s.repo.FindByID(kbID) if err != nil { return err } // 删除文件 if doc.FileKey != "" { knowledgeDebugLog.Printf("[Knowledge DeleteDocument] 删除文件: kbID=%s, docID=%s, fileKey=%s, storageType=%s", kbID, docID, doc.FileKey, kb.StorageConfig.Type) // 使用知识库的存储配置删除(传入知识库名称) s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig, kb.Name) } // 删除文档记录 if err := s.repo.DeleteDocument(docID); err != nil { return err } // 更新知识库文档数 s.updateDocumentCount(kbID) return nil } // ReparseDocument 重新解析文档 func (s *KnowledgeService) ReparseDocument(kbID, docID string) error { // 验证文档存在 doc, err := s.repo.FindDocumentByID(docID) if err != nil { return err } if doc.KnowledgeBaseID != kbID { return nil } // 获取知识库配置 kb, err := s.repo.FindByID(kbID) if err != nil { return err } // 获取文件 URL fileURL, err := s.uploadService.GetFileURL(doc.FileKey) if err != nil { return err } // 重置状态为 parsing s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"}) // 异步重新解析 go s.parseDocument(kbID, docID, fileURL, kb.ParsingConfig) return nil } // GetDocumentPreview 获取文档预览 func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*model.DocumentPreviewResponse, error) { // 验证文档存在 doc, err := s.repo.FindDocumentByID(docID) if err != nil { return nil, err } if doc.KnowledgeBaseID != kbID { return nil, nil } // 获取文件URL fileURL, _ := s.uploadService.GetFileURL(doc.FileKey) // 根据文件类型决定预览方式 fileName := doc.Name isPDF := strings.HasSuffix(strings.ToLower(fileName), ".pdf") isOffice := false officeExts := []string{".csv", ".xlsx", ".xls", ".docx", ".doc", ".pptx", ".ppt", ".txt", ".md"} for _, ext := range officeExts { if strings.HasSuffix(strings.ToLower(fileName), ext) { isOffice = true break } } // PDF文件返回文件URL if isPDF { return &model.DocumentPreviewResponse{ TotalPages: 1, CurrentPage: page, Content: fileURL, ContentType: "url", }, nil } // Office文件调用解析服务转换为HTML if isOffice && s.aiCoreClient != nil { knowledgeDebugLog.Printf("[Preview] Parsing office file: %s, URL: %s", fileName, fileURL) result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "", nil) // Preview 不使用 VLM if err != nil { // 解析失败,返回文件URL knowledgeDebugLog.Printf("[Preview] Parse document failed: %v", err) return &model.DocumentPreviewResponse{ TotalPages: 1, CurrentPage: page, Content: fileURL, ContentType: "url", }, nil } knowledgeDebugLog.Printf("[Preview] Parse result: success=%v, content_length=%d", result.Success, len(result.Content)) // 返回HTML内容 if result.Success && result.Content != "" { knowledgeDebugLog.Printf("[Preview] Returning HTML content, length: %d", len(result.Content)) return &model.DocumentPreviewResponse{ TotalPages: 1, CurrentPage: page, Content: result.Content, ContentType: "html", }, nil } } // 其他情况返回文件URL return &model.DocumentPreviewResponse{ TotalPages: 1, CurrentPage: page, Content: fileURL, ContentType: "url", }, nil } // updateDocumentCount 更新知识库文档数 func (s *KnowledgeService) updateDocumentCount(kbID string) { count, _ := s.repo.CountDocumentsByKBID(kbID) s.repo.Update(kbID, map[string]interface{}{"document_count": int(count)}) } // updateChunkCount 更新知识库 chunk 数 func (s *KnowledgeService) updateChunkCount(kbID string) { docs, _ := s.repo.FindDocumentsByKBID(kbID, "parsed") totalChunks := 0 for _, doc := range docs { totalChunks += doc.ChunkCount } s.repo.Update(kbID, map[string]interface{}{"chunk_count": totalChunks}) } func getFileExt(filename string) string { exts := []string{".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".html"} for _, ext := range exts { if len(filename) >= len(ext) && filename[len(filename)-len(ext):] == ext { return ext } } if len(filename) > 4 { return filename[len(filename)-4:] } return "" }