package service import ( "bytes" "encoding/json" "log" "mime/multipart" "net/http" "os" "strings" "time" "github.com/google/uuid" "x-agents/server/internal/model" "x-agents/server/internal/repository" ) // debugLog 专用调试日志 var knowledgeDebugLog *log.Logger func init() { debugFile, _ := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime) } type KnowledgeService struct { repo *repository.KnowledgeRepository modelRepo *repository.ModelRepository uploadService *UploadService pythonServiceURL string } func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService, pythonServiceURL string) *KnowledgeService { return &KnowledgeService{ repo: repo, modelRepo: modelRepo, uploadService: uploadService, pythonServiceURL: pythonServiceURL, } } // Create 创建知识库 func (s *KnowledgeService) Create(req model.CreateKnowledgeRequest) (*model.KnowledgeBase, error) { // 验证 LLM 模型存在 if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil { return nil, err } // 验证 Embedding 模型存在 if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil { return nil, err } kb := &model.KnowledgeBase{ ID: uuid.New().String(), Name: req.Name, Description: req.Description, LLMModelID: req.LLMModelID, EmbeddingModelID: req.EmbeddingModelID, ParsingConfig: req.ParsingConfig, StorageConfig: req.StorageConfig, Status: "active", DocumentCount: 0, ChunkCount: 0, } // 设置默认值 if kb.ParsingConfig.Engine == "" { kb.ParsingConfig.Engine = "markitdown" } if kb.ParsingConfig.EnablePDF != false { kb.ParsingConfig.EnablePDF = true } if kb.ParsingConfig.Pandoc != false { kb.ParsingConfig.Pandoc = true } if err := s.repo.Create(kb); err != nil { return nil, err } return kb, nil } // List 获取知识库列表 func (s *KnowledgeService) List() ([]model.KnowledgeBase, error) { return s.repo.FindAll() } // GetByID 获取知识库详情 func (s *KnowledgeService) GetByID(id string) (*model.KnowledgeBase, error) { return s.repo.FindByID(id) } // Update 更新知识库 func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) error { updates := make(map[string]interface{}) if req.Name != "" { updates["name"] = req.Name } if req.Description != "" { updates["description"] = req.Description } if req.LLMModelID != "" { // 验证模型存在 if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil { return err } updates["llm_model_id"] = req.LLMModelID } if req.EmbeddingModelID != "" { // 验证模型存在 if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil { return err } updates["embedding_model_id"] = req.EmbeddingModelID } if req.ParsingConfig.Engine != "" { updates["parsing_config"] = req.ParsingConfig } if req.StorageConfig.Type != "" { updates["storage_config"] = req.StorageConfig } if req.Status != "" { updates["status"] = req.Status } return s.repo.Update(id, updates) } // Delete 删除知识库 func (s *KnowledgeService) Delete(id string) error { // 先删除关联的文档 if err := s.repo.DeleteDocumentsByKBID(id); err != nil { return err } return s.repo.Delete(id) } // ListDocuments 获取知识库下的文档列表 func (s *KnowledgeService) ListDocuments(kbID string, status string) ([]model.KnowledgeDocument, error) { docs, err := s.repo.FindDocumentsByKBID(kbID, status) if err != nil { knowledgeDebugLog.Printf("[Knowledge ListDocuments] 错误: %v", err) return nil, err } knowledgeDebugLog.Printf("[Knowledge ListDocuments] kbID=%s, count=%d", kbID, len(docs)) for i, doc := range docs { knowledgeDebugLog.Printf("[Knowledge ListDocuments] doc[%d]: id=%s, name=%s, file_url=%q", i, doc.ID, doc.Name, doc.FileURL) // 如果是 MinIO 内网地址,转换为代理 URL if doc.FileURL != "" && (strings.Contains(doc.FileURL, "10.10.10.189") || strings.Contains(doc.FileURL, ":9768")) { // 从 URL 中提取 fileKey(包含扩展名) parts := strings.Split(doc.FileURL, "/") if len(parts) > 0 { fileName := parts[len(parts)-1] fileKey := strings.Split(fileName, "?")[0] doc.FileURL = "/api/file_proxy?kb_id=" + kbID + "&key=" + fileKey } } } return docs, nil } // UploadDocument 上传文档到知识库 func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, string, error) { knowledgeDebugLog.Printf("[Knowledge Upload] 开始上传文件: kbID=%s, filename=%s, size=%d", kbID, file.Filename, file.Size) // 验证知识库存在 kb, err := s.repo.FindByID(kbID) if err != nil { knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 知识库不存在, kbID=%s, err=%v", kbID, err) return nil, "", err } knowledgeDebugLog.Printf("[Knowledge Upload] 知识库配置: kbID=%s, storage_config.Type=%q, storage_config=%+v", kbID, kb.StorageConfig.Type, kb.StorageConfig) // 上传文件(根据知识库的 storage_config 选择存储方式) var result *UploadResponse if kb.StorageConfig.Type != "" { // 使用知识库的存储配置 knowledgeDebugLog.Printf("[Knowledge Upload] 使用知识库存储配置: type=%s, endpoint=%s, bucket=%s", kb.StorageConfig.Type, kb.StorageConfig.Endpoint, kb.StorageConfig.Bucket) result, err = s.uploadService.UploadWithConfig(file, kb.StorageConfig) } else { // 使用全局配置 knowledgeDebugLog.Printf("[Knowledge Upload] 使用全局存储配置") result, err = s.uploadService.Upload(file) } if err != nil { knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 上传失败, err=%v", err) return nil, "", err } knowledgeDebugLog.Printf("[Knowledge Upload] 上传结果: success=%v, url=%s, message=%s", result.Success, result.URL, result.Message) if !result.Success { knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 上传返回失败, message=%s", result.Message) return nil, "", nil } // 获取文件扩展名 ext := getFileExt(file.Filename) knowledgeDebugLog.Printf("[Knowledge Upload] 准备创建文档记录: result.URL=%q, fileKey=%s, ext=%s", result.URL, result.FileKey, ext) // 创建文档记录 doc := &model.KnowledgeDocument{ ID: uuid.New().String(), KnowledgeBaseID: kbID, Name: file.Filename, FileKey: result.FileKey + ext, FileURL: result.URL, FileSize: file.Size, Status: "parsing", UploadedAt: time.Now(), } knowledgeDebugLog.Printf("[Knowledge Upload] 文档创建完成: doc.FileURL=%q", doc.FileURL) if err := s.repo.CreateDocument(doc); err != nil { return nil, "", err } // 更新知识库文档数 s.updateDocumentCount(kbID) // 异步调用 Python 服务解析文档 go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig) return doc, result.URL, nil } // parseDocument 调用 Python 服务解析文档 func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config model.ParsingConfig) { // 构建请求 reqBody := map[string]interface{}{ "file_url": fileURL, "engine": config.Engine, } if config.Engine == "docling" && config.DoclingURL != "" { reqBody["docling_url"] = config.DoclingURL } body, _ := json.Marshal(reqBody) resp, err := http.Post(s.pythonServiceURL+"/parse", "application/json", bytes.NewBuffer(body)) if err != nil { s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) return } defer resp.Body.Close() if resp.StatusCode != 200 { s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) return } // 解析响应 var result map[string]interface{} if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) return } if success, ok := result["success"].(bool); ok && success { // 解析成功,更新状态 chunks := []string{} if c, ok := result["chunks"].([]interface{}); ok { for _, chunk := range c { if c, ok := chunk.(string); ok { chunks = append(chunks, c) } } } s.repo.UpdateDocument(docID, map[string]interface{}{ "status": "parsed", "chunk_count": len(chunks), }) // 更新知识库的 chunk_count s.updateChunkCount(kbID) } else { s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) } } // DeleteDocument 删除文档 func (s *KnowledgeService) DeleteDocument(kbID, docID string) error { // 验证文档存在 doc, err := s.repo.FindDocumentByID(docID) if err != nil { return err } if doc.KnowledgeBaseID != kbID { return nil } // 获取知识库配置 kb, err := s.repo.FindByID(kbID) if err != nil { return err } // 删除文件 if doc.FileKey != "" { knowledgeDebugLog.Printf("[Knowledge DeleteDocument] 删除文件: kbID=%s, docID=%s, fileKey=%s, storageType=%s", kbID, docID, doc.FileKey, kb.StorageConfig.Type) if kb.StorageConfig.Type != "" { // 使用知识库的存储配置删除 s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig) } else { // 使用全局配置删除 s.uploadService.DeleteFile(doc.FileKey) } } // 删除文档记录 if err := s.repo.DeleteDocument(docID); err != nil { return err } // 更新知识库文档数 s.updateDocumentCount(kbID) return nil } // ReparseDocument 重新解析文档 func (s *KnowledgeService) ReparseDocument(kbID, docID string) error { // 验证文档存在 doc, err := s.repo.FindDocumentByID(docID) if err != nil { return err } if doc.KnowledgeBaseID != kbID { return nil } // 获取知识库配置 kb, err := s.repo.FindByID(kbID) if err != nil { return err } // 获取文件 URL fileURL, err := s.uploadService.GetFileURL(doc.FileKey) if err != nil { return err } // 重置状态为 parsing s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"}) // 异步重新解析 go s.parseDocument(kbID, docID, fileURL, kb.ParsingConfig) return nil } // GetDocumentPreview 获取文档预览 func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*model.DocumentPreviewResponse, error) { // 验证文档存在 doc, err := s.repo.FindDocumentByID(docID) if err != nil { return nil, err } if doc.KnowledgeBaseID != kbID { return nil, nil } // 如果已解析,返回解析内容;否则返回文件 URL if doc.Status == "parsed" { // TODO: 从存储中读取解析内容(可以存到数据库或文件) // 暂时返回文件 URL fileURL, _ := s.uploadService.GetFileURL(doc.FileKey) return &model.DocumentPreviewResponse{ TotalPages: 1, CurrentPage: page, Content: fileURL, }, nil } // 未解析,返回文件 URL fileURL, _ := s.uploadService.GetFileURL(doc.FileKey) return &model.DocumentPreviewResponse{ TotalPages: 1, CurrentPage: page, Content: fileURL, }, nil } // updateDocumentCount 更新知识库文档数 func (s *KnowledgeService) updateDocumentCount(kbID string) { count, _ := s.repo.CountDocumentsByKBID(kbID) s.repo.Update(kbID, map[string]interface{}{"document_count": int(count)}) } // updateChunkCount 更新知识库 chunk 数 func (s *KnowledgeService) updateChunkCount(kbID string) { docs, _ := s.repo.FindDocumentsByKBID(kbID, "parsed") totalChunks := 0 for _, doc := range docs { totalChunks += doc.ChunkCount } s.repo.Update(kbID, map[string]interface{}{"chunk_count": totalChunks}) } func getFileExt(filename string) string { exts := []string{".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".html"} for _, ext := range exts { if len(filename) >= len(ext) && filename[len(filename)-len(ext):] == ext { return ext } } if len(filename) > 4 { return filename[len(filename)-4:] } return "" }