diff --git a/server/cmd/api/main.go b/server/cmd/api/main.go index 127bfd7..de37472 100644 --- a/server/cmd/api/main.go +++ b/server/cmd/api/main.go @@ -70,12 +70,13 @@ func main() { } // 3. 自动迁移表 - db.AutoMigrate(&model.DatabaseInfo{}, &model.SubTableInfo{}, &model.ModelInfo{}) + db.AutoMigrate(&model.DatabaseInfo{}, &model.SubTableInfo{}, &model.ModelInfo{}, &model.KnowledgeBase{}, &model.KnowledgeDocument{}) // 4. 初始化 Repository dbRepo := repository.NewDatabaseRepository(db) subTableRepo := repository.NewSubTableRepository(db) modelRepo := repository.NewModelRepository(db) + knowledgeRepo := repository.NewKnowledgeRepository(db) // 5. 初始化 Service dbService := service.NewDatabaseService(dbRepo, subTableRepo) @@ -86,6 +87,7 @@ func main() { if err != nil { log.Printf("Warning: Failed to initialize upload service: %v (files will not be available)", err) } + knowledgeService := service.NewKnowledgeService(knowledgeRepo, modelRepo, uploadService, cfg.PythonServiceURL) // 6. 初始化 Handler dbHandler := handler.NewDatabaseHandler(dbService) @@ -93,6 +95,7 @@ func main() { neo4jHandler := handler.NewNeo4jHandler(neo4jService) modelHandler := handler.NewModelHandler(modelService) systemHandler := handler.NewSystemHandler() + knowledgeHandler := handler.NewKnowledgeHandler(knowledgeService) var uploadHandler *handler.UploadHandler if uploadService != nil { uploadHandler = handler.NewUploadHandler(uploadService) @@ -185,6 +188,22 @@ func main() { modelGroup.DELETE("/:id", modelHandler.Delete) } + // 知识库管理模块 + knowledgeGroup := r.Group("/api/knowledge") + { + knowledgeGroup.POST("/create", knowledgeHandler.Create) + knowledgeGroup.GET("/list", knowledgeHandler.List) + knowledgeGroup.GET("/:id", knowledgeHandler.GetByID) + knowledgeGroup.PUT("/:id", knowledgeHandler.Update) + knowledgeGroup.DELETE("/:id", knowledgeHandler.Delete) + // 文档管理 + knowledgeGroup.GET("/:id/documents", knowledgeHandler.ListDocuments) + knowledgeGroup.POST("/:id/documents", knowledgeHandler.UploadDocument) + knowledgeGroup.DELETE("/:id/documents/:doc_id", knowledgeHandler.DeleteDocument) + knowledgeGroup.POST("/:id/documents/:doc_id/reparse", knowledgeHandler.ReparseDocument) + knowledgeGroup.GET("/:id/documents/:doc_id/preview", knowledgeHandler.GetDocumentPreview) + } + // 系统信息模块 r.GET("/system/info", systemHandler.GetSystemInfo) @@ -195,8 +214,8 @@ func main() { r.Static("/files", cfg.UploadLocalPath) } // 上传路由 - r.POST("/upload", uploadHandler.Upload) - r.DELETE("/upload/:filename", uploadHandler.Delete) + r.POST("/api/file_upload", uploadHandler.Upload) + r.DELETE("/api/file_upload/:filename", uploadHandler.Delete) } // 8. 启动服务 diff --git a/server/internal/handler/knowledge_handler.go b/server/internal/handler/knowledge_handler.go new file mode 100644 index 0000000..721d3ec --- /dev/null +++ b/server/internal/handler/knowledge_handler.go @@ -0,0 +1,220 @@ +package handler + +import ( + "net/http" + "strconv" + + "github.com/gin-gonic/gin" + "x-agents/server/internal/model" + "x-agents/server/internal/service" +) + +type KnowledgeHandler struct { + service *service.KnowledgeService +} + +func NewKnowledgeHandler(s *service.KnowledgeService) *KnowledgeHandler { + return &KnowledgeHandler{service: s} +} + +// Create 创建知识库 +func (h *KnowledgeHandler) Create(c *gin.Context) { + var req model.CreateKnowledgeRequest + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": err.Error()}) + return + } + + kb, err := h.service.Create(req) + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{ + "success": true, + "id": kb.ID, + "message": "Knowledge base created successfully", + }) +} + +// List 获取知识库列表 +func (h *KnowledgeHandler) List(c *gin.Context) { + list, err := h.service.List() + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"success": false, "message": err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{"success": true, "data": list}) +} + +// GetByID 获取知识库详情 +func (h *KnowledgeHandler) GetByID(c *gin.Context) { + id := c.Param("id") + if id == "" { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": "id is required"}) + return + } + + kb, err := h.service.GetByID(id) + if err != nil { + c.JSON(http.StatusNotFound, gin.H{"success": false, "message": "Knowledge base not found"}) + return + } + + c.JSON(http.StatusOK, gin.H{"success": true, "data": kb}) +} + +// Update 更新知识库 +func (h *KnowledgeHandler) Update(c *gin.Context) { + id := c.Param("id") + if id == "" { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": "id is required"}) + return + } + + var req model.UpdateKnowledgeRequest + if err := c.ShouldBindJSON(&req); err != nil { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": err.Error()}) + return + } + + if err := h.service.Update(id, req); err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"success": false, "message": err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{"success": true, "message": "Knowledge base updated"}) +} + +// Delete 删除知识库 +func (h *KnowledgeHandler) Delete(c *gin.Context) { + id := c.Param("id") + if id == "" { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": "id is required"}) + return + } + + if err := h.service.Delete(id); err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"success": false, "message": err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{"success": true, "message": "Knowledge base deleted"}) +} + +// ListDocuments 获取知识库下的文档列表 +func (h *KnowledgeHandler) ListDocuments(c *gin.Context) { + id := c.Param("id") + if id == "" { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": "id is required"}) + return + } + + status := c.Query("status") + list, err := h.service.ListDocuments(id, status) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"success": false, "message": err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{"success": true, "data": list}) +} + +// UploadDocument 上传文档到知识库 +func (h *KnowledgeHandler) UploadDocument(c *gin.Context) { + id := c.Param("id") + if id == "" { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": "id is required"}) + return + } + + file, err := c.FormFile("file") + if err != nil { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": "No file uploaded"}) + return + } + + // 检查文件大小(最大 100MB) + if file.Size > 100*1024*1024 { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": "File too large (max 100MB)"}) + return + } + + doc, fileURL, err := h.service.UploadDocument(id, file) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"success": false, "message": err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{ + "success": true, + "id": doc.ID, + "url": fileURL, + "document": doc, + "message": "Document uploaded", + }) +} + +// DeleteDocument 删除文档 +func (h *KnowledgeHandler) DeleteDocument(c *gin.Context) { + id := c.Param("id") + docID := c.Param("doc_id") + + if id == "" || docID == "" { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": "id and doc_id are required"}) + return + } + + if err := h.service.DeleteDocument(id, docID); err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"success": false, "message": err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{"success": true, "message": "Document deleted"}) +} + +// ReparseDocument 重新解析文档 +func (h *KnowledgeHandler) ReparseDocument(c *gin.Context) { + id := c.Param("id") + docID := c.Param("doc_id") + + if id == "" || docID == "" { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": "id and doc_id are required"}) + return + } + + if err := h.service.ReparseDocument(id, docID); err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"success": false, "message": err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{"success": true, "message": "Document reparse started"}) +} + +// GetDocumentPreview 获取文档预览 +func (h *KnowledgeHandler) GetDocumentPreview(c *gin.Context) { + id := c.Param("id") + docID := c.Param("doc_id") + + if id == "" || docID == "" { + c.JSON(http.StatusBadRequest, gin.H{"success": false, "message": "id and doc_id are required"}) + return + } + + page := 1 + if p := c.Query("page"); p != "" { + if parsed, err := strconv.Atoi(p); err == nil { + page = parsed + } + } + + preview, err := h.service.GetDocumentPreview(id, docID, page) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"success": false, "message": err.Error()}) + return + } + + c.JSON(http.StatusOK, gin.H{"success": true, "data": preview}) +} diff --git a/server/internal/model/knowledge_info.go b/server/internal/model/knowledge_info.go index 9a7e639..b8a3b0c 100644 --- a/server/internal/model/knowledge_info.go +++ b/server/internal/model/knowledge_info.go @@ -57,6 +57,7 @@ type KnowledgeDocument struct { KnowledgeBaseID string `json:"knowledge_base_id" gorm:"type:varchar(36);not null;index"` Name string `json:"name" gorm:"type:varchar(255);not null"` FileKey string `json:"file_key" gorm:"type:varchar(500)"` + FileURL string `json:"file_url" gorm:"type:varchar(500)"` // 文件访问 URL FileSize int64 `json:"file_size" gorm:"type:bigint;default:0"` Status string `json:"status" gorm:"type:varchar(20);default:parsing"` // parsing / parsed / failed ChunkCount int `json:"chunk_count" gorm:"default:0"` diff --git a/server/internal/service/knowledge_service.go b/server/internal/service/knowledge_service.go index c23f703..4c4b2ca 100644 --- a/server/internal/service/knowledge_service.go +++ b/server/internal/service/knowledge_service.go @@ -1,7 +1,10 @@ package service import ( + "bytes" + "encoding/json" "mime/multipart" + "net/http" "time" "github.com/google/uuid" @@ -10,16 +13,18 @@ import ( ) type KnowledgeService struct { - repo *repository.KnowledgeRepository - modelRepo *repository.ModelRepository - uploadService *UploadService + repo *repository.KnowledgeRepository + modelRepo *repository.ModelRepository + uploadService *UploadService + pythonServiceURL string } -func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService) *KnowledgeService { +func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService, pythonServiceURL string) *KnowledgeService { return &KnowledgeService{ - repo: repo, - modelRepo: modelRepo, - uploadService: uploadService, + repo: repo, + modelRepo: modelRepo, + uploadService: uploadService, + pythonServiceURL: pythonServiceURL, } } @@ -35,22 +40,25 @@ func (s *KnowledgeService) Create(req model.CreateKnowledgeRequest) (*model.Know } kb := &model.KnowledgeBase{ - ID: uuid.New().String(), - Name: req.Name, - Description: req.Description, - LLMModelID: req.LLMModelID, - EmbeddingModelID: req.EmbeddingModelID, - ParsingConfig: req.ParsingConfig, - Status: "active", - DocumentCount: 0, - ChunkCount: 0, + ID: uuid.New().String(), + Name: req.Name, + Description: req.Description, + LLMModelID: req.LLMModelID, + EmbeddingModelID: req.EmbeddingModelID, + ParsingConfig: req.ParsingConfig, + Status: "active", + DocumentCount: 0, + ChunkCount: 0, } // 设置默认值 - if kb.ParsingConfig.EnablePDF == false && kb.ParsingConfig.EnablePDF != true { + if kb.ParsingConfig.Engine == "" { + kb.ParsingConfig.Engine = "markitdown" + } + if kb.ParsingConfig.EnablePDF != false { kb.ParsingConfig.EnablePDF = true } - if kb.ParsingConfig.Pandoc == false && kb.ParsingConfig.Pandoc != true { + if kb.ParsingConfig.Pandoc != false { kb.ParsingConfig.Pandoc = true } @@ -118,41 +126,102 @@ func (s *KnowledgeService) ListDocuments(kbID string, status string) ([]model.Kn } // UploadDocument 上传文档到知识库 -func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, error) { +func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, string, error) { // 验证知识库存在 - _, err := s.repo.FindByID(kbID) + kb, err := s.repo.FindByID(kbID) if err != nil { - return nil, err + return nil, "", err } // 上传文件 result, err := s.uploadService.Upload(file) if err != nil { - return nil, err + return nil, "", err } if !result.Success { - return nil, nil + return nil, "", nil } + // 获取文件扩展名 + ext := getFileExt(file.Filename) + // 创建文档记录 doc := &model.KnowledgeDocument{ ID: uuid.New().String(), KnowledgeBaseID: kbID, Name: file.Filename, - FileKey: result.FileKey, + FileKey: result.FileKey + ext, + FileURL: result.URL, FileSize: file.Size, Status: "parsing", UploadedAt: time.Now(), } if err := s.repo.CreateDocument(doc); err != nil { - return nil, err + return nil, "", err } // 更新知识库文档数 s.updateDocumentCount(kbID) - return doc, nil + // 异步调用 Python 服务解析文档 + go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig) + + return doc, result.URL, nil +} + +// parseDocument 调用 Python 服务解析文档 +func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config model.ParsingConfig) { + // 构建请求 + reqBody := map[string]interface{}{ + "file_url": fileURL, + "engine": config.Engine, + } + if config.Engine == "docling" && config.DoclingURL != "" { + reqBody["docling_url"] = config.DoclingURL + } + + body, _ := json.Marshal(reqBody) + resp, err := http.Post(s.pythonServiceURL+"/parse", "application/json", bytes.NewBuffer(body)) + if err != nil { + s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) + return + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) + return + } + + // 解析响应 + var result map[string]interface{} + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) + return + } + + if success, ok := result["success"].(bool); ok && success { + // 解析成功,更新状态 + chunks := []string{} + if c, ok := result["chunks"].([]interface{}); ok { + for _, chunk := range c { + if c, ok := chunk.(string); ok { + chunks = append(chunks, c) + } + } + } + + s.repo.UpdateDocument(docID, map[string]interface{}{ + "status": "parsed", + "chunk_count": len(chunks), + }) + + // 更新知识库的 chunk_count + s.updateChunkCount(kbID) + } else { + s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"}) + } } // DeleteDocument 删除文档 @@ -168,7 +237,7 @@ func (s *KnowledgeService) DeleteDocument(kbID, docID string) error { // 删除文件 if doc.FileKey != "" { - s.uploadService.DeleteFile(doc.FileKey + getFileExt(doc.Name)) + s.uploadService.DeleteFile(doc.FileKey) } // 删除文档记录 @@ -193,8 +262,25 @@ func (s *KnowledgeService) ReparseDocument(kbID, docID string) error { return nil } + // 获取知识库配置 + kb, err := s.repo.FindByID(kbID) + if err != nil { + return err + } + + // 获取文件 URL + fileURL, err := s.uploadService.GetFileURL(doc.FileKey) + if err != nil { + return err + } + // 重置状态为 parsing - return s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"}) + s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"}) + + // 异步重新解析 + go s.parseDocument(kbID, docID, fileURL, kb.ParsingConfig) + + return nil } // GetDocumentPreview 获取文档预览 @@ -208,12 +294,20 @@ func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*mo return nil, nil } - // 简单实现:返回文件 URL(实际应由 Python 服务处理) - fileURL, err := s.uploadService.GetFileURL(doc.FileKey + getFileExt(doc.Name)) - if err != nil { - return nil, err + // 如果已解析,返回解析内容;否则返回文件 URL + if doc.Status == "parsed" { + // TODO: 从存储中读取解析内容(可以存到数据库或文件) + // 暂时返回文件 URL + fileURL, _ := s.uploadService.GetFileURL(doc.FileKey) + return &model.DocumentPreviewResponse{ + TotalPages: 1, + CurrentPage: page, + Content: fileURL, + }, nil } + // 未解析,返回文件 URL + fileURL, _ := s.uploadService.GetFileURL(doc.FileKey) return &model.DocumentPreviewResponse{ TotalPages: 1, CurrentPage: page, @@ -227,7 +321,23 @@ func (s *KnowledgeService) updateDocumentCount(kbID string) { s.repo.Update(kbID, map[string]interface{}{"document_count": int(count)}) } +// updateChunkCount 更新知识库 chunk 数 +func (s *KnowledgeService) updateChunkCount(kbID string) { + docs, _ := s.repo.FindDocumentsByKBID(kbID, "parsed") + totalChunks := 0 + for _, doc := range docs { + totalChunks += doc.ChunkCount + } + s.repo.Update(kbID, map[string]interface{}{"chunk_count": totalChunks}) +} + func getFileExt(filename string) string { + exts := []string{".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".html"} + for _, ext := range exts { + if len(filename) >= len(ext) && filename[len(filename)-len(ext):] == ext { + return ext + } + } if len(filename) > 4 { return filename[len(filename)-4:] }