diff --git a/server/internal/model/knowledge_info.go b/server/internal/model/knowledge_info.go new file mode 100644 index 0000000..9a7e639 --- /dev/null +++ b/server/internal/model/knowledge_info.go @@ -0,0 +1,119 @@ +package model + +import ( + "database/sql/driver" + "encoding/json" + "errors" + "time" +) + +// ParsingConfig 解析配置 +type ParsingConfig struct { + Engine string `json:"engine"` // markitdown / docling + DoclingURL string `json:"docling_url"` // Docling 服务 URL + EnablePDF bool `json:"enable_pdf"` // 是否启用 PDF 解析 + Pandoc bool `json:"pandoc"` // 是否启用 Pandoc +} + +// Scan 实现 sql.Scanner 接口 +func (p *ParsingConfig) Scan(value interface{}) error { + if value == nil { + return nil + } + bytes, ok := value.([]byte) + if !ok { + return errors.New("type assertion to []byte failed") + } + return json.Unmarshal(bytes, p) +} + +// Value 实现 driver.Valuer 接口 +func (p ParsingConfig) Value() (driver.Value, error) { + return json.Marshal(p) +} + +// KnowledgeBase 知识库 +type KnowledgeBase struct { + ID string `json:"id" gorm:"primaryKey;type:varchar(36)"` + Name string `json:"name" gorm:"type:varchar(255);not null"` + Description string `json:"description" gorm:"type:text"` + LLMModelID string `json:"llm_model_id" gorm:"type:varchar(36);not null"` + EmbeddingModelID string `json:"embedding_model_id" gorm:"type:varchar(36);not null"` + ParsingConfig ParsingConfig `json:"parsing_config" gorm:"type:json"` + Status string `json:"status" gorm:"type:varchar(20);default:active"` // active / inactive + DocumentCount int `json:"document_count" gorm:"default:0"` + ChunkCount int `json:"chunk_count" gorm:"default:0"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +func (KnowledgeBase) TableName() string { + return "knowledge_base" +} + +// KnowledgeDocument 知识库文档 +type KnowledgeDocument struct { + ID string `json:"id" gorm:"primaryKey;type:varchar(36)"` + KnowledgeBaseID string `json:"knowledge_base_id" gorm:"type:varchar(36);not null;index"` + Name string `json:"name" gorm:"type:varchar(255);not null"` + FileKey string `json:"file_key" gorm:"type:varchar(500)"` + FileSize int64 `json:"file_size" gorm:"type:bigint;default:0"` + Status string `json:"status" gorm:"type:varchar(20);default:parsing"` // parsing / parsed / failed + ChunkCount int `json:"chunk_count" gorm:"default:0"` + UploadedAt time.Time `json:"uploaded_at"` +} + +func (KnowledgeDocument) TableName() string { + return "knowledge_document" +} + +// ========== Request/Response ========== + +// CreateKnowledgeRequest 创建知识库请求 +type CreateKnowledgeRequest struct { + Name string `json:"name" binding:"required"` + Description string `json:"description"` + LLMModelID string `json:"llm_model_id" binding:"required"` + EmbeddingModelID string `json:"embedding_model_id" binding:"required"` + ParsingConfig ParsingConfig `json:"parsing_config" binding:"required"` +} + +// UpdateKnowledgeRequest 更新知识库请求 +type UpdateKnowledgeRequest struct { + Name string `json:"name"` + Description string `json:"description"` + LLMModelID string `json:"llm_model_id"` + EmbeddingModelID string `json:"embedding_model_id"` + ParsingConfig ParsingConfig `json:"parsing_config"` + Status string `json:"status"` +} + +// KnowledgeListResponse 知识库列表响应 +type KnowledgeListResponse struct { + List []KnowledgeBase `json:"data"` +} + +// KnowledgeDetailResponse 知识库详情响应 +type KnowledgeDetailResponse struct { + KnowledgeBase KnowledgeBase `json:"data"` +} + +// DocumentListResponse 文档列表响应 +type DocumentListResponse struct { + List []KnowledgeDocument `json:"data"` +} + +// UploadDocumentResponse 上传文档响应 +type UploadDocumentResponse struct { + Success bool `json:"success"` + ID string `json:"id"` + Document KnowledgeDocument `json:"document"` + Message string `json:"message"` +} + +// DocumentPreviewResponse 文档预览响应 +type DocumentPreviewResponse struct { + TotalPages int `json:"total_pages"` + CurrentPage int `json:"current_page"` + Content string `json:"content"` +} diff --git a/server/internal/repository/knowledge_repo.go b/server/internal/repository/knowledge_repo.go new file mode 100644 index 0000000..99691af --- /dev/null +++ b/server/internal/repository/knowledge_repo.go @@ -0,0 +1,97 @@ +package repository + +import ( + "x-agents/server/internal/model" + + "gorm.io/gorm" +) + +type KnowledgeRepository struct { + db *gorm.DB +} + +func NewKnowledgeRepository(db *gorm.DB) *KnowledgeRepository { + return &KnowledgeRepository{db: db} +} + +// Create 创建知识库 +func (r *KnowledgeRepository) Create(kb *model.KnowledgeBase) error { + return r.db.Create(kb).Error +} + +// FindByID 根据ID查询 +func (r *KnowledgeRepository) FindByID(id string) (*model.KnowledgeBase, error) { + var kb model.KnowledgeBase + err := r.db.First(&kb, "id = ?", id).Error + if err != nil { + return nil, err + } + return &kb, nil +} + +// FindAll 查询所有 +func (r *KnowledgeRepository) FindAll() ([]model.KnowledgeBase, error) { + var list []model.KnowledgeBase + err := r.db.Order("created_at DESC").Find(&list).Error + return list, err +} + +// Update 更新知识库 +func (r *KnowledgeRepository) Update(id string, updates map[string]interface{}) error { + return r.db.Model(&model.KnowledgeBase{}).Where("id = ?", id).Updates(updates).Error +} + +// Delete 删除知识库 +func (r *KnowledgeRepository) Delete(id string) error { + return r.db.Delete(&model.KnowledgeBase{}, "id = ?", id).Error +} + +// ========== Document ========== + +// CreateDocument 创建文档 +func (r *KnowledgeRepository) CreateDocument(doc *model.KnowledgeDocument) error { + return r.db.Create(doc).Error +} + +// FindDocumentByID 根据ID查询文档 +func (r *KnowledgeRepository) FindDocumentByID(id string) (*model.KnowledgeDocument, error) { + var doc model.KnowledgeDocument + err := r.db.First(&doc, "id = ?", id).Error + if err != nil { + return nil, err + } + return &doc, nil +} + +// FindDocumentsByKBID 根据知识库ID查询文档 +func (r *KnowledgeRepository) FindDocumentsByKBID(kbID string, status string) ([]model.KnowledgeDocument, error) { + var list []model.KnowledgeDocument + query := r.db.Where("knowledge_base_id = ?", kbID).Order("uploaded_at DESC") + if status != "" && status != "all" { + query = query.Where("status = ?", status) + } + err := query.Find(&list).Error + return list, err +} + +// UpdateDocument 更新文档 +func (r *KnowledgeRepository) UpdateDocument(id string, updates map[string]interface{}) error { + return r.db.Model(&model.KnowledgeDocument{}).Where("id = ?", id).Updates(updates).Error +} + +// DeleteDocument 删除文档 +func (r *KnowledgeRepository) DeleteDocument(id string) error { + return r.db.Delete(&model.KnowledgeDocument{}, "id = ?", id).Error +} + +// DeleteDocumentsByKBID 删除知识库下所有文档 +func (r *KnowledgeRepository) DeleteDocumentsByKBID(kbID string) error { + return r.db.Delete(&model.KnowledgeDocument{}, "knowledge_base_id = ?", kbID).Error +} + +// CountDocumentsByKBID 统计知识库下文档数 +func (r *KnowledgeRepository) CountDocumentsByKBID(kbID string) (int64, error) { + var count int64 + err := r.db.Model(&model.KnowledgeDocument{}).Where("knowledge_base_id = ?", kbID).Count(&count).Error + return count, err +} diff --git a/server/internal/service/knowledge_service.go b/server/internal/service/knowledge_service.go new file mode 100644 index 0000000..c23f703 --- /dev/null +++ b/server/internal/service/knowledge_service.go @@ -0,0 +1,235 @@ +package service + +import ( + "mime/multipart" + "time" + + "github.com/google/uuid" + "x-agents/server/internal/model" + "x-agents/server/internal/repository" +) + +type KnowledgeService struct { + repo *repository.KnowledgeRepository + modelRepo *repository.ModelRepository + uploadService *UploadService +} + +func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService) *KnowledgeService { + return &KnowledgeService{ + repo: repo, + modelRepo: modelRepo, + uploadService: uploadService, + } +} + +// Create 创建知识库 +func (s *KnowledgeService) Create(req model.CreateKnowledgeRequest) (*model.KnowledgeBase, error) { + // 验证 LLM 模型存在 + if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil { + return nil, err + } + // 验证 Embedding 模型存在 + if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil { + return nil, err + } + + kb := &model.KnowledgeBase{ + ID: uuid.New().String(), + Name: req.Name, + Description: req.Description, + LLMModelID: req.LLMModelID, + EmbeddingModelID: req.EmbeddingModelID, + ParsingConfig: req.ParsingConfig, + Status: "active", + DocumentCount: 0, + ChunkCount: 0, + } + + // 设置默认值 + if kb.ParsingConfig.EnablePDF == false && kb.ParsingConfig.EnablePDF != true { + kb.ParsingConfig.EnablePDF = true + } + if kb.ParsingConfig.Pandoc == false && kb.ParsingConfig.Pandoc != true { + kb.ParsingConfig.Pandoc = true + } + + if err := s.repo.Create(kb); err != nil { + return nil, err + } + return kb, nil +} + +// List 获取知识库列表 +func (s *KnowledgeService) List() ([]model.KnowledgeBase, error) { + return s.repo.FindAll() +} + +// GetByID 获取知识库详情 +func (s *KnowledgeService) GetByID(id string) (*model.KnowledgeBase, error) { + return s.repo.FindByID(id) +} + +// Update 更新知识库 +func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) error { + updates := make(map[string]interface{}) + if req.Name != "" { + updates["name"] = req.Name + } + if req.Description != "" { + updates["description"] = req.Description + } + if req.LLMModelID != "" { + // 验证模型存在 + if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil { + return err + } + updates["llm_model_id"] = req.LLMModelID + } + if req.EmbeddingModelID != "" { + // 验证模型存在 + if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil { + return err + } + updates["embedding_model_id"] = req.EmbeddingModelID + } + if req.ParsingConfig.Engine != "" { + updates["parsing_config"] = req.ParsingConfig + } + if req.Status != "" { + updates["status"] = req.Status + } + + return s.repo.Update(id, updates) +} + +// Delete 删除知识库 +func (s *KnowledgeService) Delete(id string) error { + // 先删除关联的文档 + if err := s.repo.DeleteDocumentsByKBID(id); err != nil { + return err + } + return s.repo.Delete(id) +} + +// ListDocuments 获取知识库下的文档列表 +func (s *KnowledgeService) ListDocuments(kbID string, status string) ([]model.KnowledgeDocument, error) { + return s.repo.FindDocumentsByKBID(kbID, status) +} + +// UploadDocument 上传文档到知识库 +func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, error) { + // 验证知识库存在 + _, err := s.repo.FindByID(kbID) + if err != nil { + return nil, err + } + + // 上传文件 + result, err := s.uploadService.Upload(file) + if err != nil { + return nil, err + } + if !result.Success { + return nil, nil + } + + // 创建文档记录 + doc := &model.KnowledgeDocument{ + ID: uuid.New().String(), + KnowledgeBaseID: kbID, + Name: file.Filename, + FileKey: result.FileKey, + FileSize: file.Size, + Status: "parsing", + UploadedAt: time.Now(), + } + + if err := s.repo.CreateDocument(doc); err != nil { + return nil, err + } + + // 更新知识库文档数 + s.updateDocumentCount(kbID) + + return doc, nil +} + +// DeleteDocument 删除文档 +func (s *KnowledgeService) DeleteDocument(kbID, docID string) error { + // 验证文档存在 + doc, err := s.repo.FindDocumentByID(docID) + if err != nil { + return err + } + if doc.KnowledgeBaseID != kbID { + return nil + } + + // 删除文件 + if doc.FileKey != "" { + s.uploadService.DeleteFile(doc.FileKey + getFileExt(doc.Name)) + } + + // 删除文档记录 + if err := s.repo.DeleteDocument(docID); err != nil { + return err + } + + // 更新知识库文档数 + s.updateDocumentCount(kbID) + + return nil +} + +// ReparseDocument 重新解析文档 +func (s *KnowledgeService) ReparseDocument(kbID, docID string) error { + // 验证文档存在 + doc, err := s.repo.FindDocumentByID(docID) + if err != nil { + return err + } + if doc.KnowledgeBaseID != kbID { + return nil + } + + // 重置状态为 parsing + return s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"}) +} + +// GetDocumentPreview 获取文档预览 +func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*model.DocumentPreviewResponse, error) { + // 验证文档存在 + doc, err := s.repo.FindDocumentByID(docID) + if err != nil { + return nil, err + } + if doc.KnowledgeBaseID != kbID { + return nil, nil + } + + // 简单实现:返回文件 URL(实际应由 Python 服务处理) + fileURL, err := s.uploadService.GetFileURL(doc.FileKey + getFileExt(doc.Name)) + if err != nil { + return nil, err + } + + return &model.DocumentPreviewResponse{ + TotalPages: 1, + CurrentPage: page, + Content: fileURL, + }, nil +} + +// updateDocumentCount 更新知识库文档数 +func (s *KnowledgeService) updateDocumentCount(kbID string) { + count, _ := s.repo.CountDocumentsByKBID(kbID) + s.repo.Update(kbID, map[string]interface{}{"document_count": int(count)}) +} + +func getFileExt(filename string) string { + if len(filename) > 4 { + return filename[len(filename)-4:] + } + return "" +}