Files
X-Agents/server/internal/service/knowledge_service.go

346 lines
8.6 KiB
Go
Raw Normal View History

package service
import (
"bytes"
"encoding/json"
"mime/multipart"
"net/http"
"time"
"github.com/google/uuid"
"x-agents/server/internal/model"
"x-agents/server/internal/repository"
)
type KnowledgeService struct {
repo *repository.KnowledgeRepository
modelRepo *repository.ModelRepository
uploadService *UploadService
pythonServiceURL string
}
func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService, pythonServiceURL string) *KnowledgeService {
return &KnowledgeService{
repo: repo,
modelRepo: modelRepo,
uploadService: uploadService,
pythonServiceURL: pythonServiceURL,
}
}
// Create 创建知识库
func (s *KnowledgeService) Create(req model.CreateKnowledgeRequest) (*model.KnowledgeBase, error) {
// 验证 LLM 模型存在
if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil {
return nil, err
}
// 验证 Embedding 模型存在
if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil {
return nil, err
}
kb := &model.KnowledgeBase{
ID: uuid.New().String(),
Name: req.Name,
Description: req.Description,
LLMModelID: req.LLMModelID,
EmbeddingModelID: req.EmbeddingModelID,
ParsingConfig: req.ParsingConfig,
Status: "active",
DocumentCount: 0,
ChunkCount: 0,
}
// 设置默认值
if kb.ParsingConfig.Engine == "" {
kb.ParsingConfig.Engine = "markitdown"
}
if kb.ParsingConfig.EnablePDF != false {
kb.ParsingConfig.EnablePDF = true
}
if kb.ParsingConfig.Pandoc != false {
kb.ParsingConfig.Pandoc = true
}
if err := s.repo.Create(kb); err != nil {
return nil, err
}
return kb, nil
}
// List 获取知识库列表
func (s *KnowledgeService) List() ([]model.KnowledgeBase, error) {
return s.repo.FindAll()
}
// GetByID 获取知识库详情
func (s *KnowledgeService) GetByID(id string) (*model.KnowledgeBase, error) {
return s.repo.FindByID(id)
}
// Update 更新知识库
func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) error {
updates := make(map[string]interface{})
if req.Name != "" {
updates["name"] = req.Name
}
if req.Description != "" {
updates["description"] = req.Description
}
if req.LLMModelID != "" {
// 验证模型存在
if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil {
return err
}
updates["llm_model_id"] = req.LLMModelID
}
if req.EmbeddingModelID != "" {
// 验证模型存在
if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil {
return err
}
updates["embedding_model_id"] = req.EmbeddingModelID
}
if req.ParsingConfig.Engine != "" {
updates["parsing_config"] = req.ParsingConfig
}
if req.Status != "" {
updates["status"] = req.Status
}
return s.repo.Update(id, updates)
}
// Delete 删除知识库
func (s *KnowledgeService) Delete(id string) error {
// 先删除关联的文档
if err := s.repo.DeleteDocumentsByKBID(id); err != nil {
return err
}
return s.repo.Delete(id)
}
// ListDocuments 获取知识库下的文档列表
func (s *KnowledgeService) ListDocuments(kbID string, status string) ([]model.KnowledgeDocument, error) {
return s.repo.FindDocumentsByKBID(kbID, status)
}
// UploadDocument 上传文档到知识库
func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, string, error) {
// 验证知识库存在
kb, err := s.repo.FindByID(kbID)
if err != nil {
return nil, "", err
}
// 上传文件
result, err := s.uploadService.Upload(file)
if err != nil {
return nil, "", err
}
if !result.Success {
return nil, "", nil
}
// 获取文件扩展名
ext := getFileExt(file.Filename)
// 创建文档记录
doc := &model.KnowledgeDocument{
ID: uuid.New().String(),
KnowledgeBaseID: kbID,
Name: file.Filename,
FileKey: result.FileKey + ext,
FileURL: result.URL,
FileSize: file.Size,
Status: "parsing",
UploadedAt: time.Now(),
}
if err := s.repo.CreateDocument(doc); err != nil {
return nil, "", err
}
// 更新知识库文档数
s.updateDocumentCount(kbID)
// 异步调用 Python 服务解析文档
go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig)
return doc, result.URL, nil
}
// parseDocument 调用 Python 服务解析文档
func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config model.ParsingConfig) {
// 构建请求
reqBody := map[string]interface{}{
"file_url": fileURL,
"engine": config.Engine,
}
if config.Engine == "docling" && config.DoclingURL != "" {
reqBody["docling_url"] = config.DoclingURL
}
body, _ := json.Marshal(reqBody)
resp, err := http.Post(s.pythonServiceURL+"/parse", "application/json", bytes.NewBuffer(body))
if err != nil {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
return
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
return
}
// 解析响应
var result map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
return
}
if success, ok := result["success"].(bool); ok && success {
// 解析成功,更新状态
chunks := []string{}
if c, ok := result["chunks"].([]interface{}); ok {
for _, chunk := range c {
if c, ok := chunk.(string); ok {
chunks = append(chunks, c)
}
}
}
s.repo.UpdateDocument(docID, map[string]interface{}{
"status": "parsed",
"chunk_count": len(chunks),
})
// 更新知识库的 chunk_count
s.updateChunkCount(kbID)
} else {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
}
}
// DeleteDocument 删除文档
func (s *KnowledgeService) DeleteDocument(kbID, docID string) error {
// 验证文档存在
doc, err := s.repo.FindDocumentByID(docID)
if err != nil {
return err
}
if doc.KnowledgeBaseID != kbID {
return nil
}
// 删除文件
if doc.FileKey != "" {
s.uploadService.DeleteFile(doc.FileKey)
}
// 删除文档记录
if err := s.repo.DeleteDocument(docID); err != nil {
return err
}
// 更新知识库文档数
s.updateDocumentCount(kbID)
return nil
}
// ReparseDocument 重新解析文档
func (s *KnowledgeService) ReparseDocument(kbID, docID string) error {
// 验证文档存在
doc, err := s.repo.FindDocumentByID(docID)
if err != nil {
return err
}
if doc.KnowledgeBaseID != kbID {
return nil
}
// 获取知识库配置
kb, err := s.repo.FindByID(kbID)
if err != nil {
return err
}
// 获取文件 URL
fileURL, err := s.uploadService.GetFileURL(doc.FileKey)
if err != nil {
return err
}
// 重置状态为 parsing
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"})
// 异步重新解析
go s.parseDocument(kbID, docID, fileURL, kb.ParsingConfig)
return nil
}
// GetDocumentPreview 获取文档预览
func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*model.DocumentPreviewResponse, error) {
// 验证文档存在
doc, err := s.repo.FindDocumentByID(docID)
if err != nil {
return nil, err
}
if doc.KnowledgeBaseID != kbID {
return nil, nil
}
// 如果已解析,返回解析内容;否则返回文件 URL
if doc.Status == "parsed" {
// TODO: 从存储中读取解析内容(可以存到数据库或文件)
// 暂时返回文件 URL
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: fileURL,
}, nil
}
// 未解析,返回文件 URL
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: fileURL,
}, nil
}
// updateDocumentCount 更新知识库文档数
func (s *KnowledgeService) updateDocumentCount(kbID string) {
count, _ := s.repo.CountDocumentsByKBID(kbID)
s.repo.Update(kbID, map[string]interface{}{"document_count": int(count)})
}
// updateChunkCount 更新知识库 chunk 数
func (s *KnowledgeService) updateChunkCount(kbID string) {
docs, _ := s.repo.FindDocumentsByKBID(kbID, "parsed")
totalChunks := 0
for _, doc := range docs {
totalChunks += doc.ChunkCount
}
s.repo.Update(kbID, map[string]interface{}{"chunk_count": totalChunks})
}
func getFileExt(filename string) string {
exts := []string{".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".html"}
for _, ext := range exts {
if len(filename) >= len(ext) && filename[len(filename)-len(ext):] == ext {
return ext
}
}
if len(filename) > 4 {
return filename[len(filename)-4:]
}
return ""
}