Files
X-Agents/server/internal/service/knowledge_service.go
DESKTOP-72TV0V4\caoxiaozhu bb04c4afd0 feat: 完善知识库数据模型和服务
- 添加知识库更多字段配置
- 优化知识库服务逻辑
- 添加文档解析相关接口

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-08 23:02:09 +08:00

420 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package service
import (
"bytes"
"encoding/json"
"log"
"mime/multipart"
"net/http"
"os"
"strings"
"time"
"github.com/google/uuid"
"x-agents/server/internal/model"
"x-agents/server/internal/repository"
)
// debugLog 专用调试日志
var knowledgeDebugLog *log.Logger
func init() {
debugFile, _ := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime)
}
type KnowledgeService struct {
repo *repository.KnowledgeRepository
modelRepo *repository.ModelRepository
uploadService *UploadService
pythonServiceURL string
}
func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService, pythonServiceURL string) *KnowledgeService {
return &KnowledgeService{
repo: repo,
modelRepo: modelRepo,
uploadService: uploadService,
pythonServiceURL: pythonServiceURL,
}
}
// Create 创建知识库
func (s *KnowledgeService) Create(req model.CreateKnowledgeRequest) (*model.KnowledgeBase, error) {
// 验证 LLM 模型存在
if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil {
return nil, err
}
// 验证 Embedding 模型存在
if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil {
return nil, err
}
kb := &model.KnowledgeBase{
ID: uuid.New().String(),
Name: req.Name,
Description: req.Description,
LLMModelID: req.LLMModelID,
EmbeddingModelID: req.EmbeddingModelID,
ParsingConfig: req.ParsingConfig,
StorageConfig: req.StorageConfig,
Status: "active",
DocumentCount: 0,
ChunkCount: 0,
}
// 设置默认值
if kb.ParsingConfig.Engine == "" {
kb.ParsingConfig.Engine = "markitdown"
}
if kb.ParsingConfig.EnablePDF != false {
kb.ParsingConfig.EnablePDF = true
}
if kb.ParsingConfig.Pandoc != false {
kb.ParsingConfig.Pandoc = true
}
if err := s.repo.Create(kb); err != nil {
return nil, err
}
return kb, nil
}
// List 获取知识库列表
func (s *KnowledgeService) List() ([]model.KnowledgeBase, error) {
return s.repo.FindAll()
}
// GetByID 获取知识库详情
func (s *KnowledgeService) GetByID(id string) (*model.KnowledgeBase, error) {
return s.repo.FindByID(id)
}
// Update 更新知识库
func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) error {
updates := make(map[string]interface{})
if req.Name != "" {
updates["name"] = req.Name
}
if req.Description != "" {
updates["description"] = req.Description
}
if req.LLMModelID != "" {
// 验证模型存在
if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil {
return err
}
updates["llm_model_id"] = req.LLMModelID
}
if req.EmbeddingModelID != "" {
// 验证模型存在
if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil {
return err
}
updates["embedding_model_id"] = req.EmbeddingModelID
}
if req.ParsingConfig.Engine != "" {
updates["parsing_config"] = req.ParsingConfig
}
if req.StorageConfig.Type != "" {
updates["storage_config"] = req.StorageConfig
}
if req.Status != "" {
updates["status"] = req.Status
}
return s.repo.Update(id, updates)
}
// Delete 删除知识库
func (s *KnowledgeService) Delete(id string) error {
// 先删除关联的文档
if err := s.repo.DeleteDocumentsByKBID(id); err != nil {
return err
}
return s.repo.Delete(id)
}
// ListDocuments 获取知识库下的文档列表
func (s *KnowledgeService) ListDocuments(kbID string, status string) ([]model.KnowledgeDocument, error) {
docs, err := s.repo.FindDocumentsByKBID(kbID, status)
if err != nil {
knowledgeDebugLog.Printf("[Knowledge ListDocuments] 错误: %v", err)
return nil, err
}
knowledgeDebugLog.Printf("[Knowledge ListDocuments] kbID=%s, count=%d", kbID, len(docs))
for i, doc := range docs {
knowledgeDebugLog.Printf("[Knowledge ListDocuments] doc[%d]: id=%s, name=%s, file_url=%q", i, doc.ID, doc.Name, doc.FileURL)
// 如果是 MinIO 内网地址,转换为代理 URL
if doc.FileURL != "" && (strings.Contains(doc.FileURL, "10.10.10.189") || strings.Contains(doc.FileURL, ":9768")) {
// 从 URL 中提取 fileKey包含扩展名
parts := strings.Split(doc.FileURL, "/")
if len(parts) > 0 {
fileName := parts[len(parts)-1]
fileKey := strings.Split(fileName, "?")[0]
doc.FileURL = "/api/file_proxy?kb_id=" + kbID + "&key=" + fileKey
}
}
}
return docs, nil
}
// UploadDocument 上传文档到知识库
func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, string, error) {
knowledgeDebugLog.Printf("[Knowledge Upload] 开始上传文件: kbID=%s, filename=%s, size=%d", kbID, file.Filename, file.Size)
// 验证知识库存在
kb, err := s.repo.FindByID(kbID)
if err != nil {
knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 知识库不存在, kbID=%s, err=%v", kbID, err)
return nil, "", err
}
knowledgeDebugLog.Printf("[Knowledge Upload] 知识库配置: kbID=%s, storage_config.Type=%q, storage_config=%+v",
kbID, kb.StorageConfig.Type, kb.StorageConfig)
// 上传文件(根据知识库的 storage_config 选择存储方式)
var result *UploadResponse
if kb.StorageConfig.Type != "" {
// 使用知识库的存储配置
knowledgeDebugLog.Printf("[Knowledge Upload] 使用知识库存储配置: type=%s, endpoint=%s, bucket=%s",
kb.StorageConfig.Type, kb.StorageConfig.Endpoint, kb.StorageConfig.Bucket)
result, err = s.uploadService.UploadWithConfig(file, kb.StorageConfig)
} else {
// 使用全局配置
knowledgeDebugLog.Printf("[Knowledge Upload] 使用全局存储配置")
result, err = s.uploadService.Upload(file)
}
if err != nil {
knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 上传失败, err=%v", err)
return nil, "", err
}
knowledgeDebugLog.Printf("[Knowledge Upload] 上传结果: success=%v, url=%s, message=%s",
result.Success, result.URL, result.Message)
if !result.Success {
knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 上传返回失败, message=%s", result.Message)
return nil, "", nil
}
// 获取文件扩展名
ext := getFileExt(file.Filename)
knowledgeDebugLog.Printf("[Knowledge Upload] 准备创建文档记录: result.URL=%q, fileKey=%s, ext=%s",
result.URL, result.FileKey, ext)
// 创建文档记录
doc := &model.KnowledgeDocument{
ID: uuid.New().String(),
KnowledgeBaseID: kbID,
Name: file.Filename,
FileKey: result.FileKey + ext,
FileURL: result.URL,
FileSize: file.Size,
Status: "parsing",
UploadedAt: time.Now(),
}
knowledgeDebugLog.Printf("[Knowledge Upload] 文档创建完成: doc.FileURL=%q", doc.FileURL)
if err := s.repo.CreateDocument(doc); err != nil {
return nil, "", err
}
// 更新知识库文档数
s.updateDocumentCount(kbID)
// 异步调用 Python 服务解析文档
go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig)
return doc, result.URL, nil
}
// parseDocument 调用 Python 服务解析文档
func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config model.ParsingConfig) {
// 构建请求
reqBody := map[string]interface{}{
"file_url": fileURL,
"engine": config.Engine,
}
if config.Engine == "docling" && config.DoclingURL != "" {
reqBody["docling_url"] = config.DoclingURL
}
body, _ := json.Marshal(reqBody)
resp, err := http.Post(s.pythonServiceURL+"/parse", "application/json", bytes.NewBuffer(body))
if err != nil {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
return
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
return
}
// 解析响应
var result map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
return
}
if success, ok := result["success"].(bool); ok && success {
// 解析成功,更新状态
chunks := []string{}
if c, ok := result["chunks"].([]interface{}); ok {
for _, chunk := range c {
if c, ok := chunk.(string); ok {
chunks = append(chunks, c)
}
}
}
s.repo.UpdateDocument(docID, map[string]interface{}{
"status": "parsed",
"chunk_count": len(chunks),
})
// 更新知识库的 chunk_count
s.updateChunkCount(kbID)
} else {
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
}
}
// DeleteDocument 删除文档
func (s *KnowledgeService) DeleteDocument(kbID, docID string) error {
// 验证文档存在
doc, err := s.repo.FindDocumentByID(docID)
if err != nil {
return err
}
if doc.KnowledgeBaseID != kbID {
return nil
}
// 获取知识库配置
kb, err := s.repo.FindByID(kbID)
if err != nil {
return err
}
// 删除文件
if doc.FileKey != "" {
knowledgeDebugLog.Printf("[Knowledge DeleteDocument] 删除文件: kbID=%s, docID=%s, fileKey=%s, storageType=%s",
kbID, docID, doc.FileKey, kb.StorageConfig.Type)
if kb.StorageConfig.Type != "" {
// 使用知识库的存储配置删除
s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig)
} else {
// 使用全局配置删除
s.uploadService.DeleteFile(doc.FileKey)
}
}
// 删除文档记录
if err := s.repo.DeleteDocument(docID); err != nil {
return err
}
// 更新知识库文档数
s.updateDocumentCount(kbID)
return nil
}
// ReparseDocument 重新解析文档
func (s *KnowledgeService) ReparseDocument(kbID, docID string) error {
// 验证文档存在
doc, err := s.repo.FindDocumentByID(docID)
if err != nil {
return err
}
if doc.KnowledgeBaseID != kbID {
return nil
}
// 获取知识库配置
kb, err := s.repo.FindByID(kbID)
if err != nil {
return err
}
// 获取文件 URL
fileURL, err := s.uploadService.GetFileURL(doc.FileKey)
if err != nil {
return err
}
// 重置状态为 parsing
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"})
// 异步重新解析
go s.parseDocument(kbID, docID, fileURL, kb.ParsingConfig)
return nil
}
// GetDocumentPreview 获取文档预览
func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*model.DocumentPreviewResponse, error) {
// 验证文档存在
doc, err := s.repo.FindDocumentByID(docID)
if err != nil {
return nil, err
}
if doc.KnowledgeBaseID != kbID {
return nil, nil
}
// 如果已解析,返回解析内容;否则返回文件 URL
if doc.Status == "parsed" {
// TODO: 从存储中读取解析内容(可以存到数据库或文件)
// 暂时返回文件 URL
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: fileURL,
}, nil
}
// 未解析,返回文件 URL
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
return &model.DocumentPreviewResponse{
TotalPages: 1,
CurrentPage: page,
Content: fileURL,
}, nil
}
// updateDocumentCount 更新知识库文档数
func (s *KnowledgeService) updateDocumentCount(kbID string) {
count, _ := s.repo.CountDocumentsByKBID(kbID)
s.repo.Update(kbID, map[string]interface{}{"document_count": int(count)})
}
// updateChunkCount 更新知识库 chunk 数
func (s *KnowledgeService) updateChunkCount(kbID string) {
docs, _ := s.repo.FindDocumentsByKBID(kbID, "parsed")
totalChunks := 0
for _, doc := range docs {
totalChunks += doc.ChunkCount
}
s.repo.Update(kbID, map[string]interface{}{"chunk_count": totalChunks})
}
func getFileExt(filename string) string {
exts := []string{".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".html"}
for _, ext := range exts {
if len(filename) >= len(ext) && filename[len(filename)-len(ext):] == ext {
return ext
}
}
if len(filename) > 4 {
return filename[len(filename)-4:]
}
return ""
}