2026-03-08 16:56:24 +08:00
|
|
|
|
package service
|
|
|
|
|
|
|
|
|
|
|
|
import (
|
2026-03-08 20:34:15 +08:00
|
|
|
|
"bytes"
|
|
|
|
|
|
"encoding/json"
|
2026-03-08 23:02:09 +08:00
|
|
|
|
"log"
|
2026-03-08 16:56:24 +08:00
|
|
|
|
"mime/multipart"
|
2026-03-08 20:34:15 +08:00
|
|
|
|
"net/http"
|
2026-03-08 23:02:09 +08:00
|
|
|
|
"os"
|
|
|
|
|
|
"strings"
|
2026-03-08 16:56:24 +08:00
|
|
|
|
"time"
|
|
|
|
|
|
|
|
|
|
|
|
"github.com/google/uuid"
|
|
|
|
|
|
"x-agents/server/internal/model"
|
|
|
|
|
|
"x-agents/server/internal/repository"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-03-08 23:02:09 +08:00
|
|
|
|
// debugLog 专用调试日志
|
|
|
|
|
|
var knowledgeDebugLog *log.Logger
|
|
|
|
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
|
|
debugFile, _ := os.OpenFile("logs/debug.log", os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
|
|
|
|
|
|
knowledgeDebugLog = log.New(debugFile, "", log.Ldate|log.Ltime)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 16:56:24 +08:00
|
|
|
|
type KnowledgeService struct {
|
2026-03-09 12:50:27 +08:00
|
|
|
|
repo *repository.KnowledgeRepository
|
|
|
|
|
|
modelRepo *repository.ModelRepository
|
|
|
|
|
|
uploadService *UploadService
|
2026-03-08 20:34:15 +08:00
|
|
|
|
pythonServiceURL string
|
2026-03-09 12:50:27 +08:00
|
|
|
|
aiCoreClient *AICoreClient
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-09 12:50:27 +08:00
|
|
|
|
func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService, pythonServiceURL, aiCoreServiceAddr string) *KnowledgeService {
|
|
|
|
|
|
aiCoreClient, _ := NewAICoreClient(aiCoreServiceAddr)
|
2026-03-08 16:56:24 +08:00
|
|
|
|
return &KnowledgeService{
|
2026-03-09 12:50:27 +08:00
|
|
|
|
repo: repo,
|
|
|
|
|
|
modelRepo: modelRepo,
|
|
|
|
|
|
uploadService: uploadService,
|
2026-03-08 20:34:15 +08:00
|
|
|
|
pythonServiceURL: pythonServiceURL,
|
2026-03-09 12:50:27 +08:00
|
|
|
|
aiCoreClient: aiCoreClient,
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Create 创建知识库
|
|
|
|
|
|
func (s *KnowledgeService) Create(req model.CreateKnowledgeRequest) (*model.KnowledgeBase, error) {
|
|
|
|
|
|
// 验证 LLM 模型存在
|
|
|
|
|
|
if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
|
|
|
|
|
// 验证 Embedding 模型存在
|
|
|
|
|
|
if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
kb := &model.KnowledgeBase{
|
2026-03-08 20:34:15 +08:00
|
|
|
|
ID: uuid.New().String(),
|
|
|
|
|
|
Name: req.Name,
|
|
|
|
|
|
Description: req.Description,
|
|
|
|
|
|
LLMModelID: req.LLMModelID,
|
|
|
|
|
|
EmbeddingModelID: req.EmbeddingModelID,
|
|
|
|
|
|
ParsingConfig: req.ParsingConfig,
|
2026-03-08 23:02:09 +08:00
|
|
|
|
StorageConfig: req.StorageConfig,
|
2026-03-08 20:34:15 +08:00
|
|
|
|
Status: "active",
|
|
|
|
|
|
DocumentCount: 0,
|
|
|
|
|
|
ChunkCount: 0,
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 设置默认值
|
2026-03-08 20:34:15 +08:00
|
|
|
|
if kb.ParsingConfig.Engine == "" {
|
|
|
|
|
|
kb.ParsingConfig.Engine = "markitdown"
|
|
|
|
|
|
}
|
|
|
|
|
|
if kb.ParsingConfig.EnablePDF != false {
|
2026-03-08 16:56:24 +08:00
|
|
|
|
kb.ParsingConfig.EnablePDF = true
|
|
|
|
|
|
}
|
2026-03-08 20:34:15 +08:00
|
|
|
|
if kb.ParsingConfig.Pandoc != false {
|
2026-03-08 16:56:24 +08:00
|
|
|
|
kb.ParsingConfig.Pandoc = true
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if err := s.repo.Create(kb); err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
|
|
|
|
|
return kb, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// List 获取知识库列表
|
|
|
|
|
|
func (s *KnowledgeService) List() ([]model.KnowledgeBase, error) {
|
|
|
|
|
|
return s.repo.FindAll()
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// GetByID 获取知识库详情
|
|
|
|
|
|
func (s *KnowledgeService) GetByID(id string) (*model.KnowledgeBase, error) {
|
|
|
|
|
|
return s.repo.FindByID(id)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Update 更新知识库
|
|
|
|
|
|
func (s *KnowledgeService) Update(id string, req model.UpdateKnowledgeRequest) error {
|
|
|
|
|
|
updates := make(map[string]interface{})
|
|
|
|
|
|
if req.Name != "" {
|
|
|
|
|
|
updates["name"] = req.Name
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.Description != "" {
|
|
|
|
|
|
updates["description"] = req.Description
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.LLMModelID != "" {
|
|
|
|
|
|
// 验证模型存在
|
|
|
|
|
|
if _, err := s.modelRepo.FindByID(req.LLMModelID); err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
updates["llm_model_id"] = req.LLMModelID
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.EmbeddingModelID != "" {
|
|
|
|
|
|
// 验证模型存在
|
|
|
|
|
|
if _, err := s.modelRepo.FindByID(req.EmbeddingModelID); err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
updates["embedding_model_id"] = req.EmbeddingModelID
|
|
|
|
|
|
}
|
|
|
|
|
|
if req.ParsingConfig.Engine != "" {
|
|
|
|
|
|
updates["parsing_config"] = req.ParsingConfig
|
|
|
|
|
|
}
|
2026-03-08 23:02:09 +08:00
|
|
|
|
if req.StorageConfig.Type != "" {
|
|
|
|
|
|
updates["storage_config"] = req.StorageConfig
|
|
|
|
|
|
}
|
2026-03-08 16:56:24 +08:00
|
|
|
|
if req.Status != "" {
|
|
|
|
|
|
updates["status"] = req.Status
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return s.repo.Update(id, updates)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Delete 删除知识库
|
|
|
|
|
|
func (s *KnowledgeService) Delete(id string) error {
|
|
|
|
|
|
// 先删除关联的文档
|
|
|
|
|
|
if err := s.repo.DeleteDocumentsByKBID(id); err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
return s.repo.Delete(id)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ListDocuments 获取知识库下的文档列表
|
|
|
|
|
|
func (s *KnowledgeService) ListDocuments(kbID string, status string) ([]model.KnowledgeDocument, error) {
|
2026-03-08 23:02:09 +08:00
|
|
|
|
docs, err := s.repo.FindDocumentsByKBID(kbID, status)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge ListDocuments] 错误: %v", err)
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
|
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge ListDocuments] kbID=%s, count=%d", kbID, len(docs))
|
|
|
|
|
|
for i, doc := range docs {
|
|
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge ListDocuments] doc[%d]: id=%s, name=%s, file_url=%q", i, doc.ID, doc.Name, doc.FileURL)
|
|
|
|
|
|
|
|
|
|
|
|
// 如果是 MinIO 内网地址,转换为代理 URL
|
|
|
|
|
|
if doc.FileURL != "" && (strings.Contains(doc.FileURL, "10.10.10.189") || strings.Contains(doc.FileURL, ":9768")) {
|
|
|
|
|
|
// 从 URL 中提取 fileKey(包含扩展名)
|
|
|
|
|
|
parts := strings.Split(doc.FileURL, "/")
|
|
|
|
|
|
if len(parts) > 0 {
|
|
|
|
|
|
fileName := parts[len(parts)-1]
|
|
|
|
|
|
fileKey := strings.Split(fileName, "?")[0]
|
|
|
|
|
|
doc.FileURL = "/api/file_proxy?kb_id=" + kbID + "&key=" + fileKey
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return docs, nil
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// UploadDocument 上传文档到知识库
|
2026-03-08 20:34:15 +08:00
|
|
|
|
func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, string, error) {
|
2026-03-08 23:02:09 +08:00
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge Upload] 开始上传文件: kbID=%s, filename=%s, size=%d", kbID, file.Filename, file.Size)
|
|
|
|
|
|
|
2026-03-08 16:56:24 +08:00
|
|
|
|
// 验证知识库存在
|
2026-03-08 20:34:15 +08:00
|
|
|
|
kb, err := s.repo.FindByID(kbID)
|
2026-03-08 16:56:24 +08:00
|
|
|
|
if err != nil {
|
2026-03-08 23:02:09 +08:00
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 知识库不存在, kbID=%s, err=%v", kbID, err)
|
2026-03-08 20:34:15 +08:00
|
|
|
|
return nil, "", err
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 23:02:09 +08:00
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge Upload] 知识库配置: kbID=%s, storage_config.Type=%q, storage_config=%+v",
|
|
|
|
|
|
kbID, kb.StorageConfig.Type, kb.StorageConfig)
|
|
|
|
|
|
|
|
|
|
|
|
// 上传文件(根据知识库的 storage_config 选择存储方式)
|
|
|
|
|
|
var result *UploadResponse
|
|
|
|
|
|
if kb.StorageConfig.Type != "" {
|
|
|
|
|
|
// 使用知识库的存储配置
|
|
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge Upload] 使用知识库存储配置: type=%s, endpoint=%s, bucket=%s",
|
|
|
|
|
|
kb.StorageConfig.Type, kb.StorageConfig.Endpoint, kb.StorageConfig.Bucket)
|
|
|
|
|
|
result, err = s.uploadService.UploadWithConfig(file, kb.StorageConfig)
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// 使用全局配置
|
|
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge Upload] 使用全局存储配置")
|
|
|
|
|
|
result, err = s.uploadService.Upload(file)
|
|
|
|
|
|
}
|
2026-03-08 16:56:24 +08:00
|
|
|
|
if err != nil {
|
2026-03-08 23:02:09 +08:00
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 上传失败, err=%v", err)
|
2026-03-08 20:34:15 +08:00
|
|
|
|
return nil, "", err
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
2026-03-08 23:02:09 +08:00
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge Upload] 上传结果: success=%v, url=%s, message=%s",
|
|
|
|
|
|
result.Success, result.URL, result.Message)
|
2026-03-08 16:56:24 +08:00
|
|
|
|
if !result.Success {
|
2026-03-08 23:02:09 +08:00
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge Upload] 错误: 上传返回失败, message=%s", result.Message)
|
2026-03-08 20:34:15 +08:00
|
|
|
|
return nil, "", nil
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 20:34:15 +08:00
|
|
|
|
// 获取文件扩展名
|
|
|
|
|
|
ext := getFileExt(file.Filename)
|
|
|
|
|
|
|
2026-03-08 23:02:09 +08:00
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge Upload] 准备创建文档记录: result.URL=%q, fileKey=%s, ext=%s",
|
|
|
|
|
|
result.URL, result.FileKey, ext)
|
|
|
|
|
|
|
2026-03-08 16:56:24 +08:00
|
|
|
|
// 创建文档记录
|
|
|
|
|
|
doc := &model.KnowledgeDocument{
|
|
|
|
|
|
ID: uuid.New().String(),
|
|
|
|
|
|
KnowledgeBaseID: kbID,
|
|
|
|
|
|
Name: file.Filename,
|
2026-03-08 20:34:15 +08:00
|
|
|
|
FileKey: result.FileKey + ext,
|
|
|
|
|
|
FileURL: result.URL,
|
2026-03-08 16:56:24 +08:00
|
|
|
|
FileSize: file.Size,
|
|
|
|
|
|
Status: "parsing",
|
|
|
|
|
|
UploadedAt: time.Now(),
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 23:02:09 +08:00
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge Upload] 文档创建完成: doc.FileURL=%q", doc.FileURL)
|
|
|
|
|
|
|
2026-03-08 16:56:24 +08:00
|
|
|
|
if err := s.repo.CreateDocument(doc); err != nil {
|
2026-03-08 20:34:15 +08:00
|
|
|
|
return nil, "", err
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 更新知识库文档数
|
|
|
|
|
|
s.updateDocumentCount(kbID)
|
|
|
|
|
|
|
2026-03-08 20:34:15 +08:00
|
|
|
|
// 异步调用 Python 服务解析文档
|
|
|
|
|
|
go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig)
|
|
|
|
|
|
|
2026-03-09 12:50:27 +08:00
|
|
|
|
// 异步调用 AI-Core gRPC 服务解析文档(获取 Markdown)
|
|
|
|
|
|
go s.parseDocumentWithAICore(doc.ID, result.URL, doc.Name)
|
|
|
|
|
|
|
2026-03-08 20:34:15 +08:00
|
|
|
|
return doc, result.URL, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// parseDocument 调用 Python 服务解析文档
|
|
|
|
|
|
func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config model.ParsingConfig) {
|
|
|
|
|
|
// 构建请求
|
|
|
|
|
|
reqBody := map[string]interface{}{
|
|
|
|
|
|
"file_url": fileURL,
|
|
|
|
|
|
"engine": config.Engine,
|
|
|
|
|
|
}
|
|
|
|
|
|
if config.Engine == "docling" && config.DoclingURL != "" {
|
|
|
|
|
|
reqBody["docling_url"] = config.DoclingURL
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
body, _ := json.Marshal(reqBody)
|
|
|
|
|
|
resp, err := http.Post(s.pythonServiceURL+"/parse", "application/json", bytes.NewBuffer(body))
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
defer resp.Body.Close()
|
|
|
|
|
|
|
|
|
|
|
|
if resp.StatusCode != 200 {
|
|
|
|
|
|
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 解析响应
|
|
|
|
|
|
var result map[string]interface{}
|
|
|
|
|
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
|
|
|
|
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if success, ok := result["success"].(bool); ok && success {
|
|
|
|
|
|
// 解析成功,更新状态
|
|
|
|
|
|
chunks := []string{}
|
|
|
|
|
|
if c, ok := result["chunks"].([]interface{}); ok {
|
|
|
|
|
|
for _, chunk := range c {
|
|
|
|
|
|
if c, ok := chunk.(string); ok {
|
|
|
|
|
|
chunks = append(chunks, c)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
s.repo.UpdateDocument(docID, map[string]interface{}{
|
|
|
|
|
|
"status": "parsed",
|
|
|
|
|
|
"chunk_count": len(chunks),
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
// 更新知识库的 chunk_count
|
|
|
|
|
|
s.updateChunkCount(kbID)
|
|
|
|
|
|
} else {
|
|
|
|
|
|
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
|
|
|
|
|
|
}
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-09 12:50:27 +08:00
|
|
|
|
// parseDocumentWithAICore 调用 AI-Core gRPC 服务解析文档
|
|
|
|
|
|
func (s *KnowledgeService) parseDocumentWithAICore(docID, fileURL, fileName string) {
|
|
|
|
|
|
if s.aiCoreClient == nil {
|
|
|
|
|
|
knowledgeDebugLog.Printf("[AICore] AI-Core 客户端未初始化")
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
knowledgeDebugLog.Printf("[AICore] 开始解析文档: docID=%s, fileURL=%s, fileName=%s", docID, fileURL, fileName)
|
|
|
|
|
|
|
|
|
|
|
|
result, err := s.aiCoreClient.ParseDocument(fileURL, fileName, "")
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
knowledgeDebugLog.Printf("[AICore] 解析失败: docID=%s, err=%v", docID, err)
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if result.Success && result.Content != "" {
|
|
|
|
|
|
knowledgeDebugLog.Printf("[AICore] 解析成功: docID=%s, contentLength=%d", docID, len(result.Content))
|
|
|
|
|
|
// 更新文档的 Content 字段
|
|
|
|
|
|
s.repo.UpdateDocument(docID, map[string]interface{}{
|
|
|
|
|
|
"content": result.Content,
|
|
|
|
|
|
})
|
|
|
|
|
|
} else {
|
|
|
|
|
|
knowledgeDebugLog.Printf("[AICore] 解析返回失败: docID=%s, message=%s", docID, result.Message)
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 16:56:24 +08:00
|
|
|
|
// DeleteDocument 删除文档
|
|
|
|
|
|
func (s *KnowledgeService) DeleteDocument(kbID, docID string) error {
|
|
|
|
|
|
// 验证文档存在
|
|
|
|
|
|
doc, err := s.repo.FindDocumentByID(docID)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
if doc.KnowledgeBaseID != kbID {
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 23:02:09 +08:00
|
|
|
|
// 获取知识库配置
|
|
|
|
|
|
kb, err := s.repo.FindByID(kbID)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 16:56:24 +08:00
|
|
|
|
// 删除文件
|
|
|
|
|
|
if doc.FileKey != "" {
|
2026-03-08 23:02:09 +08:00
|
|
|
|
knowledgeDebugLog.Printf("[Knowledge DeleteDocument] 删除文件: kbID=%s, docID=%s, fileKey=%s, storageType=%s",
|
|
|
|
|
|
kbID, docID, doc.FileKey, kb.StorageConfig.Type)
|
|
|
|
|
|
if kb.StorageConfig.Type != "" {
|
|
|
|
|
|
// 使用知识库的存储配置删除
|
|
|
|
|
|
s.uploadService.DeleteFileWithConfig(doc.FileKey, kb.StorageConfig)
|
|
|
|
|
|
} else {
|
|
|
|
|
|
// 使用全局配置删除
|
|
|
|
|
|
s.uploadService.DeleteFile(doc.FileKey)
|
|
|
|
|
|
}
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 删除文档记录
|
|
|
|
|
|
if err := s.repo.DeleteDocument(docID); err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 更新知识库文档数
|
|
|
|
|
|
s.updateDocumentCount(kbID)
|
|
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ReparseDocument 重新解析文档
|
|
|
|
|
|
func (s *KnowledgeService) ReparseDocument(kbID, docID string) error {
|
|
|
|
|
|
// 验证文档存在
|
|
|
|
|
|
doc, err := s.repo.FindDocumentByID(docID)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
if doc.KnowledgeBaseID != kbID {
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 20:34:15 +08:00
|
|
|
|
// 获取知识库配置
|
|
|
|
|
|
kb, err := s.repo.FindByID(kbID)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 获取文件 URL
|
|
|
|
|
|
fileURL, err := s.uploadService.GetFileURL(doc.FileKey)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 16:56:24 +08:00
|
|
|
|
// 重置状态为 parsing
|
2026-03-08 20:34:15 +08:00
|
|
|
|
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"})
|
|
|
|
|
|
|
|
|
|
|
|
// 异步重新解析
|
|
|
|
|
|
go s.parseDocument(kbID, docID, fileURL, kb.ParsingConfig)
|
|
|
|
|
|
|
|
|
|
|
|
return nil
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// GetDocumentPreview 获取文档预览
|
|
|
|
|
|
func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*model.DocumentPreviewResponse, error) {
|
|
|
|
|
|
// 验证文档存在
|
|
|
|
|
|
doc, err := s.repo.FindDocumentByID(docID)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return nil, err
|
|
|
|
|
|
}
|
|
|
|
|
|
if doc.KnowledgeBaseID != kbID {
|
|
|
|
|
|
return nil, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 20:34:15 +08:00
|
|
|
|
// 如果已解析,返回解析内容;否则返回文件 URL
|
|
|
|
|
|
if doc.Status == "parsed" {
|
|
|
|
|
|
// TODO: 从存储中读取解析内容(可以存到数据库或文件)
|
|
|
|
|
|
// 暂时返回文件 URL
|
|
|
|
|
|
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
|
|
|
|
|
|
return &model.DocumentPreviewResponse{
|
|
|
|
|
|
TotalPages: 1,
|
|
|
|
|
|
CurrentPage: page,
|
|
|
|
|
|
Content: fileURL,
|
|
|
|
|
|
}, nil
|
2026-03-08 16:56:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 20:34:15 +08:00
|
|
|
|
// 未解析,返回文件 URL
|
|
|
|
|
|
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
|
2026-03-08 16:56:24 +08:00
|
|
|
|
return &model.DocumentPreviewResponse{
|
|
|
|
|
|
TotalPages: 1,
|
|
|
|
|
|
CurrentPage: page,
|
|
|
|
|
|
Content: fileURL,
|
|
|
|
|
|
}, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// updateDocumentCount 更新知识库文档数
|
|
|
|
|
|
func (s *KnowledgeService) updateDocumentCount(kbID string) {
|
|
|
|
|
|
count, _ := s.repo.CountDocumentsByKBID(kbID)
|
|
|
|
|
|
s.repo.Update(kbID, map[string]interface{}{"document_count": int(count)})
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 20:34:15 +08:00
|
|
|
|
// updateChunkCount 更新知识库 chunk 数
|
|
|
|
|
|
func (s *KnowledgeService) updateChunkCount(kbID string) {
|
|
|
|
|
|
docs, _ := s.repo.FindDocumentsByKBID(kbID, "parsed")
|
|
|
|
|
|
totalChunks := 0
|
|
|
|
|
|
for _, doc := range docs {
|
|
|
|
|
|
totalChunks += doc.ChunkCount
|
|
|
|
|
|
}
|
|
|
|
|
|
s.repo.Update(kbID, map[string]interface{}{"chunk_count": totalChunks})
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-08 16:56:24 +08:00
|
|
|
|
func getFileExt(filename string) string {
|
2026-03-08 20:34:15 +08:00
|
|
|
|
exts := []string{".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".html"}
|
|
|
|
|
|
for _, ext := range exts {
|
|
|
|
|
|
if len(filename) >= len(ext) && filename[len(filename)-len(ext):] == ext {
|
|
|
|
|
|
return ext
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2026-03-08 16:56:24 +08:00
|
|
|
|
if len(filename) > 4 {
|
|
|
|
|
|
return filename[len(filename)-4:]
|
|
|
|
|
|
}
|
|
|
|
|
|
return ""
|
|
|
|
|
|
}
|