feat: 完善知识库后端 API
- 添加 KnowledgeHandler 处理知识库请求 - 注册知识库 CRUD 路由 - 添加文档上传、删除、解析、预览接口 - 更新数据库模型和迁移 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"mime/multipart"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -10,16 +13,18 @@ import (
|
||||
)
|
||||
|
||||
type KnowledgeService struct {
|
||||
repo *repository.KnowledgeRepository
|
||||
modelRepo *repository.ModelRepository
|
||||
uploadService *UploadService
|
||||
repo *repository.KnowledgeRepository
|
||||
modelRepo *repository.ModelRepository
|
||||
uploadService *UploadService
|
||||
pythonServiceURL string
|
||||
}
|
||||
|
||||
func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService) *KnowledgeService {
|
||||
func NewKnowledgeService(repo *repository.KnowledgeRepository, modelRepo *repository.ModelRepository, uploadService *UploadService, pythonServiceURL string) *KnowledgeService {
|
||||
return &KnowledgeService{
|
||||
repo: repo,
|
||||
modelRepo: modelRepo,
|
||||
uploadService: uploadService,
|
||||
repo: repo,
|
||||
modelRepo: modelRepo,
|
||||
uploadService: uploadService,
|
||||
pythonServiceURL: pythonServiceURL,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,22 +40,25 @@ func (s *KnowledgeService) Create(req model.CreateKnowledgeRequest) (*model.Know
|
||||
}
|
||||
|
||||
kb := &model.KnowledgeBase{
|
||||
ID: uuid.New().String(),
|
||||
Name: req.Name,
|
||||
Description: req.Description,
|
||||
LLMModelID: req.LLMModelID,
|
||||
EmbeddingModelID: req.EmbeddingModelID,
|
||||
ParsingConfig: req.ParsingConfig,
|
||||
Status: "active",
|
||||
DocumentCount: 0,
|
||||
ChunkCount: 0,
|
||||
ID: uuid.New().String(),
|
||||
Name: req.Name,
|
||||
Description: req.Description,
|
||||
LLMModelID: req.LLMModelID,
|
||||
EmbeddingModelID: req.EmbeddingModelID,
|
||||
ParsingConfig: req.ParsingConfig,
|
||||
Status: "active",
|
||||
DocumentCount: 0,
|
||||
ChunkCount: 0,
|
||||
}
|
||||
|
||||
// 设置默认值
|
||||
if kb.ParsingConfig.EnablePDF == false && kb.ParsingConfig.EnablePDF != true {
|
||||
if kb.ParsingConfig.Engine == "" {
|
||||
kb.ParsingConfig.Engine = "markitdown"
|
||||
}
|
||||
if kb.ParsingConfig.EnablePDF != false {
|
||||
kb.ParsingConfig.EnablePDF = true
|
||||
}
|
||||
if kb.ParsingConfig.Pandoc == false && kb.ParsingConfig.Pandoc != true {
|
||||
if kb.ParsingConfig.Pandoc != false {
|
||||
kb.ParsingConfig.Pandoc = true
|
||||
}
|
||||
|
||||
@@ -118,41 +126,102 @@ func (s *KnowledgeService) ListDocuments(kbID string, status string) ([]model.Kn
|
||||
}
|
||||
|
||||
// UploadDocument 上传文档到知识库
|
||||
func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, error) {
|
||||
func (s *KnowledgeService) UploadDocument(kbID string, file *multipart.FileHeader) (*model.KnowledgeDocument, string, error) {
|
||||
// 验证知识库存在
|
||||
_, err := s.repo.FindByID(kbID)
|
||||
kb, err := s.repo.FindByID(kbID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
// 上传文件
|
||||
result, err := s.uploadService.Upload(file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, "", err
|
||||
}
|
||||
if !result.Success {
|
||||
return nil, nil
|
||||
return nil, "", nil
|
||||
}
|
||||
|
||||
// 获取文件扩展名
|
||||
ext := getFileExt(file.Filename)
|
||||
|
||||
// 创建文档记录
|
||||
doc := &model.KnowledgeDocument{
|
||||
ID: uuid.New().String(),
|
||||
KnowledgeBaseID: kbID,
|
||||
Name: file.Filename,
|
||||
FileKey: result.FileKey,
|
||||
FileKey: result.FileKey + ext,
|
||||
FileURL: result.URL,
|
||||
FileSize: file.Size,
|
||||
Status: "parsing",
|
||||
UploadedAt: time.Now(),
|
||||
}
|
||||
|
||||
if err := s.repo.CreateDocument(doc); err != nil {
|
||||
return nil, err
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
// 更新知识库文档数
|
||||
s.updateDocumentCount(kbID)
|
||||
|
||||
return doc, nil
|
||||
// 异步调用 Python 服务解析文档
|
||||
go s.parseDocument(kbID, doc.ID, result.URL, kb.ParsingConfig)
|
||||
|
||||
return doc, result.URL, nil
|
||||
}
|
||||
|
||||
// parseDocument 调用 Python 服务解析文档
|
||||
func (s *KnowledgeService) parseDocument(kbID, docID, fileURL string, config model.ParsingConfig) {
|
||||
// 构建请求
|
||||
reqBody := map[string]interface{}{
|
||||
"file_url": fileURL,
|
||||
"engine": config.Engine,
|
||||
}
|
||||
if config.Engine == "docling" && config.DoclingURL != "" {
|
||||
reqBody["docling_url"] = config.DoclingURL
|
||||
}
|
||||
|
||||
body, _ := json.Marshal(reqBody)
|
||||
resp, err := http.Post(s.pythonServiceURL+"/parse", "application/json", bytes.NewBuffer(body))
|
||||
if err != nil {
|
||||
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
|
||||
return
|
||||
}
|
||||
|
||||
// 解析响应
|
||||
var result map[string]interface{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
||||
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
|
||||
return
|
||||
}
|
||||
|
||||
if success, ok := result["success"].(bool); ok && success {
|
||||
// 解析成功,更新状态
|
||||
chunks := []string{}
|
||||
if c, ok := result["chunks"].([]interface{}); ok {
|
||||
for _, chunk := range c {
|
||||
if c, ok := chunk.(string); ok {
|
||||
chunks = append(chunks, c)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.repo.UpdateDocument(docID, map[string]interface{}{
|
||||
"status": "parsed",
|
||||
"chunk_count": len(chunks),
|
||||
})
|
||||
|
||||
// 更新知识库的 chunk_count
|
||||
s.updateChunkCount(kbID)
|
||||
} else {
|
||||
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "failed"})
|
||||
}
|
||||
}
|
||||
|
||||
// DeleteDocument 删除文档
|
||||
@@ -168,7 +237,7 @@ func (s *KnowledgeService) DeleteDocument(kbID, docID string) error {
|
||||
|
||||
// 删除文件
|
||||
if doc.FileKey != "" {
|
||||
s.uploadService.DeleteFile(doc.FileKey + getFileExt(doc.Name))
|
||||
s.uploadService.DeleteFile(doc.FileKey)
|
||||
}
|
||||
|
||||
// 删除文档记录
|
||||
@@ -193,8 +262,25 @@ func (s *KnowledgeService) ReparseDocument(kbID, docID string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 获取知识库配置
|
||||
kb, err := s.repo.FindByID(kbID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 获取文件 URL
|
||||
fileURL, err := s.uploadService.GetFileURL(doc.FileKey)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 重置状态为 parsing
|
||||
return s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"})
|
||||
s.repo.UpdateDocument(docID, map[string]interface{}{"status": "parsing"})
|
||||
|
||||
// 异步重新解析
|
||||
go s.parseDocument(kbID, docID, fileURL, kb.ParsingConfig)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetDocumentPreview 获取文档预览
|
||||
@@ -208,12 +294,20 @@ func (s *KnowledgeService) GetDocumentPreview(kbID, docID string, page int) (*mo
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// 简单实现:返回文件 URL(实际应由 Python 服务处理)
|
||||
fileURL, err := s.uploadService.GetFileURL(doc.FileKey + getFileExt(doc.Name))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
// 如果已解析,返回解析内容;否则返回文件 URL
|
||||
if doc.Status == "parsed" {
|
||||
// TODO: 从存储中读取解析内容(可以存到数据库或文件)
|
||||
// 暂时返回文件 URL
|
||||
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
|
||||
return &model.DocumentPreviewResponse{
|
||||
TotalPages: 1,
|
||||
CurrentPage: page,
|
||||
Content: fileURL,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// 未解析,返回文件 URL
|
||||
fileURL, _ := s.uploadService.GetFileURL(doc.FileKey)
|
||||
return &model.DocumentPreviewResponse{
|
||||
TotalPages: 1,
|
||||
CurrentPage: page,
|
||||
@@ -227,7 +321,23 @@ func (s *KnowledgeService) updateDocumentCount(kbID string) {
|
||||
s.repo.Update(kbID, map[string]interface{}{"document_count": int(count)})
|
||||
}
|
||||
|
||||
// updateChunkCount 更新知识库 chunk 数
|
||||
func (s *KnowledgeService) updateChunkCount(kbID string) {
|
||||
docs, _ := s.repo.FindDocumentsByKBID(kbID, "parsed")
|
||||
totalChunks := 0
|
||||
for _, doc := range docs {
|
||||
totalChunks += doc.ChunkCount
|
||||
}
|
||||
s.repo.Update(kbID, map[string]interface{}{"chunk_count": totalChunks})
|
||||
}
|
||||
|
||||
func getFileExt(filename string) string {
|
||||
exts := []string{".pdf", ".docx", ".xlsx", ".pptx", ".txt", ".md", ".html"}
|
||||
for _, ext := range exts {
|
||||
if len(filename) >= len(ext) && filename[len(filename)-len(ext):] == ext {
|
||||
return ext
|
||||
}
|
||||
}
|
||||
if len(filename) > 4 {
|
||||
return filename[len(filename)-4:]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user