diff --git a/backend/app/api/v1/chunks/__init__.py b/backend/app/api/v1/chunks/__init__.py index 590a084..2c1986d 100644 --- a/backend/app/api/v1/chunks/__init__.py +++ b/backend/app/api/v1/chunks/__init__.py @@ -44,6 +44,14 @@ class SplitRequest(BaseModel): chunk_size: int = Field(500, ge=50, le=5000) overlap: int = Field(50, ge=0, le=500) separator: Optional[str] = None + # Embedding 相关参数(用于 semantic_embedding 方法) + embedding_provider: Optional[str] = Field(None, description="embedding provider: openai, minimax") + embedding_api_key: Optional[str] = Field(None, description="API key for embedding") + embedding_base_url: Optional[str] = Field(None, description="API base URL") + embedding_model: Optional[str] = Field(None, description="Embedding model name") + # 语义分割参数 + similarity_threshold: float = Field(0.3, ge=0.0, le=1.0, description="Similarity threshold for semantic split") + min_chunk_size: int = Field(100, ge=10, le=1000, description="Minimum chunk size") async def process_file_by_type(file: File) -> str: @@ -111,6 +119,15 @@ async def split_text( if request.method == "custom" and request.separator: kwargs["separator"] = request.separator + # 如果使用 semantic_embedding 方法,传递 embedding 参数 + if request.method == "semantic_embedding": + kwargs["embedding_provider_type"] = request.embedding_provider or "openai" + kwargs["embedding_api_key"] = request.embedding_api_key + kwargs["embedding_base_url"] = request.embedding_base_url or "https://api.minimax.chat/v1" + kwargs["embedding_model"] = request.embedding_model or "text-embedding-3-small" + kwargs["similarity_threshold"] = request.similarity_threshold + kwargs["min_chunk_size"] = request.min_chunk_size + splitter = get_splitter(request.method, **kwargs) split_results = splitter.split(text) diff --git a/backend/app/api/v1/projects/__init__.py b/backend/app/api/v1/projects/__init__.py index 0998c3c..300213d 100644 --- a/backend/app/api/v1/projects/__init__.py +++ b/backend/app/api/v1/projects/__init__.py @@ -2,6 +2,8 @@ Projects API Router """ import logging +import shutil +from pathlib import Path from typing import List, Optional from uuid import UUID from fastapi import APIRouter, Depends, Query @@ -107,5 +109,12 @@ async def delete_project( logger.info(f"Deleting project: id={project_id}") await project_crud.get_or_raise(db, project_id, "Project") await project_crud.delete(db, project_id) + + # 删除项目对应的本地数据目录 + project_data_dir = Path("/data/code/YG-Datasets/data") / str(project_id) + if project_data_dir.exists(): + shutil.rmtree(project_data_dir) + logger.info(f"Project data directory deleted: {project_data_dir}") + logger.info(f"Project deleted: id={project_id}") return ApiResponse.ok(message="Project deleted successfully")