feat(backend): 更新 API 支持语义分割和 embedding 配置

- chunks API 添加 embedding 配置字段
- projects API 更新路由和方法

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Developer
2026-03-18 16:08:08 +08:00
parent da2887d913
commit cc2e73c595
2 changed files with 26 additions and 0 deletions

View File

@@ -44,6 +44,14 @@ class SplitRequest(BaseModel):
chunk_size: int = Field(500, ge=50, le=5000)
overlap: int = Field(50, ge=0, le=500)
separator: Optional[str] = None
# Embedding 相关参数(用于 semantic_embedding 方法)
embedding_provider: Optional[str] = Field(None, description="embedding provider: openai, minimax")
embedding_api_key: Optional[str] = Field(None, description="API key for embedding")
embedding_base_url: Optional[str] = Field(None, description="API base URL")
embedding_model: Optional[str] = Field(None, description="Embedding model name")
# 语义分割参数
similarity_threshold: float = Field(0.3, ge=0.0, le=1.0, description="Similarity threshold for semantic split")
min_chunk_size: int = Field(100, ge=10, le=1000, description="Minimum chunk size")
async def process_file_by_type(file: File) -> str:
@@ -111,6 +119,15 @@ async def split_text(
if request.method == "custom" and request.separator:
kwargs["separator"] = request.separator
# 如果使用 semantic_embedding 方法,传递 embedding 参数
if request.method == "semantic_embedding":
kwargs["embedding_provider_type"] = request.embedding_provider or "openai"
kwargs["embedding_api_key"] = request.embedding_api_key
kwargs["embedding_base_url"] = request.embedding_base_url or "https://api.minimax.chat/v1"
kwargs["embedding_model"] = request.embedding_model or "text-embedding-3-small"
kwargs["similarity_threshold"] = request.similarity_threshold
kwargs["min_chunk_size"] = request.min_chunk_size
splitter = get_splitter(request.method, **kwargs)
split_results = splitter.split(text)