feat(backend): 更新 API 支持语义分割和 embedding 配置
- chunks API 添加 embedding 配置字段 - projects API 更新路由和方法 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -44,6 +44,14 @@ class SplitRequest(BaseModel):
|
||||
chunk_size: int = Field(500, ge=50, le=5000)
|
||||
overlap: int = Field(50, ge=0, le=500)
|
||||
separator: Optional[str] = None
|
||||
# Embedding 相关参数(用于 semantic_embedding 方法)
|
||||
embedding_provider: Optional[str] = Field(None, description="embedding provider: openai, minimax")
|
||||
embedding_api_key: Optional[str] = Field(None, description="API key for embedding")
|
||||
embedding_base_url: Optional[str] = Field(None, description="API base URL")
|
||||
embedding_model: Optional[str] = Field(None, description="Embedding model name")
|
||||
# 语义分割参数
|
||||
similarity_threshold: float = Field(0.3, ge=0.0, le=1.0, description="Similarity threshold for semantic split")
|
||||
min_chunk_size: int = Field(100, ge=10, le=1000, description="Minimum chunk size")
|
||||
|
||||
|
||||
async def process_file_by_type(file: File) -> str:
|
||||
@@ -111,6 +119,15 @@ async def split_text(
|
||||
if request.method == "custom" and request.separator:
|
||||
kwargs["separator"] = request.separator
|
||||
|
||||
# 如果使用 semantic_embedding 方法,传递 embedding 参数
|
||||
if request.method == "semantic_embedding":
|
||||
kwargs["embedding_provider_type"] = request.embedding_provider or "openai"
|
||||
kwargs["embedding_api_key"] = request.embedding_api_key
|
||||
kwargs["embedding_base_url"] = request.embedding_base_url or "https://api.minimax.chat/v1"
|
||||
kwargs["embedding_model"] = request.embedding_model or "text-embedding-3-small"
|
||||
kwargs["similarity_threshold"] = request.similarity_threshold
|
||||
kwargs["min_chunk_size"] = request.min_chunk_size
|
||||
|
||||
splitter = get_splitter(request.method, **kwargs)
|
||||
split_results = splitter.split(text)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user