refactor: 前端架构重构 - 提取 CSS 和逻辑到独立模块
前端重构: - 删除旧的大体积 Vue 组件(HomeView, FileManage, TextSplit 等) - 删除旧的 composables(useFormatters, useModels, useProjects) - 新增 core/, page-logic/, pages/, shared/ 模块化目录结构 - 提取 CSS 到 styles/pages/ 目录 - 添加全局样式 variables.css 和 common.css 后端 API 更新: - chunks: 语义分割 API 增强 - files: 文件处理 API 更新 - models: 模型管理 API 更新 - questions: 问答管理 API 更新 - database: 数据库连接优化 - semantic_embedding: 语义嵌入服务优化 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.api.response import ApiResponse, PaginatedResponse
|
||||
from app.core.database import get_db
|
||||
from app.core.database import get_db, AsyncSessionLocal
|
||||
from app.core.exceptions import NotFoundException
|
||||
from app.core.crud import CRUDBase
|
||||
from app.core.logging import log_success, log_failure
|
||||
@@ -80,6 +80,106 @@ async def process_file_by_type(file: File) -> str:
|
||||
return content
|
||||
|
||||
|
||||
async def process_split_async(
|
||||
project_id: UUID,
|
||||
request: SplitRequest,
|
||||
):
|
||||
"""Run chunk splitting in background."""
|
||||
async with AsyncSessionLocal() as db:
|
||||
file = None
|
||||
try:
|
||||
result = await db.execute(
|
||||
select(File).where(File.id == request.file_id, File.project_id == project_id)
|
||||
)
|
||||
file = result.scalar_one_or_none()
|
||||
if not file:
|
||||
return
|
||||
|
||||
text = await process_file_by_type(file)
|
||||
|
||||
kwargs = {"chunk_size": request.chunk_size, "overlap": request.overlap}
|
||||
if request.method == "custom" and request.separator:
|
||||
kwargs["separator"] = request.separator
|
||||
|
||||
if request.method == "semantic_embedding":
|
||||
kwargs["embedding_provider_type"] = request.embedding_provider or "openai"
|
||||
kwargs["embedding_api_key"] = request.embedding_api_key
|
||||
kwargs["embedding_base_url"] = request.embedding_base_url or "https://api.minimax.chat/v1"
|
||||
kwargs["embedding_model"] = request.embedding_model or "text-embedding-3-small"
|
||||
kwargs["similarity_threshold"] = request.similarity_threshold
|
||||
kwargs["min_chunk_size"] = request.min_chunk_size
|
||||
|
||||
splitter = get_splitter(request.method, **kwargs)
|
||||
split_results = splitter.split(text)
|
||||
|
||||
await db.execute(
|
||||
Chunk.__table__.delete().where(
|
||||
Chunk.project_id == project_id,
|
||||
Chunk.file_id == file.id
|
||||
)
|
||||
)
|
||||
|
||||
chunks = []
|
||||
for chunk_data in split_results:
|
||||
db_chunk = Chunk(
|
||||
project_id=project_id,
|
||||
file_id=file.id,
|
||||
name=chunk_data.get("name", f"Chunk {chunk_data['index'] + 1}"),
|
||||
content=chunk_data["content"],
|
||||
word_count=chunk_data.get("word_count", len(chunk_data["content"].split()))
|
||||
)
|
||||
db.add(db_chunk)
|
||||
chunks.append(db_chunk)
|
||||
|
||||
await db.commit()
|
||||
|
||||
ready_dir = get_project_ready_dir(str(project_id))
|
||||
|
||||
# 删除旧的 markdown 文件(可能有两种命名格式)
|
||||
old_md_files = list(ready_dir.glob(f"{file.id}*.md"))
|
||||
for old_file in old_md_files:
|
||||
try:
|
||||
old_file.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
md_filename = f"{file.id}.md"
|
||||
md_path = ready_dir / md_filename
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
lambda: md_path.write_text(text, encoding='utf-8')
|
||||
)
|
||||
|
||||
file.file_path = str(md_path)
|
||||
file.status = "completed"
|
||||
await db.commit()
|
||||
|
||||
log_success(
|
||||
"文件分割完成",
|
||||
project_id=str(project_id),
|
||||
file_id=str(file.id),
|
||||
filename=file.filename,
|
||||
method=request.method,
|
||||
chunk_count=len(chunks),
|
||||
text_length=len(text),
|
||||
ready_path=str(md_path)
|
||||
)
|
||||
except Exception as e:
|
||||
if file:
|
||||
file.status = "failed"
|
||||
await db.commit()
|
||||
|
||||
log_failure(
|
||||
"文件分割失败",
|
||||
project_id=str(project_id),
|
||||
file_id=str(request.file_id),
|
||||
method=request.method,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
|
||||
@router.post("/split", response_model=ApiResponse)
|
||||
async def split_text(
|
||||
project_id: UUID,
|
||||
@@ -88,7 +188,6 @@ async def split_text(
|
||||
):
|
||||
"""Split text into chunks"""
|
||||
try:
|
||||
# Get file
|
||||
result = await db.execute(
|
||||
select(File).where(File.id == request.file_id, File.project_id == project_id)
|
||||
)
|
||||
@@ -107,81 +206,27 @@ async def split_text(
|
||||
overlap=request.overlap
|
||||
)
|
||||
|
||||
# Process file
|
||||
text = await process_file_by_type(file)
|
||||
|
||||
# Update file status
|
||||
file.status = "processing"
|
||||
await db.commit()
|
||||
|
||||
# Split text
|
||||
kwargs = {"chunk_size": request.chunk_size, "overlap": request.overlap}
|
||||
if request.method == "custom" and request.separator:
|
||||
kwargs["separator"] = request.separator
|
||||
|
||||
# 如果使用 semantic_embedding 方法,传递 embedding 参数
|
||||
if request.method == "semantic_embedding":
|
||||
kwargs["embedding_provider_type"] = request.embedding_provider or "openai"
|
||||
kwargs["embedding_api_key"] = request.embedding_api_key
|
||||
kwargs["embedding_base_url"] = request.embedding_base_url or "https://api.minimax.chat/v1"
|
||||
kwargs["embedding_model"] = request.embedding_model or "text-embedding-3-small"
|
||||
kwargs["similarity_threshold"] = request.similarity_threshold
|
||||
kwargs["min_chunk_size"] = request.min_chunk_size
|
||||
|
||||
splitter = get_splitter(request.method, **kwargs)
|
||||
split_results = splitter.split(text)
|
||||
|
||||
# Save chunks
|
||||
chunks = []
|
||||
for chunk_data in split_results:
|
||||
db_chunk = Chunk(
|
||||
asyncio.create_task(
|
||||
process_split_async(
|
||||
project_id=project_id,
|
||||
file_id=file.id,
|
||||
name=chunk_data.get("name", f"Chunk {chunk_data['index'] + 1}"),
|
||||
content=chunk_data["content"],
|
||||
word_count=chunk_data.get("word_count", len(chunk_data["content"].split()))
|
||||
request=request,
|
||||
)
|
||||
db.add(db_chunk)
|
||||
chunks.append(db_chunk)
|
||||
|
||||
await db.commit()
|
||||
|
||||
# Save processed markdown to ready directory
|
||||
ready_dir = get_project_ready_dir(str(project_id))
|
||||
md_filename = f"{file.id}_{file.filename}.md"
|
||||
md_path = ready_dir / md_filename
|
||||
|
||||
# Write markdown content to file
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(
|
||||
None,
|
||||
lambda: md_path.write_text(text, encoding='utf-8')
|
||||
)
|
||||
|
||||
# Update file path to ready location
|
||||
file.file_path = str(md_path)
|
||||
file.status = "completed"
|
||||
await db.commit()
|
||||
|
||||
# 记录成功日志
|
||||
log_success(
|
||||
"文件处理完成",
|
||||
project_id=str(project_id),
|
||||
file_id=str(file.id),
|
||||
filename=file.filename,
|
||||
chunk_count=len(chunks),
|
||||
text_length=len(text),
|
||||
ready_path=str(md_path)
|
||||
)
|
||||
|
||||
return ApiResponse.ok(
|
||||
data={"chunks": len(chunks)},
|
||||
message=f"Successfully split into {len(chunks)} chunks"
|
||||
data={"file_id": str(file.id), "status": file.status},
|
||||
message="Split task started, processing in background"
|
||||
)
|
||||
except Exception as e:
|
||||
# 记录失败日志
|
||||
if 'file' in locals() and file:
|
||||
file.status = "failed"
|
||||
await db.commit()
|
||||
|
||||
log_failure(
|
||||
"文件处理失败",
|
||||
"分割任务启动失败",
|
||||
project_id=str(project_id),
|
||||
file_id=str(request.file_id),
|
||||
error=str(e)
|
||||
|
||||
Reference in New Issue
Block a user