refactor: 前端架构重构 - 提取 CSS 和逻辑到独立模块

前端重构： - 删除旧的大体积 Vue 组件（HomeView, FileManage, TextSplit 等） - 删除旧的 composables（useFormatters, useModels, useProjects） - 新增 core/, page-logic/, pages/, shared/ 模块化目录结构 - 提取 CSS 到 styles/pages/ 目录 - 添加全局样式 variables.css 和 common.css 后端 API 更新： - chunks: 语义分割 API 增强 - files: 文件处理 API 更新 - models: 模型管理 API 更新 - questions: 问答管理 API 更新 - database: 数据库连接优化 - semantic_embedding: 语义嵌入服务优化 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 14:23:34 +08:00
parent a280b4f014
commit 6aa271c4f7
75 changed files with 22636 additions and 6519 deletions
--- a/backend/app/api/v1/chunks/init.py
+++ b/backend/app/api/v1/chunks/init.py
@@ -11,7 +11,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy import select

 from app.api.response import ApiResponse, PaginatedResponse
-from app.core.database import get_db
+from app.core.database import get_db, AsyncSessionLocal
 from app.core.exceptions import NotFoundException
 from app.core.crud import CRUDBase
 from app.core.logging import log_success, log_failure
@@ -80,6 +80,106 @@ async def process_file_by_type(file: File) -> str:
    return content


+async def process_split_async(
+    project_id: UUID,
+    request: SplitRequest,
+):
+    """Run chunk splitting in background."""
+    async with AsyncSessionLocal() as db:
+        file = None
+        try:
+            result = await db.execute(
+                select(File).where(File.id == request.file_id, File.project_id == project_id)
+            )
+            file = result.scalar_one_or_none()
+            if not file:
+                return
+
+            text = await process_file_by_type(file)
+
+            kwargs = {"chunk_size": request.chunk_size, "overlap": request.overlap}
+            if request.method == "custom" and request.separator:
+                kwargs["separator"] = request.separator
+
+            if request.method == "semantic_embedding":
+                kwargs["embedding_provider_type"] = request.embedding_provider or "openai"
+                kwargs["embedding_api_key"] = request.embedding_api_key
+                kwargs["embedding_base_url"] = request.embedding_base_url or "https://api.minimax.chat/v1"
+                kwargs["embedding_model"] = request.embedding_model or "text-embedding-3-small"
+                kwargs["similarity_threshold"] = request.similarity_threshold
+                kwargs["min_chunk_size"] = request.min_chunk_size
+
+            splitter = get_splitter(request.method, **kwargs)
+            split_results = splitter.split(text)
+
+            await db.execute(
+                Chunk.__table__.delete().where(
+                    Chunk.project_id == project_id,
+                    Chunk.file_id == file.id
+                )
+            )
+
+            chunks = []
+            for chunk_data in split_results:
+                db_chunk = Chunk(
+                    project_id=project_id,
+                    file_id=file.id,
+                    name=chunk_data.get("name", f"Chunk {chunk_data['index'] + 1}"),
+                    content=chunk_data["content"],
+                    word_count=chunk_data.get("word_count", len(chunk_data["content"].split()))
+                )
+                db.add(db_chunk)
+                chunks.append(db_chunk)
+
+            await db.commit()
+
+            ready_dir = get_project_ready_dir(str(project_id))
+
+            # 删除旧的 markdown 文件（可能有两种命名格式）
+            old_md_files = list(ready_dir.glob(f"{file.id}*.md"))
+            for old_file in old_md_files:
+                try:
+                    old_file.unlink()
+                except Exception:
+                    pass
+
+            md_filename = f"{file.id}.md"
+            md_path = ready_dir / md_filename
+
+            loop = asyncio.get_event_loop()
+            await loop.run_in_executor(
+                None,
+                lambda: md_path.write_text(text, encoding='utf-8')
+            )
+
+            file.file_path = str(md_path)
+            file.status = "completed"
+            await db.commit()
+
+            log_success(
+                "文件分割完成",
+                project_id=str(project_id),
+                file_id=str(file.id),
+                filename=file.filename,
+                method=request.method,
+                chunk_count=len(chunks),
+                text_length=len(text),
+                ready_path=str(md_path)
+            )
+        except Exception as e:
+            if file:
+                file.status = "failed"
+                await db.commit()
+
+            log_failure(
+                "文件分割失败",
+                project_id=str(project_id),
+                file_id=str(request.file_id),
+                method=request.method,
+                error=str(e)
+            )
+
+
@router.post("/split", response_model=ApiResponse)
 async def split_text(
    project_id: UUID,
@@ -88,7 +188,6 @@ async def split_text(
 ):
    """Split text into chunks"""
    try:
-        # Get file
        result = await db.execute(
            select(File).where(File.id == request.file_id, File.project_id == project_id)
        )
@@ -107,81 +206,27 @@ async def split_text(
            overlap=request.overlap
        )

-        # Process file
-        text = await process_file_by_type(file)
-
-        # Update file status
        file.status = "processing"
        await db.commit()

-        # Split text
-        kwargs = {"chunk_size": request.chunk_size, "overlap": request.overlap}
-        if request.method == "custom" and request.separator:
-            kwargs["separator"] = request.separator
-
-        # 如果使用 semantic_embedding 方法，传递 embedding 参数
-        if request.method == "semantic_embedding":
-            kwargs["embedding_provider_type"] = request.embedding_provider or "openai"
-            kwargs["embedding_api_key"] = request.embedding_api_key
-            kwargs["embedding_base_url"] = request.embedding_base_url or "https://api.minimax.chat/v1"
-            kwargs["embedding_model"] = request.embedding_model or "text-embedding-3-small"
-            kwargs["similarity_threshold"] = request.similarity_threshold
-            kwargs["min_chunk_size"] = request.min_chunk_size
-
-        splitter = get_splitter(request.method, **kwargs)
-        split_results = splitter.split(text)
-
-        # Save chunks
-        chunks = []
-        for chunk_data in split_results:
-            db_chunk = Chunk(
+        asyncio.create_task(
+            process_split_async(
                project_id=project_id,
-                file_id=file.id,
-                name=chunk_data.get("name", f"Chunk {chunk_data['index'] + 1}"),
-                content=chunk_data["content"],
-                word_count=chunk_data.get("word_count", len(chunk_data["content"].split()))
+                request=request,
            )
-            db.add(db_chunk)
-            chunks.append(db_chunk)
-
-        await db.commit()
-
-        # Save processed markdown to ready directory
-        ready_dir = get_project_ready_dir(str(project_id))
-        md_filename = f"{file.id}_{file.filename}.md"
-        md_path = ready_dir / md_filename
-
-        # Write markdown content to file
-        loop = asyncio.get_event_loop()
-        await loop.run_in_executor(
-            None,
-            lambda: md_path.write_text(text, encoding='utf-8')
-        )
-
-        # Update file path to ready location
-        file.file_path = str(md_path)
-        file.status = "completed"
-        await db.commit()
-
-        # 记录成功日志
-        log_success(
-            "文件处理完成",
-            project_id=str(project_id),
-            file_id=str(file.id),
-            filename=file.filename,
-            chunk_count=len(chunks),
-            text_length=len(text),
-            ready_path=str(md_path)
        )

        return ApiResponse.ok(
-            data={"chunks": len(chunks)},
-            message=f"Successfully split into {len(chunks)} chunks"
+            data={"file_id": str(file.id), "status": file.status},
+            message="Split task started, processing in background"
        )
    except Exception as e:
-        # 记录失败日志
+        if 'file' in locals() and file:
+            file.status = "failed"
+            await db.commit()
+
        log_failure(
-            "文件处理失败",
+            "分割任务启动失败",
            project_id=str(project_id),
            file_id=str(request.file_id),
            error=str(e)