feat(backend): 更新 API 端点实现

- 更新 Chunks API 端点
- 更新 Datasets API 端点
- 更新 Evaluation API 端点
- 更新 Files API 端点
- 更新 Projects API 端点
- 更新 Questions API 端点

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Developer
2026-03-17 17:29:58 +08:00
parent eac10a9d95
commit db11429290
6 changed files with 519 additions and 317 deletions

View File

@@ -1,15 +1,21 @@
"""
Chunks API Router
"""
import asyncio
from typing import List, Optional
from uuid import UUID
from pydantic import BaseModel
from pydantic import BaseModel, Field
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from app.api.response import ApiResponse, PaginatedResponse
from app.core.database import get_db
from app.core.exceptions import NotFoundException
from app.core.crud import CRUDBase
from app.models.models import Chunk, File
from app.schemas.base import ChunkCreate, ChunkResponse
from app.schemas.chunk import ChunkResponse
from app.schemas.chunk import ChunkCreateSchema
from app.services.text_splitter.splitter import get_splitter
from app.services.file_processor.pdf_processor import process_pdf
from app.services.file_processor.docx_processor import process_docx
@@ -17,26 +23,23 @@ from app.services.file_processor.excel_processor import process_csv, process_exc
router = APIRouter()
# Initialize CRUD
chunk_crud = CRUDBase(Chunk)
class SplitRequest(BaseModel):
"""Request model for splitting text"""
file_id: Optional[UUID] = None
file_id: UUID
method: str = "recursive"
chunk_size: int = 500
overlap: int = 50
chunk_size: int = Field(500, ge=50, le=5000)
overlap: int = Field(50, ge=0, le=500)
separator: Optional[str] = None
class ChunkListResponse(BaseModel):
"""Response for chunk list"""
chunks: List[ChunkResponse]
total: int
def process_file_by_type(file: File) -> str:
async def process_file_by_type(file: File) -> str:
"""Process file based on its type"""
if not file.file_path:
raise HTTPException(status_code=400, detail="File path not found")
raise NotFoundException("File", file.id)
processors = {
"pdf": process_pdf,
@@ -48,13 +51,17 @@ def process_file_by_type(file: File) -> str:
processor = processors.get(file.file_type)
if not processor:
# Return raw text for txt, md files
with open(file.file_path, 'r', encoding='utf-8') as f:
return f.read()
loop = asyncio.get_event_loop()
content = await loop.run_in_executor(
None,
lambda: open(file.file_path, 'r', encoding='utf-8').read()
)
return content
return processor(file.file_path)
return await processor(file.file_path)
@router.post("/split", response_model=dict)
@router.post("/split", response_model=ApiResponse)
async def split_text(
project_id: UUID,
request: SplitRequest,
@@ -62,22 +69,19 @@ async def split_text(
):
"""Split text into chunks"""
# Get file
if request.file_id:
result = await db.execute(
select(File).where(File.id == request.file_id, File.project_id == project_id)
)
file = result.scalar_one_or_none()
if not file:
raise HTTPException(status_code=404, detail="File not found")
result = await db.execute(
select(File).where(File.id == request.file_id, File.project_id == project_id)
)
file = result.scalar_one_or_none()
if not file:
raise NotFoundException("File", request.file_id)
# Process file
text = process_file_by_type(file)
# Process file
text = await process_file_by_type(file)
# Update file status
file.status = "processing"
await db.commit()
else:
raise HTTPException(status_code=400, detail="file_id is required")
# Update file status
file.status = "processing"
await db.commit()
# Split text
kwargs = {"chunk_size": request.chunk_size, "overlap": request.overlap}
@@ -106,77 +110,87 @@ async def split_text(
file.status = "completed"
await db.commit()
return {"chunks": len(chunks), "message": f"Successfully split into {len(chunks)} chunks"}
return ApiResponse.ok(
data={"chunks": len(chunks)},
message=f"Successfully split into {len(chunks)} chunks"
)
@router.get("/", response_model=dict)
@router.get("", response_model=ApiResponse)
async def list_chunks(
project_id: UUID,
file_id: Optional[UUID] = Query(None),
page: int = Query(1, ge=1),
page_size: int = Query(20, ge=1, le=100),
db: AsyncSession = Depends(get_db)
):
"""List chunks for a project"""
query = select(Chunk).where(Chunk.project_id == project_id)
filters = {"project_id": project_id}
if file_id:
query = query.where(Chunk.file_id == file_id)
filters["file_id"] = file_id
query = query.order_by(Chunk.created_at.desc())
result = await db.execute(query)
chunks = result.scalars().all()
return {
"chunks": [ChunkResponse.model_validate(c) for c in chunks],
"total": len(chunks)
}
@router.get("/{chunk_id}", response_model=dict)
async def get_chunk(project_id: UUID, chunk_id: UUID, db: AsyncSession = Depends(get_db)):
"""Get chunk by ID"""
result = await db.execute(
select(Chunk).where(Chunk.id == chunk_id, Chunk.project_id == project_id)
skip = (page - 1) * page_size
chunks, total = await chunk_crud.get_multi(
db,
skip=skip,
limit=page_size,
filters=filters,
order_by="created_at",
descending=True
)
chunk_responses = [ChunkResponse.model_validate(c) for c in chunks]
return PaginatedResponse.ok(
items=chunk_responses,
page=page,
page_size=page_size,
total=total
)
chunk = result.scalar_one_or_none()
if not chunk:
raise HTTPException(status_code=404, detail="Chunk not found")
return ChunkResponse.model_validate(chunk)
@router.put("/{chunk_id}", response_model=dict)
@router.get("/{chunk_id}", response_model=ApiResponse)
async def get_chunk(
project_id: UUID,
chunk_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Get chunk by ID"""
chunk = await chunk_crud.get(db, chunk_id)
if not chunk or chunk.project_id != project_id:
raise NotFoundException("Chunk", chunk_id)
return ApiResponse.ok(data=ChunkResponse.model_validate(chunk))
@router.put("/{chunk_id}", response_model=ApiResponse)
async def update_chunk(
project_id: UUID,
chunk_id: UUID,
chunk: ChunkCreate,
chunk: ChunkCreateSchema,
db: AsyncSession = Depends(get_db)
):
"""Update chunk"""
result = await db.execute(
select(Chunk).where(Chunk.id == chunk_id, Chunk.project_id == project_id)
db_chunk = await chunk_crud.get(db, chunk_id)
if not db_chunk or db_chunk.project_id != project_id:
raise NotFoundException("Chunk", chunk_id)
updated_chunk = await chunk_crud.update(db, db_chunk, chunk)
return ApiResponse.ok(
data=ChunkResponse.model_validate(updated_chunk),
message="Chunk updated successfully"
)
db_chunk = result.scalar_one_or_none()
if not db_chunk:
raise HTTPException(status_code=404, detail="Chunk not found")
for key, value in chunk.model_dump(exclude_unset=True).items():
setattr(db_chunk, key, value)
await db.commit()
await db.refresh(db_chunk)
return ChunkResponse.model_validate(db_chunk)
@router.delete("/{chunk_id}", response_model=dict)
async def delete_chunk(project_id: UUID, chunk_id: UUID, db: AsyncSession = Depends(get_db)):
@router.delete("/{chunk_id}", response_model=ApiResponse)
async def delete_chunk(
project_id: UUID,
chunk_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Delete chunk"""
result = await db.execute(
select(Chunk).where(Chunk.id == chunk_id, Chunk.project_id == project_id)
)
chunk = result.scalar_one_or_none()
if not chunk:
raise HTTPException(status_code=404, detail="Chunk not found")
chunk = await chunk_crud.get(db, chunk_id)
if not chunk or chunk.project_id != project_id:
raise NotFoundException("Chunk", chunk_id)
await db.delete(chunk)
await db.commit()
return {"message": "Chunk deleted successfully"}
await chunk_crud.delete(db, chunk_id)
return ApiResponse.ok(message="Chunk deleted successfully")