feat(backend): 更新 API 端点实现
- 更新 Chunks API 端点 - 更新 Datasets API 端点 - 更新 Evaluation API 端点 - 更新 Files API 端点 - 更新 Projects API 端点 - 更新 Questions API 端点 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,15 +1,21 @@
|
||||
"""
|
||||
Chunks API Router
|
||||
"""
|
||||
import asyncio
|
||||
from typing import List, Optional
|
||||
from uuid import UUID
|
||||
from pydantic import BaseModel
|
||||
from pydantic import BaseModel, Field
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.api.response import ApiResponse, PaginatedResponse
|
||||
from app.core.database import get_db
|
||||
from app.core.exceptions import NotFoundException
|
||||
from app.core.crud import CRUDBase
|
||||
from app.models.models import Chunk, File
|
||||
from app.schemas.base import ChunkCreate, ChunkResponse
|
||||
from app.schemas.chunk import ChunkResponse
|
||||
from app.schemas.chunk import ChunkCreateSchema
|
||||
from app.services.text_splitter.splitter import get_splitter
|
||||
from app.services.file_processor.pdf_processor import process_pdf
|
||||
from app.services.file_processor.docx_processor import process_docx
|
||||
@@ -17,26 +23,23 @@ from app.services.file_processor.excel_processor import process_csv, process_exc
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# Initialize CRUD
|
||||
chunk_crud = CRUDBase(Chunk)
|
||||
|
||||
|
||||
class SplitRequest(BaseModel):
|
||||
"""Request model for splitting text"""
|
||||
file_id: Optional[UUID] = None
|
||||
file_id: UUID
|
||||
method: str = "recursive"
|
||||
chunk_size: int = 500
|
||||
overlap: int = 50
|
||||
chunk_size: int = Field(500, ge=50, le=5000)
|
||||
overlap: int = Field(50, ge=0, le=500)
|
||||
separator: Optional[str] = None
|
||||
|
||||
|
||||
class ChunkListResponse(BaseModel):
|
||||
"""Response for chunk list"""
|
||||
chunks: List[ChunkResponse]
|
||||
total: int
|
||||
|
||||
|
||||
def process_file_by_type(file: File) -> str:
|
||||
async def process_file_by_type(file: File) -> str:
|
||||
"""Process file based on its type"""
|
||||
if not file.file_path:
|
||||
raise HTTPException(status_code=400, detail="File path not found")
|
||||
raise NotFoundException("File", file.id)
|
||||
|
||||
processors = {
|
||||
"pdf": process_pdf,
|
||||
@@ -48,13 +51,17 @@ def process_file_by_type(file: File) -> str:
|
||||
processor = processors.get(file.file_type)
|
||||
if not processor:
|
||||
# Return raw text for txt, md files
|
||||
with open(file.file_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
loop = asyncio.get_event_loop()
|
||||
content = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: open(file.file_path, 'r', encoding='utf-8').read()
|
||||
)
|
||||
return content
|
||||
|
||||
return processor(file.file_path)
|
||||
return await processor(file.file_path)
|
||||
|
||||
|
||||
@router.post("/split", response_model=dict)
|
||||
@router.post("/split", response_model=ApiResponse)
|
||||
async def split_text(
|
||||
project_id: UUID,
|
||||
request: SplitRequest,
|
||||
@@ -62,22 +69,19 @@ async def split_text(
|
||||
):
|
||||
"""Split text into chunks"""
|
||||
# Get file
|
||||
if request.file_id:
|
||||
result = await db.execute(
|
||||
select(File).where(File.id == request.file_id, File.project_id == project_id)
|
||||
)
|
||||
file = result.scalar_one_or_none()
|
||||
if not file:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
result = await db.execute(
|
||||
select(File).where(File.id == request.file_id, File.project_id == project_id)
|
||||
)
|
||||
file = result.scalar_one_or_none()
|
||||
if not file:
|
||||
raise NotFoundException("File", request.file_id)
|
||||
|
||||
# Process file
|
||||
text = process_file_by_type(file)
|
||||
# Process file
|
||||
text = await process_file_by_type(file)
|
||||
|
||||
# Update file status
|
||||
file.status = "processing"
|
||||
await db.commit()
|
||||
else:
|
||||
raise HTTPException(status_code=400, detail="file_id is required")
|
||||
# Update file status
|
||||
file.status = "processing"
|
||||
await db.commit()
|
||||
|
||||
# Split text
|
||||
kwargs = {"chunk_size": request.chunk_size, "overlap": request.overlap}
|
||||
@@ -106,77 +110,87 @@ async def split_text(
|
||||
file.status = "completed"
|
||||
await db.commit()
|
||||
|
||||
return {"chunks": len(chunks), "message": f"Successfully split into {len(chunks)} chunks"}
|
||||
return ApiResponse.ok(
|
||||
data={"chunks": len(chunks)},
|
||||
message=f"Successfully split into {len(chunks)} chunks"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/", response_model=dict)
|
||||
@router.get("", response_model=ApiResponse)
|
||||
async def list_chunks(
|
||||
project_id: UUID,
|
||||
file_id: Optional[UUID] = Query(None),
|
||||
page: int = Query(1, ge=1),
|
||||
page_size: int = Query(20, ge=1, le=100),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""List chunks for a project"""
|
||||
query = select(Chunk).where(Chunk.project_id == project_id)
|
||||
|
||||
filters = {"project_id": project_id}
|
||||
if file_id:
|
||||
query = query.where(Chunk.file_id == file_id)
|
||||
filters["file_id"] = file_id
|
||||
|
||||
query = query.order_by(Chunk.created_at.desc())
|
||||
|
||||
result = await db.execute(query)
|
||||
chunks = result.scalars().all()
|
||||
|
||||
return {
|
||||
"chunks": [ChunkResponse.model_validate(c) for c in chunks],
|
||||
"total": len(chunks)
|
||||
}
|
||||
|
||||
|
||||
@router.get("/{chunk_id}", response_model=dict)
|
||||
async def get_chunk(project_id: UUID, chunk_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||
"""Get chunk by ID"""
|
||||
result = await db.execute(
|
||||
select(Chunk).where(Chunk.id == chunk_id, Chunk.project_id == project_id)
|
||||
skip = (page - 1) * page_size
|
||||
chunks, total = await chunk_crud.get_multi(
|
||||
db,
|
||||
skip=skip,
|
||||
limit=page_size,
|
||||
filters=filters,
|
||||
order_by="created_at",
|
||||
descending=True
|
||||
)
|
||||
|
||||
chunk_responses = [ChunkResponse.model_validate(c) for c in chunks]
|
||||
return PaginatedResponse.ok(
|
||||
items=chunk_responses,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
total=total
|
||||
)
|
||||
chunk = result.scalar_one_or_none()
|
||||
if not chunk:
|
||||
raise HTTPException(status_code=404, detail="Chunk not found")
|
||||
return ChunkResponse.model_validate(chunk)
|
||||
|
||||
|
||||
@router.put("/{chunk_id}", response_model=dict)
|
||||
@router.get("/{chunk_id}", response_model=ApiResponse)
|
||||
async def get_chunk(
|
||||
project_id: UUID,
|
||||
chunk_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Get chunk by ID"""
|
||||
chunk = await chunk_crud.get(db, chunk_id)
|
||||
if not chunk or chunk.project_id != project_id:
|
||||
raise NotFoundException("Chunk", chunk_id)
|
||||
|
||||
return ApiResponse.ok(data=ChunkResponse.model_validate(chunk))
|
||||
|
||||
|
||||
@router.put("/{chunk_id}", response_model=ApiResponse)
|
||||
async def update_chunk(
|
||||
project_id: UUID,
|
||||
chunk_id: UUID,
|
||||
chunk: ChunkCreate,
|
||||
chunk: ChunkCreateSchema,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Update chunk"""
|
||||
result = await db.execute(
|
||||
select(Chunk).where(Chunk.id == chunk_id, Chunk.project_id == project_id)
|
||||
db_chunk = await chunk_crud.get(db, chunk_id)
|
||||
if not db_chunk or db_chunk.project_id != project_id:
|
||||
raise NotFoundException("Chunk", chunk_id)
|
||||
|
||||
updated_chunk = await chunk_crud.update(db, db_chunk, chunk)
|
||||
return ApiResponse.ok(
|
||||
data=ChunkResponse.model_validate(updated_chunk),
|
||||
message="Chunk updated successfully"
|
||||
)
|
||||
db_chunk = result.scalar_one_or_none()
|
||||
if not db_chunk:
|
||||
raise HTTPException(status_code=404, detail="Chunk not found")
|
||||
|
||||
for key, value in chunk.model_dump(exclude_unset=True).items():
|
||||
setattr(db_chunk, key, value)
|
||||
|
||||
await db.commit()
|
||||
await db.refresh(db_chunk)
|
||||
return ChunkResponse.model_validate(db_chunk)
|
||||
|
||||
|
||||
@router.delete("/{chunk_id}", response_model=dict)
|
||||
async def delete_chunk(project_id: UUID, chunk_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||
@router.delete("/{chunk_id}", response_model=ApiResponse)
|
||||
async def delete_chunk(
|
||||
project_id: UUID,
|
||||
chunk_id: UUID,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""Delete chunk"""
|
||||
result = await db.execute(
|
||||
select(Chunk).where(Chunk.id == chunk_id, Chunk.project_id == project_id)
|
||||
)
|
||||
chunk = result.scalar_one_or_none()
|
||||
if not chunk:
|
||||
raise HTTPException(status_code=404, detail="Chunk not found")
|
||||
chunk = await chunk_crud.get(db, chunk_id)
|
||||
if not chunk or chunk.project_id != project_id:
|
||||
raise NotFoundException("Chunk", chunk_id)
|
||||
|
||||
await db.delete(chunk)
|
||||
await db.commit()
|
||||
return {"message": "Chunk deleted successfully"}
|
||||
await chunk_crud.delete(db, chunk_id)
|
||||
return ApiResponse.ok(message="Chunk deleted successfully")
|
||||
|
||||
Reference in New Issue
Block a user