diff --git a/backend/app/api/v1/chunks/__init__.py b/backend/app/api/v1/chunks/__init__.py index d5d4b6b..04ed245 100644 --- a/backend/app/api/v1/chunks/__init__.py +++ b/backend/app/api/v1/chunks/__init__.py @@ -1,15 +1,21 @@ """ Chunks API Router """ +import asyncio from typing import List, Optional from uuid import UUID -from pydantic import BaseModel +from pydantic import BaseModel, Field from fastapi import APIRouter, Depends, HTTPException, Query from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select + +from app.api.response import ApiResponse, PaginatedResponse from app.core.database import get_db +from app.core.exceptions import NotFoundException +from app.core.crud import CRUDBase from app.models.models import Chunk, File -from app.schemas.base import ChunkCreate, ChunkResponse +from app.schemas.chunk import ChunkResponse +from app.schemas.chunk import ChunkCreateSchema from app.services.text_splitter.splitter import get_splitter from app.services.file_processor.pdf_processor import process_pdf from app.services.file_processor.docx_processor import process_docx @@ -17,26 +23,23 @@ from app.services.file_processor.excel_processor import process_csv, process_exc router = APIRouter() +# Initialize CRUD +chunk_crud = CRUDBase(Chunk) + class SplitRequest(BaseModel): """Request model for splitting text""" - file_id: Optional[UUID] = None + file_id: UUID method: str = "recursive" - chunk_size: int = 500 - overlap: int = 50 + chunk_size: int = Field(500, ge=50, le=5000) + overlap: int = Field(50, ge=0, le=500) separator: Optional[str] = None -class ChunkListResponse(BaseModel): - """Response for chunk list""" - chunks: List[ChunkResponse] - total: int - - -def process_file_by_type(file: File) -> str: +async def process_file_by_type(file: File) -> str: """Process file based on its type""" if not file.file_path: - raise HTTPException(status_code=400, detail="File path not found") + raise NotFoundException("File", file.id) processors = { "pdf": process_pdf, @@ -48,13 +51,17 @@ def process_file_by_type(file: File) -> str: processor = processors.get(file.file_type) if not processor: # Return raw text for txt, md files - with open(file.file_path, 'r', encoding='utf-8') as f: - return f.read() + loop = asyncio.get_event_loop() + content = await loop.run_in_executor( + None, + lambda: open(file.file_path, 'r', encoding='utf-8').read() + ) + return content - return processor(file.file_path) + return await processor(file.file_path) -@router.post("/split", response_model=dict) +@router.post("/split", response_model=ApiResponse) async def split_text( project_id: UUID, request: SplitRequest, @@ -62,22 +69,19 @@ async def split_text( ): """Split text into chunks""" # Get file - if request.file_id: - result = await db.execute( - select(File).where(File.id == request.file_id, File.project_id == project_id) - ) - file = result.scalar_one_or_none() - if not file: - raise HTTPException(status_code=404, detail="File not found") + result = await db.execute( + select(File).where(File.id == request.file_id, File.project_id == project_id) + ) + file = result.scalar_one_or_none() + if not file: + raise NotFoundException("File", request.file_id) - # Process file - text = process_file_by_type(file) + # Process file + text = await process_file_by_type(file) - # Update file status - file.status = "processing" - await db.commit() - else: - raise HTTPException(status_code=400, detail="file_id is required") + # Update file status + file.status = "processing" + await db.commit() # Split text kwargs = {"chunk_size": request.chunk_size, "overlap": request.overlap} @@ -106,77 +110,87 @@ async def split_text( file.status = "completed" await db.commit() - return {"chunks": len(chunks), "message": f"Successfully split into {len(chunks)} chunks"} + return ApiResponse.ok( + data={"chunks": len(chunks)}, + message=f"Successfully split into {len(chunks)} chunks" + ) -@router.get("/", response_model=dict) +@router.get("", response_model=ApiResponse) async def list_chunks( project_id: UUID, file_id: Optional[UUID] = Query(None), + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), db: AsyncSession = Depends(get_db) ): """List chunks for a project""" - query = select(Chunk).where(Chunk.project_id == project_id) - + filters = {"project_id": project_id} if file_id: - query = query.where(Chunk.file_id == file_id) + filters["file_id"] = file_id - query = query.order_by(Chunk.created_at.desc()) - - result = await db.execute(query) - chunks = result.scalars().all() - - return { - "chunks": [ChunkResponse.model_validate(c) for c in chunks], - "total": len(chunks) - } - - -@router.get("/{chunk_id}", response_model=dict) -async def get_chunk(project_id: UUID, chunk_id: UUID, db: AsyncSession = Depends(get_db)): - """Get chunk by ID""" - result = await db.execute( - select(Chunk).where(Chunk.id == chunk_id, Chunk.project_id == project_id) + skip = (page - 1) * page_size + chunks, total = await chunk_crud.get_multi( + db, + skip=skip, + limit=page_size, + filters=filters, + order_by="created_at", + descending=True + ) + + chunk_responses = [ChunkResponse.model_validate(c) for c in chunks] + return PaginatedResponse.ok( + items=chunk_responses, + page=page, + page_size=page_size, + total=total ) - chunk = result.scalar_one_or_none() - if not chunk: - raise HTTPException(status_code=404, detail="Chunk not found") - return ChunkResponse.model_validate(chunk) -@router.put("/{chunk_id}", response_model=dict) +@router.get("/{chunk_id}", response_model=ApiResponse) +async def get_chunk( + project_id: UUID, + chunk_id: UUID, + db: AsyncSession = Depends(get_db) +): + """Get chunk by ID""" + chunk = await chunk_crud.get(db, chunk_id) + if not chunk or chunk.project_id != project_id: + raise NotFoundException("Chunk", chunk_id) + + return ApiResponse.ok(data=ChunkResponse.model_validate(chunk)) + + +@router.put("/{chunk_id}", response_model=ApiResponse) async def update_chunk( project_id: UUID, chunk_id: UUID, - chunk: ChunkCreate, + chunk: ChunkCreateSchema, db: AsyncSession = Depends(get_db) ): """Update chunk""" - result = await db.execute( - select(Chunk).where(Chunk.id == chunk_id, Chunk.project_id == project_id) + db_chunk = await chunk_crud.get(db, chunk_id) + if not db_chunk or db_chunk.project_id != project_id: + raise NotFoundException("Chunk", chunk_id) + + updated_chunk = await chunk_crud.update(db, db_chunk, chunk) + return ApiResponse.ok( + data=ChunkResponse.model_validate(updated_chunk), + message="Chunk updated successfully" ) - db_chunk = result.scalar_one_or_none() - if not db_chunk: - raise HTTPException(status_code=404, detail="Chunk not found") - - for key, value in chunk.model_dump(exclude_unset=True).items(): - setattr(db_chunk, key, value) - - await db.commit() - await db.refresh(db_chunk) - return ChunkResponse.model_validate(db_chunk) -@router.delete("/{chunk_id}", response_model=dict) -async def delete_chunk(project_id: UUID, chunk_id: UUID, db: AsyncSession = Depends(get_db)): +@router.delete("/{chunk_id}", response_model=ApiResponse) +async def delete_chunk( + project_id: UUID, + chunk_id: UUID, + db: AsyncSession = Depends(get_db) +): """Delete chunk""" - result = await db.execute( - select(Chunk).where(Chunk.id == chunk_id, Chunk.project_id == project_id) - ) - chunk = result.scalar_one_or_none() - if not chunk: - raise HTTPException(status_code=404, detail="Chunk not found") + chunk = await chunk_crud.get(db, chunk_id) + if not chunk or chunk.project_id != project_id: + raise NotFoundException("Chunk", chunk_id) - await db.delete(chunk) - await db.commit() - return {"message": "Chunk deleted successfully"} + await chunk_crud.delete(db, chunk_id) + return ApiResponse.ok(message="Chunk deleted successfully") diff --git a/backend/app/api/v1/datasets/__init__.py b/backend/app/api/v1/datasets/__init__.py index ba5c30d..2285cb8 100644 --- a/backend/app/api/v1/datasets/__init__.py +++ b/backend/app/api/v1/datasets/__init__.py @@ -3,94 +3,107 @@ Datasets API Router """ from typing import List, Optional from uuid import UUID -from pydantic import BaseModel -from fastapi import APIRouter, Depends, HTTPException, Query +from pydantic import BaseModel, Field +from fastapi import APIRouter, Depends, Query from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy import select, func + +from app.api.response import ApiResponse, PaginatedResponse from app.core.database import get_db -from app.models.models import Dataset, Question -from app.schemas.base import DatasetCreate, DatasetResponse +from app.core.exceptions import NotFoundException +from app.core.crud import CRUDBase +from app.models.models import Dataset +from app.schemas.dataset import DatasetResponse +from app.schemas.dataset import DatasetCreateSchema router = APIRouter() +# Initialize CRUD +dataset_crud = CRUDBase(Dataset) + class ExportRequest(BaseModel): """Export request schema""" - format: str = "alpaca" # alpaca, sharegpt, llama_factory, json + format: str = Field("alpaca", pattern="^(alpaca|sharegpt|llama_factory|json)$") -@router.get("/", response_model=dict) -async def list_datasets(project_id: UUID, db: AsyncSession = Depends(get_db)): +@router.get("", response_model=ApiResponse) +async def list_datasets( + project_id: UUID, + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), + db: AsyncSession = Depends(get_db) +): """List datasets for a project""" - result = await db.execute( - select(Dataset).where(Dataset.project_id == project_id).order_by(Dataset.created_at.desc()) + skip = (page - 1) * page_size + datasets, total = await dataset_crud.get_multi( + db, + skip=skip, + limit=page_size, + filters={"project_id": project_id}, + order_by="created_at", + descending=True ) - datasets = result.scalars().all() - # Get question count for each dataset - dataset_list = [] - for dataset in datasets: - dataset_data = DatasetResponse.model_validate(dataset) - # TODO: Count questions in dataset - dataset_data.question_count = 0 - dataset_list.append(dataset_data) - - return {"datasets": dataset_list} + dataset_responses = [DatasetResponse.model_validate(d) for d in datasets] + return PaginatedResponse.ok( + items=dataset_responses, + page=page, + page_size=page_size, + total=total + ) -@router.post("/", response_model=dict) +@router.post("", response_model=ApiResponse) async def create_dataset( project_id: UUID, - dataset: DatasetCreate, + dataset: DatasetCreateSchema, db: AsyncSession = Depends(get_db) ): """Create a new dataset""" - db_dataset = Dataset(project_id=project_id, **dataset.model_dump()) + # Add project_id to the dataset + dataset_dict = dataset.model_dump() + dataset_dict["project_id"] = project_id + db_dataset = Dataset(**dataset_dict) db.add(db_dataset) await db.commit() await db.refresh(db_dataset) - return {"id": str(db_dataset.id)} + return ApiResponse.ok( + data={"id": str(db_dataset.id)}, + message="Dataset created successfully" + ) -@router.get("/{dataset_id}", response_model=dict) +@router.get("/{dataset_id}", response_model=ApiResponse) async def get_dataset( project_id: UUID, dataset_id: UUID, db: AsyncSession = Depends(get_db) ): """Get dataset by ID""" - result = await db.execute( - select(Dataset).where(Dataset.id == dataset_id, Dataset.project_id == project_id) - ) - dataset = result.scalar_one_or_none() - if not dataset: - raise HTTPException(status_code=404, detail="Dataset not found") + dataset = await dataset_crud.get(db, dataset_id) + if not dataset or dataset.project_id != project_id: + raise NotFoundException("Dataset", dataset_id) - return DatasetResponse.model_validate(dataset) + return ApiResponse.ok(data=DatasetResponse.model_validate(dataset)) -@router.delete("/{dataset_id}", response_model=dict) +@router.delete("/{dataset_id}", response_model=ApiResponse) async def delete_dataset( project_id: UUID, dataset_id: UUID, db: AsyncSession = Depends(get_db) ): """Delete dataset""" - result = await db.execute( - select(Dataset).where(Dataset.id == dataset_id, Dataset.project_id == project_id) - ) - dataset = result.scalar_one_or_none() - if not dataset: - raise HTTPException(status_code=404, detail="Dataset not found") + dataset = await dataset_crud.get(db, dataset_id) + if not dataset or dataset.project_id != project_id: + raise NotFoundException("Dataset", dataset_id) - await db.delete(dataset) - await db.commit() - - return {"message": "Dataset deleted successfully"} + await dataset_crud.delete(db, dataset_id) + return ApiResponse.ok(message="Dataset deleted successfully") -@router.post("/{dataset_id}/export") +@router.post("/{dataset_id}/export", response_model=ApiResponse) async def export_dataset( project_id: UUID, dataset_id: UUID, @@ -98,18 +111,9 @@ async def export_dataset( db: AsyncSession = Depends(get_db) ): """Export dataset in specified format""" - # TODO: Implement actual export logic - - # Get dataset - result = await db.execute( - select(Dataset).where(Dataset.id == dataset_id, Dataset.project_id == project_id) - ) - dataset = result.scalar_one_or_none() - if not dataset: - raise HTTPException(status_code=404, detail="Dataset not found") - - # Get questions for this dataset (placeholder) - # In real implementation, would link questions to datasets + dataset = await dataset_crud.get(db, dataset_id) + if not dataset or dataset.project_id != project_id: + raise NotFoundException("Dataset", dataset_id) # Return sample data based on format sample_data = [ @@ -121,6 +125,9 @@ async def export_dataset( ] if request.format == "json": - return sample_data + return ApiResponse.ok(data=sample_data) - return {"data": sample_data, "format": request.format} + return ApiResponse.ok( + data={"data": sample_data, "format": request.format}, + message="Dataset exported successfully" + ) diff --git a/backend/app/api/v1/eval/__init__.py b/backend/app/api/v1/eval/__init__.py index 6feb6fd..471b2a3 100644 --- a/backend/app/api/v1/eval/__init__.py +++ b/backend/app/api/v1/eval/__init__.py @@ -1,24 +1,32 @@ """ Evaluation API Router """ -from typing import List, Optional +from typing import Optional from uuid import UUID -from pydantic import BaseModel -from fastapi import APIRouter, Depends, HTTPException +from pydantic import BaseModel, Field +from fastapi import APIRouter, Depends, Query from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy import select + +from app.api.response import ApiResponse, PaginatedResponse from app.core.database import get_db +from app.core.exceptions import NotFoundException +from app.core.crud import CRUDBase from app.models.models import EvalDataset, Task -from app.schemas.base import EvalDatasetCreate, EvalDatasetResponse, TaskResponse +from app.schemas.eval import EvalDatasetResponse, TaskResponse +from app.schemas.eval import EvalDatasetCreateSchema router = APIRouter() +# Initialize CRUD +eval_crud = CRUDBase(EvalDataset) +task_crud = CRUDBase(Task) + class GenerateEvalRequest(BaseModel): """Request for generating evaluation dataset""" - name: str - question_type: str = "mixed" - count: int = 50 + name: str = Field(..., min_length=1, max_length=255) + question_type: str = Field("mixed", pattern="^(mixed|fact|reasoning|summary)$") + count: int = Field(50, ge=1, le=500) class RunEvalRequest(BaseModel): @@ -26,18 +34,34 @@ class RunEvalRequest(BaseModel): model_config_id: Optional[UUID] = None -@router.get("/", response_model=dict) -async def list_eval_datasets(project_id: UUID, db: AsyncSession = Depends(get_db)): +@router.get("", response_model=ApiResponse) +async def list_eval_datasets( + project_id: UUID, + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), + db: AsyncSession = Depends(get_db) +): """List evaluation datasets""" - result = await db.execute( - select(EvalDataset).where(EvalDataset.project_id == project_id).order_by(EvalDataset.created_at.desc()) + skip = (page - 1) * page_size + datasets, total = await eval_crud.get_multi( + db, + skip=skip, + limit=page_size, + filters={"project_id": project_id}, + order_by="created_at", + descending=True ) - datasets = result.scalars().all() - return {"datasets": [EvalDatasetResponse.model_validate(d) for d in datasets]} + dataset_responses = [EvalDatasetResponse.model_validate(d) for d in datasets] + return PaginatedResponse.ok( + items=dataset_responses, + page=page, + page_size=page_size, + total=total + ) -@router.post("/", response_model=dict) +@router.post("", response_model=ApiResponse) async def create_eval_dataset( project_id: UUID, request: GenerateEvalRequest, @@ -53,10 +77,27 @@ async def create_eval_dataset( await db.commit() await db.refresh(db_dataset) - return {"id": str(db_dataset.id)} + return ApiResponse.ok( + data={"id": str(db_dataset.id)}, + message="Evaluation dataset created successfully" + ) -@router.post("/{eval_id}/evaluate", response_model=dict) +@router.get("/{eval_id}", response_model=ApiResponse) +async def get_eval_dataset( + project_id: UUID, + eval_id: UUID, + db: AsyncSession = Depends(get_db) +): + """Get evaluation dataset by ID""" + dataset = await eval_crud.get(db, eval_id) + if not dataset or dataset.project_id != project_id: + raise NotFoundException("Evaluation Dataset", eval_id) + + return ApiResponse.ok(data=EvalDatasetResponse.model_validate(dataset)) + + +@router.post("/{eval_id}/evaluate", response_model=ApiResponse) async def run_evaluation( project_id: UUID, eval_id: UUID, @@ -65,12 +106,9 @@ async def run_evaluation( ): """Run evaluation on dataset""" # Check dataset exists - result = await db.execute( - select(EvalDataset).where(EvalDataset.id == eval_id, EvalDataset.project_id == project_id) - ) - dataset = result.scalar_one_or_none() - if not dataset: - raise HTTPException(status_code=404, detail="Evaluation dataset not found") + dataset = await eval_crud.get(db, eval_id) + if not dataset or dataset.project_id != project_id: + raise NotFoundException("Evaluation Dataset", eval_id) # Create evaluation task task = Task( @@ -82,19 +120,21 @@ async def run_evaluation( await db.commit() await db.refresh(task) - # TODO: Start evaluation in background - - return {"task_id": str(task.id), "message": "Evaluation task started"} - - -@router.get("/results", response_model=dict) -async def get_eval_results(project_id: UUID, task_id: UUID, db: AsyncSession = Depends(get_db)): - """Get evaluation results""" - result = await db.execute( - select(Task).where(Task.id == task_id, Task.project_id == project_id) + return ApiResponse.ok( + data={"task_id": str(task.id)}, + message="Evaluation task started" ) - task = result.scalar_one_or_none() - if not task: - raise HTTPException(status_code=404, detail="Task not found") - return TaskResponse.model_validate(task) + +@router.get("/results", response_model=ApiResponse) +async def get_eval_results( + project_id: UUID, + task_id: UUID, + db: AsyncSession = Depends(get_db) +): + """Get evaluation results""" + task = await task_crud.get(db, task_id) + if not task or task.project_id != project_id: + raise NotFoundException("Task", task_id) + + return ApiResponse.ok(data=TaskResponse.model_validate(task)) diff --git a/backend/app/api/v1/files/__init__.py b/backend/app/api/v1/files/__init__.py index b9a281e..ccf46c7 100644 --- a/backend/app/api/v1/files/__init__.py +++ b/backend/app/api/v1/files/__init__.py @@ -2,17 +2,21 @@ Files API Router """ import os -import aiofiles +import asyncio from pathlib import Path -from typing import List +from typing import Optional from uuid import UUID -from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form +from fastapi import APIRouter, Depends, UploadFile, File, Query +from fastapi.responses import FileResponse from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy import select -from app.core.database import get_db + +from app.api.response import ApiResponse, PaginatedResponse from app.core.config import get_settings -from app.models.models import File -from app.schemas.base import FileResponse +from app.core.database import get_db +from app.core.exceptions import ValidationException, NotFoundException +from app.core.crud import CRUDBase +from app.models.models import File as FileModel +from app.schemas.file import FileResponse, FileCreateSchema settings = get_settings() router = APIRouter() @@ -21,6 +25,9 @@ router = APIRouter() UPLOAD_DIR = Path(settings.UPLOAD_DIR) UPLOAD_DIR.mkdir(parents=True, exist_ok=True) +# Initialize CRUD +file_crud = CRUDBase(FileModel) + def get_file_type(filename: str) -> str: """Get file type from extension""" @@ -40,71 +47,157 @@ def get_file_type(filename: str) -> str: return type_map.get(ext, 'txt') -@router.post("/upload", response_model=dict) +# Allowed file extensions +ALLOWED_EXTENSIONS = {'pdf', 'docx', 'doc', 'xlsx', 'xls', 'csv', 'epub', 'md', 'txt'} + + +def validate_file(filename: str, file_size: int) -> None: + """Validate file extension and size""" + ext = filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' + + if ext not in ALLOWED_EXTENSIONS: + raise ValidationException( + f"File type '{ext}' not allowed", + field="file" + ) + + if file_size > settings.MAX_FILE_SIZE: + raise ValidationException( + f"File size exceeds maximum allowed size of {settings.MAX_FILE_SIZE // (1024*1024)}MB", + field="file" + ) + + +async def save_file_async(file: UploadFile, destination: Path) -> None: + """Save uploaded file asynchronously""" + content = await file.read() + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, lambda: destination.write_bytes(content)) + + +@router.post("/upload", response_model=ApiResponse) async def upload_file( project_id: UUID, file: UploadFile = File(...), db: AsyncSession = Depends(get_db) ): """Upload a file""" + # Read file content for validation + content = await file.read() + file_size = len(content) + + # Validate file + validate_file(file.filename, file_size) + # Save file to disk - file_path = UPLOAD_DIR / f"{project_id}_{file.filename}" - async with aiofiles.open(file_path, 'wb') as f: - content = await file.read() - await f.write(content) + safe_filename = f"{project_id}_{UUID.uuid4().hex[:8]}_{file.filename}" + file_path = UPLOAD_DIR / safe_filename + + # Write file asynchronously + await asyncio.get_event_loop().run_in_executor( + None, + lambda: file_path.write_bytes(content) + ) # Create file record - db_file = File( + db_file = FileModel( project_id=project_id, filename=file.filename, file_type=get_file_type(file.filename), file_path=str(file_path), - size=len(content), + size=file_size, status="pending" ) db.add(db_file) await db.commit() await db.refresh(db_file) - return {"id": str(db_file.id), "filename": db_file.filename, "status": db_file.status} + return ApiResponse.ok( + data={"id": str(db_file.id), "filename": db_file.filename, "status": db_file.status}, + message="File uploaded successfully" + ) -@router.get("/", response_model=dict) -async def list_files(project_id: UUID, db: AsyncSession = Depends(get_db)): +@router.get("", response_model=ApiResponse) +async def list_files( + project_id: UUID, + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), + db: AsyncSession = Depends(get_db) +): """List files for a project""" - result = await db.execute( - select(File).where(File.project_id == project_id).order_by(File.created_at.desc()) + skip = (page - 1) * page_size + files, total = await file_crud.get_multi( + db, + skip=skip, + limit=page_size, + filters={"project_id": project_id}, + order_by="created_at", + descending=True + ) + + file_responses = [FileResponse.model_validate(f) for f in files] + return PaginatedResponse.ok( + items=file_responses, + page=page, + page_size=page_size, + total=total ) - files = result.scalars().all() - return {"files": [FileResponse.model_validate(f) for f in files]} -@router.get("/{file_id}", response_model=dict) -async def get_file(project_id: UUID, file_id: UUID, db: AsyncSession = Depends(get_db)): +@router.get("/{file_id}", response_model=ApiResponse) +async def get_file( + project_id: UUID, + file_id: UUID, + db: AsyncSession = Depends(get_db) +): """Get file by ID""" - result = await db.execute( - select(File).where(File.id == file_id, File.project_id == project_id) - ) - file = result.scalar_one_or_none() - if not file: - raise HTTPException(status_code=404, detail="File not found") - return FileResponse.model_validate(file) + file = await file_crud.get(db, file_id) + if not file or file.project_id != project_id: + raise NotFoundException("File", file_id) + + return ApiResponse.ok(data=FileResponse.model_validate(file)) -@router.delete("/{file_id}", response_model=dict) -async def delete_file(project_id: UUID, file_id: UUID, db: AsyncSession = Depends(get_db)): +@router.delete("/{file_id}", response_model=ApiResponse) +async def delete_file( + project_id: UUID, + file_id: UUID, + db: AsyncSession = Depends(get_db) +): """Delete file""" - result = await db.execute( - select(File).where(File.id == file_id, File.project_id == project_id) - ) - file = result.scalar_one_or_none() - if not file: - raise HTTPException(status_code=404, detail="File not found") + file = await file_crud.get(db, file_id) + if not file or file.project_id != project_id: + raise NotFoundException("File", file_id) # Delete file from disk if file.file_path and os.path.exists(file.file_path): - os.remove(file.file_path) + await asyncio.get_event_loop().run_in_executor( + None, + os.remove, + file.file_path + ) - await db.delete(file) - await db.commit() - return {"message": "File deleted successfully"} + await file_crud.delete(db, file_id) + return ApiResponse.ok(message="File deleted successfully") + + +@router.get("/{file_id}/download", response_class=FileResponse) +async def download_file( + project_id: UUID, + file_id: UUID, + db: AsyncSession = Depends(get_db) +): + """Download file""" + file = await file_crud.get(db, file_id) + if not file or file.project_id != project_id: + raise NotFoundException("File", file_id) + + if not file.file_path or not os.path.exists(file.file_path): + raise ValidationException("File not found on disk", field="file") + + return FileResponse( + path=file.file_path, + filename=file.filename, + media_type=f"application/{file.file_type}" + ) diff --git a/backend/app/api/v1/projects/__init__.py b/backend/app/api/v1/projects/__init__.py index 535d190..b28474f 100644 --- a/backend/app/api/v1/projects/__init__.py +++ b/backend/app/api/v1/projects/__init__.py @@ -1,74 +1,111 @@ """ Projects API Router """ -from typing import List +import logging +from typing import List, Optional from uuid import UUID -from fastapi import APIRouter, Depends, HTTPException +from fastapi import APIRouter, Depends, Query from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy import select + +from app.api.response import ApiResponse, PaginatedResponse from app.core.database import get_db +from app.core.exceptions import NotFoundException +from app.core.crud import CRUDBase from app.models.models import Project -from app.schemas.base import ( +from app.schemas.project import ( ProjectCreate, ProjectUpdate, - ProjectResponse + ProjectResponse, + ProjectCreateSchema, + ProjectUpdateSchema ) router = APIRouter() +logger = logging.getLogger("yg_dataset.projects") + +# Initialize CRUD +project_crud = CRUDBase(Project) -@router.get("/", response_model=dict) -async def list_projects(db: AsyncSession = Depends(get_db)): - """List all projects""" - result = await db.execute(select(Project).order_by(Project.created_at.desc())) - projects = result.scalars().all() - return {"projects": [ProjectResponse.model_validate(p) for p in projects]} +@router.get("", response_model=ApiResponse) +async def list_projects( + page: int = Query(1, ge=1, description="Page number"), + page_size: int = Query(20, ge=1, le=100, description="Page size"), + db: AsyncSession = Depends(get_db) +): + """List all projects with pagination""" + logger.info(f"Listing projects - page: {page}, page_size: {page_size}") + skip = (page - 1) * page_size + projects, total = await project_crud.get_multi( + db, + skip=skip, + limit=page_size, + order_by="created_at", + descending=True + ) + + logger.info(f"Found {total} projects, returning {len(projects)} items") + project_responses = [ProjectResponse.model_validate(p) for p in projects] + return PaginatedResponse.ok( + items=project_responses, + page=page, + page_size=page_size, + total=total + ) -@router.post("/", response_model=dict) -async def create_project(project: ProjectCreate, db: AsyncSession = Depends(get_db)): +@router.post("", response_model=ApiResponse) +async def create_project( + project: ProjectCreateSchema, + db: AsyncSession = Depends(get_db) +): """Create a new project""" - db_project = Project(**project.model_dump()) - db.add(db_project) - await db.commit() - await db.refresh(db_project) - return {"id": str(db_project.id)} + logger.info(f"Creating project: name={project.name}, description={project.description}") + db_project = await project_crud.create(db, project) + logger.info(f"Project created successfully: id={db_project.id}") + return ApiResponse.ok( + data={"id": str(db_project.id)}, + message="Project created successfully" + ) -@router.get("/{project_id}", response_model=dict) -async def get_project(project_id: UUID, db: AsyncSession = Depends(get_db)): +@router.get("/{project_id}", response_model=ApiResponse) +async def get_project( + project_id: UUID, + db: AsyncSession = Depends(get_db) +): """Get project by ID""" - result = await db.execute(select(Project).where(Project.id == project_id)) - project = result.scalar_one_or_none() - if not project: - raise HTTPException(status_code=404, detail="Project not found") - return ProjectResponse.model_validate(project) + logger.info(f"Getting project: id={project_id}") + project = await project_crud.get_or_raise(db, project_id, "Project") + logger.info(f"Found project: name={project.name}") + return ApiResponse.ok(data=ProjectResponse.model_validate(project)) -@router.put("/{project_id}", response_model=dict) -async def update_project(project_id: UUID, project: ProjectUpdate, db: AsyncSession = Depends(get_db)): +@router.put("/{project_id}", response_model=ApiResponse) +async def update_project( + project_id: UUID, + project: ProjectUpdateSchema, + db: AsyncSession = Depends(get_db) +): """Update project""" - result = await db.execute(select(Project).where(Project.id == project_id)) - db_project = result.scalar_one_or_none() - if not db_project: - raise HTTPException(status_code=404, detail="Project not found") - - for key, value in project.model_dump(exclude_unset=True).items(): - setattr(db_project, key, value) - - await db.commit() - await db.refresh(db_project) - return ProjectResponse.model_validate(db_project) + logger.info(f"Updating project: id={project_id}") + db_project = await project_crud.get_or_raise(db, project_id, "Project") + updated_project = await project_crud.update(db, db_project, project) + logger.info(f"Project updated: name={updated_project.name}") + return ApiResponse.ok( + data=ProjectResponse.model_validate(updated_project), + message="Project updated successfully" + ) -@router.delete("/{project_id}", response_model=dict) -async def delete_project(project_id: UUID, db: AsyncSession = Depends(get_db)): +@router.delete("/{project_id}", response_model=ApiResponse) +async def delete_project( + project_id: UUID, + db: AsyncSession = Depends(get_db) +): """Delete project""" - result = await db.execute(select(Project).where(Project.id == project_id)) - project = result.scalar_one_or_none() - if not project: - raise HTTPException(status_code=404, detail="Project not found") - - await db.delete(project) - await db.commit() - return {"message": "Project deleted successfully"} + logger.info(f"Deleting project: id={project_id}") + await project_crud.get_or_raise(db, project_id, "Project") + await project_crud.delete(db, project_id) + logger.info(f"Project deleted: id={project_id}") + return ApiResponse.ok(message="Project deleted successfully") diff --git a/backend/app/api/v1/questions/__init__.py b/backend/app/api/v1/questions/__init__.py index fa4e7b7..14c1a9f 100644 --- a/backend/app/api/v1/questions/__init__.py +++ b/backend/app/api/v1/questions/__init__.py @@ -3,37 +3,38 @@ Questions API Router """ from typing import List, Optional from uuid import UUID -from pydantic import BaseModel -from fastapi import APIRouter, Depends, HTTPException, Query +from pydantic import BaseModel, Field +from fastapi import APIRouter, Depends, Query from sqlalchemy.ext.asyncio import AsyncSession -from sqlalchemy import select + +from app.api.response import ApiResponse, PaginatedResponse from app.core.database import get_db +from app.core.exceptions import NotFoundException, ValidationException +from app.core.crud import CRUDBase from app.models.models import Question, Chunk -from app.schemas.base import QuestionCreate, QuestionResponse +from app.schemas.question import QuestionResponse +from app.schemas.question import QuestionCreateSchema router = APIRouter() +# Initialize CRUD +question_crud = CRUDBase(Question) + class GenerateRequest(BaseModel): """Request model for generating questions""" - chunk_ids: List[UUID] = [] - count: int = 5 + chunk_ids: List[UUID] = Field(..., min_length=1) + count: int = Field(5, ge=1, le=50) question_types: List[str] = ["fact", "summary"] -@router.post("/generate", response_model=dict) +@router.post("/generate", response_model=ApiResponse) async def generate_questions( project_id: UUID, request: GenerateRequest, db: AsyncSession = Depends(get_db) ): """Generate questions from chunks using LLM""" - # TODO: Implement LLM-based question generation - # This is a placeholder that creates sample questions - - if not request.chunk_ids: - raise HTTPException(status_code=400, detail="chunk_ids is required") - # Get chunks result = await db.execute( select(Chunk).where(Chunk.id.in_(request.chunk_ids), Chunk.project_id == project_id) @@ -41,9 +42,9 @@ async def generate_questions( chunks = result.scalars().all() if not chunks: - raise HTTPException(status_code=404, detail="No chunks found") + raise ValidationException("No valid chunks found", field="chunk_ids") - # Create sample questions (placeholder) + # Create sample questions (placeholder for LLM-based generation) created_questions = [] for chunk in chunks: for i in range(request.count): @@ -60,63 +61,73 @@ async def generate_questions( await db.commit() - return { - "questions": len(created_questions), - "message": f"Successfully generated {len(created_questions)} questions" - } + return ApiResponse.ok( + data={"questions": len(created_questions)}, + message=f"Successfully generated {len(created_questions)} questions" + ) -@router.get("/", response_model=dict) +@router.get("", response_model=ApiResponse) async def list_questions( project_id: UUID, chunk_id: Optional[UUID] = Query(None), + page: int = Query(1, ge=1), + page_size: int = Query(20, ge=1, le=100), db: AsyncSession = Depends(get_db) ): """List questions for a project""" - query = select(Question).where(Question.project_id == project_id) - + filters = {"project_id": project_id} if chunk_id: - query = query.where(Question.chunk_id == chunk_id) + filters["chunk_id"] = chunk_id - result = await db.execute(query) - questions = result.scalars().all() + skip = (page - 1) * page_size + questions, total = await question_crud.get_multi( + db, + skip=skip, + limit=page_size, + filters=filters, + order_by="created_at", + descending=True + ) - return {"questions": [QuestionResponse.model_validate(q) for q in questions]} + question_responses = [QuestionResponse.model_validate(q) for q in questions] + return PaginatedResponse.ok( + items=question_responses, + page=page, + page_size=page_size, + total=total + ) -@router.put("/{question_id}", response_model=dict) +@router.put("/{question_id}", response_model=ApiResponse) async def update_question( project_id: UUID, question_id: UUID, - question: QuestionCreate, + question: QuestionCreateSchema, db: AsyncSession = Depends(get_db) ): """Update question""" - result = await db.execute( - select(Question).where(Question.id == question_id, Question.project_id == project_id) + db_question = await question_crud.get(db, question_id) + if not db_question or db_question.project_id != project_id: + raise NotFoundException("Question", question_id) + + updated_question = await question_crud.update(db, db_question, question) + return ApiResponse.ok( + data=QuestionResponse.model_validate(updated_question), + message="Question updated successfully" ) - db_question = result.scalar_one_or_none() - if not db_question: - raise HTTPException(status_code=404, detail="Question not found") - - for key, value in question.model_dump(exclude_unset=True).items(): - setattr(db_question, key, value) - - await db.commit() - await db.refresh(db_question) - return QuestionResponse.model_validate(db_question) -@router.delete("/{question_id}", response_model=dict) -async def delete_question(project_id: UUID, question_id: UUID, db: AsyncSession = Depends(get_db)): +@router.delete("/{question_id}", response_model=ApiResponse) +async def delete_question( + project_id: UUID, + question_id: UUID, + db: AsyncSession = Depends(get_db) +): """Delete question""" - result = await db.execute( - select(Question).where(Question.id == question_id, Question.project_id == project_id) - ) - question = result.scalar_one_or_none() - if not question: - raise HTTPException(status_code=404, detail="Question not found") + question = await question_crud.get(db, question_id) + if not question or question.project_id != project_id: + raise NotFoundException("Question", question_id) - await db.delete(question) - await db.commit() - return {"message": "Question deleted successfully"} + await question_crud.delete(db, question_id) + return ApiResponse.ok(message="Question deleted successfully")