first-update

This commit is contained in:
2026-03-17 14:36:31 +08:00
parent 72f08aee7c
commit 4eddf05e79
516 changed files with 115270 additions and 1 deletions

View File

@@ -0,0 +1,126 @@
"""
Datasets API Router
"""
from typing import List, Optional
from uuid import UUID
from pydantic import BaseModel
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select, func
from app.core.database import get_db
from app.models.models import Dataset, Question
from app.schemas.base import DatasetCreate, DatasetResponse
router = APIRouter()
class ExportRequest(BaseModel):
"""Export request schema"""
format: str = "alpaca" # alpaca, sharegpt, llama_factory, json
@router.get("/", response_model=dict)
async def list_datasets(project_id: UUID, db: AsyncSession = Depends(get_db)):
"""List datasets for a project"""
result = await db.execute(
select(Dataset).where(Dataset.project_id == project_id).order_by(Dataset.created_at.desc())
)
datasets = result.scalars().all()
# Get question count for each dataset
dataset_list = []
for dataset in datasets:
dataset_data = DatasetResponse.model_validate(dataset)
# TODO: Count questions in dataset
dataset_data.question_count = 0
dataset_list.append(dataset_data)
return {"datasets": dataset_list}
@router.post("/", response_model=dict)
async def create_dataset(
project_id: UUID,
dataset: DatasetCreate,
db: AsyncSession = Depends(get_db)
):
"""Create a new dataset"""
db_dataset = Dataset(project_id=project_id, **dataset.model_dump())
db.add(db_dataset)
await db.commit()
await db.refresh(db_dataset)
return {"id": str(db_dataset.id)}
@router.get("/{dataset_id}", response_model=dict)
async def get_dataset(
project_id: UUID,
dataset_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Get dataset by ID"""
result = await db.execute(
select(Dataset).where(Dataset.id == dataset_id, Dataset.project_id == project_id)
)
dataset = result.scalar_one_or_none()
if not dataset:
raise HTTPException(status_code=404, detail="Dataset not found")
return DatasetResponse.model_validate(dataset)
@router.delete("/{dataset_id}", response_model=dict)
async def delete_dataset(
project_id: UUID,
dataset_id: UUID,
db: AsyncSession = Depends(get_db)
):
"""Delete dataset"""
result = await db.execute(
select(Dataset).where(Dataset.id == dataset_id, Dataset.project_id == project_id)
)
dataset = result.scalar_one_or_none()
if not dataset:
raise HTTPException(status_code=404, detail="Dataset not found")
await db.delete(dataset)
await db.commit()
return {"message": "Dataset deleted successfully"}
@router.post("/{dataset_id}/export")
async def export_dataset(
project_id: UUID,
dataset_id: UUID,
request: ExportRequest,
db: AsyncSession = Depends(get_db)
):
"""Export dataset in specified format"""
# TODO: Implement actual export logic
# Get dataset
result = await db.execute(
select(Dataset).where(Dataset.id == dataset_id, Dataset.project_id == project_id)
)
dataset = result.scalar_one_or_none()
if not dataset:
raise HTTPException(status_code=404, detail="Dataset not found")
# Get questions for this dataset (placeholder)
# In real implementation, would link questions to datasets
# Return sample data based on format
sample_data = [
{
"instruction": "这是一个示例指令",
"input": "",
"output": "这是一个示例输出"
}
]
if request.format == "json":
return sample_data
return {"data": sample_data, "format": request.format}