新增了数据集上传界面
This commit is contained in:
129
src/README.md
Normal file
129
src/README.md
Normal file
@@ -0,0 +1,129 @@
|
||||
# FastAPI 服务器
|
||||
|
||||
## 功能特性
|
||||
|
||||
这个 FastAPI 服务器为大模型微调平台提供了 RESTful API 接口。
|
||||
|
||||
## API 端点
|
||||
|
||||
### 基础信息
|
||||
- `GET /` - 根路径,返回欢迎信息
|
||||
- `GET /api/health` - 健康检查
|
||||
|
||||
### 用户认证
|
||||
- `POST /api/login` - 用户登录
|
||||
```json
|
||||
{
|
||||
"username": "admin",
|
||||
"password": "your_password"
|
||||
}
|
||||
```
|
||||
|
||||
### 数据集管理
|
||||
- `GET /api/datasets` - 获取数据集列表
|
||||
- `POST /api/datasets` - 创建新数据集
|
||||
```json
|
||||
{
|
||||
"name": "新数据集名称",
|
||||
"description": "数据集描述",
|
||||
"size": "数据集大小"
|
||||
}
|
||||
```
|
||||
- `POST /api/datasets/upload` - 上传数据集文件(支持 JSON 和 JSONL 格式)
|
||||
```bash
|
||||
curl -X POST "http://10.10.10.77:8001/api/datasets/upload" \
|
||||
-F "file=@dataset.json" \
|
||||
-F "description=数据集描述"
|
||||
```
|
||||
**支持的文件格式**: .json, .jsonl
|
||||
**文件大小限制**: 100MB
|
||||
- `GET /api/datasets/files` - 获取data目录中保存的文件列表
|
||||
- `DELETE /api/datasets/{dataset_id}` - 删除数据集
|
||||
|
||||
### 模型管理
|
||||
- `GET /api/models` - 获取模型列表
|
||||
- `POST /api/models/config` - 配置模型参数
|
||||
```json
|
||||
{
|
||||
"model_name": "GPT-4",
|
||||
"learning_rate": 0.001,
|
||||
"batch_size": 32,
|
||||
"epochs": 100
|
||||
}
|
||||
```
|
||||
|
||||
### 训练管理
|
||||
- `GET /api/training/status` - 获取训练状态
|
||||
- `POST /api/training/start` - 开始训练任务
|
||||
- `POST /api/training/stop/{task_id}` - 停止训练任务
|
||||
- `GET /api/model/{model_id}/metrics` - 获取模型指标
|
||||
|
||||
### 系统监控
|
||||
- `GET /api/system/stats` - 获取系统统计信息
|
||||
|
||||
## 启动服务器
|
||||
|
||||
### 方法 1: 使用启动脚本(推荐)
|
||||
```bash
|
||||
cd src
|
||||
./run.sh
|
||||
```
|
||||
|
||||
### 方法 2: 手动启动
|
||||
```bash
|
||||
# 安装依赖
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
# 启动服务器
|
||||
uvicorn main:app --host 0.0.0.0 --port 8001 --reload
|
||||
```
|
||||
|
||||
## 访问地址
|
||||
|
||||
- **服务器**: http://10.10.10.77:8001
|
||||
- **API 文档**: http://10.10.10.77:8001/docs
|
||||
- **替代文档**: http://10.10.10.77:8001/redoc
|
||||
|
||||
## 示例请求
|
||||
|
||||
### 登录
|
||||
```bash
|
||||
curl -X POST "http://10.10.10.77:8001/api/login" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"username": "admin", "password": "123456"}'
|
||||
```
|
||||
|
||||
### 获取数据集列表
|
||||
```bash
|
||||
curl -X GET "http://10.10.10.77:8001/api/datasets"
|
||||
```
|
||||
|
||||
### 上传数据集文件
|
||||
```bash
|
||||
curl -X POST "http://10.10.10.77:8001/api/datasets/upload" \
|
||||
-F "file=@dataset.json" \
|
||||
-F "description=数据集描述"
|
||||
```
|
||||
|
||||
### 获取data目录文件列表
|
||||
```bash
|
||||
curl -X GET "http://10.10.10.77:8001/api/datasets/files"
|
||||
```
|
||||
|
||||
### 获取系统统计
|
||||
```bash
|
||||
curl -X GET "http://10.10.10.77:8001/api/system/stats"
|
||||
```
|
||||
|
||||
## 依赖
|
||||
|
||||
- Python 3.7+
|
||||
- FastAPI 0.104.1
|
||||
- Uvicorn 0.24.0
|
||||
- Pydantic 2.5.0
|
||||
|
||||
## 注意事项
|
||||
|
||||
- 服务器默认运行在端口 8001
|
||||
- 使用 `--reload` 参数启用热重载
|
||||
- 所有 API 响应都遵循统一格式
|
||||
381
src/main.py
Normal file
381
src/main.py
Normal file
@@ -0,0 +1,381 @@
|
||||
from fastapi import FastAPI, File, UploadFile, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional
|
||||
import uvicorn
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
|
||||
app = FastAPI(title="大模型微调平台 API", version="1.0.0")
|
||||
|
||||
|
||||
# 请求模型
|
||||
class UserModel(BaseModel):
|
||||
username: str
|
||||
password: str
|
||||
|
||||
|
||||
class DatasetModel(BaseModel):
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
size: str
|
||||
|
||||
|
||||
class ModelConfigModel(BaseModel):
|
||||
model_name: str
|
||||
learning_rate: float
|
||||
batch_size: int
|
||||
epochs: int
|
||||
|
||||
|
||||
# 响应模型
|
||||
class ResponseModel(BaseModel):
|
||||
code: int
|
||||
message: str
|
||||
data: Optional[dict] = None
|
||||
|
||||
|
||||
# 模拟数据存储
|
||||
datasets = [
|
||||
{"id": 1, "name": "中文对话数据集", "size": "1.2GB", "status": "已处理"},
|
||||
{"id": 2, "name": "英文文本分类数据集", "size": "856MB", "status": "处理中"},
|
||||
{"id": 3, "name": "图像识别数据集", "size": "2.5GB", "status": "待处理"},
|
||||
]
|
||||
|
||||
models = [
|
||||
{"id": 1, "name": "GPT-4", "status": "训练中", "accuracy": "92%"},
|
||||
{"id": 2, "name": "BERT", "status": "已完成", "accuracy": "89%"},
|
||||
{"id": 3, "name": "LLaMA", "status": "已完成", "accuracy": "95%"},
|
||||
]
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
"""根路径"""
|
||||
return {"message": "大模型微调平台 API 服务"}
|
||||
|
||||
|
||||
@app.get("/api/health")
|
||||
async def health_check():
|
||||
"""健康检查"""
|
||||
return ResponseModel(code=200, message="服务运行正常", data={"status": "healthy"})
|
||||
|
||||
|
||||
@app.post("/api/login", response_model=ResponseModel)
|
||||
async def login(user: UserModel):
|
||||
"""用户登录"""
|
||||
if user.username == "admin" and user.password:
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message="登录成功",
|
||||
data={"token": "mock_token_12345", "user": user.username}
|
||||
)
|
||||
else:
|
||||
return ResponseModel(code=401, message="用户名或密码错误")
|
||||
|
||||
|
||||
@app.get("/api/datasets", response_model=ResponseModel)
|
||||
async def get_datasets():
|
||||
"""获取数据集列表"""
|
||||
return ResponseModel(code=200, message="获取成功", data={"datasets": datasets})
|
||||
|
||||
|
||||
@app.post("/api/datasets", response_model=ResponseModel)
|
||||
async def create_dataset(dataset: DatasetModel):
|
||||
"""创建数据集"""
|
||||
new_dataset = {
|
||||
"id": len(datasets) + 1,
|
||||
"name": dataset.name,
|
||||
"description": dataset.description,
|
||||
"size": "0MB",
|
||||
"status": "待处理"
|
||||
}
|
||||
datasets.append(new_dataset)
|
||||
return ResponseModel(code=201, message="创建成功", data={"dataset": new_dataset})
|
||||
|
||||
|
||||
@app.post("/api/datasets/upload", response_model=ResponseModel)
|
||||
async def upload_dataset(file: UploadFile = File(...), description: Optional[str] = None):
|
||||
"""上传数据集文件(仅支持 JSON 和 JSONL 格式)"""
|
||||
# 检查文件类型
|
||||
allowed_extensions = ['.json', '.jsonl']
|
||||
file_extension = os.path.splitext(file.filename)[1].lower()
|
||||
|
||||
if file_extension not in allowed_extensions:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的文件类型。只能上传 {', '.join(allowed_extensions)} 格式的文件"
|
||||
)
|
||||
|
||||
# 检查文件大小(限制为 100MB)
|
||||
max_size = 100 * 1024 * 1024 # 100MB
|
||||
contents = await file.read()
|
||||
file_size = len(contents)
|
||||
|
||||
if file_size > max_size:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"文件大小超过限制。最大支持 100MB,当前文件大小: {file_size / (1024*1024):.2f}MB"
|
||||
)
|
||||
|
||||
try:
|
||||
# 验证文件内容
|
||||
if file_extension == '.json':
|
||||
# 验证 JSON 文件
|
||||
json.loads(contents.decode('utf-8'))
|
||||
elif file_extension == '.jsonl':
|
||||
# 验证 JSONL 文件(每行必须是有效的 JSON)
|
||||
lines = contents.decode('utf-8').strip().split('\n')
|
||||
for i, line in enumerate(lines):
|
||||
if line.strip():
|
||||
try:
|
||||
json.loads(line)
|
||||
except json.JSONDecodeError as e:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"JSONL 文件格式错误:第 {i+1} 行不是有效的 JSON 格式"
|
||||
)
|
||||
|
||||
# 生成文件大小字符串
|
||||
if file_size < 1024:
|
||||
size_str = f"{file_size}B"
|
||||
elif file_size < 1024 * 1024:
|
||||
size_str = f"{file_size / 1024:.2f}KB"
|
||||
else:
|
||||
size_str = f"{file_size / (1024*1024):.2f}MB"
|
||||
|
||||
# 计算行数(用于统计)
|
||||
lines_count = len(contents.decode('utf-8').strip().split('\n')) if contents else 0
|
||||
|
||||
# 保存文件到 data 目录
|
||||
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
||||
# 生成唯一文件名(避免冲突)
|
||||
base_name = os.path.splitext(file.filename)[0]
|
||||
timestamp = int(time.time())
|
||||
saved_filename = f"{base_name}_{timestamp}{file_extension}"
|
||||
saved_path = os.path.join(data_dir, saved_filename)
|
||||
|
||||
# 写入文件
|
||||
with open(saved_path, 'wb') as f:
|
||||
f.write(contents)
|
||||
|
||||
# 创建新数据集记录
|
||||
new_dataset = {
|
||||
"id": len(datasets) + 1,
|
||||
"name": file.filename,
|
||||
"description": description or f"上传的数据集文件,包含 {lines_count} 行数据",
|
||||
"size": size_str,
|
||||
"status": "已处理",
|
||||
"upload_time": "刚刚",
|
||||
"file_extension": file_extension,
|
||||
"records_count": lines_count,
|
||||
"saved_path": saved_path # 添加保存路径信息
|
||||
}
|
||||
|
||||
# 添加到数据集列表
|
||||
datasets.append(new_dataset)
|
||||
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message="文件上传成功",
|
||||
data={
|
||||
"dataset": new_dataset,
|
||||
"file_info": {
|
||||
"filename": file.filename,
|
||||
"size": size_str,
|
||||
"extension": file_extension,
|
||||
"records": lines_count
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="JSON 文件格式错误:文件内容不是有效的 JSON 格式"
|
||||
)
|
||||
except UnicodeDecodeError:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="文件编码错误:请确保文件使用 UTF-8 编码"
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"文件处理错误:{str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/datasets/files", response_model=ResponseModel)
|
||||
async def list_dataset_files():
|
||||
"""列出data目录中所有保存的数据集文件"""
|
||||
try:
|
||||
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'data')
|
||||
|
||||
if not os.path.exists(data_dir):
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message="获取成功",
|
||||
data={"files": [], "total": 0, "directory": data_dir}
|
||||
)
|
||||
|
||||
files = []
|
||||
for filename in os.listdir(data_dir):
|
||||
file_path = os.path.join(data_dir, filename)
|
||||
if os.path.isfile(file_path):
|
||||
stat = os.stat(file_path)
|
||||
files.append({
|
||||
"filename": filename,
|
||||
"size": stat.st_size,
|
||||
"size_human": format_size(stat.st_size),
|
||||
"modified_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(stat.st_mtime)),
|
||||
"path": file_path
|
||||
})
|
||||
|
||||
# 按修改时间排序(最新的在前)
|
||||
files.sort(key=lambda x: x["modified_time"], reverse=True)
|
||||
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message="获取成功",
|
||||
data={
|
||||
"files": files,
|
||||
"total": len(files),
|
||||
"directory": data_dir
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"获取文件列表失败:{str(e)}"
|
||||
)
|
||||
|
||||
|
||||
def format_size(size_bytes):
|
||||
"""格式化文件大小"""
|
||||
if size_bytes < 1024:
|
||||
return f"{size_bytes}B"
|
||||
elif size_bytes < 1024 * 1024:
|
||||
return f"{size_bytes / 1024:.2f}KB"
|
||||
else:
|
||||
return f"{size_bytes / (1024*1024):.2f}MB"
|
||||
|
||||
|
||||
@app.delete("/api/datasets/{dataset_id}", response_model=ResponseModel)
|
||||
async def delete_dataset(dataset_id: int):
|
||||
"""删除数据集"""
|
||||
global datasets
|
||||
for i, dataset in enumerate(datasets):
|
||||
if dataset["id"] == dataset_id:
|
||||
deleted_dataset = datasets.pop(i)
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message="删除成功",
|
||||
data={"deleted_dataset": deleted_dataset}
|
||||
)
|
||||
raise HTTPException(status_code=404, detail="数据集不存在")
|
||||
|
||||
|
||||
@app.get("/api/models", response_model=ResponseModel)
|
||||
async def get_models():
|
||||
"""获取模型列表"""
|
||||
return ResponseModel(code=200, message="获取成功", data={"models": models})
|
||||
|
||||
|
||||
@app.post("/api/models/config", response_model=ResponseModel)
|
||||
async def config_model(config: ModelConfigModel):
|
||||
"""配置模型参数"""
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message="配置成功",
|
||||
data={
|
||||
"model_name": config.model_name,
|
||||
"learning_rate": config.learning_rate,
|
||||
"batch_size": config.batch_size,
|
||||
"epochs": config.epochs,
|
||||
"status": "已配置"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/training/status")
|
||||
async def get_training_status():
|
||||
"""获取训练状态"""
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message="获取成功",
|
||||
data={
|
||||
"current_task": "GPT-4微调",
|
||||
"progress": 75,
|
||||
"eta": "2小时",
|
||||
"loss": 0.23,
|
||||
"accuracy": 0.89
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/system/stats")
|
||||
async def get_system_stats():
|
||||
"""获取系统统计信息"""
|
||||
import random
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message="获取成功",
|
||||
data={
|
||||
"cpu_usage": random.randint(30, 80),
|
||||
"memory_usage": random.randint(40, 70),
|
||||
"gpu_usage": random.randint(50, 90),
|
||||
"active_tasks": 5,
|
||||
"completed_tasks": 158
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/training/start")
|
||||
async def start_training(model_name: str, dataset_id: int):
|
||||
"""开始训练任务"""
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message="训练任务已启动",
|
||||
data={
|
||||
"task_id": random.randint(1000, 9999),
|
||||
"model_name": model_name,
|
||||
"dataset_id": dataset_id,
|
||||
"status": "running"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/training/stop/{task_id}")
|
||||
async def stop_training(task_id: int):
|
||||
"""停止训练任务"""
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message=f"训练任务 {task_id} 已停止",
|
||||
data={"task_id": task_id, "status": "stopped"}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/model/{model_id}/metrics")
|
||||
async def get_model_metrics(model_id: int):
|
||||
"""获取模型指标"""
|
||||
return ResponseModel(
|
||||
code=200,
|
||||
message="获取成功",
|
||||
data={
|
||||
"model_id": model_id,
|
||||
"accuracy": round(random.uniform(0.85, 0.98), 3),
|
||||
"precision": round(random.uniform(0.80, 0.95), 3),
|
||||
"recall": round(random.uniform(0.82, 0.96), 3),
|
||||
"f1_score": round(random.uniform(0.83, 0.97), 3),
|
||||
"training_time": f"{random.randint(2, 24)}小时",
|
||||
"parameters": random.randint(1000000, 100000000)
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(app, host="0.0.0.0", port=8001)
|
||||
4
src/requirements.txt
Normal file
4
src/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
fastapi==0.104.1
|
||||
uvicorn[standard]==0.24.0
|
||||
pydantic==2.5.0
|
||||
python-multipart==0.0.6
|
||||
43
src/run.sh
Executable file
43
src/run.sh
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo "🚀 启动 FastAPI 服务器..."
|
||||
|
||||
# 确保在正确的目录中
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
echo "📂 当前目录: $SCRIPT_DIR"
|
||||
|
||||
# 检查Python是否安装
|
||||
if ! command -v python3 &> /dev/null; then
|
||||
echo "❌ 错误: Python3 未安装"
|
||||
echo "请先安装 Python3"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 检查pip是否安装
|
||||
if ! command -v pip3 &> /dev/null; then
|
||||
echo "❌ 错误: pip3 未安装"
|
||||
echo "请先安装 pip3"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 安装依赖
|
||||
echo "📦 安装依赖包..."
|
||||
pip3 install -r requirements.txt
|
||||
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "❌ 依赖安装失败"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "🌐 服务器地址: http://localhost:8001"
|
||||
echo "📚 API 文档: http://localhost:8001/docs"
|
||||
echo "🔍 替代文档: http://localhost:8001/redoc"
|
||||
echo ""
|
||||
echo "按 Ctrl+C 停止服务器"
|
||||
echo ""
|
||||
|
||||
# 启动服务器
|
||||
python3 -m uvicorn main:app --host 0.0.0.0 --port 8001 --reload
|
||||
47
src/test_api.sh
Executable file
47
src/test_api.sh
Executable file
@@ -0,0 +1,47 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo "🧪 测试 FastAPI 服务器"
|
||||
echo "=================================="
|
||||
echo ""
|
||||
|
||||
BASE_URL="http://localhost:8001"
|
||||
|
||||
# 测试 1: 根路径
|
||||
echo "1. 测试根路径..."
|
||||
curl -s "$BASE_URL/" | python3 -m json.tool
|
||||
echo ""
|
||||
|
||||
# 测试 2: 健康检查
|
||||
echo "2. 测试健康检查..."
|
||||
curl -s "$BASE_URL/api/health" | python3 -m json.tool
|
||||
echo ""
|
||||
|
||||
# 测试 3: 用户登录
|
||||
echo "3. 测试用户登录..."
|
||||
curl -s -X POST "$BASE_URL/api/login" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"username": "admin", "password": "123456"}' | python3 -m json.tool
|
||||
echo ""
|
||||
|
||||
# 测试 4: 获取数据集列表
|
||||
echo "4. 测试获取数据集列表..."
|
||||
curl -s "$BASE_URL/api/datasets" | python3 -m json.tool
|
||||
echo ""
|
||||
|
||||
# 测试 5: 获取模型列表
|
||||
echo "5. 测试获取模型列表..."
|
||||
curl -s "$BASE_URL/api/models" | python3 -m json.tool
|
||||
echo ""
|
||||
|
||||
# 测试 6: 系统统计
|
||||
echo "6. 测试系统统计..."
|
||||
curl -s "$BASE_URL/api/system/stats" | python3 -m json.tool
|
||||
echo ""
|
||||
|
||||
# 测试 7: 训练状态
|
||||
echo "7. 测试训练状态..."
|
||||
curl -s "$BASE_URL/api/training/status" | python3 -m json.tool
|
||||
echo ""
|
||||
|
||||
echo "=================================="
|
||||
echo "✅ 所有测试完成!"
|
||||
Reference in New Issue
Block a user