文件上传页面功能基本集成完成

This commit is contained in:
2026-01-19 17:28:58 +08:00
parent 88eaa33db0
commit bfaeb24d9e
10 changed files with 16758 additions and 248 deletions

9
src/api/__init__.py Normal file
View File

@@ -0,0 +1,9 @@
"""
API 路由包
"""
from .datasets import datasets_bp
# 注册所有蓝图
def register_blueprints(app):
"""注册所有蓝图"""
app.register_blueprint(datasets_bp)

430
src/api/datasets.py Normal file
View File

@@ -0,0 +1,430 @@
"""
数据集管理 API 路由
"""
import io
import os
import time
import zipfile
from flask import Blueprint, request, jsonify, send_from_directory, Response
from werkzeug.utils import secure_filename
# 获取项目根目录
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
DATASET_FOLDER = os.path.join(PROJECT_ROOT, 'datasets')
ALLOWED_EXTENSIONS = {'jsonl', 'json', 'xls', 'xlsx'}
# 创建蓝图
datasets_bp = Blueprint('datasets', __name__, url_prefix='/api/dataset-manage')
def get_db_connection():
"""获取数据库连接"""
import pymysql
import yaml
CONFIG_PATH = os.path.join(PROJECT_ROOT, 'config.yaml')
with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
CONFIG = yaml.safe_load(f)
db_config = CONFIG['database']
return pymysql.connect(
host=db_config['host'],
port=db_config['port'],
user=db_config['username'],
password=db_config['password'],
database=db_config['name'],
charset=db_config.get('charset', 'utf8mb4'),
cursorclass=pymysql.cursors.DictCursor
)
def format_file_size(size_bytes):
"""格式化文件大小"""
if size_bytes < 1024:
return f"{size_bytes} B"
elif size_bytes < 1024 * 1024:
return f"{size_bytes / 1024:.1f} KB"
elif size_bytes < 1024 * 1024 * 1024:
return f"{size_bytes / (1024 * 1024):.1f} MB"
else:
return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def generic_get_by_id(table_name, id_val):
"""通用按ID查询"""
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute(f"SELECT * FROM {table_name} WHERE id = %s", (id_val,))
result = cursor.fetchone()
cursor.close()
conn.close()
return result
# ============ 数据集管理 CRUD ============
@datasets_bp.route('/<int:id>', methods=['GET'])
def get_dataset(id):
"""获取单个数据集详情"""
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("SELECT * FROM dataset_manage WHERE id = %s", (id,))
dataset = cursor.fetchone()
if not dataset:
cursor.close()
conn.close()
return jsonify({'code': 1, 'message': '数据集不存在'})
# 获取关联的文件列表
cursor.execute(
"SELECT id, file_name, file_path, file_size, file_type, create_time FROM dataset_files WHERE dataset_id = %s ORDER BY create_time DESC",
(id,)
)
files = cursor.fetchall()
# 格式化文件大小
for f in files:
f['file_size_formatted'] = format_file_size(f['file_size'])
dataset['files'] = files
cursor.close()
conn.close()
return jsonify({'code': 0, 'data': dataset})
@datasets_bp.route('', methods=['GET'])
def get_datasets():
"""获取所有数据集"""
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("SELECT * FROM dataset_manage ORDER BY create_time DESC")
result = cursor.fetchall()
cursor.close()
conn.close()
return jsonify({'code': 0, 'data': result})
@datasets_bp.route('', methods=['POST'])
def create_dataset():
"""创建数据集"""
data = request.json
try:
conn = get_db_connection()
cursor = conn.cursor()
columns = ', '.join(data.keys())
placeholders = ', '.join(['%s'] * len(data))
sql = f"INSERT INTO dataset_manage ({columns}) VALUES ({placeholders})"
cursor.execute(sql, list(data.values()))
conn.commit()
new_id = cursor.lastrowid
cursor.close()
conn.close()
return jsonify({'code': 0, 'message': '创建成功', 'id': new_id})
except Exception as e:
return jsonify({'code': 1, 'message': f'创建失败: {str(e)}'})
@datasets_bp.route('/<int:id>', methods=['PUT'])
def update_dataset(id):
"""更新数据集"""
data = request.json
conn = get_db_connection()
cursor = conn.cursor()
set_clause = ', '.join([f"{k} = %s" for k in data.keys()])
sql = f"UPDATE dataset_manage SET {set_clause} WHERE id = %s"
values = list(data.values()) + [id]
cursor.execute(sql, values)
conn.commit()
cursor.close()
conn.close()
return jsonify({'code': 0, 'message': '更新成功'})
@datasets_bp.route('/<int:id>', methods=['DELETE'])
def delete_dataset(id):
"""删除数据集"""
conn = get_db_connection()
cursor = conn.cursor()
# 获取文件路径列表
cursor.execute("SELECT file_path FROM dataset_files WHERE dataset_id = %s", (id,))
files = cursor.fetchall()
# 删除文件
for f in files:
file_path = f.get('file_path')
if file_path and os.path.exists(file_path):
try:
os.remove(file_path)
except Exception as e:
print(f"删除文件失败: {file_path}, {e}")
# 删除数据库记录
cursor.execute("DELETE FROM dataset_files WHERE dataset_id = %s", (id,))
cursor.execute("DELETE FROM dataset_manage WHERE id = %s", (id,))
conn.commit()
cursor.close()
conn.close()
return jsonify({'code': 0, 'message': '删除成功'})
# ============ 数据集文件上传接口 ============
@datasets_bp.route('/upload/<int:dataset_id>', methods=['POST'])
def upload_dataset_file(dataset_id):
"""上传数据集文件"""
# 检查数据集是否存在
dataset = generic_get_by_id('dataset_manage', dataset_id)
if not dataset:
return jsonify({'code': 1, 'message': '数据集不存在'})
# 确保上传目录存在datasets根目录
os.makedirs(DATASET_FOLDER, exist_ok=True)
uploaded_files = []
errors = []
if 'files' not in request.files:
return jsonify({'code': 1, 'message': '没有文件被上传'})
files = request.files.getlist('files')
for file in files:
if file.filename == '':
continue
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
# 添加时间戳和dataset_id防止文件名冲突格式timestamp_datasetId_filename
timestamp = int(time.time() * 1000)
new_filename = f"{timestamp}_{dataset_id}_{filename}"
file_path = os.path.join(DATASET_FOLDER, new_filename)
# 保存文件
file.save(file_path)
file_size = os.path.getsize(file_path)
# 获取文件扩展名
ext = filename.rsplit('.', 1)[1].lower()
# 保存文件信息到数据库
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute(
"INSERT INTO dataset_files (dataset_id, file_name, file_path, file_size, file_type) VALUES (%s, %s, %s, %s, %s)",
(dataset_id, filename, file_path, file_size, ext)
)
conn.commit()
cursor.close()
conn.close()
uploaded_files.append({
'name': filename,
'size': file_size,
'size_formatted': format_file_size(file_size)
})
else:
errors.append(f"{file.filename}: 文件类型不支持")
# 如果有成功上传的文件,才更新数据集的文件数量和大小
if uploaded_files:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) as count, SUM(file_size) as total_size FROM dataset_files WHERE dataset_id = %s", (dataset_id,))
result = cursor.fetchone()
file_count = result['count'] or 0
total_size = result['total_size'] or 0
cursor.execute(
"UPDATE dataset_manage SET file_count = %s, size = %s WHERE id = %s",
(file_count, format_file_size(total_size), dataset_id)
)
conn.commit()
cursor.close()
conn.close()
if errors:
return jsonify({
'code': 0,
'message': f'部分文件上传成功,{len(errors)}个文件失败',
'data': {
'uploaded': uploaded_files,
'errors': errors
}
})
return jsonify({
'code': 0,
'message': f'成功上传 {len(uploaded_files)} 个文件',
'data': {
'uploaded': uploaded_files,
'file_count': file_count
}
})
@datasets_bp.route('/<int:dataset_id>/files', methods=['GET'])
def get_dataset_files(dataset_id):
"""获取数据集文件列表"""
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute(
"SELECT id, file_name, file_path, file_size, file_type, create_time FROM dataset_files WHERE dataset_id = %s ORDER BY create_time DESC",
(dataset_id,)
)
files = cursor.fetchall()
cursor.close()
conn.close()
# 格式化文件大小
for f in files:
f['file_size_formatted'] = format_file_size(f['file_size'])
return jsonify({'code': 0, 'data': files})
@datasets_bp.route('/files/<int:file_id>', methods=['DELETE'])
def delete_dataset_file(file_id):
"""删除数据集文件"""
conn = get_db_connection()
cursor = conn.cursor()
# 获取文件信息
cursor.execute("SELECT dataset_id, file_path FROM dataset_files WHERE id = %s", (file_id,))
file_info = cursor.fetchone()
if not file_info:
cursor.close()
conn.close()
return jsonify({'code': 1, 'message': '文件不存在'})
# 删除物理文件
file_path = file_info['file_path']
if file_path and os.path.exists(file_path):
try:
os.remove(file_path)
except Exception as e:
print(f"删除文件失败: {file_path}, {e}")
# 删除数据库记录
cursor.execute("DELETE FROM dataset_files WHERE id = %s", (file_id,))
# 更新数据集的文件数量和大小
dataset_id = file_info['dataset_id']
cursor.execute("SELECT COUNT(*) as count, SUM(file_size) as total_size FROM dataset_files WHERE dataset_id = %s", (dataset_id,))
result = cursor.fetchone()
file_count = result['count'] or 0
total_size = result['total_size'] or 0
cursor.execute(
"UPDATE dataset_manage SET file_count = %s, size = %s WHERE id = %s",
(file_count, format_file_size(total_size), dataset_id)
)
conn.commit()
cursor.close()
conn.close()
return jsonify({'code': 0, 'message': '删除成功'})
# ============ 文件下载接口 ============
@datasets_bp.route('/download/<int:dataset_id>/<filename>', methods=['GET'])
def download_dataset_file(dataset_id, filename):
"""下载数据集文件"""
# 文件直接存储在 DATASET_FOLDER 根目录下
return send_from_directory(DATASET_FOLDER, filename, as_attachment=True)
@datasets_bp.route('/download/<int:dataset_id>', methods=['GET'])
def download_dataset_all(dataset_id):
"""下载数据集所有文件ZIP打包"""
conn = get_db_connection()
cursor = conn.cursor()
# 获取数据集信息
cursor.execute("SELECT name FROM dataset_manage WHERE id = %s", (dataset_id,))
dataset = cursor.fetchone()
if not dataset:
cursor.close()
conn.close()
return jsonify({'code': 1, 'message': '数据集不存在'})
# 获取所有文件
cursor.execute(
"SELECT id, file_name, file_path FROM dataset_files WHERE dataset_id = %s ORDER BY create_time DESC",
(dataset_id,)
)
files = cursor.fetchall()
cursor.close()
conn.close()
if not files:
return jsonify({'code': 1, 'message': '数据集没有文件'})
# 创建ZIP文件
memory_file = io.BytesIO()
with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf:
for f in files:
file_path = f.get('file_path')
if file_path and os.path.exists(file_path):
# 使用原始文件名
zf.write(file_path, f['file_name'])
memory_file.seek(0)
# 发送ZIP文件
zip_name = f"{dataset['name'] or 'dataset'}_{dataset_id}.zip"
return Response(
memory_file,
mimetype='application/zip',
headers={'Content-Disposition': f'attachment;filename={zip_name}'}
)
# ============ 文件预览接口 ============
@datasets_bp.route('/preview/<int:file_id>', methods=['GET'])
def preview_dataset_file(file_id):
"""预览数据集文件内容限100KB"""
conn = get_db_connection()
cursor = conn.cursor()
# 获取文件信息
cursor.execute("SELECT id, file_name, file_path, file_type FROM dataset_files WHERE id = %s", (file_id,))
file_info = cursor.fetchone()
if not file_info:
cursor.close()
conn.close()
return jsonify({'code': 1, 'message': '文件不存在'})
file_path = file_info['file_path']
if not file_path or not os.path.exists(file_path):
cursor.close()
conn.close()
return jsonify({'code': 1, 'message': '文件不存在'})
# 读取文件内容限100KB
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read(102400) # 100KB
except Exception as e:
cursor.close()
conn.close()
return jsonify({'code': 1, 'message': f'读取文件失败: {str(e)}'})
cursor.close()
conn.close()
return jsonify({
'code': 0,
'data': {
'file_name': file_info['file_name'],
'content': content
}
})