文件上传页面功能基本集成完成
This commit is contained in:
9
src/api/__init__.py
Normal file
9
src/api/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
"""
|
||||
API 路由包
|
||||
"""
|
||||
from .datasets import datasets_bp
|
||||
|
||||
# 注册所有蓝图
|
||||
def register_blueprints(app):
|
||||
"""注册所有蓝图"""
|
||||
app.register_blueprint(datasets_bp)
|
||||
430
src/api/datasets.py
Normal file
430
src/api/datasets.py
Normal file
@@ -0,0 +1,430 @@
|
||||
"""
|
||||
数据集管理 API 路由
|
||||
"""
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
import zipfile
|
||||
from flask import Blueprint, request, jsonify, send_from_directory, Response
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
# 获取项目根目录
|
||||
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
DATASET_FOLDER = os.path.join(PROJECT_ROOT, 'datasets')
|
||||
ALLOWED_EXTENSIONS = {'jsonl', 'json', 'xls', 'xlsx'}
|
||||
|
||||
# 创建蓝图
|
||||
datasets_bp = Blueprint('datasets', __name__, url_prefix='/api/dataset-manage')
|
||||
|
||||
|
||||
def get_db_connection():
|
||||
"""获取数据库连接"""
|
||||
import pymysql
|
||||
import yaml
|
||||
CONFIG_PATH = os.path.join(PROJECT_ROOT, 'config.yaml')
|
||||
with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
|
||||
CONFIG = yaml.safe_load(f)
|
||||
db_config = CONFIG['database']
|
||||
return pymysql.connect(
|
||||
host=db_config['host'],
|
||||
port=db_config['port'],
|
||||
user=db_config['username'],
|
||||
password=db_config['password'],
|
||||
database=db_config['name'],
|
||||
charset=db_config.get('charset', 'utf8mb4'),
|
||||
cursorclass=pymysql.cursors.DictCursor
|
||||
)
|
||||
|
||||
|
||||
def format_file_size(size_bytes):
|
||||
"""格式化文件大小"""
|
||||
if size_bytes < 1024:
|
||||
return f"{size_bytes} B"
|
||||
elif size_bytes < 1024 * 1024:
|
||||
return f"{size_bytes / 1024:.1f} KB"
|
||||
elif size_bytes < 1024 * 1024 * 1024:
|
||||
return f"{size_bytes / (1024 * 1024):.1f} MB"
|
||||
else:
|
||||
return f"{size_bytes / (1024 * 1024 * 1024):.1f} GB"
|
||||
|
||||
|
||||
def allowed_file(filename):
|
||||
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
|
||||
def generic_get_by_id(table_name, id_val):
|
||||
"""通用按ID查询"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(f"SELECT * FROM {table_name} WHERE id = %s", (id_val,))
|
||||
result = cursor.fetchone()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return result
|
||||
|
||||
|
||||
# ============ 数据集管理 CRUD ============
|
||||
|
||||
@datasets_bp.route('/<int:id>', methods=['GET'])
|
||||
def get_dataset(id):
|
||||
"""获取单个数据集详情"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT * FROM dataset_manage WHERE id = %s", (id,))
|
||||
dataset = cursor.fetchone()
|
||||
|
||||
if not dataset:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return jsonify({'code': 1, 'message': '数据集不存在'})
|
||||
|
||||
# 获取关联的文件列表
|
||||
cursor.execute(
|
||||
"SELECT id, file_name, file_path, file_size, file_type, create_time FROM dataset_files WHERE dataset_id = %s ORDER BY create_time DESC",
|
||||
(id,)
|
||||
)
|
||||
files = cursor.fetchall()
|
||||
|
||||
# 格式化文件大小
|
||||
for f in files:
|
||||
f['file_size_formatted'] = format_file_size(f['file_size'])
|
||||
|
||||
dataset['files'] = files
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
return jsonify({'code': 0, 'data': dataset})
|
||||
|
||||
|
||||
@datasets_bp.route('', methods=['GET'])
|
||||
def get_datasets():
|
||||
"""获取所有数据集"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT * FROM dataset_manage ORDER BY create_time DESC")
|
||||
result = cursor.fetchall()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return jsonify({'code': 0, 'data': result})
|
||||
|
||||
|
||||
@datasets_bp.route('', methods=['POST'])
|
||||
def create_dataset():
|
||||
"""创建数据集"""
|
||||
data = request.json
|
||||
try:
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
columns = ', '.join(data.keys())
|
||||
placeholders = ', '.join(['%s'] * len(data))
|
||||
sql = f"INSERT INTO dataset_manage ({columns}) VALUES ({placeholders})"
|
||||
cursor.execute(sql, list(data.values()))
|
||||
conn.commit()
|
||||
new_id = cursor.lastrowid
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return jsonify({'code': 0, 'message': '创建成功', 'id': new_id})
|
||||
except Exception as e:
|
||||
return jsonify({'code': 1, 'message': f'创建失败: {str(e)}'})
|
||||
|
||||
|
||||
@datasets_bp.route('/<int:id>', methods=['PUT'])
|
||||
def update_dataset(id):
|
||||
"""更新数据集"""
|
||||
data = request.json
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
set_clause = ', '.join([f"{k} = %s" for k in data.keys()])
|
||||
sql = f"UPDATE dataset_manage SET {set_clause} WHERE id = %s"
|
||||
values = list(data.values()) + [id]
|
||||
cursor.execute(sql, values)
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return jsonify({'code': 0, 'message': '更新成功'})
|
||||
|
||||
|
||||
@datasets_bp.route('/<int:id>', methods=['DELETE'])
|
||||
def delete_dataset(id):
|
||||
"""删除数据集"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
# 获取文件路径列表
|
||||
cursor.execute("SELECT file_path FROM dataset_files WHERE dataset_id = %s", (id,))
|
||||
files = cursor.fetchall()
|
||||
# 删除文件
|
||||
for f in files:
|
||||
file_path = f.get('file_path')
|
||||
if file_path and os.path.exists(file_path):
|
||||
try:
|
||||
os.remove(file_path)
|
||||
except Exception as e:
|
||||
print(f"删除文件失败: {file_path}, {e}")
|
||||
# 删除数据库记录
|
||||
cursor.execute("DELETE FROM dataset_files WHERE dataset_id = %s", (id,))
|
||||
cursor.execute("DELETE FROM dataset_manage WHERE id = %s", (id,))
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return jsonify({'code': 0, 'message': '删除成功'})
|
||||
|
||||
|
||||
# ============ 数据集文件上传接口 ============
|
||||
|
||||
@datasets_bp.route('/upload/<int:dataset_id>', methods=['POST'])
|
||||
def upload_dataset_file(dataset_id):
|
||||
"""上传数据集文件"""
|
||||
# 检查数据集是否存在
|
||||
dataset = generic_get_by_id('dataset_manage', dataset_id)
|
||||
if not dataset:
|
||||
return jsonify({'code': 1, 'message': '数据集不存在'})
|
||||
|
||||
# 确保上传目录存在(datasets根目录)
|
||||
os.makedirs(DATASET_FOLDER, exist_ok=True)
|
||||
|
||||
uploaded_files = []
|
||||
errors = []
|
||||
|
||||
if 'files' not in request.files:
|
||||
return jsonify({'code': 1, 'message': '没有文件被上传'})
|
||||
|
||||
files = request.files.getlist('files')
|
||||
|
||||
for file in files:
|
||||
if file.filename == '':
|
||||
continue
|
||||
|
||||
if file and allowed_file(file.filename):
|
||||
filename = secure_filename(file.filename)
|
||||
# 添加时间戳和dataset_id防止文件名冲突,格式:timestamp_datasetId_filename
|
||||
timestamp = int(time.time() * 1000)
|
||||
new_filename = f"{timestamp}_{dataset_id}_{filename}"
|
||||
file_path = os.path.join(DATASET_FOLDER, new_filename)
|
||||
|
||||
# 保存文件
|
||||
file.save(file_path)
|
||||
file_size = os.path.getsize(file_path)
|
||||
|
||||
# 获取文件扩展名
|
||||
ext = filename.rsplit('.', 1)[1].lower()
|
||||
|
||||
# 保存文件信息到数据库
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"INSERT INTO dataset_files (dataset_id, file_name, file_path, file_size, file_type) VALUES (%s, %s, %s, %s, %s)",
|
||||
(dataset_id, filename, file_path, file_size, ext)
|
||||
)
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
uploaded_files.append({
|
||||
'name': filename,
|
||||
'size': file_size,
|
||||
'size_formatted': format_file_size(file_size)
|
||||
})
|
||||
else:
|
||||
errors.append(f"{file.filename}: 文件类型不支持")
|
||||
|
||||
# 如果有成功上传的文件,才更新数据集的文件数量和大小
|
||||
if uploaded_files:
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT COUNT(*) as count, SUM(file_size) as total_size FROM dataset_files WHERE dataset_id = %s", (dataset_id,))
|
||||
result = cursor.fetchone()
|
||||
file_count = result['count'] or 0
|
||||
total_size = result['total_size'] or 0
|
||||
|
||||
cursor.execute(
|
||||
"UPDATE dataset_manage SET file_count = %s, size = %s WHERE id = %s",
|
||||
(file_count, format_file_size(total_size), dataset_id)
|
||||
)
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
if errors:
|
||||
return jsonify({
|
||||
'code': 0,
|
||||
'message': f'部分文件上传成功,{len(errors)}个文件失败',
|
||||
'data': {
|
||||
'uploaded': uploaded_files,
|
||||
'errors': errors
|
||||
}
|
||||
})
|
||||
|
||||
return jsonify({
|
||||
'code': 0,
|
||||
'message': f'成功上传 {len(uploaded_files)} 个文件',
|
||||
'data': {
|
||||
'uploaded': uploaded_files,
|
||||
'file_count': file_count
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
@datasets_bp.route('/<int:dataset_id>/files', methods=['GET'])
|
||||
def get_dataset_files(dataset_id):
|
||||
"""获取数据集文件列表"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
"SELECT id, file_name, file_path, file_size, file_type, create_time FROM dataset_files WHERE dataset_id = %s ORDER BY create_time DESC",
|
||||
(dataset_id,)
|
||||
)
|
||||
files = cursor.fetchall()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
# 格式化文件大小
|
||||
for f in files:
|
||||
f['file_size_formatted'] = format_file_size(f['file_size'])
|
||||
|
||||
return jsonify({'code': 0, 'data': files})
|
||||
|
||||
|
||||
@datasets_bp.route('/files/<int:file_id>', methods=['DELETE'])
|
||||
def delete_dataset_file(file_id):
|
||||
"""删除数据集文件"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 获取文件信息
|
||||
cursor.execute("SELECT dataset_id, file_path FROM dataset_files WHERE id = %s", (file_id,))
|
||||
file_info = cursor.fetchone()
|
||||
|
||||
if not file_info:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return jsonify({'code': 1, 'message': '文件不存在'})
|
||||
|
||||
# 删除物理文件
|
||||
file_path = file_info['file_path']
|
||||
if file_path and os.path.exists(file_path):
|
||||
try:
|
||||
os.remove(file_path)
|
||||
except Exception as e:
|
||||
print(f"删除文件失败: {file_path}, {e}")
|
||||
|
||||
# 删除数据库记录
|
||||
cursor.execute("DELETE FROM dataset_files WHERE id = %s", (file_id,))
|
||||
|
||||
# 更新数据集的文件数量和大小
|
||||
dataset_id = file_info['dataset_id']
|
||||
cursor.execute("SELECT COUNT(*) as count, SUM(file_size) as total_size FROM dataset_files WHERE dataset_id = %s", (dataset_id,))
|
||||
result = cursor.fetchone()
|
||||
file_count = result['count'] or 0
|
||||
total_size = result['total_size'] or 0
|
||||
|
||||
cursor.execute(
|
||||
"UPDATE dataset_manage SET file_count = %s, size = %s WHERE id = %s",
|
||||
(file_count, format_file_size(total_size), dataset_id)
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
return jsonify({'code': 0, 'message': '删除成功'})
|
||||
|
||||
|
||||
# ============ 文件下载接口 ============
|
||||
|
||||
@datasets_bp.route('/download/<int:dataset_id>/<filename>', methods=['GET'])
|
||||
def download_dataset_file(dataset_id, filename):
|
||||
"""下载数据集文件"""
|
||||
# 文件直接存储在 DATASET_FOLDER 根目录下
|
||||
return send_from_directory(DATASET_FOLDER, filename, as_attachment=True)
|
||||
|
||||
|
||||
@datasets_bp.route('/download/<int:dataset_id>', methods=['GET'])
|
||||
def download_dataset_all(dataset_id):
|
||||
"""下载数据集所有文件(ZIP打包)"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 获取数据集信息
|
||||
cursor.execute("SELECT name FROM dataset_manage WHERE id = %s", (dataset_id,))
|
||||
dataset = cursor.fetchone()
|
||||
|
||||
if not dataset:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return jsonify({'code': 1, 'message': '数据集不存在'})
|
||||
|
||||
# 获取所有文件
|
||||
cursor.execute(
|
||||
"SELECT id, file_name, file_path FROM dataset_files WHERE dataset_id = %s ORDER BY create_time DESC",
|
||||
(dataset_id,)
|
||||
)
|
||||
files = cursor.fetchall()
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
if not files:
|
||||
return jsonify({'code': 1, 'message': '数据集没有文件'})
|
||||
|
||||
# 创建ZIP文件
|
||||
memory_file = io.BytesIO()
|
||||
with zipfile.ZipFile(memory_file, 'w', zipfile.ZIP_DEFLATED) as zf:
|
||||
for f in files:
|
||||
file_path = f.get('file_path')
|
||||
if file_path and os.path.exists(file_path):
|
||||
# 使用原始文件名
|
||||
zf.write(file_path, f['file_name'])
|
||||
|
||||
memory_file.seek(0)
|
||||
|
||||
# 发送ZIP文件
|
||||
zip_name = f"{dataset['name'] or 'dataset'}_{dataset_id}.zip"
|
||||
return Response(
|
||||
memory_file,
|
||||
mimetype='application/zip',
|
||||
headers={'Content-Disposition': f'attachment;filename={zip_name}'}
|
||||
)
|
||||
|
||||
|
||||
# ============ 文件预览接口 ============
|
||||
|
||||
@datasets_bp.route('/preview/<int:file_id>', methods=['GET'])
|
||||
def preview_dataset_file(file_id):
|
||||
"""预览数据集文件内容(限100KB)"""
|
||||
conn = get_db_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 获取文件信息
|
||||
cursor.execute("SELECT id, file_name, file_path, file_type FROM dataset_files WHERE id = %s", (file_id,))
|
||||
file_info = cursor.fetchone()
|
||||
|
||||
if not file_info:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return jsonify({'code': 1, 'message': '文件不存在'})
|
||||
|
||||
file_path = file_info['file_path']
|
||||
|
||||
if not file_path or not os.path.exists(file_path):
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return jsonify({'code': 1, 'message': '文件不存在'})
|
||||
|
||||
# 读取文件内容(限100KB)
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read(102400) # 100KB
|
||||
except Exception as e:
|
||||
cursor.close()
|
||||
conn.close()
|
||||
return jsonify({'code': 1, 'message': f'读取文件失败: {str(e)}'})
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
return jsonify({
|
||||
'code': 0,
|
||||
'data': {
|
||||
'file_name': file_info['file_name'],
|
||||
'content': content
|
||||
}
|
||||
})
|
||||
Reference in New Issue
Block a user