Files
YG_FT_Platform/web/pages/dataset-create.html

1171 lines
57 KiB
HTML
Raw Normal View History

<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>上传数据集 - 远光软件微调平台</title>
2026-01-19 13:54:34 +08:00
<script src="../lib/tailwindcss/tailwind.js"></script>
<link href="../lib/font-awesome/css/font-awesome.min.css" rel="stylesheet">
<style>
.sidebar-section-title {
padding: 0.5rem 1rem;
font-size: 0.75rem;
color: rgba(191, 203, 217, 0.7);
font-weight: 500;
text-transform: uppercase;
letter-spacing: 0.05em;
}
2026-01-19 13:54:34 +08:00
.nav-link:hover {
background-color: rgba(0, 21, 41, 0.2);
}
2026-01-19 13:54:34 +08:00
.form-input {
width: 100%;
padding: 0.5rem 0.75rem;
border: 1px solid #d1d5db;
border-radius: 0.5rem;
font-size: 0.875rem;
transition: border-color 0.2s, outline 0.2s;
}
.form-input:focus {
border-color: #1890ff;
outline: none;
}
.form-select {
width: 100%;
padding: 0.5rem 0.75rem;
border: 1px solid #d1d5db;
border-radius: 0.5rem;
font-size: 0.875rem;
transition: border-color 0.2s, outline 0.2s;
appearance: none;
background-color: white;
background-image: url("data:image/svg+xml,%3csvg xmlns='http://www.w3.org/2000/svg' fill='none' viewBox='0 0 20 20'%3e%3cpath stroke='%236b7280' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' d='M6 8l4 4 4-4'/%3e%3c/svg%3e");
background-position: right 0.5rem center;
background-repeat: no-repeat;
background-size: 1.5em 1.5em;
padding-right: 2.5rem;
}
.form-select:focus {
border-color: #1890ff;
outline: none;
}
.radio-dot {
width: 0.5rem;
height: 0.5rem;
border-radius: 50%;
background-color: transparent;
transition: all 0.2s;
}
.upload-area:hover,
.upload-area.drag-over {
border-color: #1890ff;
background-color: rgba(24, 144, 255, 0.05);
}
.bg-primary { background-color: #1890ff; }
.text-primary { color: #1890ff; }
.border-primary { border-color: #1890ff; }
:root { --primary: #1890ff; --danger: #f5222d; --success: #52c41a; }
</style>
</head>
<body class="antialiased bg-gray-50 flex h-screen overflow-hidden">
<!-- 侧边导航 -->
2026-01-19 13:54:34 +08:00
<aside class="w-64 text-[#bfcbd9] flex-shrink-0 hidden md:block flex flex-col h-full" style="background-color: #001529;">
<!-- 平台LOGO区域 -->
2026-01-19 13:54:34 +08:00
<div class="p-4 border-b border-[#001529]/30 flex items-center">
<img src="../assets/logo/logo.png" alt="Logo" class="w-6 h-6 object-contain mr-2">
<span class="text-white font-medium">远光软件微调平台</span>
</div>
<!-- 导航主区域 -->
<nav class="flex-1 overflow-y-auto py-2">
<!-- 第一分区:模型服务 -->
<div class="sidebar-section-title">模型服务</div>
2026-01-19 13:54:34 +08:00
<a href="main.html" data-page="fine-tune" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
<i class="fa fa-cogs w-5 text-center"></i>
<span class="ml-2">模型调优</span>
</a>
2026-01-19 13:54:34 +08:00
<a href="main.html?page=my-models" data-page="my-models" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
<i class="fa fa-database w-5 text-center"></i>
<span class="ml-2">我的模型</span>
</a>
2026-01-19 13:54:34 +08:00
<a href="main.html?page=model-eval" data-page="model-eval" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
<i class="fa fa-line-chart w-5 text-center"></i>
<span class="ml-2">模型评测</span>
</a>
2026-01-19 13:54:34 +08:00
<a href="main.html?page=model-deploy" data-page="model-deploy" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
<i class="fa fa-server w-5 text-center"></i>
<span class="ml-2">模型部署</span>
</a>
<!-- 第二分区:资源管理 -->
<div class="sidebar-section-title mt-6">资源管理</div>
2026-01-19 13:54:34 +08:00
<a href="main.html?page=model-manage" data-page="model-manage" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
<i class="fa fa-cube w-5 text-center"></i>
<span class="ml-2">模型管理</span>
</a>
2026-01-19 13:54:34 +08:00
<a href="main.html?page=dataset-manage" data-page="dataset-manage" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
<i class="fa fa-file-text w-5 text-center"></i>
<span class="ml-2">数据集管理</span>
</a>
2026-01-19 13:54:34 +08:00
<a href="main.html?page=data-generate" data-page="data-generate" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
<i class="fa fa-database w-5 text-center"></i>
2026-01-19 11:33:22 +08:00
<span class="ml-2">其他工具</span>
</a>
<!-- 第三分区:系统设置 -->
<div class="sidebar-section-title mt-6">系统设置</div>
2026-01-19 13:54:34 +08:00
<a href="main.html?page=config" data-page="config" class="nav-link flex items-center px-4 py-2.5 hover:bg-[#001529]/20 transition-colors">
<i class="fa fa-bar-chart w-5 text-center"></i>
<span class="ml-2">平台性能</span>
</a>
</nav>
<!-- 底部信息区域 -->
2026-01-19 13:54:34 +08:00
<div class="p-4 border-t border-[#001529]/30 text-xs mt-auto">
<div class="mb-2 text-[#bfcbd9]/80">默认业务空间</div>
<div class="flex items-center justify-between">
2026-01-19 13:54:34 +08:00
<span class="text-[#bfcbd9]">版本 v1.0.0</span>
<i class="fa fa-question-circle-o text-[#bfcbd9]/70"></i>
</div>
</div>
</aside>
<!-- 主内容区 -->
<div class="flex-1 flex flex-col overflow-hidden">
<!-- 顶部导航 -->
2026-01-19 13:54:34 +08:00
<header class="bg-white border-b border-gray-200 shadow-sm">
<div class="flex items-center justify-between px-6 h-14">
<div class="flex items-center space-x-4">
<a href="#" onclick="goBack()" class="text-gray-500 hover:text-gray-700 flex items-center">
<i class="fa fa-arrow-left"></i>
<span class="ml-1">上一步</span>
</a>
</div>
<div class="flex items-center space-x-4">
<div class="relative group">
<img src="https://picsum.photos/id/1005/32/32" class="w-8 h-8 rounded-full cursor-pointer" alt="用户头像">
<div class="absolute right-0 top-full mt-2 bg-white rounded shadow-lg py-1 hidden group-hover:block border border-gray-100 min-w-[140px]">
<a href="login.html" class="block px-4 py-2 text-sm text-gray-700 hover:bg-gray-50 whitespace-nowrap">
<i class="fa fa-sign-out mr-1"></i>退出登录
</a>
</div>
</div>
</div>
</div>
</header>
<!-- 内容区域 -->
<main class="flex-1 overflow-y-auto p-6 bg-gray-50">
<!-- 页面标题 -->
<div class="bg-white rounded-lg shadow-sm p-4 border-b border-gray-100 mb-4">
<div class="flex items-center text-sm">
<span id="breadcrumbParent" class="text-primary cursor-pointer hover:underline" onclick="goBack()">数据集管理</span>
<span class="mx-2 text-gray-300">/</span>
<span id="breadcrumbChild" class="text-gray-800 font-medium">上传数据集</span>
</div>
</div>
<!-- 表单内容 -->
<div class="bg-white rounded-lg shadow-sm">
<div class="p-6 max-w-3xl">
<form id="datasetForm">
<!-- 1. 数据集名称输入框 -->
<div class="mb-6">
<label class="form-label">
数据集名称
</label>
<div class="relative">
<input
type="text"
name="name"
placeholder="数据集名称"
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:border-primary focus:outline-none"
maxlength="20"
>
<span class="absolute right-3 top-1/2 -translate-y-1/2 text-gray-400 text-sm">0 / 20</span>
</div>
</div>
<!-- 1.1 数据集描述输入框 -->
<div class="mb-6">
<label class="form-label">
数据集描述
</label>
<div class="relative">
<textarea
name="description"
placeholder="请输入数据集描述(选填)"
class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:border-primary focus:outline-none resize-none"
rows="3"
maxlength="50"
></textarea>
<span class="absolute right-3 bottom-2 text-gray-400 text-sm">0 / 50</span>
</div>
</div>
<!-- 2. 数据集类型(单选按钮) -->
<div class="mb-6 pl-4">
<label class="block text-sm font-medium text-gray-700 mb-2">数据集类型</label>
<div class="flex items-center space-x-6">
<label class="flex items-center cursor-pointer">
<input type="radio" name="dataset_type" id="train-set" value="train" checked onchange="switchDatasetType()" class="radio-custom absolute opacity-0">
<div class="flex items-center space-x-1">
<div class="w-4 h-4 rounded-full border-2 border-gray-300 flex items-center justify-center">
<div class="radio-dot"></div>
</div>
<span class="text-sm text-gray-700">训练集</span>
</div>
</label>
<label class="flex items-center cursor-pointer">
<input type="radio" name="dataset_type" id="eval-set" value="eval" onchange="switchDatasetType()" class="radio-custom absolute opacity-0">
<div class="flex items-center space-x-1">
<div class="w-4 h-4 rounded-full border-2 border-gray-300 flex items-center justify-center">
<div class="radio-dot"></div>
</div>
<span class="text-sm text-gray-700">评测集</span>
</div>
</label>
</div>
</div>
<!-- 4. 存储位置 -->
<div class="mb-6 pl-4">
<label class="block text-sm font-medium text-gray-700 mb-2">存储位置</label>
<div class="flex items-center space-x-6">
<label class="flex items-center cursor-pointer">
2026-01-19 14:53:16 +08:00
<input type="radio" name="storage" value="local" class="radio-custom absolute opacity-0" checked onchange="toggleStorageConfig()">
<div class="flex items-center space-x-1">
<div class="w-4 h-4 rounded-full border-2 border-gray-300 flex items-center justify-center">
<div class="radio-dot"></div>
</div>
<span class="text-sm text-gray-700">本地存储</span>
</div>
</label>
<label class="flex items-center cursor-pointer">
2026-01-19 14:53:16 +08:00
<input type="radio" name="storage" value="cloud" class="radio-custom absolute opacity-0" onchange="toggleStorageConfig()">
<div class="flex items-center space-x-1">
<div class="w-4 h-4 rounded-full border-2 border-gray-300 flex items-center justify-center">
<div class="radio-dot"></div>
</div>
<span class="text-sm text-gray-700">云平台存储</span>
</div>
</label>
2026-01-19 14:53:16 +08:00
<label class="flex items-center cursor-pointer">
<input type="radio" name="storage" value="minio" class="radio-custom absolute opacity-0" onchange="toggleStorageConfig()">
<div class="flex items-center space-x-1">
<div class="w-4 h-4 rounded-full border-2 border-gray-300 flex items-center justify-center">
<div class="radio-dot"></div>
</div>
<span class="text-sm text-gray-700">MinIO存储</span>
</div>
</label>
</div>
<!-- MinIO配置面板 -->
<div id="minioConfigPanel" class="hidden mt-4 p-4 bg-gray-50 rounded-lg border border-gray-200">
<div class="grid grid-cols-2 gap-4">
<div>
<label class="block text-xs text-gray-600 mb-1">Endpoint地址</label>
<input type="text" name="minio_endpoint" placeholder="如http://localhost:9000" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:border-primary focus:outline-none">
</div>
<div>
<label class="block text-xs text-gray-600 mb-1">Bucket名称</label>
<input type="text" name="minio_bucket" placeholder="如datasets" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:border-primary focus:outline-none">
</div>
<div>
<label class="block text-xs text-gray-600 mb-1">Access Key</label>
<input type="text" name="minio_access_key" placeholder="Access Key" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:border-primary focus:outline-none">
</div>
<div>
<label class="block text-xs text-gray-600 mb-1">Secret Key</label>
<input type="password" name="minio_secret_key" placeholder="Secret Key" class="w-full px-3 py-2 border border-gray-300 rounded-lg text-sm focus:border-primary focus:outline-none">
</div>
</div>
<div class="mt-3 flex items-center justify-between">
<div class="flex items-center">
<input type="checkbox" id="minioSSL" name="minio_ssl" class="mr-2">
<label for="minioSSL" class="text-xs text-gray-600">使用SSL连接</label>
</div>
<button type="button" onclick="testMinioConnection()" class="px-3 py-1 text-xs bg-white border border-primary text-primary rounded hover:bg-primary/5 transition-colors">
<i class="fa fa-plug mr-1"></i>测试连接
</button>
</div>
</div>
</div>
<!-- 5. 上传文件区域 -->
<div class="mb-6 pl-4">
<label class="block text-sm font-medium text-gray-700 mb-1">上传文件</label>
<p class="text-xs text-gray-500 mb-2">选择文件进行上传,数据格式可下载模板查看</p>
<div
id="upload-area"
class="upload-area border-2 border-dashed border-gray-300 rounded-lg p-8 text-center transition-colors cursor-pointer relative"
>
<input type="file" id="file-upload" class="absolute opacity-0" accept=".jsonl,.json">
<div class="flex flex-col items-center space-y-2">
<i class="fa fa-cloud-upload text-2xl text-gray-400"></i>
<p class="text-sm text-gray-600">点击或将文件拖拽到这里上传</p>
<p class="text-xs text-gray-500">支持扩展名jsonl, json文件最大200MB</p>
</div>
</div>
<!-- 数据统计信息 -->
<div id="dataStats" class="mt-4 p-4 bg-blue-50 rounded-lg border border-blue-200 hidden">
<div class="flex items-center space-x-4">
<div id="formatCheck" class="flex items-center hidden">
<i class="fa fa-check-circle text-green-500 mr-1"></i>
<span class="text-sm text-green-600">符合 Alpaca 格式</span>
</div>
<div id="formatError" class="flex items-center hidden">
<i class="fa fa-exclamation-circle text-red-500 mr-1"></i>
<span class="text-sm text-red-600">格式异常</span>
</div>
</div>
</div>
2026-01-19 14:53:16 +08:00
<!-- 已上传文件列表 -->
<div id="fileList" class="mt-4 space-y-2"></div>
</div>
<!-- 8. 模板链接 -->
<div class="mb-6 pl-4 space-x-4">
<a href="#" class="text-primary text-sm hover:underline">
<i class="fa fa-file-code mr-1"></i>JSON数据模板
</a>
</div>
<!-- 底部按钮 -->
<div class="flex items-center justify-between pt-6 border-t border-gray-100 mt-8">
<div class="flex items-center space-x-3">
<button type="button" onclick="submitForm()" class="px-4 py-2 bg-primary text-white rounded-lg text-sm hover:bg-primary/90">
保存
</button>
<a href="main.html?page=dataset-manage" class="px-4 py-2 bg-gray-200 text-gray-700 rounded-lg text-sm hover:bg-gray-300">
取消
</a>
</div>
</div>
</form>
</div>
</div>
</main>
</div>
<script>
// API 基础地址
const getApiBase = () => {
const protocol = window.location.protocol;
const hostname = window.location.hostname;
return `${protocol}//${hostname}:8080/api`;
};
const API_BASE = getApiBase();
// 返回页面
let backUrl = 'main.html?page=dataset-manage';
2026-01-19 14:53:16 +08:00
// 已选择的文件列表
let selectedFiles = [];
// 编辑模式
let editId = null;
// 页面加载完成后初始化
document.addEventListener('DOMContentLoaded', async function() {
// 根据URL参数设置返回页面
const urlParams = new URLSearchParams(window.location.search);
const from = urlParams.get('from');
const id = urlParams.get('id');
const breadcrumbParent = document.getElementById('breadcrumbParent');
const breadcrumbChild = document.getElementById('breadcrumbChild');
if (from === 'fine-tune') {
backUrl = 'fine-tune-create.html';
if (breadcrumbParent) {
breadcrumbParent.textContent = '创建训练任务';
}
}
// 检查是否是编辑模式
if (id) {
editId = parseInt(id);
document.title = '编辑数据集 - 远光软件微调平台';
if (breadcrumbChild) {
breadcrumbChild.textContent = '编辑数据集';
}
// 加载现有数据
await loadDatasetData(editId);
}
// 文件上传区域拖拽逻辑
const uploadArea = document.getElementById('upload-area');
const fileUpload = document.getElementById('file-upload');
// 点击上传区域触发文件选择
uploadArea.addEventListener('click', () => fileUpload.click());
// 拖拽事件处理
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
uploadArea.addEventListener(eventName, preventDefaults, false);
});
function preventDefaults(e) {
e.preventDefault();
e.stopPropagation();
}
['dragenter', 'dragover'].forEach(eventName => {
uploadArea.addEventListener(eventName, () => uploadArea.classList.add('drag-over'), false);
});
['dragleave', 'drop'].forEach(eventName => {
uploadArea.addEventListener(eventName, () => uploadArea.classList.remove('drag-over'), false);
});
// 处理文件拖放
uploadArea.addEventListener('drop', (e) => {
2026-01-19 14:53:16 +08:00
const files = Array.from(e.dataTransfer.files);
handleFiles(files);
});
// 监听文件选择(每次只上传一个文件,新选择会替换旧文件)
fileUpload.addEventListener('change', () => {
2026-01-19 14:53:16 +08:00
const files = Array.from(fileUpload.files);
if (files.length > 0) {
// 清空之前的文件,只保留新选择的第一个文件
selectedFiles = [];
handleFiles([files[0]]);
}
// 清空文件输入框
fileUpload.value = '';
});
// 绑定导航点击事件
document.querySelectorAll('.nav-link').forEach(link => {
link.addEventListener('click', function(e) {
if (!this.href.includes('dataset-create')) {
e.preventDefault();
window.location.href = this.href;
}
});
});
// 初始化单选框选中样式
initRadioStyles();
});
// 加载数据集数据(编辑模式)
async function loadDatasetData(id) {
try {
const response = await fetch(`${API_BASE}/dataset-manage/${id}`);
const result = await response.json();
if (result.code !== 0) {
showMessage('错误', result.message || '获取数据集信息失败', 'error');
return;
}
const data = result.data;
if (!data) {
showMessage('错误', '数据集不存在', 'error');
return;
}
// 填充表单
const form = document.getElementById('datasetForm');
// 数据集名称
const nameInput = form.querySelector('input[name="name"]');
if (nameInput) nameInput.value = data.name || '';
// 数据集描述
const descInput = form.querySelector('textarea[name="description"]');
if (descInput) descInput.value = data.description || '';
// 数据集类型
const typeValue = data.type || 'train';
const typeRadio = form.querySelector(`input[name="dataset_type"][value="${typeValue}"]`);
if (typeRadio) {
typeRadio.checked = true;
initRadioStyles();
}
// 存储位置
const storageValue = data.storage_type || 'local';
const storageRadio = form.querySelector(`input[name="storage"][value="${storageValue}"]`);
if (storageRadio) {
storageRadio.checked = true;
initRadioStyles();
toggleStorageConfig();
// 如果是MinIO填充配置
if (storageValue === 'minio' && data.minio_config) {
const config = typeof data.minio_config === 'string' ? JSON.parse(data.minio_config) : data.minio_config;
const endpointInput = form.querySelector('input[name="minio_endpoint"]');
const bucketInput = form.querySelector('input[name="minio_bucket"]');
const accessKeyInput = form.querySelector('input[name="minio_access_key"]');
const secretKeyInput = form.querySelector('input[name="minio_secret_key"]');
const sslCheckbox = form.querySelector('input[name="minio_ssl"]');
if (endpointInput) endpointInput.value = config.endpoint || '';
if (bucketInput) bucketInput.value = config.bucket || '';
if (accessKeyInput) accessKeyInput.value = config.access_key || '';
if (secretKeyInput) secretKeyInput.value = config.secret_key || '';
if (sslCheckbox) sslCheckbox.checked = config.ssl || false;
}
}
// 加载已上传的文件列表
if (data.files && data.files.length > 0) {
// 将文件信息转换为文件对象显示(只读模式)
selectedFiles = data.files.map(f => ({
name: f.file_name,
size: f.file_size,
isExisting: true
}));
renderFileList();
}
// 更新页面标题
document.title = `编辑数据集 - ${data.name || ''} - 远光软件微调平台`;
} catch (error) {
showMessage('错误', '加载数据集信息失败: ' + error.message, 'error');
}
}
// 初始化单选框选中样式
function initRadioStyles() {
document.querySelectorAll('.radio-custom').forEach(radio => {
updateRadioStyle(radio);
radio.addEventListener('change', function() {
document.querySelectorAll('.radio-custom').forEach(r => updateRadioStyle(r));
});
});
}
// 更新单选框样式
function updateRadioStyle(radio) {
const parent = radio.closest('label');
const dotContainer = parent.querySelector('.w-4');
const dot = parent.querySelector('.radio-dot');
if (radio.checked) {
if (dotContainer) {
dotContainer.classList.add('border-primary', 'bg-primary/10');
dotContainer.classList.remove('border-gray-300');
}
if (dot) {
dot.classList.add('bg-primary');
dot.classList.remove('bg-transparent');
}
} else {
if (dotContainer) {
dotContainer.classList.remove('border-primary', 'bg-primary/10');
dotContainer.classList.add('border-gray-300');
}
if (dot) {
dot.classList.remove('bg-primary');
dot.classList.add('bg-transparent');
}
}
}
2026-01-19 14:53:16 +08:00
// 处理文件选择
async function handleFiles(files) {
const validExtensions = ['.jsonl', '.json'];
2026-01-19 14:53:16 +08:00
const maxFileSize = 200 * 1024 * 1024; // 200MB
const maxFiles = 10;
// 将FileList转换为数组
const fileArray = Array.from(files);
for (const file of fileArray) {
2026-01-19 14:53:16 +08:00
// 检查文件扩展名
const ext = '.' + file.name.split('.').pop().toLowerCase();
if (!validExtensions.includes(ext)) {
showMessage('提示', `文件 "${file.name}" 扩展名不支持,请上传 jsonl 或 json 格式的文件`, 'warning');
continue;
2026-01-19 14:53:16 +08:00
}
// 检查文件大小
if (file.size > maxFileSize) {
showMessage('提示', `文件 "${file.name}" 大小超过200MB限制`, 'warning');
continue;
2026-01-19 14:53:16 +08:00
}
// 检查是否已存在相同文件
const exists = selectedFiles.some(f => f.name === file.name);
2026-01-19 14:53:16 +08:00
if (exists) {
showMessage('提示', `文件 "${file.name}" 已存在`, 'warning');
continue;
2026-01-19 14:53:16 +08:00
}
// 检查文件数量
if (selectedFiles.length >= maxFiles) {
showMessage('提示', '最多只能上传10个文件', 'warning');
continue;
}
// 解析文件并统计记录数
try {
const fileInfo = await parseFileAndCount(file);
selectedFiles.push(fileInfo);
} catch (error) {
showMessage('错误', `解析文件 "${file.name}" 失败: ${error.message}`, 'error');
2026-01-19 14:53:16 +08:00
}
}
updateDataStats();
renderFileList();
}
// 解析文件并统计记录数
async function parseFileAndCount(file) {
return new Promise((resolve, reject) => {
const ext = file.name.split('.').pop().toLowerCase();
const reader = new FileReader();
if (ext === 'jsonl') {
// JSONL格式每行一个JSON对象
reader.onload = (e) => {
try {
const content = e.target.result;
// 限制读取内容用于预览100KB
const previewContent = content.substring(0, 102400);
const lines = content.trim().split('\n').filter(line => line.trim());
let recordCount = 0;
let isAlpacaFormat = true;
for (const line of lines) {
try {
const obj = JSON.parse(line);
recordCount++;
// 验证 Alpaca 格式:必须有 instruction 字段
if (!obj.instruction) {
isAlpacaFormat = false;
}
} catch (parseError) {
// 解析失败,跳过该行
}
}
2026-01-19 14:53:16 +08:00
resolve({
name: file.name,
size: file.size,
recordCount: recordCount,
isAlpacaFormat: isAlpacaFormat && recordCount > 0,
isExisting: false,
previewContent: previewContent,
file: file // 保存原始File对象
});
} catch (error) {
reject(new Error('文件格式错误'));
}
};
reader.readAsText(file);
} else if (ext === 'json') {
// JSON格式数组
reader.onload = (e) => {
try {
const content = e.target.result;
// 限制读取内容用于预览100KB
const previewContent = content.substring(0, 102400);
const data = JSON.parse(content);
let recordCount = 0;
let isAlpacaFormat = true;
// 支持数组格式
const items = Array.isArray(data) ? data : [data];
for (const item of items) {
recordCount++;
// 验证 Alpaca 格式:必须有 instruction 字段
if (!item.instruction) {
isAlpacaFormat = false;
}
}
resolve({
name: file.name,
size: file.size,
recordCount: recordCount,
isAlpacaFormat: isAlpacaFormat && recordCount > 0,
isExisting: false,
previewContent: previewContent,
file: file // 保存原始File对象
});
} catch (error) {
reject(new Error('JSON格式解析失败'));
}
};
reader.readAsText(file);
} else {
resolve({
name: file.name,
size: file.size,
recordCount: 0,
isAlpacaFormat: false,
isExisting: false,
previewContent: ''
});
}
2026-01-19 14:53:16 +08:00
});
}
// 更新数据统计
function updateDataStats() {
const dataStats = document.getElementById('dataStats');
const formatCheck = document.getElementById('formatCheck');
const formatError = document.getElementById('formatError');
if (selectedFiles.length === 0) {
dataStats.classList.add('hidden');
return;
}
dataStats.classList.remove('hidden');
// 检查所有文件格式
const hasAlpacaFiles = selectedFiles.some(f => f.isAlpacaFormat);
const hasNonAlpacaFiles = selectedFiles.some(f => !f.isAlpacaFormat && f.recordCount > 0);
if (hasNonAlpacaFiles) {
formatCheck.classList.add('hidden');
formatError.classList.remove('hidden');
} else if (hasAlpacaFiles) {
formatCheck.classList.remove('hidden');
formatError.classList.add('hidden');
} else {
formatCheck.classList.add('hidden');
formatError.classList.add('hidden');
}
}
2026-01-19 14:53:16 +08:00
// 清空所有文件
function clearAllFiles() {
selectedFiles = [];
updateDataStats();
2026-01-19 14:53:16 +08:00
renderFileList();
}
// 渲染文件列表
function renderFileList() {
const fileListEl = document.getElementById('fileList');
const uploadArea = document.getElementById('upload-area');
2026-01-19 14:53:16 +08:00
// 如果有文件,隐藏上传区域
if (selectedFiles.length > 0) {
uploadArea.classList.add('hidden');
} else {
uploadArea.classList.remove('hidden');
}
2026-01-19 14:53:16 +08:00
if (selectedFiles.length === 0) {
fileListEl.innerHTML = '';
return;
}
fileListEl.innerHTML = selectedFiles.map((file, index) => {
const size = formatFileSize(file.size);
const icon = getFileIcon(file.name);
const isExisting = file.isExisting;
const recordCount = file.recordCount || 0;
2026-01-19 14:53:16 +08:00
return `
<div class="flex items-center justify-between bg-gray-50 px-4 py-2 rounded-lg border ${isExisting ? 'border-blue-200 bg-blue-50/50' : 'border-gray-200'}">
2026-01-19 14:53:16 +08:00
<div class="flex items-center space-x-3">
<i class="fa ${icon} ${isExisting ? 'text-blue-500' : 'text-primary'} text-lg"></i>
<span class="text-sm ${isExisting ? 'text-blue-700' : 'text-gray-700'} truncate max-w-[150px]" title="${file.name}">${file.name}</span>
<span class="text-xs ${isExisting ? 'text-blue-400' : 'text-gray-400'}">${size}</span>
${recordCount > 0 ? `<span class="text-xs px-2 py-0.5 bg-green-100 text-green-600 rounded">${recordCount} 条</span>` : ''}
${isExisting ? '<span class="text-xs px-2 py-0.5 bg-blue-100 text-blue-600 rounded">已上传</span>' : ''}
</div>
<div class="flex items-center space-x-2">
${!isExisting || file.previewContent ? `<button type="button" onclick="previewFile(${index})" class="${isExisting ? 'text-blue-400 hover:text-blue-600' : 'text-gray-400 hover:text-primary'} transition-colors" title="预览">
<i class="fa fa-eye text-lg"></i>
</button>` : ''}
<button type="button" onclick="removeFile(${index})" class="${isExisting ? 'text-blue-400 hover:text-blue-600' : 'text-gray-400 hover:text-red-500'} transition-colors" title="${isExisting ? '移除文件' : '删除文件'}">
<i class="fa fa-trash-o text-lg"></i>
</button>
2026-01-19 14:53:16 +08:00
</div>
</div>
`;
}).join('');
}
// 格式化文件大小
function formatFileSize(bytes) {
if (bytes < 1024) return bytes + ' B';
if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(1) + ' KB';
return (bytes / (1024 * 1024)).toFixed(1) + ' MB';
}
// 预览文件内容
function previewFile(index) {
const file = selectedFiles[index];
if (!file) return;
if (file.isExisting) {
// 从服务器获取文件内容
const modal = document.getElementById('previewModal');
const modalTitle = document.getElementById('previewModalTitle');
const modalContent = document.getElementById('previewModalContent');
modalTitle.textContent = file.name;
modalContent.innerHTML = '<div class="flex justify-center items-center h-32"><span class="text-gray-500">加载中...</span></div>';
modal.classList.remove('hidden');
document.body.style.overflow = 'hidden';
fetch(`/api/dataset-manage/preview/${file.id}`)
.then(res => res.json())
.then(data => {
if (data.code === 0) {
modalContent.innerHTML = `<pre class="text-xs text-gray-600 whitespace-pre-wrap break-all">${escapeHtml(data.data.content || '')}</pre>`;
} else {
modalContent.innerHTML = `<div class="text-red-500">${escapeHtml(data.message || '读取文件失败')}</div>`;
}
})
.catch(err => {
modalContent.innerHTML = `<div class="text-red-500">读取文件失败: ${escapeHtml(err.message)}</div>`;
});
return;
}
// 显示预览弹窗
const modal = document.getElementById('previewModal');
const modalTitle = document.getElementById('previewModalTitle');
const modalContent = document.getElementById('previewModalContent');
modalTitle.textContent = file.name;
modalContent.innerHTML = `<pre class="text-xs text-gray-600 whitespace-pre-wrap break-all">${escapeHtml(file.previewContent || '')}</pre>`;
modal.classList.remove('hidden');
document.body.style.overflow = 'hidden';
}
// 关闭预览弹窗
function closePreviewModal() {
const modal = document.getElementById('previewModal');
modal.classList.add('hidden');
document.body.style.overflow = '';
}
// HTML转义
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
2026-01-19 14:53:16 +08:00
// 获取文件图标
function getFileIcon(filename) {
const ext = filename.split('.').pop().toLowerCase();
if (ext === 'xls' || ext === 'xlsx') {
return 'fa-file-excel-o';
} else if (ext === 'jsonl' || ext === 'json') {
return 'fa-file-code-o';
}
return 'fa-file-text-o';
}
// 删除文件
function removeFile(index) {
selectedFiles.splice(index, 1);
updateDataStats();
2026-01-19 14:53:16 +08:00
renderFileList();
}
// 数据集类型切换逻辑
function switchDatasetType() {
// 数据集类型切换逻辑
}
2026-01-19 14:53:16 +08:00
// 存储位置切换逻辑
function toggleStorageConfig() {
const storageValue = document.querySelector('input[name="storage"]:checked').value;
const minioConfigPanel = document.getElementById('minioConfigPanel');
if (storageValue === 'minio') {
minioConfigPanel.classList.remove('hidden');
} else {
minioConfigPanel.classList.add('hidden');
}
}
// 测试MinIO连接
function testMinioConnection() {
const endpoint = document.querySelector('input[name="minio_endpoint"]').value;
const bucket = document.querySelector('input[name="minio_bucket"]').value;
const accessKey = document.querySelector('input[name="minio_access_key"]').value;
const secretKey = document.querySelector('input[name="minio_secret_key"]').value;
if (!endpoint || !bucket || !accessKey || !secretKey) {
showMessage('提示', '请填写完整的MinIO配置信息', 'warning');
2026-01-19 14:53:16 +08:00
return;
}
// 模拟测试连接
showMessage('成功', '正在测试连接...\n\n连接成功MinIO服务可用。', 'success');
2026-01-19 14:53:16 +08:00
}
// 提交表单
async function submitForm() {
const form = document.getElementById('datasetForm');
const formData = new FormData(form);
2026-01-19 14:53:16 +08:00
const storageValue = formData.get('storage');
// 验证名称
if (!formData.get('name')) {
showMessage('提示', '请输入数据集名称', 'warning');
2026-01-19 14:53:16 +08:00
return;
}
// 如果选择MinIO存储验证配置
2026-01-19 14:53:16 +08:00
if (storageValue === 'minio') {
const endpoint = formData.get('minio_endpoint');
const bucket = formData.get('minio_bucket');
const accessKey = formData.get('minio_access_key');
const secretKey = formData.get('minio_secret_key');
2026-01-19 14:53:16 +08:00
if (!endpoint || !bucket || !accessKey || !secretKey) {
showMessage('提示', '请填写完整的MinIO配置信息', 'warning');
2026-01-19 14:53:16 +08:00
return;
}
}
try {
// 准备数据
const data = {
name: formData.get('name'),
type: formData.get('dataset_type'),
storage_type: storageValue,
description: formData.get('description') || ''
};
if (storageValue === 'minio') {
data.minio_config = {
endpoint: formData.get('minio_endpoint'),
bucket: formData.get('minio_bucket'),
access_key: formData.get('minio_access_key'),
secret_key: formData.get('minio_secret_key'),
ssl: formData.get('minio_ssl') === 'on'
};
}
// 判断是创建还是更新
if (editId) {
// 编辑模式使用PUT更新
const updateResponse = await fetch(`${API_BASE}/dataset-manage/${editId}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(data)
});
const updateResult = await updateResponse.json();
if (updateResult.code !== 0) {
showMessage('错误', updateResult.message || '更新数据集失败', 'error');
return;
}
// 上传新文件(如果有)
const newFiles = selectedFiles.filter(f => !f.isExisting);
if (newFiles.length > 0) {
const uploadFormData = new FormData();
for (const fileObj of newFiles) {
uploadFormData.append('files', fileObj.file);
}
const uploadResponse = await fetch(`${API_BASE}/dataset-manage/upload/${editId}`, {
method: 'POST',
body: uploadFormData
});
const uploadResult = await uploadResponse.json();
if (uploadResult.code !== 0) {
showMessage('错误', '数据集更新成功,但新文件上传失败: ' + uploadResult.message, 'error');
setTimeout(() => {
window.location.href = 'main.html?page=dataset-manage';
}, 1500);
return;
}
}
showMessage('成功', '更新成功!', 'success', () => {
window.location.href = 'main.html?page=dataset-manage';
});
2026-01-19 14:53:16 +08:00
} else {
// 创建模式
// 验证文件
if (selectedFiles.length === 0) {
showMessage('提示', '请选择至少一个文件上传', 'warning');
return;
}
// 验证记录数
const totalRecords = selectedFiles.reduce((sum, f) => sum + (f.recordCount || 0), 0);
if (totalRecords === 0) {
showMessage('提示', '无法解析文件中的数据记录请确保文件格式正确JSON/JSONL格式需要包含 instruction 字段)', 'warning');
return;
}
// 计算总文件大小
const totalSize = selectedFiles.reduce((sum, f) => sum + (f.size || 0), 0);
// 添加记录数和大小到数据中
data.count = totalRecords;
data.size = formatFileSize(totalSize);
const response = await fetch(`${API_BASE}/dataset-manage`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(data)
});
const result = await response.json();
if (result.code !== 0) {
showMessage('错误', result.message || '创建数据集失败', 'error');
return;
}
const datasetId = result.id;
// 上传文件
if (selectedFiles.length > 0) {
const uploadFormData = new FormData();
for (const fileObj of selectedFiles) {
uploadFormData.append('files', fileObj.file);
}
const uploadResponse = await fetch(`${API_BASE}/dataset-manage/upload/${datasetId}`, {
method: 'POST',
body: uploadFormData
});
const uploadResult = await uploadResponse.json();
if (uploadResult.code !== 0) {
showMessage('错误', '数据集创建成功,但文件上传失败: ' + uploadResult.message, 'error');
setTimeout(() => {
window.location.href = 'main.html?page=dataset-manage';
}, 1500);
return;
}
// 更新数据集的count字段数据记录数
const newTotalRecords = selectedFiles.reduce((sum, f) => sum + (f.recordCount || 0), 0);
await fetch(`${API_BASE}/dataset-manage/${datasetId}`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ count: newTotalRecords })
});
}
showMessage('成功', '创建成功!', 'success', () => {
window.location.href = 'main.html?page=dataset-manage';
});
}
} catch (error) {
showMessage('错误', editId ? '更新失败: ' + error.message : '创建失败: ' + error.message, 'error');
}
}
</script>
<!-- 预览弹窗 -->
<div id="previewModal" class="hidden fixed inset-0 bg-black/50 z-50 flex items-center justify-center" onclick="if(event.target === this) closePreviewModal();">
<div class="bg-white rounded-xl shadow-xl max-w-4xl w-full mx-4 overflow-hidden transform transition-all max-h-[80vh] flex flex-col">
<div class="flex items-center justify-between px-6 py-4 border-b border-gray-100">
<h3 id="previewModalTitle" class="text-lg font-medium text-gray-800 truncate"></h3>
<button onclick="closePreviewModal()" class="text-gray-400 hover:text-gray-600 transition-colors">
<i class="fa fa-times text-xl"></i>
</button>
</div>
<div id="previewModalContent" class="flex-1 overflow-auto p-6 bg-gray-50">
</div>
<div class="px-6 py-3 border-t border-gray-100 bg-gray-50 flex justify-between items-center">
<span class="text-xs text-gray-400">仅显示前 100KB 内容</span>
<button onclick="closePreviewModal()" class="px-4 py-2 bg-gray-200 text-gray-700 rounded-lg text-sm hover:bg-gray-300 transition-colors">关闭</button>
</div>
</div>
</div>
<!-- 自定义消息弹窗 -->
<div id="customModal" class="hidden fixed inset-0 bg-black/50 z-50 flex items-center justify-center" onclick="if(event.target === this) closeModal();">
<div class="bg-white rounded-xl shadow-xl max-w-sm w-full mx-4 overflow-hidden transform transition-all">
<div class="flex flex-col items-center justify-center min-h-[140px] py-4">
<div id="modalIcon"></div>
<h3 id="modalTitle" class="text-lg font-medium text-gray-800 mb-2"></h3>
<p id="modalMessage" class="text-gray-600 text-sm"></p>
</div>
<div id="modalBtnGroup" class="hidden px-6 pb-6 flex flex-col space-y-2 mx-4">
<button id="modalConfirmBtn" class="px-6 py-2 bg-primary text-white rounded-lg hover:bg-primary/90 transition-colors">确定</button>
<button id="modalCancelBtn" class="px-6 py-2 border border-gray-300 text-gray-700 rounded-lg hover:bg-gray-50 transition-colors">取消</button>
</div>
<div id="modalSingleBtnGroup" class="px-6 pb-6 flex justify-center">
<button id="modalConfirmBtn2" class="px-6 py-2 bg-primary text-white rounded-lg hover:bg-primary/90 transition-colors max-w-[160px]">确定</button>
</div>
</div>
</div>
<script>
// 显示消息弹窗
function showMessage(title, message, type = 'info', onConfirm) {
const modal = document.getElementById('customModal');
const modalTitle = document.getElementById('modalTitle');
const modalMessage = document.getElementById('modalMessage');
const modalIcon = document.getElementById('modalIcon');
const modalConfirmBtn = document.getElementById('modalConfirmBtn');
const modalConfirmBtn2 = document.getElementById('modalConfirmBtn2');
const modalBtnGroup = document.getElementById('modalBtnGroup');
const modalSingleBtnGroup = document.getElementById('modalSingleBtnGroup');
modalTitle.textContent = title;
modalTitle.className = 'text-lg font-medium text-gray-800 mb-2';
// 根据类型设置图标
if (type === 'success') {
modalIcon.innerHTML = '<div class="w-10 h-10 mx-auto mb-3 rounded-full bg-green-100 flex items-center justify-center"><i class="fa fa-check text-lg text-green-600"></i></div>';
} else if (type === 'error') {
modalIcon.innerHTML = '<div class="w-10 h-10 mx-auto mb-3 rounded-full bg-red-100 flex items-center justify-center"><i class="fa fa-times text-lg text-red-600"></i></div>';
} else if (type === 'warning') {
modalIcon.innerHTML = '<div class="w-10 h-10 mx-auto mb-3 rounded-full bg-yellow-100 flex items-center justify-center"><i class="fa fa-exclamation text-lg text-yellow-600"></i></div>';
} else {
modalIcon.innerHTML = '<div class="w-10 h-10 mx-auto mb-3 rounded-full bg-blue-100 flex items-center justify-center"><i class="fa fa-info text-lg text-blue-600"></i></div>';
}
modalMessage.innerHTML = message;
// 单按钮模式
modalBtnGroup.classList.add('hidden');
modalSingleBtnGroup.classList.remove('hidden');
const confirmBtn = modalConfirmBtn2;
confirmBtn.className = 'px-6 py-2 bg-primary text-white rounded-lg hover:bg-primary/90 transition-colors max-w-[160px]';
if (type === 'success') {
confirmBtn.classList.add('bg-primary');
} else if (type === 'error') {
confirmBtn.className = 'px-6 py-2 bg-red-500 text-white rounded-lg hover:bg-red-500/90 transition-colors max-w-[160px]';
} else if (type === 'warning') {
confirmBtn.className = 'px-6 py-2 bg-yellow-500 text-white rounded-lg hover:bg-yellow-500/90 transition-colors max-w-[160px]';
} else {
confirmBtn.classList.add('bg-primary');
}
confirmBtn.onclick = () => {
closeModal();
if (onConfirm) onConfirm();
};
modal.classList.remove('hidden');
document.body.style.overflow = 'hidden';
}
// 关闭弹窗
function closeModal() {
const modal = document.getElementById('customModal');
modal.classList.add('hidden');
document.body.style.overflow = '';
}
// 返回上一页
function goBack() {
window.location.href = backUrl;
}
</script>
</body>
</html>