first-update
This commit is contained in:
314
easy-dataset-main/components/datasets/import/FieldMappingStep.js
Normal file
314
easy-dataset-main/components/datasets/import/FieldMappingStep.js
Normal file
@@ -0,0 +1,314 @@
|
||||
'use client';
|
||||
|
||||
import { useState, useEffect } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Typography,
|
||||
FormControl,
|
||||
InputLabel,
|
||||
Select,
|
||||
MenuItem,
|
||||
Paper,
|
||||
Table,
|
||||
TableBody,
|
||||
TableCell,
|
||||
TableContainer,
|
||||
TableHead,
|
||||
TableRow,
|
||||
Alert,
|
||||
Button,
|
||||
Chip
|
||||
} from '@mui/material';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
/**
|
||||
* 字段映射步骤组件
|
||||
*/
|
||||
export default function FieldMappingStep({ previewData, onMappingComplete, onError }) {
|
||||
const { t } = useTranslation();
|
||||
const [fieldMapping, setFieldMapping] = useState({
|
||||
question: '',
|
||||
answer: '',
|
||||
cot: '',
|
||||
tags: ''
|
||||
});
|
||||
const [availableFields, setAvailableFields] = useState([]);
|
||||
const [mappingValid, setMappingValid] = useState(false);
|
||||
|
||||
// 智能字段识别(支持 Alpaca: instruction + input -> question,output -> answer)
|
||||
const smartFieldMapping = fields => {
|
||||
const mapping = {
|
||||
question: '',
|
||||
answer: '',
|
||||
cot: '',
|
||||
tags: ''
|
||||
};
|
||||
|
||||
const lower = fields.map(f => f.toLowerCase());
|
||||
const instructionIdx = lower.findIndex(f => f.includes('instruction'));
|
||||
const inputIdx = lower.findIndex(f => f.includes('input'));
|
||||
const outputIdx = lower.findIndex(f => f.includes('output'));
|
||||
|
||||
// Alpaca 格式的优先识别
|
||||
if (instructionIdx !== -1 && inputIdx !== -1) {
|
||||
// 如果同时有instruction和input字段,将它们组合为question
|
||||
mapping.question = [fields[instructionIdx], fields[inputIdx]];
|
||||
} else if (instructionIdx !== -1) {
|
||||
// 如果只有instruction字段(比如从ShareGPT转换而来),直接映射为question
|
||||
mapping.question = fields[instructionIdx];
|
||||
}
|
||||
|
||||
if (outputIdx !== -1) {
|
||||
mapping.answer = fields[outputIdx];
|
||||
}
|
||||
|
||||
const questionKeywords = ['question', 'input', 'query', 'prompt', 'instruction', '问题', '输入', '指令'];
|
||||
const answerKeywords = ['answer', 'output', 'response', 'completion', 'target', '答案', '输出', '回答'];
|
||||
const cotKeywords = ['cot', 'reasoning', 'explanation', 'thinking', 'rationale', '思维链', '推理', '解释'];
|
||||
const tagKeywords = ['tag', 'tags', 'label', 'labels', 'category', 'categories', '标签', '类别'];
|
||||
|
||||
fields.forEach(field => {
|
||||
const fieldLower = field.toLowerCase();
|
||||
|
||||
if (!mapping.question || (typeof mapping.question === 'string' && !mapping.question)) {
|
||||
if (questionKeywords.some(keyword => fieldLower.includes(keyword))) {
|
||||
mapping.question = field;
|
||||
}
|
||||
} else if (!mapping.answer) {
|
||||
if (answerKeywords.some(keyword => fieldLower.includes(keyword))) {
|
||||
mapping.answer = field;
|
||||
}
|
||||
} else if (!mapping.cot) {
|
||||
if (cotKeywords.some(keyword => fieldLower.includes(keyword))) {
|
||||
mapping.cot = field;
|
||||
}
|
||||
} else if (!mapping.tags) {
|
||||
if (tagKeywords.some(keyword => fieldLower.includes(keyword))) {
|
||||
mapping.tags = field;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return mapping;
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (previewData && previewData.length > 0) {
|
||||
const fields = Object.keys(previewData[0]);
|
||||
setAvailableFields(fields);
|
||||
|
||||
// 智能识别字段映射
|
||||
const smartMapping = smartFieldMapping(fields);
|
||||
setFieldMapping(smartMapping);
|
||||
}
|
||||
}, [previewData]);
|
||||
|
||||
useEffect(() => {
|
||||
// 验证映射是否有效(问题和答案字段必须选择)
|
||||
const hasQuestion = Array.isArray(fieldMapping.question)
|
||||
? fieldMapping.question.length > 0
|
||||
: !!fieldMapping.question;
|
||||
const hasAnswer = !!fieldMapping.answer;
|
||||
const isValid = hasQuestion && hasAnswer;
|
||||
setMappingValid(isValid);
|
||||
}, [fieldMapping]);
|
||||
|
||||
const handleFieldChange = (targetField, sourceField) => {
|
||||
setFieldMapping(prev => ({
|
||||
...prev,
|
||||
[targetField]:
|
||||
targetField === 'question'
|
||||
? Array.isArray(sourceField)
|
||||
? sourceField.filter(Boolean)
|
||||
: sourceField
|
||||
: sourceField
|
||||
}));
|
||||
};
|
||||
|
||||
const handleConfirmMapping = () => {
|
||||
if (!mappingValid) {
|
||||
onError(t('import.mappingRequired', '问题和答案字段为必选项'));
|
||||
return;
|
||||
}
|
||||
|
||||
// 检查是否有重复映射(兼容数组)
|
||||
const flatFields = Object.values(fieldMapping)
|
||||
.filter(Boolean)
|
||||
.flatMap(f => (Array.isArray(f) ? f.filter(Boolean) : [f]));
|
||||
const uniqueFields = [...new Set(flatFields)];
|
||||
if (flatFields.length !== uniqueFields.length) {
|
||||
onError(t('import.duplicateMapping', '不能将多个目标字段映射到同一个源字段'));
|
||||
return;
|
||||
}
|
||||
|
||||
onMappingComplete(fieldMapping);
|
||||
};
|
||||
|
||||
const getFieldDescription = field => {
|
||||
switch (field) {
|
||||
case 'question':
|
||||
return t('import.questionDesc', '用户的问题或输入内容(必选,可多选)');
|
||||
case 'answer':
|
||||
return t('import.answerDesc', 'AI的回答或输出内容(必选)');
|
||||
case 'cot':
|
||||
return t('import.cotDesc', '思维链或推理过程(可选)');
|
||||
case 'tags':
|
||||
return t('import.tagsDesc', '标签数组,多个标签用逗号分隔(可选)');
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
};
|
||||
|
||||
const isFieldRequired = field => {
|
||||
return field === 'question' || field === 'answer';
|
||||
};
|
||||
|
||||
if (!previewData || previewData.length === 0) {
|
||||
return <Alert severity="error">{t('import.noPreviewData', '没有可预览的数据')}</Alert>;
|
||||
}
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<Typography variant="h6" gutterBottom>
|
||||
{t('import.fieldMapping', '字段映射')}
|
||||
</Typography>
|
||||
|
||||
<Typography variant="body2" color="text.secondary" sx={{ mb: 3 }}>
|
||||
{t(
|
||||
'import.mappingDescription',
|
||||
'请将源数据的字段映射到目标字段。系统已自动识别可能的映射关系,您可以根据需要调整。'
|
||||
)}
|
||||
</Typography>
|
||||
|
||||
{/* 字段映射选择 */}
|
||||
<Paper sx={{ p: 3, mb: 3 }}>
|
||||
<Typography variant="subtitle1" gutterBottom>
|
||||
{t('import.selectMapping', '选择字段映射')}
|
||||
</Typography>
|
||||
|
||||
<Box sx={{ display: 'grid', gap: 2, gridTemplateColumns: 'repeat(auto-fit, minmax(300px, 1fr))' }}>
|
||||
{Object.keys(fieldMapping).map(targetField => (
|
||||
<FormControl key={targetField} fullWidth>
|
||||
<InputLabel>
|
||||
{t(`import.${targetField}Field`, targetField)}
|
||||
{isFieldRequired(targetField) && <span style={{ color: 'red' }}>*</span>}
|
||||
</InputLabel>
|
||||
{targetField === 'question' ? (
|
||||
<Select
|
||||
multiple
|
||||
value={
|
||||
Array.isArray(fieldMapping.question)
|
||||
? fieldMapping.question
|
||||
: fieldMapping.question
|
||||
? [fieldMapping.question]
|
||||
: []
|
||||
}
|
||||
label={t(`import.${targetField}Field`, targetField)}
|
||||
onChange={e => handleFieldChange(targetField, e.target.value)}
|
||||
renderValue={selected => (
|
||||
<Box sx={{ display: 'flex', flexWrap: 'wrap', gap: 0.5 }}>
|
||||
{selected.map(value => (
|
||||
<Chip key={value} label={value} size="small" />
|
||||
))}
|
||||
</Box>
|
||||
)}
|
||||
>
|
||||
{availableFields.map(field => (
|
||||
<MenuItem key={field} value={field}>
|
||||
{field}
|
||||
</MenuItem>
|
||||
))}
|
||||
</Select>
|
||||
) : (
|
||||
<Select
|
||||
value={fieldMapping[targetField]}
|
||||
label={t(`import.${targetField}Field`, targetField)}
|
||||
onChange={e => handleFieldChange(targetField, e.target.value)}
|
||||
>
|
||||
<MenuItem value="">
|
||||
<em>{t('import.selectField', '选择字段')}</em>
|
||||
</MenuItem>
|
||||
{availableFields.map(field => (
|
||||
<MenuItem key={field} value={field}>
|
||||
{field}
|
||||
</MenuItem>
|
||||
))}
|
||||
</Select>
|
||||
)}
|
||||
<Typography variant="caption" color="text.secondary" sx={{ mt: 0.5 }}>
|
||||
{getFieldDescription(targetField)}
|
||||
</Typography>
|
||||
</FormControl>
|
||||
))}
|
||||
</Box>
|
||||
</Paper>
|
||||
|
||||
{/* 数据预览 */}
|
||||
<Paper sx={{ mb: 3 }}>
|
||||
<Box sx={{ p: 2, borderBottom: 1, borderColor: 'divider' }}>
|
||||
<Typography variant="subtitle1">{t('import.dataPreview', '数据预览')}</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
{t('import.previewNote', '显示前3条记录,每个字段值最多显示100个字符')}
|
||||
</Typography>
|
||||
</Box>
|
||||
|
||||
<TableContainer sx={{ maxHeight: 400 }}>
|
||||
<Table stickyHeader size="small">
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
{availableFields.map(field => (
|
||||
<TableCell key={field} sx={{ minWidth: 150 }}>
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<Typography variant="subtitle2">{field}</Typography>
|
||||
{Object.entries(fieldMapping).map(([targetField, sourceField]) => {
|
||||
const match = Array.isArray(sourceField) ? sourceField.includes(field) : sourceField === field;
|
||||
if (match) {
|
||||
return (
|
||||
<Chip
|
||||
key={targetField}
|
||||
label={t(`import.${targetField}Field`, targetField)}
|
||||
size="small"
|
||||
color={isFieldRequired(targetField) ? 'primary' : 'default'}
|
||||
variant="outlined"
|
||||
/>
|
||||
);
|
||||
}
|
||||
return null;
|
||||
})}
|
||||
</Box>
|
||||
</TableCell>
|
||||
))}
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
{previewData.map((row, index) => (
|
||||
<TableRow key={index}>
|
||||
{availableFields.map(field => (
|
||||
<TableCell key={field}>
|
||||
<Typography variant="body2" sx={{ wordBreak: 'break-word' }}>
|
||||
{row[field] || '-'}
|
||||
</Typography>
|
||||
</TableCell>
|
||||
))}
|
||||
</TableRow>
|
||||
))}
|
||||
</TableBody>
|
||||
</Table>
|
||||
</TableContainer>
|
||||
</Paper>
|
||||
|
||||
{/* 确认按钮 */}
|
||||
<Box sx={{ display: 'flex', justifyContent: 'flex-end' }}>
|
||||
<Button variant="contained" onClick={handleConfirmMapping} disabled={!mappingValid}>
|
||||
{t('import.confirmMapping', '确认映射')}
|
||||
</Button>
|
||||
</Box>
|
||||
|
||||
{!mappingValid && (
|
||||
<Alert severity="warning" sx={{ mt: 2 }}>
|
||||
{t('import.requiredFields', '请至少选择问题和答案字段的映射')}
|
||||
</Alert>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
344
easy-dataset-main/components/datasets/import/FileUploadStep.js
Normal file
344
easy-dataset-main/components/datasets/import/FileUploadStep.js
Normal file
@@ -0,0 +1,344 @@
|
||||
'use client';
|
||||
|
||||
import { useState, useCallback } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Typography,
|
||||
Button,
|
||||
Paper,
|
||||
List,
|
||||
ListItem,
|
||||
ListItemIcon,
|
||||
ListItemText,
|
||||
LinearProgress,
|
||||
Alert
|
||||
} from '@mui/material';
|
||||
import { CloudUpload as UploadIcon, Description as FileIcon, CheckCircle as CheckIcon } from '@mui/icons-material';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
// import { useDropzone } from 'react-dropzone';
|
||||
|
||||
/**
|
||||
* 文件上传步骤组件
|
||||
*/
|
||||
export default function FileUploadStep({ onDataLoaded, onError }) {
|
||||
const { t } = useTranslation();
|
||||
const [uploading, setUploading] = useState(false);
|
||||
const [uploadedFiles, setUploadedFiles] = useState([]);
|
||||
|
||||
// 健壮的CSV解析函数,支持多行字段和引号转义
|
||||
const parseCSV = text => {
|
||||
const result = [];
|
||||
const lines = [];
|
||||
let currentLine = '';
|
||||
let inQuotes = false;
|
||||
|
||||
// 逐字符解析,正确处理引号内的换行符
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
const char = text[i];
|
||||
const nextChar = text[i + 1];
|
||||
|
||||
if (char === '"') {
|
||||
if (inQuotes && nextChar === '"') {
|
||||
// 转义的引号
|
||||
currentLine += '"';
|
||||
i++; // 跳过下一个引号
|
||||
} else {
|
||||
// 切换引号状态
|
||||
inQuotes = !inQuotes;
|
||||
}
|
||||
} else if (char === '\n' && !inQuotes) {
|
||||
// 行结束(不在引号内)
|
||||
if (currentLine.trim()) {
|
||||
lines.push(currentLine);
|
||||
}
|
||||
currentLine = '';
|
||||
} else {
|
||||
currentLine += char;
|
||||
}
|
||||
}
|
||||
|
||||
// 添加最后一行
|
||||
if (currentLine.trim()) {
|
||||
lines.push(currentLine);
|
||||
}
|
||||
|
||||
if (lines.length < 2) {
|
||||
throw new Error('CSV文件格式不正确,至少需要标题行和一行数据');
|
||||
}
|
||||
|
||||
// 解析标题行
|
||||
const headers = parseCSVLine(lines[0]);
|
||||
|
||||
// 解析数据行
|
||||
for (let i = 1; i < lines.length; i++) {
|
||||
const values = parseCSVLine(lines[i]);
|
||||
if (values.length > 0) {
|
||||
const obj = {};
|
||||
headers.forEach((header, index) => {
|
||||
obj[header] = values[index] || '';
|
||||
});
|
||||
result.push(obj);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
// 解析单行CSV,处理逗号分隔和引号转义
|
||||
const parseCSVLine = line => {
|
||||
const result = [];
|
||||
let current = '';
|
||||
let inQuotes = false;
|
||||
|
||||
for (let i = 0; i < line.length; i++) {
|
||||
const char = line[i];
|
||||
const nextChar = line[i + 1];
|
||||
|
||||
if (char === '"') {
|
||||
if (inQuotes && nextChar === '"') {
|
||||
// 转义的引号
|
||||
current += '"';
|
||||
i++; // 跳过下一个引号
|
||||
} else {
|
||||
// 切换引号状态
|
||||
inQuotes = !inQuotes;
|
||||
}
|
||||
} else if (char === ',' && !inQuotes) {
|
||||
// 字段分隔符(不在引号内)
|
||||
result.push(current.trim());
|
||||
current = '';
|
||||
} else {
|
||||
current += char;
|
||||
}
|
||||
}
|
||||
|
||||
// 添加最后一个字段
|
||||
result.push(current.trim());
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
// 检测并转换ShareGPT格式为Alpaca格式
|
||||
const convertShareGPTToAlpaca = item => {
|
||||
// 检查是否包含conversations字段且格式正确
|
||||
if (item.conversations && Array.isArray(item.conversations)) {
|
||||
const conversations = item.conversations;
|
||||
|
||||
// 查找system、human、gpt消息
|
||||
let systemMessage = '';
|
||||
let instruction = '';
|
||||
let output = '';
|
||||
|
||||
for (const conv of conversations) {
|
||||
if (conv.from === 'system' && conv.value) {
|
||||
systemMessage = conv.value;
|
||||
} else if (conv.from === 'human' && conv.value) {
|
||||
instruction = conv.value;
|
||||
} else if (conv.from === 'gpt' && conv.value) {
|
||||
output = conv.value;
|
||||
break; // 只取第一轮对话
|
||||
}
|
||||
}
|
||||
|
||||
// 如果有system消息,将其作为instruction的前缀
|
||||
if (systemMessage && instruction) {
|
||||
instruction = `${systemMessage}\n\n${instruction}`;
|
||||
} else if (systemMessage && !instruction) {
|
||||
instruction = systemMessage;
|
||||
}
|
||||
|
||||
// 转换为Alpaca格式
|
||||
return {
|
||||
instruction: instruction || '',
|
||||
input: '', // ShareGPT格式通常没有单独的input字段
|
||||
output: output || '',
|
||||
// 保留其他字段
|
||||
...Object.fromEntries(Object.entries(item).filter(([key]) => key !== 'conversations'))
|
||||
};
|
||||
}
|
||||
|
||||
return item; // 如果不是ShareGPT格式,返回原始数据
|
||||
};
|
||||
|
||||
const parseFileContent = async file => {
|
||||
const text = await file.text();
|
||||
const extension = file.name.split('.').pop().toLowerCase();
|
||||
|
||||
try {
|
||||
let data = [];
|
||||
|
||||
if (extension === 'json') {
|
||||
const parsed = JSON.parse(text);
|
||||
data = Array.isArray(parsed) ? parsed : [parsed];
|
||||
} else if (extension === 'jsonl') {
|
||||
data = text
|
||||
.split('\n')
|
||||
.filter(line => line.trim())
|
||||
.map(line => JSON.parse(line));
|
||||
} else if (extension === 'csv') {
|
||||
// 更健壮的CSV解析,支持多行字段和引号转义
|
||||
data = parseCSV(text);
|
||||
if (data.length === 0) {
|
||||
throw new Error('CSV文件格式不正确或没有数据');
|
||||
}
|
||||
} else {
|
||||
throw new Error('不支持的文件格式');
|
||||
}
|
||||
|
||||
if (data.length === 0) {
|
||||
throw new Error('文件中没有找到有效数据');
|
||||
}
|
||||
|
||||
// 检测并转换ShareGPT格式为Alpaca格式
|
||||
data = data.map(convertShareGPTToAlpaca);
|
||||
|
||||
// 生成预览数据(取前3条记录,每个字段值截取前100字符)
|
||||
const previewData = data.slice(0, 3).map(item => {
|
||||
const preview = {};
|
||||
Object.keys(item).forEach(key => {
|
||||
const value = String(item[key] || '');
|
||||
preview[key] = value.length > 100 ? value.substring(0, 100) + '...' : value;
|
||||
});
|
||||
return preview;
|
||||
});
|
||||
|
||||
return {
|
||||
data,
|
||||
preview: previewData,
|
||||
source: {
|
||||
type: 'file',
|
||||
fileName: file.name,
|
||||
fileSize: file.size,
|
||||
totalRecords: data.length
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
throw new Error(`解析文件失败: ${error.message}`);
|
||||
}
|
||||
};
|
||||
|
||||
const handleFileSelect = async event => {
|
||||
const files = event.target.files;
|
||||
if (!files || files.length === 0) return;
|
||||
|
||||
const file = files[0];
|
||||
setUploading(true);
|
||||
|
||||
try {
|
||||
const result = await parseFileContent(file);
|
||||
setUploadedFiles([
|
||||
{
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
status: 'success'
|
||||
}
|
||||
]);
|
||||
|
||||
onDataLoaded(result.data, result.preview, result.source);
|
||||
} catch (error) {
|
||||
setUploadedFiles([
|
||||
{
|
||||
name: file.name,
|
||||
size: file.size,
|
||||
status: 'error',
|
||||
error: error.message
|
||||
}
|
||||
]);
|
||||
onError(error.message);
|
||||
} finally {
|
||||
setUploading(false);
|
||||
}
|
||||
};
|
||||
|
||||
const formatFileSize = bytes => {
|
||||
if (bytes === 0) return '0 Bytes';
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||
};
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<Typography variant="h6" gutterBottom>
|
||||
{t('import.uploadFile', '上传文件')}
|
||||
</Typography>
|
||||
|
||||
<Typography variant="body2" color="text.secondary" sx={{ mb: 3 }}>
|
||||
{t('import.supportedFormats', '支持 JSON、JSONL、CSV 格式文件')}
|
||||
</Typography>
|
||||
|
||||
{/* 文件上传区域 */}
|
||||
<Paper
|
||||
sx={{
|
||||
p: 4,
|
||||
textAlign: 'center',
|
||||
cursor: 'pointer',
|
||||
border: '2px dashed',
|
||||
borderColor: 'divider',
|
||||
backgroundColor: 'background.paper',
|
||||
transition: 'all 0.2s ease',
|
||||
mb: 3,
|
||||
'&:hover': {
|
||||
borderColor: 'primary.main',
|
||||
backgroundColor: 'action.hover'
|
||||
}
|
||||
}}
|
||||
onClick={() => document.getElementById('file-upload-input').click()}
|
||||
>
|
||||
<input
|
||||
id="file-upload-input"
|
||||
type="file"
|
||||
accept=".json,.jsonl,.csv"
|
||||
onChange={handleFileSelect}
|
||||
style={{ display: 'none' }}
|
||||
/>
|
||||
<UploadIcon sx={{ fontSize: 48, color: 'text.secondary', mb: 2 }} />
|
||||
<Typography variant="h6" gutterBottom>
|
||||
{t('import.dragDropFile', '拖拽文件到此处或点击选择文件')}
|
||||
</Typography>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
{t('import.maxFileSize', '最大文件大小: 50MB')}
|
||||
</Typography>
|
||||
</Paper>
|
||||
|
||||
{/* 上传进度 */}
|
||||
{uploading && (
|
||||
<Box sx={{ mb: 3 }}>
|
||||
<Typography variant="body2" gutterBottom>
|
||||
{t('import.processingFile', '正在处理文件...')}
|
||||
</Typography>
|
||||
<LinearProgress />
|
||||
</Box>
|
||||
)}
|
||||
|
||||
{/* 已上传文件列表 */}
|
||||
{uploadedFiles.length > 0 && (
|
||||
<Box>
|
||||
<Typography variant="subtitle2" gutterBottom>
|
||||
{t('import.uploadedFiles', '已上传文件')}
|
||||
</Typography>
|
||||
<List>
|
||||
{uploadedFiles.map((file, index) => (
|
||||
<ListItem key={index} sx={{ px: 0 }}>
|
||||
<ListItemIcon>
|
||||
{file.status === 'success' ? <CheckIcon color="success" /> : <FileIcon color="error" />}
|
||||
</ListItemIcon>
|
||||
<ListItemText
|
||||
primary={file.name}
|
||||
secondary={file.status === 'success' ? `${formatFileSize(file.size)}` : file.error}
|
||||
/>
|
||||
</ListItem>
|
||||
))}
|
||||
</List>
|
||||
|
||||
{uploadedFiles.some(f => f.status === 'error') && (
|
||||
<Alert severity="error" sx={{ mt: 2 }}>
|
||||
{t('import.uploadError', '文件上传失败,请检查文件格式是否正确')}
|
||||
</Alert>
|
||||
)}
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,303 @@
|
||||
'use client';
|
||||
|
||||
import { useState, useEffect, useRef } from 'react';
|
||||
import {
|
||||
Box,
|
||||
Typography,
|
||||
LinearProgress,
|
||||
Alert,
|
||||
Paper,
|
||||
List,
|
||||
ListItem,
|
||||
ListItemIcon,
|
||||
ListItemText,
|
||||
Chip
|
||||
} from '@mui/material';
|
||||
import { CheckCircle as CheckIcon, Error as ErrorIcon, Info as InfoIcon } from '@mui/icons-material';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
/**
|
||||
* 导入进度步骤组件
|
||||
*/
|
||||
export default function ImportProgressStep({ projectId, rawData, fieldMapping, sourceInfo, onComplete, onError }) {
|
||||
const { t } = useTranslation();
|
||||
const [progress, setProgress] = useState(0);
|
||||
const [currentStep, setCurrentStep] = useState('');
|
||||
const [importStats, setImportStats] = useState({
|
||||
total: 0,
|
||||
processed: 0,
|
||||
success: 0,
|
||||
failed: 0,
|
||||
skipped: 0,
|
||||
errors: []
|
||||
});
|
||||
const [completed, setCompleted] = useState(false);
|
||||
const startedRef = useRef(false); // 防止在开发模式下因严格模式导致重复执行
|
||||
|
||||
useEffect(() => {
|
||||
if (!startedRef.current && rawData && fieldMapping && projectId) {
|
||||
startedRef.current = true;
|
||||
startImport();
|
||||
}
|
||||
}, [rawData, fieldMapping, projectId]);
|
||||
|
||||
const startImport = async () => {
|
||||
try {
|
||||
setCurrentStep(t('import.preparingData', '准备数据...'));
|
||||
setImportStats(prev => ({ ...prev, total: rawData.length }));
|
||||
|
||||
// 转换数据格式
|
||||
const convertedData = rawData.map(item => {
|
||||
// 支持 question 映射多个字段,拼接为一个字符串
|
||||
const qFields = fieldMapping.question;
|
||||
const question = Array.isArray(qFields)
|
||||
? qFields
|
||||
.map(f => item[f] || '')
|
||||
.filter(v => v && String(v).trim())
|
||||
.join('\n')
|
||||
: item[qFields] || '';
|
||||
|
||||
const converted = {
|
||||
question,
|
||||
answer: item[fieldMapping.answer] || '',
|
||||
cot: fieldMapping.cot ? item[fieldMapping.cot] || '' : '',
|
||||
questionLabel: '', // 默认标签,后续可以通过AI生成
|
||||
chunkName: sourceInfo?.datasetName || sourceInfo?.fileName || 'Imported Data',
|
||||
chunkContent: `Imported from ${sourceInfo?.type || 'file'}`,
|
||||
model: 'imported',
|
||||
confirmed: false,
|
||||
score: 0,
|
||||
tags: fieldMapping.tags ? JSON.stringify(parseTagsField(item[fieldMapping.tags])) : '[]',
|
||||
note: '',
|
||||
other: JSON.stringify(getOtherFields(item, fieldMapping))
|
||||
};
|
||||
|
||||
// 不在前端抛错,由后端负责校验并统计 skipped
|
||||
return converted;
|
||||
});
|
||||
|
||||
setProgress(25);
|
||||
setCurrentStep(t('import.uploadingData', '上传数据...'));
|
||||
|
||||
// 分批上传数据
|
||||
const batchSize = 500;
|
||||
let processed = 0;
|
||||
let success = 0;
|
||||
let failed = 0;
|
||||
let skipped = 0;
|
||||
const errors = [];
|
||||
|
||||
for (let i = 0; i < convertedData.length; i += batchSize) {
|
||||
const batch = convertedData.slice(i, i + batchSize);
|
||||
|
||||
try {
|
||||
const response = await fetch(`/api/projects/${projectId}/datasets/import`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
datasets: batch,
|
||||
sourceInfo
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`批次上传失败: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
success += result.success || 0;
|
||||
failed += typeof result.failed === 'number' ? result.failed : result.errors?.length || 0;
|
||||
skipped += result.skipped || 0;
|
||||
processed += batch.length;
|
||||
|
||||
if (result.errors && result.errors.length > 0) {
|
||||
errors.push(...result.errors);
|
||||
}
|
||||
} catch (error) {
|
||||
failed += batch.length;
|
||||
processed += batch.length;
|
||||
errors.push(`批次 ${Math.floor(i / batchSize) + 1}: ${error.message}`);
|
||||
}
|
||||
|
||||
// 更新进度
|
||||
const progressPercent = 25 + (processed / convertedData.length) * 70;
|
||||
setProgress(progressPercent);
|
||||
setImportStats({
|
||||
total: convertedData.length,
|
||||
processed,
|
||||
success,
|
||||
failed,
|
||||
skipped,
|
||||
errors
|
||||
});
|
||||
|
||||
setCurrentStep(
|
||||
t('import.processing', '处理中... {{processed}}/{{total}}', {
|
||||
processed,
|
||||
total: convertedData.length
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
setProgress(100);
|
||||
setCurrentStep(t('import.completed', '导入完成'));
|
||||
setCompleted(true);
|
||||
|
||||
// 延迟一下再调用完成回调,让用户看到完成状态
|
||||
setTimeout(() => {
|
||||
onComplete();
|
||||
}, 2000);
|
||||
} catch (error) {
|
||||
onError(error.message);
|
||||
setImportStats(prev => ({
|
||||
...prev,
|
||||
errors: [...prev.errors, error.message]
|
||||
}));
|
||||
}
|
||||
};
|
||||
|
||||
// 解析标签字段
|
||||
const parseTagsField = tagsValue => {
|
||||
if (!tagsValue) return [];
|
||||
|
||||
if (Array.isArray(tagsValue)) {
|
||||
return tagsValue;
|
||||
}
|
||||
|
||||
if (typeof tagsValue === 'string') {
|
||||
return tagsValue
|
||||
.split(',')
|
||||
.map(tag => tag.trim())
|
||||
.filter(tag => tag);
|
||||
}
|
||||
|
||||
return [];
|
||||
};
|
||||
|
||||
// 获取其他字段(兼容数组映射)
|
||||
const getOtherFields = (item, mapping) => {
|
||||
const used = [];
|
||||
Object.values(mapping).forEach(field => {
|
||||
if (!field) return;
|
||||
if (Array.isArray(field)) used.push(...field);
|
||||
else used.push(field);
|
||||
});
|
||||
const mappedFields = new Set(used);
|
||||
const otherFields = {};
|
||||
|
||||
Object.keys(item).forEach(key => {
|
||||
if (!mappedFields.has(key)) {
|
||||
otherFields[key] = item[key];
|
||||
}
|
||||
});
|
||||
|
||||
return otherFields;
|
||||
};
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<Typography variant="h6" gutterBottom>
|
||||
{t('import.importing', '正在导入数据集')}
|
||||
</Typography>
|
||||
|
||||
{/* 进度条 */}
|
||||
<Paper sx={{ p: 3, mb: 3 }}>
|
||||
<Typography variant="body1" gutterBottom>
|
||||
{currentStep}
|
||||
</Typography>
|
||||
<LinearProgress variant="determinate" value={progress} sx={{ height: 8, borderRadius: 4, mb: 2 }} />
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
{Math.round(progress)}% {t('import.complete', '完成')}
|
||||
</Typography>
|
||||
</Paper>
|
||||
|
||||
{/* 导入统计 */}
|
||||
<Paper sx={{ p: 3, mb: 3 }}>
|
||||
<Typography variant="subtitle1" gutterBottom>
|
||||
{t('import.importStats', '导入统计')}
|
||||
</Typography>
|
||||
|
||||
<Box sx={{ display: 'flex', gap: 2, flexWrap: 'wrap', mb: 2 }}>
|
||||
<Chip
|
||||
icon={<InfoIcon />}
|
||||
label={t('import.total', '总计: {{count}}', { count: importStats.total })}
|
||||
variant="outlined"
|
||||
/>
|
||||
<Chip
|
||||
icon={<CheckIcon />}
|
||||
label={t('import.success', '成功: {{count}}', { count: importStats.success })}
|
||||
color="success"
|
||||
variant="outlined"
|
||||
/>
|
||||
{importStats.skipped > 0 && (
|
||||
<Chip
|
||||
icon={<InfoIcon />}
|
||||
label={t('import.skipped', '跳过: {{count}}', { count: importStats.skipped })}
|
||||
color="warning"
|
||||
variant="outlined"
|
||||
/>
|
||||
)}
|
||||
{importStats.failed > 0 && (
|
||||
<Chip
|
||||
icon={<ErrorIcon />}
|
||||
label={t('import.failed', '失败: {{count}}', { count: importStats.failed })}
|
||||
color="error"
|
||||
variant="outlined"
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
{sourceInfo && (
|
||||
<Box>
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
{t('import.source', '数据源')}:{' '}
|
||||
{sourceInfo.type === 'file' ? sourceInfo.fileName : sourceInfo.datasetName}
|
||||
</Typography>
|
||||
{sourceInfo.description && (
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
{t('import.description', '描述')}: {sourceInfo.description}
|
||||
</Typography>
|
||||
)}
|
||||
</Box>
|
||||
)}
|
||||
</Paper>
|
||||
|
||||
{/* 错误列表 */}
|
||||
{importStats.errors.length > 0 && (
|
||||
<Paper sx={{ p: 3 }}>
|
||||
<Typography variant="subtitle1" gutterBottom color="error">
|
||||
{t('import.errors', '错误信息')}
|
||||
</Typography>
|
||||
<List dense>
|
||||
{importStats.errors.slice(0, 10).map((error, index) => (
|
||||
<ListItem key={index} sx={{ px: 0 }}>
|
||||
<ListItemIcon>
|
||||
<ErrorIcon color="error" fontSize="small" />
|
||||
</ListItemIcon>
|
||||
<ListItemText primary={error} primaryTypographyProps={{ variant: 'body2' }} />
|
||||
</ListItem>
|
||||
))}
|
||||
</List>
|
||||
{importStats.errors.length > 10 && (
|
||||
<Typography variant="body2" color="text.secondary">
|
||||
{t('import.moreErrors', '还有 {{count}} 个错误未显示...', {
|
||||
count: importStats.errors.length - 10
|
||||
})}
|
||||
</Typography>
|
||||
)}
|
||||
</Paper>
|
||||
)}
|
||||
|
||||
{/* 完成提示 */}
|
||||
{completed && (
|
||||
<Alert severity="success" sx={{ mt: 2 }}>
|
||||
{t('import.importSuccess', '数据集导入完成!成功导入 {{success}} 条记录。', {
|
||||
success: importStats.success
|
||||
})}
|
||||
</Alert>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user