// LocalExportTab.js 组件 import React, { useState, useEffect } from 'react'; import { useTranslation } from 'react-i18next'; import { Button, FormControl, FormControlLabel, RadioGroup, Radio, TextField, Checkbox, Typography, Box, Paper, useTheme, Grid, Table, TableRow, TableHead, TableBody, TableCell, TableContainer, Dialog, DialogTitle, DialogContent, DialogActions, Chip, Alert, CircularProgress } from '@mui/material'; const LocalExportTab = ({ fileFormat, formatType, systemPrompt, confirmedOnly, includeCOT, customFields, alpacaFieldType, customInstruction, reasoningLanguage, handleFileFormatChange, handleFormatChange, handleSystemPromptChange, handleReasoningLanguageChange, handleConfirmedOnlyChange, handleIncludeCOTChange, handleCustomFieldChange, handleIncludeLabelsChange, handleIncludeChunkChange, handleQuestionOnlyChange, handleAlpacaFieldTypeChange, handleCustomInstructionChange, handleExport, projectId }) => { const theme = useTheme(); const { t } = useTranslation(); // Balance export related state const [balanceDialogOpen, setBalanceDialogOpen] = useState(false); const [tagStats, setTagStats] = useState([]); const [balanceConfig, setBalanceConfig] = useState([]); const [loading, setLoading] = useState(false); const [error, setError] = useState(''); const [totalCount, setTotalCount] = useState(0); // Get label statistics (changed to GET + query parameters) const fetchTagStats = async () => { try { setLoading(true); const url = `/api/projects/${projectId}/datasets/export?confirmed=${confirmedOnly ? 'true' : 'false'}`; const response = await fetch(url, { method: 'GET' }); if (!response.ok) { throw new Error(t('errors.getTagStatsFailed')); } const stats = await response.json(); setTagStats(stats); // 初始化平衡配置 const initialConfig = stats.map(stat => ({ tagLabel: stat.tagLabel, maxCount: Math.min(stat.datasetCount, 100), // 默认最多100条 availableCount: stat.datasetCount })); setBalanceConfig(initialConfig); // 计算总数 const total = initialConfig.reduce((sum, config) => sum + config.maxCount, 0); setTotalCount(total); } catch (err) { setError(err.message); } finally { setLoading(false); } }; // 打开平衡导出对话框 const handleOpenBalanceDialog = () => { setBalanceDialogOpen(true); fetchTagStats(); }; // 更新单个标签的数量配置 const updateBalanceConfig = (tagLabel, newCount) => { const newConfig = balanceConfig.map(config => { if (config.tagLabel === tagLabel) { const count = Math.min(Math.max(0, parseInt(newCount) || 0), config.availableCount); return { ...config, maxCount: count }; } return config; }); setBalanceConfig(newConfig); // 重新计算总数 const total = newConfig.reduce((sum, config) => sum + config.maxCount, 0); setTotalCount(total); }; // 一键设置所有标签为相同数量 const setAllToSameCount = count => { const newConfig = balanceConfig.map(config => ({ ...config, maxCount: Math.min(Math.max(0, parseInt(count) || 0), config.availableCount) })); setBalanceConfig(newConfig); const total = newConfig.reduce((sum, config) => sum + config.maxCount, 0); setTotalCount(total); }; // 处理平衡导出 const handleBalancedExport = () => { // 过滤出数量大于0的配置 const validConfig = balanceConfig.filter(config => config.maxCount > 0); if (validConfig.length === 0) { setError(t('export.balancedExport.atLeastOneTag', '请至少为一个标签设置大于0的数量')); return; } // 调用原有的导出函数,但传递平衡配置 handleExport({ balanceMode: true, balanceConfig: validConfig, formatType, systemPrompt, reasoningLanguage, confirmedOnly, fileFormat, includeCOT, alpacaFieldType, customInstruction, customFields: formatType === 'custom' ? customFields : undefined }); setBalanceDialogOpen(false); }; // 自定义格式的示例 const getCustomFormatExample = () => { const { questionField, answerField, cotField, includeLabels, includeChunk } = customFields; const example = { [questionField]: t('sampleData.questionContent'), [answerField]: t('sampleData.answerContent') }; // 如果包含思维链字段,添加到示例中 if (includeCOT) { example[cotField] = t('sampleData.cotContent'); } if (includeLabels) { example.labels = [t('sampleData.domainLabel')]; } if (includeChunk) { example.chunk = t('sampleData.textChunk'); } return fileFormat === 'json' ? JSON.stringify([example], null, 2) : JSON.stringify(example); }; // CSV 自定义格式化示例 const getPreviewData = () => { if (formatType === 'alpaca') { // 根据选择的字段类型生成不同的示例 if (alpacaFieldType === 'instruction') { return { headers: ['instruction', 'input', 'output', 'system'], rows: [ { instruction: t('export.sampleInstruction', '人类指令(必填)'), input: '', output: t('export.sampleOutput', '模型回答(必填)'), system: t('export.sampleSystem', '系统提示词(选填)') }, { instruction: t('export.sampleInstruction2', '第二个指令'), input: '', output: t('export.sampleOutput2', '第二个回答'), system: t('export.sampleSystemShort', '系统提示词') } ] }; } else { // input return { headers: ['instruction', 'input', 'output', 'system'], rows: [ { instruction: customInstruction || t('export.fixedInstruction', '固定的指令内容'), input: t('export.sampleInput', '人类问题(必填)'), output: t('export.sampleOutput', '模型回答(必填)'), system: t('export.sampleSystem', '系统提示词(选填)') }, { instruction: customInstruction || t('export.fixedInstruction', '固定的指令内容'), input: t('export.sampleInput2', '第二个问题'), output: t('export.sampleOutput2', '第二个回答'), system: t('export.sampleSystemShort', '系统提示词') } ] }; } } else if (formatType === 'sharegpt') { return { headers: ['messages'], rows: [ { messages: JSON.stringify( [ { messages: [ { role: 'system', content: t('export.sampleSystem', '系统提示词(选填)') }, { role: 'user', content: t('export.sampleUserMessage', '人类指令') // 映射到 question 字段 }, { role: 'assistant', content: t('export.sampleAssistantMessage', '模型回答') // 映射到 cot+answer 字段 } ] } ], null, 2 ) } ] }; } else if (formatType === 'multilingualthinking') { return { headers: 'messages', rows: { messages: JSON.stringify( { reasoning_language: 'English', developer: t('export.sampleSystem', '系统提示词(选填)'), user: t('export.sampleUserMessage', '人类指令'), // 映射到 question 字段 analysis: t('export.sampleAnalysis', '模型的思维链内容'), // 映射到 cot 字段 final: t('export.sampleFinal', '模型回答'), // 映射到 answer 字段 messages: [ { role: 'system', content: '系统提示词(选填)', thinking: 'null' }, { role: 'user', content: '人类指令', // 映射到 question 字段 thinking: 'null' }, { role: 'assistant', content: '模型回答', // 映射到 answer 字段 thinking: '模型的思维链内容' // 映射到 cot 字段 } ] }, null, 2 ) } }; } else if (formatType === 'custom') { // 如果选择仅导出问题,只包含问题字段 if (customFields.questionOnly) { const headers = [customFields.questionField]; if (customFields.includeLabels) headers.push('labels'); if (customFields.includeChunk) headers.push('chunk'); const row = { [customFields.questionField]: t('sampleData.questionContent') }; if (customFields.includeLabels) row.labels = t('sampleData.domainLabel'); if (customFields.includeChunk) row.chunk = t('sampleData.textChunk'); return { headers, rows: [row] }; } else { // 正常的自定义格式 const headers = [customFields.questionField, customFields.answerField]; if (includeCOT) headers.push(customFields.cotField); if (customFields.includeLabels) headers.push('labels'); if (customFields.includeChunk) headers.push('chunk'); const row = { [customFields.questionField]: t('sampleData.questionContent'), [customFields.answerField]: t('sampleData.answerContent') }; if (includeCOT) row[customFields.cotField] = t('sampleData.cotContent'); if (customFields.includeLabels) row.labels = t('sampleData.domainLabel'); if (customFields.includeChunk) row.chunk = t('sampleData.textChunk'); return { headers, rows: [row] }; } } }; return ( <> {t('export.fileFormat')} } label="JSON" /> } label="JSONL" /> {/* } label="CSV" /> */} } label="CSV" /> {/* 数据集风格 */} {t('export.format')} } label="Alpaca" /> } label="ShareGPT" /> {/* NEW: Multilingual‑Thinking format */} } label={t('export.multilingualThinkingFormat') || 'Multilingual‑Thinking'} /> } label={t('export.customFormat')} /> {/* Alpaca 格式特有的设置 */} {formatType === 'alpaca' && ( {t('export.alpacaSettings', 'Alpaca 格式设置')} {t('export.questionFieldType', '问题字段类型')} } label={t('export.useInstruction', '使用 instruction 字段')} /> } label={t('export.useInput', '使用 input 字段')} /> {alpacaFieldType === 'input' && ( )} )} {/* 自定义格式选项 */} {formatType === 'custom' && ( {t('export.customFormatSettings')} {/* 添加思维链字段名输入框 */} } label={t('export.includeLabels')} /> } label={t('export.includeChunk')} /> } label={t('export.questionOnly')} /> )} {t('export.example')} {fileFormat === 'csv' ? ( {(() => { const { headers, rows } = getPreviewData(); const tableKey = `${formatType}-${fileFormat}-${JSON.stringify(customFields)}`; return ( {headers.map(header => ( {header} ))} {rows.map((row, index) => ( {headers.map(header => ( {Array.isArray(row[header]) ? row[header].join(', ') : row[header] || ''} ))} ))}
); })()}
) : (
              {formatType === 'custom'
                ? getCustomFormatExample()
                : formatType === 'multilingualthinking'
                  ? fileFormat === 'json'
                    ? JSON.stringify(
                        {
                          reasoning_language: 'English',
                          developer: '系统提示词(选填)',
                          user: '人类指令', // 映射到 question 字段
                          analysis: '模型的思维链内容', // 映射到 cot 字段
                          final: '模型回答', // 映射到 answer 字段
                          messages: [
                            {
                              content: t('export.sampleSystem', '系统提示词(选填)'),
                              role: 'system',
                              thinking: null
                            },
                            {
                              content: t('export.sampleUserMessage', '人类指令'),
                              role: 'user',
                              thinking: null
                            },
                            {
                              content: t('export.sampleAssistantMessage', '模型回答'),
                              role: 'assistant',
                              thinking: t('export.sampleThinking', '模型的思维链内容')
                            }
                          ]
                        },
                        null,
                        2
                      )
                    : '{"reasoning_language": "English","developer": "系统提示词(选填)", "user": "人类指令", "analysis": "模型的思维链内容", "final": "模型回答", "messages": [{"role": "user", "content": "人类指令", "thinking": "null"}, {"role": "assistant", "content": "模型回答", "thinking": "模型的思维链内容"}]}'
                  : formatType === 'alpaca'
                    ? fileFormat === 'json'
                      ? JSON.stringify(
                          [
                            {
                              instruction: t('export.sampleInstruction', '人类指令(必填)'), // 映射到 question 字段
                              input: t('export.sampleInputOptional', '人类输入(选填)'),
                              output: t('export.sampleOutput', '模型回答(必填)'), // 映射到 cot+answer 字段
                              system: t('export.sampleSystem', '系统提示词(选填)')
                            }
                          ],
                          null,
                          2
                        )
                      : '{"instruction": "人类指令(必填)", "input": "人类输入(选填)", "output": "模型回答(必填)", "system": "系统提示词(选填)"}\n{"instruction": "第二个指令", "input": "", "output": "第二个回答", "system": "系统提示词"}'
                    : fileFormat === 'json'
                      ? JSON.stringify(
                          [
                            {
                              messages: [
                                {
                                  role: 'system',
                                  content: t('export.sampleSystem', '系统提示词(选填)')
                                },
                                {
                                  role: 'user',
                                  content: t('export.sampleUserMessage', '人类指令') // 映射到 question 字段
                                },
                                {
                                  role: 'assistant',
                                  content: t('export.sampleAssistantMessage', '模型回答') // 映射到 cot+answer 字段
                                }
                              ]
                            }
                          ],
                          null,
                          2
                        )
                      : '{"messages": [{"role": "system", "content": "系统提示词(选填)"}, {"role": "user", "content": "人类指令"}, {"role": "assistant", "content": "模型回答"}]}\n{"messages": [{"role": "user", "content": "第二个问题"}, {"role": "assistant", "content": "第二个回答"}]}'}
            
)}
{t('export.systemPrompt')} {/* Reasoning language – only for multilingual‑thinking */} {formatType === 'multilingualthinking' && ( {t('export.Reasoninglanguage')} )} } label={t('export.onlyConfirmed')} /> } label={t('export.includeCOT')} /> {/* 平衡导出对话框 */} setBalanceDialogOpen(false)} maxWidth="md" fullWidth PaperProps={{ sx: { borderRadius: 2 } }} > {t('exportDialog.balancedExportTitle')} {t('exportDialog.balancedExportDescription')} {error && ( {error} )} {loading ? ( ) : ( <> {/* 批量设置 */} {t('exportDialog.quickSettings')} { if (e.key === 'Enter') { setAllToSameCount(e.target.value); e.target.value = ''; } }} /> {/* 标签配置表格 */} {t('exportDialog.tagName')} {t('exportDialog.availableCount')} {t('exportDialog.exportCount')} {t('exportDialog.settings')} {balanceConfig.map(config => ( {config.availableCount} {config.maxCount} updateBalanceConfig(config.tagLabel, e.target.value)} inputProps={{ min: 0, max: config.availableCount, style: { textAlign: 'right' } }} sx={{ width: 80 }} /> ))}
{/* 统计信息 */} {t('exportDialog.totalExportCount')}: {totalCount} {' '} | {t('exportDialog.tagCount')}: {balanceConfig.filter(c => c.maxCount > 0).length} /{' '} {balanceConfig.length} )}
); }; export default LocalExportTab;