first-update

This commit is contained in:
2026-03-17 14:36:31 +08:00
parent 72f08aee7c
commit 4eddf05e79
516 changed files with 115270 additions and 1 deletions

View File

@@ -0,0 +1,165 @@
'use client';
import { useState } from 'react';
import { useRouter } from 'next/navigation';
import { useTranslation } from 'react-i18next';
import { toast } from 'sonner';
import { useAtomValue } from 'jotai';
import { selectedModelInfoAtom } from '@/lib/store';
/**
* 数据集评估相关的自定义 Hook
* 封装单个评估和批量评估的逻辑
*/
const useDatasetEvaluation = (projectId, onEvaluationComplete) => {
const router = useRouter();
const { t } = useTranslation();
const model = useAtomValue(selectedModelInfoAtom);
// 评估状态管理
const [evaluatingIds, setEvaluatingIds] = useState([]);
const [batchEvaluating, setBatchEvaluating] = useState(false);
/**
* 检查模型是否已配置
*/
const checkModelConfiguration = () => {
if (!model || !model.modelName) {
toast.error(t('datasets.selectModelFirst', '请先选择模型'));
return false;
}
return true;
};
/**
* 处理单个数据集评估
* @param {Object} dataset - 要评估的数据集对象
*/
const handleEvaluateDataset = async dataset => {
// 检查模型配置
if (!checkModelConfiguration()) {
return;
}
try {
// 添加到评估中的ID列表
setEvaluatingIds(prev => [...prev, dataset.id]);
// 调用评估接口
const evaluateResponse = await fetch(`/api/projects/${projectId}/datasets/${dataset.id}/evaluate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
language: 'zh-CN'
})
});
const result = await evaluateResponse.json();
if (result.success) {
toast.success(
t('datasets.evaluateSuccess', '评估完成!评分:{{score}}/5', {
score: result.data.score
})
);
// 调用回调函数通知评估完成(通常用于刷新数据列表)
if (onEvaluationComplete) {
await onEvaluationComplete();
}
} else {
toast.error(result.message || t('datasets.evaluateFailed', '评估失败'));
}
} catch (error) {
console.error('评估失败:', error);
toast.error(
t('datasets.evaluateError', '评估失败: {{error}}', {
error: error.message
})
);
} finally {
// 从评估中的ID列表移除
setEvaluatingIds(prev => prev.filter(id => id !== dataset.id));
}
};
/**
* 处理批量评估
*/
const handleBatchEvaluate = async () => {
// 检查模型配置
if (!checkModelConfiguration()) {
return;
}
try {
setBatchEvaluating(true);
// 调用批量评估接口
const response = await fetch(`/api/projects/${projectId}/datasets/batch-evaluate`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model,
language: 'zh-CN'
})
});
const result = await response.json();
if (result.success) {
toast.success(t('datasets.batchEvaluateStarted', '批量评估任务已启动,将在后台进行处理'));
// 跳转到任务页面查看进度
router.push(`/projects/${projectId}/tasks`);
} else {
toast.error(result.message || t('datasets.batchEvaluateStartFailed', '启动批量评估失败'));
}
} catch (error) {
console.error('批量评估失败:', error);
toast.error(
t('datasets.batchEvaluateFailed', '批量评估失败: {{error}}', {
error: error.message
})
);
} finally {
setBatchEvaluating(false);
}
};
/**
* 检查指定数据集是否正在评估中
* @param {string} datasetId - 数据集ID
* @returns {boolean} 是否正在评估中
*/
const isEvaluating = datasetId => {
return evaluatingIds.includes(datasetId);
};
/**
* 获取当前正在评估的数据集数量
* @returns {number} 正在评估的数据集数量
*/
const getEvaluatingCount = () => {
return evaluatingIds.length;
};
return {
// 状态
evaluatingIds,
batchEvaluating,
// 方法
handleEvaluateDataset,
handleBatchEvaluate,
// 工具方法
isEvaluating,
getEvaluatingCount,
// 模型信息(便于组件使用)
model
};
};
export default useDatasetEvaluation;

View File

@@ -0,0 +1,487 @@
'use client';
import { useTranslation } from 'react-i18next';
import { toast } from 'sonner';
import axios from 'axios';
const useDatasetExport = projectId => {
const { t } = useTranslation();
// 优化的流式导出 - 使用 WritableStream 避免内存溢出
const exportDatasetsStreaming = async (exportOptions, onProgress) => {
try {
const batchSize = exportOptions.batchSize || 1000;
let offset = 0;
let hasMore = true;
let totalProcessed = 0;
let isFirstBatch = true;
// 确定文件格式
const fileFormat = exportOptions.fileFormat || 'json';
const formatType = exportOptions.formatType || 'alpaca';
// 生成文件名
const formatSuffixMap = {
alpaca: 'alpaca',
multilingualthinking: 'multilingual-thinking',
sharegpt: 'sharegpt',
custom: 'custom'
};
const formatSuffix = formatSuffixMap[formatType] || formatType || 'export';
const balanceSuffix = exportOptions.balanceMode ? '-balanced' : '';
const dateStr = new Date().toISOString().slice(0, 10);
const fileName = `datasets-${projectId}-${formatSuffix}${balanceSuffix}-${dateStr}.${fileFormat}`;
// 创建可写流
let fileStream;
let writer;
try {
// 使用 showSaveFilePicker API现代浏览器
if (window.showSaveFilePicker) {
const handle = await window.showSaveFilePicker({
suggestedName: fileName,
types: [
{
description: 'Dataset File',
accept: {
'application/json': [`.${fileFormat}`]
}
}
]
});
fileStream = await handle.createWritable();
} else {
// 降级方案:使用内存缓冲区(但分块处理)
fileStream = null;
}
} catch (err) {
// 用户取消或不支持,使用降级方案
fileStream = null;
}
// 如果不支持流式写入,使用分块累积方案
let chunks = [];
let chunkCount = 0;
const MAX_CHUNKS_IN_MEMORY = 5; // 最多在内存中保留5批数据
// 写入文件头JSON数组开始或CSV表头
if (fileFormat === 'json') {
if (fileStream) {
await fileStream.write('[\n');
} else {
chunks.push('[\n');
}
} else if (fileFormat === 'csv') {
// 写入CSV表头
const headers = getCSVHeaders(formatType, exportOptions);
const headerLine = headers.join(',') + '\n';
if (fileStream) {
await fileStream.write(headerLine);
} else {
chunks.push(headerLine);
}
}
// 分批获取和写入数据
while (hasMore) {
const apiUrl = `/api/projects/${projectId}/datasets/export`;
const requestBody = {
batchMode: true,
offset: offset,
batchSize: batchSize
};
// 如果有选中的数据集 ID传递 ID 列表
if (exportOptions.selectedIds && exportOptions.selectedIds.length > 0) {
requestBody.selectedIds = exportOptions.selectedIds;
} else if (exportOptions.confirmedOnly) {
requestBody.status = 'confirmed';
}
// 检查是否是平衡导出模式
if (exportOptions.balanceMode && exportOptions.balanceConfig) {
requestBody.balanceMode = true;
requestBody.balanceConfig = exportOptions.balanceConfig;
}
const response = await axios.post(apiUrl, requestBody);
const batchResult = response.data;
// 如果需要包含文本块内容,批量查询并填充
if (exportOptions.customFields?.includeChunk && batchResult.data.length > 0) {
const chunkNames = batchResult.data.map(item => item.chunkName).filter(name => name);
if (chunkNames.length > 0) {
try {
const chunkResponse = await axios.post(`/api/projects/${projectId}/chunks/batch-content`, {
chunkNames
});
const chunkContentMap = chunkResponse.data;
batchResult.data.forEach(item => {
if (item.chunkName && chunkContentMap[item.chunkName]) {
item.chunkContent = chunkContentMap[item.chunkName];
}
});
} catch (chunkError) {
console.error('获取文本块内容失败:', chunkError);
}
}
}
// 转换当前批次数据
const formattedBatch = formatDataBatch(batchResult.data, exportOptions);
// 写入当前批次
if (fileFormat === 'json') {
// 保持与原逻辑一致JSON 导出为“格式化后的 JSON 数组”2空格缩进
// 每条记录单独 stringify + 缩进,并在数组级别拼接,避免一次性 stringify 全量数据导致内存暴涨
const batchContent = formattedBatch
.map(item => {
const pretty = JSON.stringify(item, null, 2);
// 将对象的每一行整体再缩进 2 个空格,以符合数组元素缩进
return ' ' + pretty.replace(/\n/g, '\n ');
})
.join(',\n');
const content = isFirstBatch ? batchContent : ',\n' + batchContent;
if (fileStream) {
await fileStream.write(content);
} else {
chunks.push(content);
chunkCount++;
}
} else if (fileFormat === 'jsonl') {
const batchContent = formattedBatch.map(item => JSON.stringify(item)).join('\n') + '\n';
if (fileStream) {
await fileStream.write(batchContent);
} else {
chunks.push(batchContent);
chunkCount++;
}
} else if (fileFormat === 'csv') {
const batchContent = formatBatchToCSV(formattedBatch, formatType, exportOptions);
if (fileStream) {
await fileStream.write(batchContent);
} else {
chunks.push(batchContent);
chunkCount++;
}
}
// 如果使用内存缓冲且累积了足够多的块,触发部分下载
if (!fileStream && chunkCount >= MAX_CHUNKS_IN_MEMORY) {
// 这里我们仍然需要等到最后才能下载,但至少限制了内存使用
// 可以考虑使用 Blob 分片
}
hasMore = batchResult.hasMore;
offset = batchResult.offset;
totalProcessed += batchResult.data.length;
isFirstBatch = false;
// 通知进度更新
if (onProgress) {
onProgress({
processed: totalProcessed,
currentBatch: batchResult.data.length,
hasMore
});
}
// 避免过快请求
if (hasMore) {
await new Promise(resolve => setTimeout(resolve, 50));
}
}
// 写入文件尾
if (fileFormat === 'json') {
if (fileStream) {
await fileStream.write('\n]\n');
await fileStream.close();
} else {
chunks.push('\n]\n');
}
} else {
if (fileStream) {
await fileStream.close();
}
}
// 如果使用内存缓冲方案,现在触发下载
if (!fileStream) {
downloadFromChunks(chunks, fileName);
}
toast.success(t('datasets.exportSuccess'));
return true;
} catch (error) {
console.error('Streaming export failed:', error);
toast.error(error.message || t('datasets.exportFailed'));
return false;
}
};
// 从内存块下载文件(优化版本,使用 Blob 流)
const downloadFromChunks = (chunks, fileName) => {
// 使用 Blob 构造函数,它会自动处理大数据
const blob = new Blob(chunks, { type: 'application/octet-stream' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = fileName;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
// 延迟释放 URL确保下载开始
setTimeout(() => URL.revokeObjectURL(url), 1000);
};
// 获取CSV表头
const getCSVHeaders = (formatType, exportOptions) => {
if (formatType === 'alpaca') {
return ['instruction', 'input', 'output', 'system'];
} else if (formatType === 'sharegpt') {
return ['messages'];
} else if (formatType === 'multilingualthinking') {
return ['reasoning_language', 'developer', 'user', 'analysis', 'final', 'messages'];
} else if (formatType === 'custom') {
const { questionField, answerField, cotField, includeLabels, includeChunk, questionOnly } =
exportOptions.customFields;
const headers = [questionField];
if (!questionOnly) {
headers.push(answerField);
if (exportOptions.includeCOT && cotField) {
headers.push(cotField);
}
}
if (includeLabels) headers.push('label');
if (includeChunk) headers.push('chunk');
return headers;
}
return [];
};
// 格式化数据批次
const formatDataBatch = (dataBatch, exportOptions) => {
const formatType = exportOptions.formatType || 'alpaca';
if (formatType === 'alpaca') {
if (exportOptions.alpacaFieldType === 'instruction') {
return dataBatch.map(({ question, answer, cot }) => ({
instruction: question,
input: '',
output: cot && exportOptions.includeCOT ? `<think>${cot}</think>\n${answer}` : answer,
system: exportOptions.systemPrompt || ''
}));
} else {
return dataBatch.map(({ question, answer, cot }) => ({
instruction: exportOptions.customInstruction || '',
input: question,
output: cot && exportOptions.includeCOT ? `<think>${cot}</think>\n${answer}` : answer,
system: exportOptions.systemPrompt || ''
}));
}
} else if (formatType === 'sharegpt') {
return dataBatch.map(({ question, answer, cot }) => {
const messages = [];
if (exportOptions.systemPrompt) {
messages.push({ role: 'system', content: exportOptions.systemPrompt });
}
messages.push({
role: 'user',
content: question
});
messages.push({
role: 'assistant',
content: cot && exportOptions.includeCOT ? `<think>${cot}</think>\n${answer}` : answer
});
return { messages };
});
} else if (formatType === 'multilingualthinking') {
return dataBatch.map(({ question, answer, cot }) => ({
reasoning_language: exportOptions.reasoningLanguage || 'English',
developer: exportOptions.systemPrompt || '',
user: question,
analysis: exportOptions.includeCOT && cot ? cot : null,
final: answer,
messages: [
{
content: exportOptions.systemPrompt || '',
role: 'system',
thinking: null
},
{
content: question,
role: 'user',
thinking: null
},
{
content: answer,
role: 'assistant',
thinking: exportOptions.includeCOT && cot ? cot : null
}
]
}));
} else if (formatType === 'custom') {
const { questionField, answerField, cotField, includeLabels, includeChunk, questionOnly } =
exportOptions.customFields;
return dataBatch.map(({ question, answer, cot, questionLabel: labels, chunkContent }) => {
const item = { [questionField]: question };
if (!questionOnly) {
item[answerField] = answer;
if (cot && exportOptions.includeCOT && cotField) {
item[cotField] = cot;
}
}
if (includeLabels && labels && labels.length > 0) {
item.label = labels.split(' ')[1];
}
if (includeChunk && chunkContent) {
item.chunk = chunkContent;
}
return item;
});
}
return dataBatch;
};
// 将批次格式化为CSV行
const formatBatchToCSV = (formattedBatch, formatType, exportOptions) => {
const headers = getCSVHeaders(formatType, exportOptions);
return (
formattedBatch
.map(item => {
return headers
.map(header => {
let field = item[header]?.toString() || '';
// 对于复杂对象转换为JSON字符串
if (typeof item[header] === 'object') {
field = JSON.stringify(item[header]);
}
// CSV转义
if (field.includes(',') || field.includes('\n') || field.includes('"')) {
field = `"${field.replace(/"/g, '""')}"`;
}
return field;
})
.join(',');
})
.join('\n') + '\n'
);
};
// 处理和下载数据的通用函数(保留用于小数据量)
const processAndDownloadData = async (dataToExport, exportOptions) => {
const formattedData = formatDataBatch(dataToExport, exportOptions);
let content;
let fileExtension;
const fileFormat = exportOptions.fileFormat || 'json';
if (fileFormat === 'jsonl') {
content = formattedData.map(item => JSON.stringify(item)).join('\n');
fileExtension = 'jsonl';
} else if (fileFormat === 'csv') {
const headers = getCSVHeaders(exportOptions.formatType, exportOptions);
const csvRows = [
headers.join(','),
...formattedData.map(item =>
headers
.map(header => {
let field = item[header]?.toString() || '';
if (typeof item[header] === 'object') {
field = JSON.stringify(item[header]);
}
if (field.includes(',') || field.includes('\n') || field.includes('"')) {
field = `"${field.replace(/"/g, '""')}"`;
}
return field;
})
.join(',')
)
];
content = csvRows.join('\n');
fileExtension = 'csv';
} else {
content = JSON.stringify(formattedData, null, 2);
fileExtension = 'json';
}
const blob = new Blob([content], { type: 'application/json' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
const formatSuffixMap = {
alpaca: 'alpaca',
multilingualthinking: 'multilingual-thinking',
sharegpt: 'sharegpt',
custom: 'custom'
};
const formatSuffix = formatSuffixMap[exportOptions.formatType] || exportOptions.formatType || 'export';
const balanceSuffix = exportOptions.balanceMode ? '-balanced' : '';
const dateStr = new Date().toISOString().slice(0, 10);
a.download = `datasets-${projectId}-${formatSuffix}${balanceSuffix}-${dateStr}.${fileExtension}`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
};
// 导出数据集(保持向后兼容的原有功能)
const exportDatasets = async exportOptions => {
try {
const apiUrl = `/api/projects/${projectId}/datasets/export`;
const requestBody = {};
if (exportOptions.selectedIds && exportOptions.selectedIds.length > 0) {
requestBody.selectedIds = exportOptions.selectedIds;
} else if (exportOptions.confirmedOnly) {
requestBody.status = 'confirmed';
}
if (exportOptions.balanceMode && exportOptions.balanceConfig) {
requestBody.balanceMode = true;
requestBody.balanceConfig = exportOptions.balanceConfig;
}
const response = await axios.post(apiUrl, requestBody);
let dataToExport = response.data;
await processAndDownloadData(dataToExport, exportOptions);
toast.success(t('datasets.exportSuccess'));
return true;
} catch (error) {
toast.error(error.message);
return false;
}
};
// 导出平衡数据集
const exportBalancedDataset = async exportOptions => {
const balancedOptions = {
...exportOptions,
balanceMode: true,
balanceConfig: exportOptions.balanceConfig
};
return await exportDatasets(balancedOptions);
};
return {
exportDatasets,
exportBalancedDataset,
exportDatasetsStreaming
};
};
export default useDatasetExport;
export { useDatasetExport };

View File

@@ -0,0 +1,171 @@
'use client';
import { useState, useEffect } from 'react';
/**
* 数据集筛选条件持久化 Hook
* 负责筛选条件的保存、恢复和管理
* @param {string} projectId - 项目ID
* @returns {Object} 筛选条件和相关方法
*/
export function useDatasetFilters(projectId) {
const [filterConfirmed, setFilterConfirmed] = useState('all');
const [filterHasCot, setFilterHasCot] = useState('all');
const [filterIsDistill, setFilterIsDistill] = useState('all');
const [filterScoreRange, setFilterScoreRange] = useState([0, 5]);
const [filterCustomTag, setFilterCustomTag] = useState('');
const [filterNoteKeyword, setFilterNoteKeyword] = useState('');
const [filterChunkName, setFilterChunkName] = useState('');
const [searchQuery, setSearchQuery] = useState('');
const [searchField, setSearchField] = useState('question');
const [page, setPage] = useState(1);
const [rowsPerPage, setRowsPerPage] = useState(10);
const [isInitialized, setIsInitialized] = useState(false);
// 从 localStorage 恢复筛选条件
useEffect(() => {
if (typeof window !== 'undefined') {
try {
const savedFilters = localStorage.getItem(`datasets-filters-${projectId}`);
if (savedFilters) {
const filters = JSON.parse(savedFilters);
setFilterConfirmed(filters.filterConfirmed || 'all');
setFilterHasCot(filters.filterHasCot || 'all');
setFilterIsDistill(filters.filterIsDistill || 'all');
setFilterScoreRange(filters.filterScoreRange || [0, 5]);
setFilterCustomTag(filters.filterCustomTag || '');
setFilterNoteKeyword(filters.filterNoteKeyword || '');
setFilterChunkName(filters.filterChunkName || '');
setSearchQuery(filters.searchQuery || '');
setSearchField(filters.searchField || 'question');
setPage(filters.page || 1);
setRowsPerPage(filters.rowsPerPage || 10);
}
} catch (error) {
console.error('恢复筛选条件失败:', error);
}
setIsInitialized(true);
}
}, [projectId]);
// 保存筛选条件到 localStorage
useEffect(() => {
if (typeof window !== 'undefined' && isInitialized) {
try {
const filters = {
filterConfirmed,
filterHasCot,
filterIsDistill,
filterScoreRange,
filterCustomTag,
filterNoteKeyword,
filterChunkName,
searchQuery,
searchField,
page,
rowsPerPage
};
localStorage.setItem(`datasets-filters-${projectId}`, JSON.stringify(filters));
} catch (error) {
console.error('保存筛选条件失败:', error);
}
}
}, [
projectId,
filterConfirmed,
filterHasCot,
filterIsDistill,
filterScoreRange,
filterCustomTag,
filterNoteKeyword,
filterChunkName,
searchQuery,
searchField,
page,
rowsPerPage,
isInitialized
]);
/**
* 重置所有筛选条件为默认值
*/
const resetFilters = () => {
setFilterConfirmed('all');
setFilterHasCot('all');
setFilterIsDistill('all');
setFilterScoreRange([0, 5]);
setFilterCustomTag('');
setFilterNoteKeyword('');
setFilterChunkName('');
setSearchQuery('');
setSearchField('question');
setPage(1);
setRowsPerPage(10);
};
/**
* 清除 localStorage 中的筛选条件
*/
const clearSavedFilters = () => {
if (typeof window !== 'undefined') {
try {
localStorage.removeItem(`datasets-filters-${projectId}`);
} catch (error) {
console.error('清除筛选条件失败:', error);
}
}
};
/**
* 计算当前活跃的筛选条件数量
* @returns {number} 活跃筛选条件的数量
*/
const getActiveFilterCount = () => {
let count = 0;
if (filterConfirmed !== 'all') count++;
if (filterHasCot !== 'all') count++;
if (filterIsDistill !== 'all') count++;
if (filterScoreRange[0] > 0 || filterScoreRange[1] < 5) count++;
if (filterCustomTag) count++;
if (filterNoteKeyword) count++;
if (filterChunkName) count++;
return count;
};
return {
// 筛选条件状态
filterConfirmed,
setFilterConfirmed,
filterHasCot,
setFilterHasCot,
filterIsDistill,
setFilterIsDistill,
filterScoreRange,
setFilterScoreRange,
filterCustomTag,
setFilterCustomTag,
filterNoteKeyword,
setFilterNoteKeyword,
filterChunkName,
setFilterChunkName,
searchQuery,
setSearchQuery,
searchField,
setSearchField,
// 分页状态
page,
setPage,
rowsPerPage,
setRowsPerPage,
// 初始化状态
isInitialized,
// 工具方法
resetFilters,
clearSavedFilters,
getActiveFilterCount
};
}
export default useDatasetFilters;