first-update

This commit is contained in:
2026-03-17 14:36:31 +08:00
parent 72f08aee7c
commit 4eddf05e79
516 changed files with 115270 additions and 1 deletions

View File

@@ -0,0 +1,77 @@
import { NextResponse } from 'next/server';
import { db } from '@/lib/db';
export async function POST(req, { params }) {
try {
const { projectId, datasetId } = params;
// 1. 获取数据集详情
const dataset = await db.datasets.findUnique({
where: { id: datasetId, projectId }
});
if (!dataset) {
return NextResponse.json({ error: 'Dataset not found' }, { status: 404 });
}
// 2. 尝试通过 questionId 查找关联的 chunkId
let chunkId = null;
if (dataset.questionId) {
const question = await db.questions.findUnique({
where: { id: dataset.questionId }
});
if (question) {
chunkId = question.chunkId;
}
}
// 3. 创建评估数据集记录
// 默认使用 open_ended 类型,因为通常数据集是问答对,适合作为评估
let evalTags = [];
try {
evalTags = JSON.parse(dataset.tags || '[]');
if (!Array.isArray(evalTags)) evalTags = [];
} catch (e) {
evalTags = [];
}
// 排除 'Eval' 标签,并将数组转为逗号分隔的字符串
const evalTagsString = evalTags.filter(tag => tag !== 'Eval').join(',');
const evalDataset = await db.evalDatasets.create({
data: {
projectId,
question: dataset.question,
questionType: 'open_ended',
correctAnswer: dataset.answer,
tags: evalTagsString,
note: dataset.note,
chunkId: chunkId,
options: '' // 开放题不需要选项
}
});
// 4. 更新原数据集,添加 'Eval' 标签
let currentTags = [];
try {
currentTags = JSON.parse(dataset.tags || '[]');
} catch (e) {
// ignore error
}
if (!currentTags.includes('Eval')) {
currentTags.push('Eval');
await db.datasets.update({
where: { id: datasetId },
data: {
tags: JSON.stringify(currentTags)
}
});
}
return NextResponse.json({ success: true, evalDataset });
} catch (error) {
console.error('Failed to copy dataset to eval:', error);
return NextResponse.json({ error: 'Internal Server Error' }, { status: 500 });
}
}

View File

@@ -0,0 +1,36 @@
import { NextResponse } from 'next/server';
import { evaluateDataset } from '@/lib/services/datasets/evaluation';
/**
* 评估单个数据集的质量
*/
export async function POST(request, { params }) {
try {
const { projectId, datasetId } = params;
const { model, language = 'zh-CN' } = await request.json();
if (!projectId || !datasetId) {
return NextResponse.json({ success: false, message: '项目ID和数据集ID不能为空' }, { status: 400 });
}
if (!model) {
return NextResponse.json({ success: false, message: '模型配置不能为空' }, { status: 400 });
}
// 使用评估服务进行数据集评估
const result = await evaluateDataset(projectId, datasetId, model, language);
if (!result.success) {
return NextResponse.json({ success: false, message: result.error }, { status: 500 });
}
return NextResponse.json({
success: true,
message: '数据集评估完成',
data: result.data
});
} catch (error) {
console.error('数据集评估失败:', error);
return NextResponse.json({ success: false, message: `评估失败: ${error.message}` }, { status: 500 });
}
}

View File

@@ -0,0 +1,82 @@
import { NextResponse } from 'next/server';
import { getDatasetsById, getDatasetsCounts, getNavigationItems, updateDatasetMetadata } from '@/lib/db/datasets';
/**
* 获取项目的所有数据集
*/
export async function GET(request, { params }) {
try {
const { projectId, datasetId } = params;
// 验证项目ID
if (!projectId) {
return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 });
}
if (!datasetId) {
return NextResponse.json({ error: '数据集ID不能为空' }, { status: 400 });
}
const { searchParams } = new URL(request.url);
const operateType = searchParams.get('operateType');
if (operateType !== null) {
const data = await getNavigationItems(projectId, datasetId, operateType);
return NextResponse.json(data);
}
const datasets = await getDatasetsById(datasetId);
let counts = await getDatasetsCounts(projectId);
return NextResponse.json({ datasets, ...counts });
} catch (error) {
console.error('获取数据集详情失败:', String(error));
return NextResponse.json(
{
error: error.message || '获取数据集详情失败'
},
{ status: 500 }
);
}
}
/**
* 更新数据集元数据(评分、标签、备注)
*/
export async function PATCH(request, { params }) {
try {
const { projectId, datasetId } = params;
// 验证参数
if (!projectId) {
return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 });
}
if (!datasetId) {
return NextResponse.json({ error: '数据集ID不能为空' }, { status: 400 });
}
const body = await request.json();
const { score, tags, note } = body;
// 验证评分范围
if (score !== undefined && (score < 0 || score > 5)) {
return NextResponse.json({ error: '评分必须在0-5之间' }, { status: 400 });
}
// 验证标签格式
if (tags !== undefined && !Array.isArray(tags)) {
return NextResponse.json({ error: '标签必须是数组格式' }, { status: 400 });
}
// 更新数据集元数据
const updatedDataset = await updateDatasetMetadata(datasetId, { score, tags, note });
return NextResponse.json({
success: true,
dataset: updatedDataset
});
} catch (error) {
console.error('更新数据集元数据失败:', String(error));
return NextResponse.json(
{
error: error.message || '更新数据集元数据失败'
},
{ status: 500 }
);
}
}

View File

@@ -0,0 +1,52 @@
import { NextResponse } from 'next/server';
import { getDatasetsById } from '@/lib/db/datasets';
import { getEncoding } from '@langchain/core/utils/tiktoken';
/**
* 异步计算数据集文本的Token数量
*/
export async function GET(request, { params }) {
try {
const { projectId, datasetId } = params;
if (!datasetId) {
return NextResponse.json({ error: '数据集ID不能为空' }, { status: 400 });
}
const datasets = await getDatasetsById(datasetId);
const tokenCounts = {
answerTokens: 0,
cotTokens: 0
};
try {
if (datasets.answer || datasets.cot) {
// 使用 cl100k_base 编码,适用于 gpt-3.5-turbo 和 gpt-4
const encoding = await getEncoding('cl100k_base');
if (datasets.answer) {
const tokens = encoding.encode(datasets.answer);
tokenCounts.answerTokens = tokens.length;
}
if (datasets.cot) {
const tokens = encoding.encode(datasets.cot);
tokenCounts.cotTokens = tokens.length;
}
}
} catch (error) {
console.error('计算Token数量失败:', String(error));
return NextResponse.json({ error: '计算Token数量失败' }, { status: 500 });
}
return NextResponse.json(tokenCounts);
} catch (error) {
console.error('获取Token计数失败:', String(error));
return NextResponse.json(
{
error: error.message || '获取Token计数失败'
},
{ status: 500 }
);
}
}

View File

@@ -0,0 +1,55 @@
/**
* 批量数据集评估任务API
* 创建批量评估数据集质量的异步任务
*/
import { NextResponse } from 'next/server';
import { db } from '@/lib/db/index';
import { processTask } from '@/lib/services/tasks/index';
/**
* 创建批量数据集评估任务
*/
export async function POST(request, { params }) {
try {
const { projectId } = params;
const { model, language = 'zh-CN' } = await request.json();
if (!projectId) {
return NextResponse.json({ success: false, message: '项目ID不能为空' }, { status: 400 });
}
if (!model || !model.modelId) {
return NextResponse.json({ success: false, message: '模型配置不能为空' }, { status: 400 });
}
// 创建批量评估任务
const newTask = await db.task.create({
data: {
projectId,
taskType: 'dataset-evaluation',
status: 0, // 初始状态: 处理中
modelInfo: JSON.stringify(model),
language: language || 'zh-CN',
detail: '',
totalCount: 0,
note: '准备开始批量评估数据集质量...',
completedCount: 0
}
});
// 异步处理任务
processTask(newTask.id).catch(err => {
console.error(`批量评估任务启动失败: ${newTask.id}`, String(err));
});
return NextResponse.json({
success: true,
message: '批量评估任务已创建',
data: { taskId: newTask.id }
});
} catch (error) {
console.error('创建批量评估任务失败:', error);
return NextResponse.json({ success: false, message: `创建任务失败: ${error.message}` }, { status: 500 });
}
}

View File

@@ -0,0 +1,128 @@
import { NextResponse } from 'next/server';
import {
getDatasets,
getBalancedDatasetsByTags,
getTagsWithDatasetCounts,
getDatasetsBatch,
getBalancedDatasetsByTagsBatch,
getDatasetsByIds,
getDatasetsByIdsBatch
} from '@/lib/db/datasets';
/**
* 获取导出数据集
*/
export async function GET(request, { params }) {
try {
const { projectId } = params;
const { searchParams } = new URL(request.url);
// 验证项目ID
if (!projectId) {
return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 });
}
const confirmedParam = searchParams.get('confirmed');
const confirmed = confirmedParam === null ? undefined : confirmedParam === 'true';
// 获取标签统计信息
const tagStats = await getTagsWithDatasetCounts(projectId, confirmed);
return NextResponse.json(tagStats);
} catch (error) {
console.error('Failed to get tag statistics:', String(error));
return NextResponse.json(
{
error: error.message || 'Failed to get tag statistics'
},
{ status: 500 }
);
}
}
/**
* 获取标签统计信息
*/
export async function POST(request, { params }) {
try {
const { projectId } = params;
const body = await request.json();
// 验证项目ID
if (!projectId) {
return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 });
}
let status = body.status;
let confirmed = undefined;
if (status === 'confirmed') confirmed = true;
if (status === 'unconfirmed') confirmed = false;
// 检查是否是分批导出模式
const batchMode = body.batchMode ? 'true' : 'false';
const offset = body.offset ?? 0;
const batchSize = body.batchSize ?? 1000;
// 检查是否是平衡导出
const balanceMode = body.balanceMode ? 'true' : 'false';
const balanceConfig = body.balanceConfig;
// 检查是否有选中的数据集 ID
const selectedIds = Array.isArray(body.selectedIds) ? body.selectedIds : null;
if (batchMode === 'true') {
// 分批导出模式
if (selectedIds && selectedIds.length > 0) {
// 按选中 ID 分批导出
const datasets = await getDatasetsByIdsBatch(projectId, selectedIds, offset, batchSize);
const hasMore = datasets.length === batchSize;
return NextResponse.json({
data: datasets,
hasMore,
offset: offset + datasets.length
});
} else if (balanceMode === 'true' && balanceConfig) {
// 平衡分批导出
const parsedConfig = typeof balanceConfig === 'string' ? JSON.parse(balanceConfig) : balanceConfig;
const result = await getBalancedDatasetsByTagsBatch(projectId, parsedConfig, confirmed, offset, batchSize);
return NextResponse.json({
data: result.data,
hasMore: result.hasMore,
offset: offset + result.data.length
});
} else {
// 常规分批导出
const datasets = await getDatasetsBatch(projectId, confirmed, offset, batchSize);
const hasMore = datasets.length === batchSize;
return NextResponse.json({
data: datasets,
hasMore,
offset: offset + datasets.length
});
}
} else {
// 传统一次性导出模式(保持向后兼容)
if (selectedIds && selectedIds.length > 0) {
// 按选中 ID 导出
const datasets = await getDatasetsByIds(projectId, selectedIds);
return NextResponse.json(datasets);
} else if (balanceMode === 'true' && balanceConfig) {
// 平衡导出模式
const parsedConfig = typeof balanceConfig === 'string' ? JSON.parse(balanceConfig) : balanceConfig;
const datasets = await getBalancedDatasetsByTags(projectId, parsedConfig, confirmed);
return NextResponse.json(datasets);
} else {
// 常规导出模式
const datasets = await getDatasets(projectId, confirmed);
return NextResponse.json(datasets);
}
}
} catch (error) {
console.error('Failed to get datasets:', String(error));
return NextResponse.json(
{
error: error.message || 'Failed to get datasets'
},
{ status: 500 }
);
}
}

View File

@@ -0,0 +1,44 @@
import { NextResponse } from 'next/server';
import { getDatasetsById } from '@/lib/db/datasets';
import LLMClient from '@/lib/llm/core/index';
import { getEvalQuestionPrompt } from '@/lib/llm/prompts/evalQuestion';
import { extractJsonFromLLMOutput } from '@/lib/llm/common/util';
export async function POST(request, { params }) {
try {
const { projectId } = params;
const { datasetId, model, language, questionType = 'open_ended', count = 1 } = await request.json();
if (!datasetId || !model) {
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 });
}
// 1. 获取原数据集
const dataset = await getDatasetsById(datasetId);
if (!dataset) {
return NextResponse.json({ error: 'Dataset not found' }, { status: 404 });
}
// 2. 构建提示词
// 将原问题和答案合并作为上下文文本
const text = `Question: ${dataset.question}\nAnswer: ${dataset.answer}`;
const prompt = await getEvalQuestionPrompt(language || 'zh-CN', questionType, { text, number: count }, projectId);
// 3. 调用 LLM
const client = new LLMClient(model);
const response = await client.getResponse(prompt);
const result = extractJsonFromLLMOutput(response);
// 结果应该是一个数组
if (!result || !Array.isArray(result)) {
throw new Error('Failed to parse LLM output or output is not an array');
}
return NextResponse.json({ success: true, data: result });
} catch (error) {
console.error('Generate eval variant failed:', error);
return NextResponse.json({ error: error.message || 'Internal Server Error' }, { status: 500 });
}
}

View File

@@ -0,0 +1,109 @@
import { NextResponse } from 'next/server';
import { createDataset } from '@/lib/db/datasets';
import { nanoid } from 'nanoid';
export async function POST(request, { params }) {
try {
const { projectId } = params;
const { datasets, sourceInfo } = await request.json();
if (!datasets || !Array.isArray(datasets)) {
return NextResponse.json({ error: 'Invalid datasets data' }, { status: 400 });
}
const results = [];
const errors = [];
let successCount = 0;
let skippedCount = 0;
for (let i = 0; i < datasets.length; i++) {
try {
const dataset = datasets[i];
// 安全获取与清洗字段
const q = typeof dataset?.question === 'string' ? dataset.question.trim() : '';
const a = typeof dataset?.answer === 'string' ? dataset.answer.trim() : '';
// 验证必填字段:缺失则跳过
if (!q || !a) {
errors.push(`${i + 1} 条记录缺少必填字段(question/answer),已跳过`);
skippedCount++;
continue;
}
// 规范化可选字段
const chunkName = dataset?.chunkName || 'Imported Data';
const chunkContent = dataset?.chunkContent || 'Imported from external source';
const model = dataset?.model || 'imported';
const questionLabel = dataset?.questionLabel || '';
const cot = typeof dataset?.cot === 'string' ? dataset.cot : '';
const confirmed = typeof dataset?.confirmed === 'boolean' ? dataset.confirmed : false;
const score = typeof dataset?.score === 'number' ? dataset.score : 0;
// tags: 支持数组/字符串/对象
let tags = '[]';
if (Array.isArray(dataset?.tags)) {
try {
tags = JSON.stringify(dataset.tags);
} catch {
tags = '[]';
}
} else if (typeof dataset?.tags === 'string') {
tags = dataset.tags;
} else if (dataset?.tags && typeof dataset.tags === 'object') {
try {
tags = JSON.stringify(dataset.tags);
} catch {
tags = '[]';
}
}
// other: 对象或字符串
let other = '{}';
if (typeof dataset?.other === 'string') {
other = dataset.other;
} else if (dataset?.other && typeof dataset.other === 'object') {
try {
other = JSON.stringify(dataset.other);
} catch {
other = '{}';
}
}
const note = typeof dataset?.note === 'string' ? dataset.note : '';
// 创建数据集记录
const newDataset = await createDataset({
projectId,
questionId: nanoid(), // 生成唯一的问题ID
question: q,
answer: a,
chunkName,
chunkContent,
model,
questionLabel,
cot,
confirmed,
score,
tags,
note,
other
});
results.push(newDataset);
successCount++;
} catch (error) {
errors.push(`${i + 1} 条记录: ${error.message}`);
}
}
return NextResponse.json({
success: successCount,
total: datasets.length,
failed: errors.length,
skipped: skippedCount,
errors,
sourceInfo
});
} catch (error) {
console.error('Import datasets error:', error);
return NextResponse.json({ error: error.message }, { status: 500 });
}
}

View File

@@ -0,0 +1,89 @@
import { NextResponse } from 'next/server';
import { getDatasetsById, updateDataset } from '@/lib/db/datasets';
import { getQuestionById } from '@/lib/db/questions';
import { getChunkById } from '@/lib/db/chunks';
import LLMClient from '@/lib/llm/core/index';
import { getNewAnswerPrompt } from '@/lib/llm/prompts/newAnswer';
import { extractJsonFromLLMOutput } from '@/lib/llm/common/util';
// 优化数据集答案
export async function POST(request, { params }) {
try {
const { projectId } = params;
// 验证项目ID
if (!projectId) {
return NextResponse.json({ error: 'Project ID cannot be empty' }, { status: 400 });
}
// 获取请求体
const { datasetId, model, advice, language } = await request.json();
if (!datasetId) {
return NextResponse.json({ error: 'Dataset ID cannot be empty' }, { status: 400 });
}
if (!model) {
return NextResponse.json({ error: 'Model cannot be empty' }, { status: 400 });
}
if (!advice) {
return NextResponse.json({ error: 'Please provide optimization suggestions' }, { status: 400 });
}
// 获取数据集内容
const dataset = await getDatasetsById(datasetId);
if (!dataset) {
return NextResponse.json({ error: 'Dataset does not exist' }, { status: 404 });
}
// 创建LLM客户端
const llmClient = new LLMClient(model);
const { question, answer, cot, chunkContent: storedChunkContent, questionId } = dataset;
let chunkContent = storedChunkContent || '';
if (!chunkContent && questionId) {
try {
const questionRecord = await getQuestionById(questionId);
if (questionRecord?.chunkId) {
const chunkRecord = await getChunkById(questionRecord.chunkId);
chunkContent = chunkRecord?.content || '';
}
} catch (error) {
console.error('Failed to load chunk content by questionId:', error);
}
}
// 生成优化后的答案和思维链
const prompt = await getNewAnswerPrompt(language, { question, answer, cot, advice, chunkContent }, projectId);
const response = await llmClient.getResponse(prompt);
// 从LLM输出中提取JSON格式的优化结果
const optimizedResult = extractJsonFromLLMOutput(response);
if (!optimizedResult || !optimizedResult.answer) {
return NextResponse.json({ error: 'Failed to optimize answer, please try again' }, { status: 500 });
}
// 更新数据集
const updatedDataset = {
...dataset,
answer: optimizedResult.answer,
cot: cot ? optimizedResult.cot || cot : '' // 如果没有提供思考过程,则不更新
};
await updateDataset(updatedDataset);
// 返回优化后的数据集
return NextResponse.json({
success: true,
dataset: updatedDataset
});
} catch (error) {
console.error('Failed to optimize answer:', String(error));
return NextResponse.json({ error: error.message || 'Failed to optimize answer' }, { status: 500 });
}
}

View File

@@ -0,0 +1,193 @@
import { NextResponse } from 'next/server';
import {
deleteDataset,
getDatasetsByPagination,
getDatasetsIds,
getDatasetsById,
updateDataset
} from '@/lib/db/datasets';
import datasetService from '@/lib/services/datasets';
// 优化思维链函数已移至服务层
/**
* 生成数据集(为单个问题生成答案)
*/
export async function POST(request, { params }) {
try {
const { projectId } = params;
const { questionId, model, language } = await request.json();
// 使用数据集生成服务
const result = await datasetService.generateDatasetForQuestion(projectId, questionId, {
model,
language
});
return NextResponse.json(result);
} catch (error) {
console.error('Failed to generate dataset:', String(error));
return NextResponse.json(
{
error: error.message || 'Failed to generate dataset'
},
{ status: 500 }
);
}
}
/**
* 获取项目的所有数据集
*/
export async function GET(request, { params }) {
try {
const { projectId } = params;
const { searchParams } = new URL(request.url);
// 验证项目ID
if (!projectId) {
return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 });
}
const page = parseInt(searchParams.get('page')) || 1;
const size = parseInt(searchParams.get('size')) || 10;
const input = searchParams.get('input');
const field = searchParams.get('field') || 'question';
const status = searchParams.get('status');
const hasCot = searchParams.get('hasCot');
const isDistill = searchParams.get('isDistill');
const scoreRange = searchParams.get('scoreRange');
const customTag = searchParams.get('customTag');
const noteKeyword = searchParams.get('noteKeyword');
const chunkName = searchParams.get('chunkName');
let confirmed = undefined;
if (status === 'confirmed') confirmed = true;
if (status === 'unconfirmed') confirmed = false;
let selectedAll = searchParams.get('selectedAll');
if (selectedAll) {
let data = await getDatasetsIds(
projectId,
confirmed,
input,
field,
hasCot,
isDistill,
scoreRange,
customTag,
noteKeyword,
chunkName
);
return NextResponse.json(data);
}
// 获取数据集
const datasets = await getDatasetsByPagination(
projectId,
page,
size,
confirmed,
input,
field, // 传递搜索字段参数
hasCot, // 传递思维链筛选参数
isDistill, // 传递蒸馏数据集筛选参数
scoreRange, // 传递评分范围筛选参数
customTag, // 传递自定义标签筛选参数
noteKeyword, // 传递备注关键字筛选参数
chunkName // 传递文本块名称筛选参数
);
return NextResponse.json(datasets);
} catch (error) {
console.error('获取数据集失败:', String(error));
return NextResponse.json(
{
error: error.message || '获取数据集失败'
},
{ status: 500 }
);
}
}
/**
* 删除数据集
*/
export async function DELETE(request) {
try {
const { searchParams } = new URL(request.url);
const datasetId = searchParams.get('id');
if (!datasetId) {
return NextResponse.json(
{
error: 'Dataset ID cannot be empty'
},
{ status: 400 }
);
}
await deleteDataset(datasetId);
return NextResponse.json({
success: true,
message: 'Dataset deleted successfully'
});
} catch (error) {
console.error('Failed to delete dataset:', error);
return NextResponse.json(
{
error: error.message || 'Failed to delete dataset'
},
{ status: 500 }
);
}
}
/**
* 编辑数据集
*/
export async function PATCH(request) {
try {
const { searchParams } = new URL(request.url);
const datasetId = searchParams.get('id');
const { answer, cot, question, confirmed } = await request.json();
if (!datasetId) {
return NextResponse.json(
{
error: 'Dataset ID cannot be empty'
},
{ status: 400 }
);
}
// 获取所有数据集
let dataset = await getDatasetsById(datasetId);
if (!dataset) {
return NextResponse.json(
{
error: 'Dataset does not exist'
},
{ status: 404 }
);
}
let data = { id: datasetId };
if (confirmed !== undefined) data.confirmed = confirmed;
if (answer) data.answer = answer;
if (cot) data.cot = cot;
if (question) data.question = question;
// 保存更新后的数据集列表
await updateDataset(data);
return NextResponse.json({
success: true,
message: 'Dataset updated successfully',
dataset: dataset
});
} catch (error) {
console.error('Failed to update dataset:', String(error));
return NextResponse.json(
{
error: error.message || 'Failed to update dataset'
},
{ status: 500 }
);
}
}

View File

@@ -0,0 +1,28 @@
import { NextResponse } from 'next/server';
import { getUsedCustomTags } from '@/lib/db/datasets';
/**
* 获取项目中使用过的自定义标签
*/
export async function GET(request, { params }) {
try {
const { projectId } = params;
// 验证项目ID
if (!projectId) {
return NextResponse.json({ error: '项目ID不能为空' }, { status: 400 });
}
const tags = await getUsedCustomTags(projectId);
return NextResponse.json({ tags });
} catch (error) {
console.error('获取自定义标签失败:', String(error));
return NextResponse.json(
{
error: error.message || '获取自定义标签失败'
},
{ status: 500 }
);
}
}