53 lines
1.5 KiB
JavaScript
53 lines
1.5 KiB
JavaScript
import { NextResponse } from 'next/server';
|
|
import { getDatasetsById } from '@/lib/db/datasets';
|
|
import { getEncoding } from '@langchain/core/utils/tiktoken';
|
|
|
|
/**
|
|
* 异步计算数据集文本的Token数量
|
|
*/
|
|
export async function GET(request, { params }) {
|
|
try {
|
|
const { projectId, datasetId } = params;
|
|
|
|
if (!datasetId) {
|
|
return NextResponse.json({ error: '数据集ID不能为空' }, { status: 400 });
|
|
}
|
|
|
|
const datasets = await getDatasetsById(datasetId);
|
|
const tokenCounts = {
|
|
answerTokens: 0,
|
|
cotTokens: 0
|
|
};
|
|
|
|
try {
|
|
if (datasets.answer || datasets.cot) {
|
|
// 使用 cl100k_base 编码,适用于 gpt-3.5-turbo 和 gpt-4
|
|
const encoding = await getEncoding('cl100k_base');
|
|
|
|
if (datasets.answer) {
|
|
const tokens = encoding.encode(datasets.answer);
|
|
tokenCounts.answerTokens = tokens.length;
|
|
}
|
|
|
|
if (datasets.cot) {
|
|
const tokens = encoding.encode(datasets.cot);
|
|
tokenCounts.cotTokens = tokens.length;
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.error('计算Token数量失败:', String(error));
|
|
return NextResponse.json({ error: '计算Token数量失败' }, { status: 500 });
|
|
}
|
|
|
|
return NextResponse.json(tokenCounts);
|
|
} catch (error) {
|
|
console.error('获取Token计数失败:', String(error));
|
|
return NextResponse.json(
|
|
{
|
|
error: error.message || '获取Token计数失败'
|
|
},
|
|
{ status: 500 }
|
|
);
|
|
}
|
|
}
|