Files
YG-Datasets/easy-dataset-main/app/api/projects/[projectId]/datasets/[datasetId]/token-count/route.js

53 lines
1.5 KiB
JavaScript

import { NextResponse } from 'next/server';
import { getDatasetsById } from '@/lib/db/datasets';
import { getEncoding } from '@langchain/core/utils/tiktoken';
/**
* 异步计算数据集文本的Token数量
*/
export async function GET(request, { params }) {
try {
const { projectId, datasetId } = params;
if (!datasetId) {
return NextResponse.json({ error: '数据集ID不能为空' }, { status: 400 });
}
const datasets = await getDatasetsById(datasetId);
const tokenCounts = {
answerTokens: 0,
cotTokens: 0
};
try {
if (datasets.answer || datasets.cot) {
// 使用 cl100k_base 编码,适用于 gpt-3.5-turbo 和 gpt-4
const encoding = await getEncoding('cl100k_base');
if (datasets.answer) {
const tokens = encoding.encode(datasets.answer);
tokenCounts.answerTokens = tokens.length;
}
if (datasets.cot) {
const tokens = encoding.encode(datasets.cot);
tokenCounts.cotTokens = tokens.length;
}
}
} catch (error) {
console.error('计算Token数量失败:', String(error));
return NextResponse.json({ error: '计算Token数量失败' }, { status: 500 });
}
return NextResponse.json(tokenCounts);
} catch (error) {
console.error('获取Token计数失败:', String(error));
return NextResponse.json(
{
error: error.message || '获取Token计数失败'
},
{ status: 500 }
);
}
}