'use client'; import { useState } from 'react'; import { Dialog, DialogTitle, DialogContent, DialogActions, Button, Box, Typography, FormControl, InputLabel, Select, MenuItem, Alert, Divider, CircularProgress, FormHelperText } from '@mui/material'; import { useTranslation } from 'react-i18next'; import ModelSelector from './ModelSelector'; import QuestionFilter from './QuestionFilter'; import ScoreAnchorsForm from './ScoreAnchorsForm'; import { useEvalTaskForm } from '../hooks/useEvalTaskForm'; import { useEffect } from 'react'; export default function CreateEvalTaskDialog({ open, onClose, projectId, onSuccess }) { const { t, i18n } = useTranslation(); const [submitting, setSubmitting] = useState(false); const { models, selectedModels, setSelectedModels, judgeModel, setJudgeModel, evalDatasets, availableTags, questionTypes, setQuestionTypes, selectedTags, setSelectedTags, searchKeyword, setSearchKeyword, questionCount, setQuestionCount, filteredTotal, sampledIds, hasSubjectiveQuestions, hasShortAnswer, hasOpenEnded, shortAnswerScoreAnchors, setShortAnswerScoreAnchors, openEndedScoreAnchors, setOpenEndedScoreAnchors, initScoreAnchors, loading, error, setError, setSampledIds, resetFilters, resetForm } = useEvalTaskForm(projectId, open); // 当有主观题时,初始化评分规则 useEffect(() => { if (hasSubjectiveQuestions && open) { initScoreAnchors(i18n.language === 'zh-CN' ? 'zh-CN' : 'en'); } }, [hasSubjectiveQuestions, open, i18n.language]); // 统计各题型数量 const typeStats = {}; evalDatasets.forEach(d => { typeStats[d.questionType] = (typeStats[d.questionType] || 0) + 1; }); const getModelKey = model => `${model.providerId}::${model.modelId}`; const handleModelSelectionChange = newSelection => { setSelectedModels(newSelection); setError(''); }; const handleSubmit = async () => { // 先清除之前的错误 setError(''); // 验证 if (selectedModels.length === 0) { setError(t('evalTasks.errorNoModels')); return; } if (filteredTotal === 0) { setError(t('evalTasks.errorNoQuestions')); return; } if (hasSubjectiveQuestions && !judgeModel) { setError(t('evalTasks.errorNoJudgeModel')); return; } // 验证教师模型不在测试模型中 if (judgeModel && selectedModels.includes(judgeModel)) { setError(t('evalTasks.errorJudgeSameAsTest')); return; } try { setSubmitting(true); setError(''); // 解析选中的模型 const models = selectedModels.map(m => { const [providerId, modelId] = m.split('::'); return { modelId, providerId }; // 注意顺序:modelId 在前 }); // 解析教师模型 let judgeModelId = null; let judgeProviderId = null; if (judgeModel) { const [pId, mId] = judgeModel.split('::'); judgeProviderId = pId; judgeModelId = mId; } // 调用后端采样接口获取题目 ID const sampleBody = { questionTypes: questionTypes, tags: selectedTags, keyword: searchKeyword.trim() || '', limit: questionCount > 0 ? questionCount : undefined }; const sampleResponse = await fetch(`/api/projects/${projectId}/eval-datasets/sample`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(sampleBody) }); const sampleResult = await sampleResponse.json(); if (!sampleResponse.ok || sampleResult.code !== 0) { setError(sampleResult.error || t('evalTasks.errorCreateFailed')); return; } const ids = sampleResult?.data?.ids || []; if (ids.length === 0) { setError(t('evalTasks.errorNoQuestions')); return; } setSampledIds(ids); // 构建自定义评分规则对象 const customScoreAnchors = {}; if (hasShortAnswer && shortAnswerScoreAnchors.length > 0) { customScoreAnchors.short_answer = shortAnswerScoreAnchors; } if (hasOpenEnded && openEndedScoreAnchors.length > 0) { customScoreAnchors.open_ended = openEndedScoreAnchors; } // 创建任务 const response = await fetch(`/api/projects/${projectId}/eval-tasks`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ models, // 后端期望的字段名 judgeModelId, // 分开传递 judgeProviderId, // 分开传递 evalDatasetIds: ids, language: i18n.language === 'zh-CN' ? 'zh-CN' : 'en', customScoreAnchors: Object.keys(customScoreAnchors).length > 0 ? customScoreAnchors : undefined }) }); const result = await response.json(); if (result.code === 0) { onSuccess && onSuccess(result.data); handleClose(); } else { setError(result.error || t('evalTasks.errorCreateFailed')); } } catch (err) { console.error('创建评估任务失败:', err); setError(t('evalTasks.errorCreateFailed')); } finally { setSubmitting(false); } }; const handleClose = () => { resetForm(); onClose(); }; const handleJudgeModelChange = event => { setJudgeModel(event.target.value); setError(''); }; return ( {t('evalTasks.createTitle')} {error && ( setError('')}> {error} )} {/* 选择测试模型 */} {/* 题目筛选 */} {/* 最终题目统计 */} {t('evalTasks.finalSelection')} {sampledIds.length || (questionCount > 0 ? questionCount : filteredTotal)}{' '} {t('evalTasks.questionsSuffix')} {hasSubjectiveQuestions && ( {t('evalTasks.hasSubjectiveHint')} )} {/* 选择教师模型(仅当有主观题时显示) */} {hasSubjectiveQuestions && ( <> {t('evalTasks.selectJudgeModel')} * {t('evalTasks.selectJudgeModelHint')} {/* 简答题评分规则 */} {hasShortAnswer && ( )} {/* 开放题评分规则 */} {hasOpenEnded && ( )} )} ); }