1. 修改了一些bug
2. 做了一些调整,比如启动脚本,支持了tenmsorboard
This commit is contained in:
@@ -5,26 +5,30 @@
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>训练日志 / 远光软件微调平台</title>
|
||||
<script src="../lib/tailwindcss/tailwind.js"></script>
|
||||
<script>
|
||||
// 禁用 Tailwind 开发模式警告
|
||||
if (typeof console !== 'undefined' && console.warn) {
|
||||
const originalWarn = console.warn;
|
||||
console.warn = function(...args) {
|
||||
if (args[0] && args[0].includes && args[0].includes('cdn.tailwindcss.com')) {
|
||||
return;
|
||||
}
|
||||
originalWarn.apply(console, args);
|
||||
};
|
||||
}
|
||||
</script>
|
||||
<link href="../lib/font-awesome/css/font-awesome.min.css" rel="stylesheet">
|
||||
<script src="../lib/chart.js/chart.min.js"></script>
|
||||
<script>
|
||||
// 确保 Chart.js 已加载
|
||||
if (typeof Chart === 'undefined') {
|
||||
console.error('Chart.js 未加载,尝试动态加载...');
|
||||
// 备用:尝试动态加载
|
||||
const script = document.createElement('script');
|
||||
script.src = '../lib/chart.js/chart.umd.min.js';
|
||||
script.onload = function() {
|
||||
console.log('Chart.js 动态加载成功');
|
||||
window.chartJsLoaded = true;
|
||||
};
|
||||
script.onerror = function() {
|
||||
console.error('Chart.js 加载失败');
|
||||
};
|
||||
document.head.appendChild(script);
|
||||
} else {
|
||||
console.log('Chart.js 已加载');
|
||||
window.chartJsLoaded = true;
|
||||
}
|
||||
</script>
|
||||
<style>
|
||||
@@ -71,7 +75,7 @@
|
||||
<h2 class="text-lg font-medium text-gray-800" id="taskName">加载中...</h2>
|
||||
<span id="taskStatus" class="px-3 py-1 rounded-full text-sm bg-gray-100 text-gray-600">加载中</span>
|
||||
</div>
|
||||
<div class="grid grid-cols-2 md:grid-cols-5 gap-4 text-sm">
|
||||
<div class="grid grid-cols-2 md:grid-cols-6 gap-4 text-sm">
|
||||
<div>
|
||||
<div class="text-gray-500 text-xs">基础模型</div>
|
||||
<div id="baseModel" class="font-medium text-gray-800">-</div>
|
||||
@@ -88,6 +92,10 @@
|
||||
<div class="text-gray-500 text-xs">进程ID</div>
|
||||
<div id="processId" class="font-medium text-gray-800">-</div>
|
||||
</div>
|
||||
<div>
|
||||
<div class="text-gray-500 text-xs">GPU信息</div>
|
||||
<div id="taskGPU" class="font-medium text-gray-800">-</div>
|
||||
</div>
|
||||
<div>
|
||||
<div class="text-gray-500 text-xs">最后更新</div>
|
||||
<div id="lastUpdate" class="font-medium text-gray-800">-</div>
|
||||
@@ -96,21 +104,63 @@
|
||||
</div>
|
||||
|
||||
<!-- 训练曲线图表 -->
|
||||
<div id="chartsContainer" class="bg-white rounded-lg shadow-sm p-6 mb-6">
|
||||
<h3 class="text-base font-medium text-gray-800 mb-4">训练曲线</h3>
|
||||
<div class="grid grid-cols-1 md:grid-cols-3 gap-4">
|
||||
<div>
|
||||
<div id="chartsContainer" class="bg-white rounded-xl shadow-md p-6 mb-6 border border-gray-100">
|
||||
<div class="flex items-center mb-4">
|
||||
<div class="flex items-center justify-center w-8 h-8 rounded-lg bg-gradient-to-br from-blue-500 to-purple-500 text-white mr-3">
|
||||
<i class="fa fa-line-chart"></i>
|
||||
</div>
|
||||
<h3 class="text-base font-semibold text-gray-800">训练实时曲线</h3>
|
||||
<span id="chartUpdateStatus" class="ml-auto text-xs text-gray-400">自动更新中...</span>
|
||||
</div>
|
||||
<div class="grid grid-cols-1 md:grid-cols-3 gap-6">
|
||||
<div class="bg-gradient-to-br from-red-50 to-white rounded-lg p-4 border border-red-100">
|
||||
<canvas id="lossChart" class="w-full h-48"></canvas>
|
||||
</div>
|
||||
<div>
|
||||
<div class="bg-gradient-to-br from-blue-50 to-white rounded-lg p-4 border border-blue-100">
|
||||
<canvas id="gradNormChart" class="w-full h-48"></canvas>
|
||||
</div>
|
||||
<div>
|
||||
<div class="bg-gradient-to-br from-green-50 to-white rounded-lg p-4 border border-green-100">
|
||||
<canvas id="learningRateChart" class="w-full h-48"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 训练总结 -->
|
||||
<div id="trainSummaryContainer" class="bg-white rounded-xl shadow-md p-6 mb-6 border border-gray-100">
|
||||
<div class="flex items-center mb-4">
|
||||
<div class="flex items-center justify-center w-8 h-8 rounded-lg bg-gradient-to-br from-green-500 to-teal-500 text-white mr-3">
|
||||
<i class="fa fa-check-circle"></i>
|
||||
</div>
|
||||
<h3 class="text-base font-semibold text-gray-800">训练总结</h3>
|
||||
<span id="trainSummaryStatus" class="ml-auto text-xs px-2 py-1 rounded-full bg-gray-100 text-gray-500">训练中</span>
|
||||
</div>
|
||||
<div id="trainSummaryContent" class="grid grid-cols-2 md:grid-cols-5 gap-4">
|
||||
<div class="text-center p-3 bg-gray-50 rounded-lg">
|
||||
<div class="text-xs text-gray-500 mb-1">Epoch</div>
|
||||
<div id="summaryEpoch" class="text-lg font-semibold text-gray-800">-</div>
|
||||
</div>
|
||||
<div class="text-center p-3 bg-gray-50 rounded-lg">
|
||||
<div class="text-xs text-gray-500 mb-1">训练损失</div>
|
||||
<div id="summaryTrainLoss" class="text-lg font-semibold text-gray-800">-</div>
|
||||
</div>
|
||||
<div class="text-center p-3 bg-gray-50 rounded-lg">
|
||||
<div class="text-xs text-gray-500 mb-1">训练时长</div>
|
||||
<div id="summaryTrainRuntime" class="text-lg font-semibold text-gray-800">-</div>
|
||||
</div>
|
||||
<div class="text-center p-3 bg-gray-50 rounded-lg">
|
||||
<div class="text-xs text-gray-500 mb-1">样本/秒</div>
|
||||
<div id="summarySamplesPerSec" class="text-lg font-semibold text-gray-800">-</div>
|
||||
</div>
|
||||
<div class="text-center p-3 bg-gray-50 rounded-lg">
|
||||
<div class="text-xs text-gray-500 mb-1">步/秒</div>
|
||||
<div id="summaryStepsPerSec" class="text-lg font-semibold text-gray-800">-</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="trainSummaryFlos" class="mt-4 text-center text-xs text-gray-400">
|
||||
浮点运算量 (Total FLOPS): <span id="summaryTotalFlos">-</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 日志内容 -->
|
||||
<div class="bg-white rounded-lg shadow-sm">
|
||||
<div class="flex items-center justify-between p-4 border-b border-gray-100">
|
||||
@@ -145,33 +195,36 @@
|
||||
// 初始化图表
|
||||
function initCharts() {
|
||||
if (typeof Chart === 'undefined') {
|
||||
console.error('[Charts] Chart 未定义,无法初始化图表');
|
||||
document.getElementById('chartsContainer').innerHTML = '<div class="text-center p-4 text-red-500"><i class="fa fa-exclamation-triangle mr-2"></i>图表库加载失败,请刷新页面重试</div>';
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[Charts] 开始初始化图表...');
|
||||
const commonOptions = {
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
animation: false,
|
||||
scales: {
|
||||
x: {
|
||||
title: { display: true, text: 'Step' },
|
||||
grid: { color: 'rgba(0,0,0,0.05)' }
|
||||
},
|
||||
y: {
|
||||
title: { display: true, text: 'Value' },
|
||||
grid: { color: 'rgba(0,0,0,0.05)' }
|
||||
}
|
||||
},
|
||||
plugins: {
|
||||
legend: { display: false }
|
||||
}
|
||||
// 创建渐变填充函数
|
||||
function createGradient(ctx, colorStart, colorEnd) {
|
||||
const gradient = ctx.createLinearGradient(0, 0, 0, 200);
|
||||
gradient.addColorStop(0, colorStart);
|
||||
gradient.addColorStop(1, colorEnd);
|
||||
return gradient;
|
||||
}
|
||||
|
||||
// 通用样式配置
|
||||
const basePointStyle = {
|
||||
pointRadius: 4,
|
||||
pointHoverRadius: 6,
|
||||
pointBackgroundColor: '#fff',
|
||||
pointBorderWidth: 2,
|
||||
tension: 0.4
|
||||
};
|
||||
|
||||
const baseLineStyle = {
|
||||
borderWidth: 2.5,
|
||||
borderCapStyle: 'round',
|
||||
borderJoinStyle: 'round'
|
||||
};
|
||||
|
||||
// Loss 图表
|
||||
const lossCtx = document.getElementById('lossChart').getContext('2d');
|
||||
const lossGradient = createGradient(lossCtx, 'rgba(239, 68, 68, 0.4)', 'rgba(239, 68, 68, 0.02)');
|
||||
lossChart = new Chart(lossCtx, {
|
||||
type: 'line',
|
||||
data: {
|
||||
@@ -180,20 +233,53 @@
|
||||
label: 'Loss',
|
||||
data: lossData.values,
|
||||
borderColor: '#ef4444',
|
||||
backgroundColor: 'rgba(239, 68, 68, 0.1)',
|
||||
backgroundColor: lossGradient,
|
||||
fill: true,
|
||||
tension: 0.3,
|
||||
pointRadius: 3
|
||||
...basePointStyle,
|
||||
...baseLineStyle
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
...commonOptions,
|
||||
plugins: { ...commonOptions.plugins, title: { display: true, text: 'Loss', color: '#ef4444', font: { size: 14 } } }
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
animation: { duration: 500, easing: 'easeOutQuart' },
|
||||
interaction: { intersect: false, mode: 'index' },
|
||||
plugins: {
|
||||
legend: { display: false },
|
||||
title: {
|
||||
display: true,
|
||||
text: '📉 损失值 (Loss)',
|
||||
color: '#ef4444',
|
||||
font: { size: 15, weight: '600' },
|
||||
padding: { bottom: 15 }
|
||||
},
|
||||
tooltip: {
|
||||
backgroundColor: 'rgba(0,0,0,0.8)',
|
||||
titleColor: '#fff',
|
||||
bodyColor: '#fff',
|
||||
padding: 10,
|
||||
cornerRadius: 8,
|
||||
displayColors: false
|
||||
}
|
||||
},
|
||||
scales: {
|
||||
x: {
|
||||
title: { display: true, text: '训练步数 (Step)', color: '#6b7280', font: { size: 12 } },
|
||||
grid: { color: 'rgba(0,0,0,0.05)', drawBorder: false },
|
||||
ticks: { color: '#9ca3af', font: { size: 11 } }
|
||||
},
|
||||
y: {
|
||||
title: { display: true, text: '损失值', color: '#6b7280', font: { size: 12 } },
|
||||
grid: { color: 'rgba(0,0,0,0.05)', drawBorder: false },
|
||||
ticks: { color: '#9ca3af', font: { size: 11 } }
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Grad Norm 图表
|
||||
const gradNormCtx = document.getElementById('gradNormChart').getContext('2d');
|
||||
const gradNormGradient = createGradient(gradNormCtx, 'rgba(59, 130, 246, 0.4)', 'rgba(59, 130, 246, 0.02)');
|
||||
gradNormChart = new Chart(gradNormCtx, {
|
||||
type: 'line',
|
||||
data: {
|
||||
@@ -202,20 +288,53 @@
|
||||
label: 'Grad Norm',
|
||||
data: gradNormData.values,
|
||||
borderColor: '#3b82f6',
|
||||
backgroundColor: 'rgba(59, 130, 246, 0.1)',
|
||||
backgroundColor: gradNormGradient,
|
||||
fill: true,
|
||||
tension: 0.3,
|
||||
pointRadius: 3
|
||||
...basePointStyle,
|
||||
...baseLineStyle
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
...commonOptions,
|
||||
plugins: { ...commonOptions.plugins, title: { display: true, text: 'Grad Norm', color: '#3b82f6', font: { size: 14 } } }
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
animation: { duration: 500, easing: 'easeOutQuart' },
|
||||
interaction: { intersect: false, mode: 'index' },
|
||||
plugins: {
|
||||
legend: { display: false },
|
||||
title: {
|
||||
display: true,
|
||||
text: '📊 梯度范数 (Grad Norm)',
|
||||
color: '#3b82f6',
|
||||
font: { size: 15, weight: '600' },
|
||||
padding: { bottom: 15 }
|
||||
},
|
||||
tooltip: {
|
||||
backgroundColor: 'rgba(0,0,0,0.8)',
|
||||
titleColor: '#fff',
|
||||
bodyColor: '#fff',
|
||||
padding: 10,
|
||||
cornerRadius: 8,
|
||||
displayColors: false
|
||||
}
|
||||
},
|
||||
scales: {
|
||||
x: {
|
||||
title: { display: true, text: '训练步数 (Step)', color: '#6b7280', font: { size: 12 } },
|
||||
grid: { color: 'rgba(0,0,0,0.05)', drawBorder: false },
|
||||
ticks: { color: '#9ca3af', font: { size: 11 } }
|
||||
},
|
||||
y: {
|
||||
title: { display: true, text: '梯度范数', color: '#6b7280', font: { size: 12 } },
|
||||
grid: { color: 'rgba(0,0,0,0.05)', drawBorder: false },
|
||||
ticks: { color: '#9ca3af', font: { size: 11 } }
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Learning Rate 图表
|
||||
const lrCtx = document.getElementById('learningRateChart').getContext('2d');
|
||||
const lrGradient = createGradient(lrCtx, 'rgba(34, 197, 94, 0.4)', 'rgba(34, 197, 94, 0.02)');
|
||||
learningRateChart = new Chart(lrCtx, {
|
||||
type: 'line',
|
||||
data: {
|
||||
@@ -224,23 +343,59 @@
|
||||
label: 'Learning Rate',
|
||||
data: learningRateData.values,
|
||||
borderColor: '#22c55e',
|
||||
backgroundColor: 'rgba(34, 197, 94, 0.1)',
|
||||
backgroundColor: lrGradient,
|
||||
fill: true,
|
||||
tension: 0.3,
|
||||
pointRadius: 3
|
||||
...basePointStyle,
|
||||
...baseLineStyle
|
||||
}]
|
||||
},
|
||||
options: {
|
||||
...commonOptions,
|
||||
scales: {
|
||||
...commonOptions.scales,
|
||||
y: {
|
||||
...commonOptions.scales.y,
|
||||
type: 'logarithmic',
|
||||
title: { display: true, text: 'Learning Rate (log)' }
|
||||
responsive: true,
|
||||
maintainAspectRatio: false,
|
||||
animation: { duration: 500, easing: 'easeOutQuart' },
|
||||
interaction: { intersect: false, mode: 'index' },
|
||||
plugins: {
|
||||
legend: { display: false },
|
||||
title: {
|
||||
display: true,
|
||||
text: '📈 学习率 (Learning Rate)',
|
||||
color: '#22c55e',
|
||||
font: { size: 15, weight: '600' },
|
||||
padding: { bottom: 15 }
|
||||
},
|
||||
tooltip: {
|
||||
backgroundColor: 'rgba(0,0,0,0.8)',
|
||||
titleColor: '#fff',
|
||||
bodyColor: '#fff',
|
||||
padding: 10,
|
||||
cornerRadius: 8,
|
||||
displayColors: false,
|
||||
callbacks: {
|
||||
label: function(context) {
|
||||
return '学习率: ' + context.parsed.y.toExponential(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
plugins: { ...commonOptions.plugins, title: { display: true, text: 'Learning Rate', color: '#22c55e', font: { size: 14 } } }
|
||||
scales: {
|
||||
x: {
|
||||
title: { display: true, text: '训练步数 (Step)', color: '#6b7280', font: { size: 12 } },
|
||||
grid: { color: 'rgba(0,0,0,0.05)', drawBorder: false },
|
||||
ticks: { color: '#9ca3af', font: { size: 11 } }
|
||||
},
|
||||
y: {
|
||||
type: 'logarithmic',
|
||||
title: { display: true, text: '学习率 (对数坐标)', color: '#6b7280', font: { size: 12 } },
|
||||
grid: { color: 'rgba(0,0,0,0.05)', drawBorder: false },
|
||||
ticks: {
|
||||
color: '#9ca3af',
|
||||
font: { size: 11 },
|
||||
callback: function(value) {
|
||||
return value.toExponential(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -288,6 +443,65 @@
|
||||
}
|
||||
}
|
||||
|
||||
// 解析训练总结指标
|
||||
function parseTrainSummary(logContent, taskStatus) {
|
||||
const summary = {
|
||||
epoch: '-',
|
||||
train_loss: '-',
|
||||
train_runtime: '-',
|
||||
samples_per_sec: '-',
|
||||
steps_per_sec: '-',
|
||||
total_flos: '-',
|
||||
completed: false
|
||||
};
|
||||
|
||||
// 检查任务是否已完成
|
||||
if (taskStatus && taskStatus.toLowerCase() === 'completed') {
|
||||
summary.completed = true;
|
||||
}
|
||||
|
||||
// 匹配训练总结格式
|
||||
const summaryRegex = /\*\*\*\*\* train metrics \*\*\*\*\*\s*[\s\S]*?epoch\s*=\s*([\d.]+)\s*[\s\S]*?total_flos\s*=\s*([\d.]+)([GMT]?)\s*[\s\S]*?train_loss\s*=\s*([\d.]+)\s*[\s\S]*?train_runtime\s*=\s*([\d:.]+)\s*[\s\S]*?train_samples_per_second\s*=\s*([\d.]+)\s*[\s\S]*?train_steps_per_second\s*=\s*([\d.]+)/;
|
||||
|
||||
const match = logContent.match(summaryRegex);
|
||||
|
||||
if (match) {
|
||||
summary.epoch = match[1];
|
||||
summary.total_flos = match[2] + match[3];
|
||||
summary.train_loss = match[4];
|
||||
summary.train_runtime = match[5];
|
||||
summary.samples_per_sec = match[6];
|
||||
summary.steps_per_sec = match[7];
|
||||
summary.completed = true;
|
||||
}
|
||||
|
||||
// 更新UI
|
||||
const summaryEpoch = document.getElementById('summaryEpoch');
|
||||
const summaryTrainLoss = document.getElementById('summaryTrainLoss');
|
||||
const summaryTrainRuntime = document.getElementById('summaryTrainRuntime');
|
||||
const summarySamplesPerSec = document.getElementById('summarySamplesPerSec');
|
||||
const summaryStepsPerSec = document.getElementById('summaryStepsPerSec');
|
||||
const summaryTotalFlos = document.getElementById('summaryTotalFlos');
|
||||
if (summaryEpoch) summaryEpoch.textContent = summary.epoch;
|
||||
if (summaryTrainLoss) summaryTrainLoss.textContent = summary.train_loss;
|
||||
if (summaryTrainRuntime) summaryTrainRuntime.textContent = summary.train_runtime;
|
||||
if (summarySamplesPerSec) summarySamplesPerSec.textContent = summary.samples_per_sec;
|
||||
if (summaryStepsPerSec) summaryStepsPerSec.textContent = summary.steps_per_sec;
|
||||
if (summaryTotalFlos) summaryTotalFlos.textContent = summary.total_flos;
|
||||
|
||||
// 更新状态标签
|
||||
const statusElement = document.getElementById('trainSummaryStatus');
|
||||
if (statusElement) {
|
||||
if (summary.completed) {
|
||||
statusElement.textContent = '已完成';
|
||||
statusElement.className = 'ml-auto text-xs px-2 py-1 rounded-full bg-green-100 text-green-600';
|
||||
} else {
|
||||
statusElement.textContent = '训练中';
|
||||
statusElement.className = 'ml-auto text-xs px-2 py-1 rounded-full bg-blue-100 text-blue-600';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 带超时的 fetch
|
||||
async function fetchWithTimeout(url, options = {}, timeout = 10000) {
|
||||
const controller = new AbortController();
|
||||
@@ -333,21 +547,17 @@
|
||||
|
||||
// 初始化
|
||||
async function init() {
|
||||
console.log('[Init] 开始初始化...');
|
||||
|
||||
taskId = getTaskId();
|
||||
console.log('[Init] taskId:', taskId);
|
||||
|
||||
if (!taskId) {
|
||||
document.getElementById('taskName').textContent = '未指定任务ID';
|
||||
document.getElementById('logContent').innerHTML = '<span class="text-gray-400">请先从模型调优列表点击查看日志</span>';
|
||||
const taskNameEl = document.getElementById('taskName');
|
||||
const logContentEl = document.getElementById('logContent');
|
||||
if (taskNameEl) taskNameEl.textContent = '未指定任务ID';
|
||||
if (logContentEl) logContentEl.innerHTML = '<span class="text-gray-400">请先从模型调优列表点击查看日志</span>';
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[Init] 加载任务信息...');
|
||||
await loadTaskInfo();
|
||||
|
||||
console.log('[Init] 加载日志内容...');
|
||||
await loadLogContent();
|
||||
|
||||
// 自动刷新(每5秒)
|
||||
@@ -360,24 +570,26 @@
|
||||
// 加载任务信息
|
||||
async function loadTaskInfo() {
|
||||
try {
|
||||
console.log('[Task] Fetching task info from:', `${API_BASE}/fine-tune/${taskId}`);
|
||||
const response = await fetchWithTimeout(`${API_BASE}/fine-tune/${taskId}`);
|
||||
console.log('[Task] Response status:', response.status);
|
||||
const result = await response.json();
|
||||
console.log('[Task] API result:', result);
|
||||
|
||||
if (result.code === 0 && result.data) {
|
||||
taskInfo = result.data;
|
||||
console.log('[Task] taskInfo:', taskInfo);
|
||||
console.log('[Task] process_id:', taskInfo.process_id);
|
||||
await updateTaskInfo();
|
||||
} else {
|
||||
console.error('[Task] API返回错误:', result.message);
|
||||
const statusElement = document.getElementById('taskStatus');
|
||||
if (statusElement) {
|
||||
statusElement.textContent = '获取失败';
|
||||
statusElement.className = 'px-3 py-1 rounded-full text-sm bg-red-100 text-red-700';
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[Task] 获取任务信息失败:', error);
|
||||
document.getElementById('taskStatus').textContent = '获取失败';
|
||||
document.getElementById('taskStatus').className = 'px-3 py-1 rounded-full text-sm bg-red-100 text-red-700';
|
||||
const statusElement = document.getElementById('taskStatus');
|
||||
if (statusElement) {
|
||||
statusElement.textContent = '获取失败';
|
||||
statusElement.className = 'px-3 py-1 rounded-full text-sm bg-red-100 text-red-700';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -385,10 +597,15 @@
|
||||
async function updateTaskInfo() {
|
||||
if (!taskInfo) return;
|
||||
|
||||
document.getElementById('taskName').textContent = taskInfo.name || '未知任务';
|
||||
const taskNameElement = document.getElementById('taskName');
|
||||
if (taskNameElement) {
|
||||
taskNameElement.textContent = taskInfo.name || '未知任务';
|
||||
}
|
||||
|
||||
// 更新状态
|
||||
const statusElement = document.getElementById('taskStatus');
|
||||
if (!statusElement) return;
|
||||
|
||||
const actualStatus = taskInfo.status ? taskInfo.status.toLowerCase() : 'unknown';
|
||||
const statusMap = {
|
||||
'pending': { text: '等待中', class: 'bg-gray-100 text-gray-600' },
|
||||
@@ -402,6 +619,39 @@
|
||||
statusElement.textContent = statusConfig.text;
|
||||
statusElement.className = `px-3 py-1 rounded-full text-sm ${statusConfig.class}`;
|
||||
|
||||
// 更新训练总结状态
|
||||
const summaryStatusElement = document.getElementById('trainSummaryStatus');
|
||||
if (summaryStatusElement) {
|
||||
if (actualStatus === 'completed') {
|
||||
summaryStatusElement.textContent = '已完成';
|
||||
summaryStatusElement.className = 'ml-auto text-xs px-2 py-1 rounded-full bg-green-100 text-green-600';
|
||||
} else if (actualStatus === 'running') {
|
||||
summaryStatusElement.textContent = '训练中';
|
||||
summaryStatusElement.className = 'ml-auto text-xs px-2 py-1 rounded-full bg-blue-100 text-blue-600';
|
||||
} else if (actualStatus === 'failed' || actualStatus === 'stopped') {
|
||||
summaryStatusElement.textContent = '已停止';
|
||||
summaryStatusElement.className = 'ml-auto text-xs px-2 py-1 rounded-full bg-gray-100 text-gray-500';
|
||||
} else {
|
||||
summaryStatusElement.textContent = '等待中';
|
||||
summaryStatusElement.className = 'ml-auto text-xs px-2 py-1 rounded-full bg-gray-100 text-gray-500';
|
||||
}
|
||||
}
|
||||
|
||||
// 更新图表区域状态显示
|
||||
const chartStatusElement = document.getElementById('chartUpdateStatus');
|
||||
if (chartStatusElement) {
|
||||
if (actualStatus === 'completed') {
|
||||
chartStatusElement.textContent = '已完成';
|
||||
chartStatusElement.className = 'ml-auto text-xs text-green-500';
|
||||
} else if (actualStatus === 'running') {
|
||||
chartStatusElement.textContent = '自动更新中...';
|
||||
chartStatusElement.className = 'ml-auto text-xs text-gray-400';
|
||||
} else {
|
||||
chartStatusElement.textContent = '-';
|
||||
chartStatusElement.className = 'ml-auto text-xs text-gray-400';
|
||||
}
|
||||
}
|
||||
|
||||
// 更新进度
|
||||
const progressElement = document.getElementById('taskProgress');
|
||||
if (progressElement && taskInfo.progress !== undefined) {
|
||||
@@ -419,7 +669,7 @@
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.log('[Task] 获取GPU信息失败:', e);
|
||||
// GPU信息获取失败,静默处理
|
||||
}
|
||||
|
||||
// 更新数据集信息
|
||||
@@ -452,9 +702,15 @@
|
||||
}
|
||||
|
||||
// 其他信息
|
||||
document.getElementById('processId').textContent = taskInfo.process_id || '-';
|
||||
document.getElementById('createTime').textContent = taskInfo.create_time ?
|
||||
new Date(taskInfo.create_time).toLocaleString('zh-CN') : '-';
|
||||
const processIdElement = document.getElementById('processId');
|
||||
if (processIdElement) {
|
||||
processIdElement.textContent = taskInfo.process_id || '-';
|
||||
}
|
||||
const createTimeElement = document.getElementById('createTime');
|
||||
if (createTimeElement) {
|
||||
createTimeElement.textContent = taskInfo.create_time ?
|
||||
new Date(taskInfo.create_time).toLocaleString('zh-CN') : '-';
|
||||
}
|
||||
|
||||
// 获取模型名称
|
||||
if (taskInfo.base_model) {
|
||||
@@ -464,31 +720,33 @@
|
||||
|
||||
// 加载模型名称
|
||||
async function loadModelName(modelId) {
|
||||
const baseModelElement = document.getElementById('baseModel');
|
||||
if (!baseModelElement) return;
|
||||
|
||||
try {
|
||||
const response = await fetchWithTimeout(`${API_BASE}/model-manage`);
|
||||
const result = await response.json();
|
||||
if (result.code === 0 && result.data) {
|
||||
const model = result.data.find(m => m.id == modelId);
|
||||
document.getElementById('baseModel').textContent = model ? model.name : `模型${modelId}`;
|
||||
baseModelElement.textContent = model ? model.name : `模型${modelId}`;
|
||||
}
|
||||
} catch (e) {
|
||||
document.getElementById('baseModel').textContent = `模型${modelId}`;
|
||||
baseModelElement.textContent = `模型${modelId}`;
|
||||
}
|
||||
}
|
||||
|
||||
// 加载日志内容
|
||||
async function loadLogContent() {
|
||||
console.log('[Log] loadLogContent called');
|
||||
console.log('[Log] taskInfo:', taskInfo);
|
||||
console.log('[Log] taskInfo.process_id:', taskInfo ? taskInfo.process_id : 'taskInfo is null');
|
||||
const logContentElement = document.getElementById('logContent');
|
||||
|
||||
// 检查 taskInfo 是否存在
|
||||
if (!taskInfo) {
|
||||
console.log('[Log] taskInfo 为空,等待任务信息加载...');
|
||||
// 尝试重新加载任务信息
|
||||
await loadTaskInfo();
|
||||
if (!taskInfo) {
|
||||
document.getElementById('logContent').innerHTML = '<span class="text-gray-400">无法获取任务信息</span>';
|
||||
if (logContentElement) {
|
||||
logContentElement.innerHTML = '<span class="text-gray-400">无法获取任务信息</span>';
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -499,25 +757,23 @@
|
||||
|
||||
if (!processId && !taskName) {
|
||||
const msg = '<span class="text-gray-400">暂无日志文件 (任务未开始或无进程ID)</span>';
|
||||
document.getElementById('logContent').innerHTML = msg;
|
||||
if (logContentElement) {
|
||||
logContentElement.innerHTML = msg;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
console.log('[Log] Fetching training log files...');
|
||||
const response = await fetchWithTimeout(`${API_BASE}/training-log-files`);
|
||||
const result = await response.json();
|
||||
|
||||
if (result.code === 0 && result.data) {
|
||||
console.log('[Log] Training log files:', result.data);
|
||||
|
||||
// 优先使用进程ID匹配文件名
|
||||
let selectedFile = null;
|
||||
|
||||
if (processId) {
|
||||
const pidStr = processId.toString();
|
||||
for (const file of result.data) {
|
||||
console.log(`[Log] Checking file: ${file.file}, PID: ${file.pid}, Match: ${file.file.startsWith(pidStr + '_') || file.file.includes(pidStr)}`);
|
||||
if (file.file.startsWith(pidStr + '_') || file.file.includes(`_${pidStr}_`) || file.file.endsWith(`_${pidStr}.log`)) {
|
||||
selectedFile = file.file;
|
||||
break;
|
||||
@@ -538,60 +794,73 @@
|
||||
// 如果仍然没有找到,使用第一个文件
|
||||
if (!selectedFile && result.data.length > 0) {
|
||||
selectedFile = result.data[0].file;
|
||||
console.log('[Log] No matching file found, using first available file:', selectedFile);
|
||||
}
|
||||
|
||||
if (selectedFile) {
|
||||
console.log('[Log] Selected log file:', selectedFile);
|
||||
await loadLogFileContent(selectedFile);
|
||||
} else {
|
||||
document.getElementById('logContent').innerHTML = '<span class="text-gray-400">未找到匹配的日志文件</span>';
|
||||
if (logContentElement) {
|
||||
logContentElement.innerHTML = '<span class="text-gray-400">未找到匹配的日志文件</span>';
|
||||
}
|
||||
}
|
||||
} else {
|
||||
document.getElementById('logContent').innerHTML = '<span class="text-gray-400">获取日志列表失败: ' + (result.message || '未知错误') + '</span>';
|
||||
if (logContentElement) {
|
||||
logContentElement.innerHTML = '<span class="text-gray-400">获取日志列表失败: ' + (result.message || '未知错误') + '</span>';
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[Log] 获取日志列表失败:', error);
|
||||
document.getElementById('logContent').innerHTML = '<span class="text-red-500">加载日志失败: ' + error.message + '</span>';
|
||||
if (logContentElement) {
|
||||
logContentElement.innerHTML = '<span class="text-red-500">加载日志失败: ' + error.message + '</span>';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 加载日志文件内容
|
||||
async function loadLogFileContent(fileName) {
|
||||
console.log('[Log] Loading log file:', fileName);
|
||||
const logContentElement = document.getElementById('logContent');
|
||||
try {
|
||||
const response = await fetchWithTimeout(`${API_BASE}/training-log-content?file=${encodeURIComponent(fileName)}`);
|
||||
const result = await response.json();
|
||||
console.log('[Log] Log content API response:', result);
|
||||
|
||||
if (result.code === 0 && result.data) {
|
||||
trainingLogFullContent = result.data.content || '';
|
||||
console.log('[Log] Log content length:', trainingLogFullContent.length);
|
||||
renderLogContent();
|
||||
// 解析并更新图表
|
||||
parseMetricsFromLog(trainingLogFullContent);
|
||||
// 解析并更新训练总结
|
||||
const taskStatus = taskInfo ? taskInfo.status : 'running';
|
||||
parseTrainSummary(trainingLogFullContent, taskStatus);
|
||||
} else if (result.code === 2) {
|
||||
// 文件被锁定,正在训练中
|
||||
document.getElementById('logContent').innerHTML = `
|
||||
<div class="text-orange-500 p-4 text-center">
|
||||
<i class="fa fa-spinner fa-spin fa-2x mb-2"></i>
|
||||
<p class="text-lg">日志文件正在被训练进程占用</p>
|
||||
<p class="text-sm text-gray-500 mt-1">${result.message || '训练结束后可查看完整内容'}</p>
|
||||
<p class="text-xs text-gray-400 mt-2">页面将自动刷新...</p>
|
||||
</div>
|
||||
`;
|
||||
if (logContentElement) {
|
||||
logContentElement.innerHTML = `
|
||||
<div class="text-orange-500 p-4 text-center">
|
||||
<i class="fa fa-spinner fa-spin fa-2x mb-2"></i>
|
||||
<p class="text-lg">日志文件正在被训练进程占用</p>
|
||||
<p class="text-sm text-gray-500 mt-1">${result.message || '训练结束后可查看完整内容'}</p>
|
||||
<p class="text-xs text-gray-400 mt-2">页面将自动刷新...</p>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
} else {
|
||||
document.getElementById('logContent').innerHTML = '<span class="text-red-500">加载日志失败: ' + (result.message || '未知错误') + '</span>';
|
||||
if (logContentElement) {
|
||||
logContentElement.innerHTML = '<span class="text-red-500">加载日志失败: ' + (result.message || '未知错误') + '</span>';
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[Log] 获取日志内容失败:', error);
|
||||
document.getElementById('logContent').innerHTML = '<span class="text-red-500">加载日志失败: ' + error.message + '</span>';
|
||||
if (logContentElement) {
|
||||
logContentElement.innerHTML = '<span class="text-red-500">加载日志失败: ' + error.message + '</span>';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 渲染日志内容
|
||||
function renderLogContent() {
|
||||
const logContent = document.getElementById('logContent');
|
||||
if (!logContent) return;
|
||||
|
||||
const searchInput = document.getElementById('logSearchInput');
|
||||
const searchText = searchInput ? searchInput.value.toLowerCase() : '';
|
||||
|
||||
@@ -648,9 +917,6 @@
|
||||
|
||||
// 搜索日志
|
||||
function searchLog() {
|
||||
console.log('[Search] 搜索触发,trainingLogFullContent:', trainingLogFullContent ? '已加载' : '未加载');
|
||||
const searchInput = document.getElementById('logSearchInput');
|
||||
console.log('[Search] 搜索文本:', searchInput ? searchInput.value : '输入框未找到');
|
||||
renderLogContent();
|
||||
}
|
||||
|
||||
@@ -663,24 +929,18 @@
|
||||
|
||||
// 页面加载完成后初始化
|
||||
function startApp() {
|
||||
console.log('[App] startApp called');
|
||||
console.log('[App] Chart available:', typeof Chart !== 'undefined');
|
||||
|
||||
// 等待 Chart.js 加载完成(最多等待5秒)
|
||||
let waitCount = 0;
|
||||
const maxWait = 50; // 50 * 100ms = 5秒
|
||||
|
||||
function waitForChart() {
|
||||
if (typeof Chart !== 'undefined') {
|
||||
console.log('[App] Chart.js 已加载,开始初始化');
|
||||
initCharts();
|
||||
init();
|
||||
} else if (waitCount < maxWait) {
|
||||
waitCount++;
|
||||
console.log('[App] 等待 Chart.js 加载... (' + waitCount + ')');
|
||||
setTimeout(waitForChart, 100);
|
||||
} else {
|
||||
console.error('[App] Chart.js 加载超时');
|
||||
document.getElementById('chartsContainer').innerHTML = '<div class="text-center p-4 text-red-500"><i class="fa fa-exclamation-triangle mr-2"></i>图表库加载失败,请检查网络或刷新页面</div>';
|
||||
// 仍然初始化其他功能
|
||||
init();
|
||||
@@ -692,7 +952,6 @@
|
||||
initCharts();
|
||||
init();
|
||||
} else {
|
||||
console.log('[App] Chart.js 尚未加载,开始等待...');
|
||||
setTimeout(waitForChart, 100);
|
||||
}
|
||||
}
|
||||
@@ -709,7 +968,6 @@
|
||||
fetch(`${API_BASE}/fine-tune/tensorboard/start`, { method: 'POST' })
|
||||
.then(res => res.json())
|
||||
.then(result => {
|
||||
console.log('TensorBoard启动结果:', result);
|
||||
if (result.code === 0) {
|
||||
// 跳转到TensorBoard页面
|
||||
window.open(TB_URL, '_blank');
|
||||
@@ -722,7 +980,6 @@
|
||||
}
|
||||
})
|
||||
.catch(err => {
|
||||
console.error('启动TensorBoard失败:', err);
|
||||
alert('提示: 启动失败 - ' + err.message);
|
||||
btn.innerHTML = '<i class="fa fa-bar-chart mr-1"></i>TensorBoard';
|
||||
btn.className = 'bg-purple-500 text-white px-4 py-2 rounded hover:bg-purple-600 transition-colors text-sm';
|
||||
|
||||
Reference in New Issue
Block a user