diff --git a/requirements.txt b/requirements.txt
index 0c1964c..c91f3e3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,4 @@ cryptography==41.0.7
requests==2.31.0
psutil==5.9.8
werkzeug==3.0.1
+pynvml==11.5.0
diff --git a/src/main.py b/src/main.py
index 8483209..cbd0394 100644
--- a/src/main.py
+++ b/src/main.py
@@ -366,6 +366,156 @@ def health_check():
return jsonify({'status': 'error', 'code': 1, 'message': str(e)})
+# ============ 详细系统监控 ============
+@app.route('/api/system-info', methods=['GET'])
+def system_info():
+ """获取详细系统监控信息"""
+ import psutil
+ import os
+ try:
+ # CPU 信息
+ cpu_percent = psutil.cpu_percent(interval=None)
+ cpu_counts = psutil.cpu_count()
+ cpu_freq = psutil.cpu_freq()
+
+ # 内存信息
+ memory = psutil.virtual_memory()
+
+ # 磁盘信息
+ disk = psutil.disk_usage('/')
+ disk_io = psutil.disk_io_counters()
+
+ # 网络信息
+ net_io = psutil.net_io_counters()
+
+ # 系统启动时间
+ boot_time = psutil.boot_time()
+ uptime_seconds = time.time() - boot_time
+
+ # GPU 信息
+ gpu_list = []
+ try:
+ import pynvml
+ pynvml.nvmlInit()
+ gpu_count = pynvml.nvmlDeviceGetCount()
+ for i in range(gpu_count):
+ try:
+ handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+ name = pynvml.nvmlDeviceGetName(handle)
+
+ # 获取显存信息
+ try:
+ mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+ memory_used = mem_info.used
+ memory_total = mem_info.total
+ except:
+ memory_used = 0
+ memory_total = 0
+
+ # 获取利用率
+ try:
+ util = pynvml.nvmlDeviceGetUtilizationRates(handle)
+ gpu_util = util.gpu
+ mem_util = util.memory
+ except:
+ gpu_util = 0
+ mem_util = 0
+
+ # 获取温度 - pynvml 11.x API: 只接受handle参数
+ try:
+ temp = pynvml.nvmlDeviceGetTemperature(handle)
+ except:
+ temp = 0
+
+ # 获取功耗
+ try:
+ power = pynvml.nvmlDeviceGetPowerUsage(handle)
+ except:
+ power = 0
+
+ # 获取风扇转速 (百分比)
+ try:
+ fan_speed = pynvml.nvmlDeviceGetFanSpeed(handle)
+ except:
+ fan_speed = 0
+
+ # 获取显卡时钟频率 (MHz)
+ try:
+ clock = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_SM)
+ except:
+ clock = 0
+
+ # 获取显存时钟频率 (MHz)
+ try:
+ mem_clock = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_MEM)
+ except:
+ mem_clock = 0
+
+ # 获取驱动版本信息
+ try:
+ version = pynvml.nvmlSystemGetDriverVersion()
+ except:
+ version = ''
+
+ gpu_list.append({
+ 'name': name.decode() if isinstance(name, bytes) else name,
+ 'memory_used_gb': round(memory_used / (1024**3), 1),
+ 'memory_total_gb': round(memory_total / (1024**3), 1),
+ 'gpu_percent': gpu_util,
+ 'memory_percent': mem_util,
+ 'temperature': temp,
+ 'power_w': round(power / 1000, 1) if power > 0 else 0,
+ 'fan_speed': fan_speed,
+ 'clock_mhz': clock,
+ 'memory_clock_mhz': mem_clock,
+ 'driver_version': version.decode() if isinstance(version, bytes) else version
+ })
+ except Exception as e:
+ logger.debug(f"获取GPU {i} 信息失败: {e}")
+ continue
+ pynvml.nvmlShutdown()
+ except Exception as e:
+ logger.warning(f"获取GPU信息失败: {e}")
+ gpu_list = []
+
+ return jsonify({
+ 'code': 0,
+ 'data': {
+ 'cpu': {
+ 'percent': cpu_percent,
+ 'cores': cpu_counts,
+ 'frequency_mhz': cpu_freq.current if cpu_freq else 0
+ },
+ 'memory': {
+ 'percent': memory.percent,
+ 'used_gb': round(memory.used / (1024**3), 1),
+ 'total_gb': round(memory.total / (1024**3), 1),
+ 'available_gb': round(memory.available / (1024**3), 1),
+ 'cached_gb': round(memory.cached / (1024**3), 1) if hasattr(memory, 'cached') else 0
+ },
+ 'disk': {
+ 'percent': disk.percent,
+ 'used_gb': round(disk.used / (1024**3), 0),
+ 'total_gb': round(disk.total / (1024**3), 0),
+ 'read_mb': round(disk_io.read_bytes / (1024**2), 0),
+ 'write_mb': round(disk_io.write_bytes / (1024**2), 0)
+ },
+ 'network': {
+ 'upload_mb': round(net_io.bytes_sent / (1024**2), 1),
+ 'download_mb': round(net_io.bytes_recv / (1024**2), 1)
+ },
+ 'system': {
+ 'uptime_seconds': uptime_seconds,
+ 'process_count': len(psutil.pids())
+ },
+ 'gpu': gpu_list
+ }
+ })
+ except Exception as e:
+ logger.error(f"获取系统信息失败: {e}")
+ return jsonify({'code': 1, 'message': str(e)})
+
+
# ============ 通用 CRUD 操作 ============
def generic_get_all(table_name, order_by='create_time DESC'):
"""通用查询所有"""
diff --git a/web/pages/fine-tune-create.html b/web/pages/fine-tune-create.html
index 879f137..d6f6361 100644
--- a/web/pages/fine-tune-create.html
+++ b/web/pages/fine-tune-create.html
@@ -826,23 +826,45 @@
}
}
- // 获取GPU数据(模拟数据,实际可从API获取)
+ // 获取GPU数据(从真实API获取)
async function fetchGPUs() {
- // 实际项目中可以调用后端API获取GPU信息
- // const response = await fetch(`${API_BASE}/gpus`);
- // return await response.json();
+ try {
+ const response = await fetch(`${API_BASE}/system-info`);
+ const result = await response.json();
- // 模拟GPU数据
- return [
- { id: 'gpu0', name: 'NVIDIA A100 80GB', memory: '80GB', cuda_cores: 6912, available: true },
- { id: 'gpu1', name: 'NVIDIA A100 80GB', memory: '80GB', cuda_cores: 6912, available: true },
- { id: 'gpu2', name: 'NVIDIA A100 40GB', memory: '40GB', cuda_cores: 6912, available: true },
- { id: 'gpu3', name: 'NVIDIA A100 40GB', memory: '40GB', cuda_cores: 6912, available: false },
- { id: 'gpu4', name: 'NVIDIA V100 32GB', memory: '32GB', cuda_cores: 5120, available: true },
- { id: 'gpu5', name: 'NVIDIA V100 16GB', memory: '16GB', cuda_cores: 5120, available: false },
- { id: 'gpu6', name: 'NVIDIA RTX 3090', memory: '24GB', cuda_cores: 10496, available: true },
- { id: 'gpu7', name: 'NVIDIA RTX 4090', memory: '24GB', cuda_cores: 16384, available: true }
- ];
+ if (result.code === 0 && result.data.gpu && result.data.gpu.length > 0) {
+ // 将真实GPU数据转换为前端所需格式
+ return result.data.gpu.map((gpu, index) => ({
+ id: `gpu${index}`,
+ name: gpu.name || `GPU ${index}`,
+ memory: `${gpu.memory_total_gb}GB`,
+ cuda_cores: 'N/A',
+ available: gpu.power_w > 0 || gpu.gpu_percent >= 0, // 有数据即为可用
+ real_data: gpu // 保存真实数据供显示
+ }));
+ }
+
+ // 如果没有真实数据,尝试获取驱动版本
+ const driverVersion = result.data.gpu?.[0]?.driver_version || '';
+ if (driverVersion) {
+ return [{
+ id: 'gpu0',
+ name: 'NVIDIA GPU (Detected)',
+ memory: 'Unknown',
+ cuda_cores: 'N/A',
+ available: true,
+ real_data: result.data.gpu?.[0] || null
+ }];
+ }
+
+ throw new Error('未检测到GPU设备');
+ } catch (error) {
+ console.warn('获取GPU信息失败,使用模拟数据:', error);
+ // 失败时返回模拟数据作为后备
+ return [
+ { id: 'gpu0', name: 'NVIDIA GPU (未检测到)', memory: 'Unknown', cuda_cores: 'N/A', available: false }
+ ];
+ }
}
// 渲染GPU列表(点击卡片选中,无需复选框)
@@ -850,7 +872,18 @@
const container = document.getElementById('gpuSelectionArea');
if (!container) return;
- container.innerHTML = gpus.map(gpu => `
+ container.innerHTML = gpus.map(gpu => {
+ // 从真实数据中提取监控信息
+ const realData = gpu.real_data || {};
+ const memoryUsed = realData.memory_used_gb || 0;
+ const memoryTotal = realData.memory_total_gb || 0;
+ const temp = realData.temperature || 0;
+ const power = realData.power_w || 0;
+ const gpuPercent = realData.gpu_percent || 0;
+ const fanSpeed = realData.fan_speed || 0;
+ const clock = realData.clock_mhz || 0;
+
+ return `
'
: ''}
-
-
${gpu.memory}
-
${gpu.cuda_cores} CUDA
+
+ ${memoryUsed}/${memoryTotal} GB
+ ${temp}°C
+ ${power} W
+ ${clock} MHz
+
+
+
+
+ GPU: ${gpuPercent}%
+ Fan: ${fanSpeed}%
+
+
- `).join('');
+ `}).join('');
}
// 切换GPU选择状态
@@ -884,11 +930,16 @@
if (card.classList.contains('border-primary')) {
// 取消选中
card.classList.remove('border-primary', 'bg-blue-50');
- card.querySelector('.fa-check-circle').classList.replace('text-primary', 'text-green-600');
+ // 恢复图标为可选中状态(绿色勾选圈)
+ const icon = card.querySelector('.fa-check, .fa-check-circle');
+ if (icon) {
+ icon.classList.remove('fa-check', 'text-primary');
+ icon.classList.add('fa-check-circle', 'text-green-600');
+ }
} else {
// 选中
card.classList.add('border-primary', 'bg-blue-50');
- // 移除检查图标,添加选中标记
+ // 切换图标为已选中状态(蓝色勾选)
const icon = card.querySelector('.fa-check-circle');
if (icon) {
icon.classList.remove('fa-check-circle', 'text-green-600');
@@ -900,7 +951,17 @@
// 获取选中的GPU列表
function getSelectedGPUs() {
const cards = document.querySelectorAll('.gpu-card.border-primary');
- return Array.from(cards).map(card => card.dataset.gpuId);
+ return Array.from(cards).map(card => {
+ const gpuId = card.dataset.gpuId;
+ // 获取GPU名称和显存信息用于显示
+ const nameEl = card.querySelector('.text-gray-700');
+ const name = nameEl ? nameEl.textContent : gpuId;
+ // 返回GPU信息对象
+ return {
+ id: gpuId,
+ name: name
+ };
+ });
}
// 提交表单
diff --git a/web/pages/main.html b/web/pages/main.html
index 3e73bb7..a889fb1 100644
--- a/web/pages/main.html
+++ b/web/pages/main.html
@@ -1721,8 +1721,69 @@
let refreshTimer = null;
let currentRefreshInterval = 5000;
- // 刷新硬件信息
- function refreshHardwareInfo() {
+ // 刷新硬件信息(使用真实API)
+ async function refreshHardwareInfo() {
+ try {
+ const response = await fetch(`${API_BASE}/system-info`);
+ const result = await response.json();
+
+ if (result.code === 0 && result.data) {
+ const data = result.data;
+
+ // 更新CPU
+ const cpu = data.cpu || {};
+ const cpuPercent = cpu.percent || 0;
+ document.getElementById('cpuPercent').textContent = cpuPercent + '%';
+ document.getElementById('cpuBar').style.width = cpuPercent + '%';
+ document.getElementById('cpuCores').textContent = (cpu.cores || 0) + ' 核心';
+
+ // 更新内存
+ const mem = data.memory || {};
+ const memUsed = mem.used_gb || 0;
+ const memTotal = mem.total_gb || 0;
+ const memPercent = mem.percent || 0;
+ document.getElementById('memoryPercent').textContent = memPercent + '%';
+ document.getElementById('memoryBar').style.width = memPercent + '%';
+ document.getElementById('memoryUsed').textContent = memUsed + ' GB';
+ document.getElementById('memoryAvailable').textContent = (mem.available_gb || 0) + ' GB';
+ document.getElementById('memoryCached').textContent = (mem.cached_gb || 0) + ' GB';
+
+ // 更新磁盘
+ const disk = data.disk || {};
+ const diskUsed = disk.used_gb || 0;
+ const diskTotal = disk.total_gb || 0;
+ const diskPercent = disk.percent || 0;
+ document.getElementById('diskPercent').textContent = diskPercent + '%';
+ document.getElementById('diskBar').style.width = diskPercent + '%';
+ document.getElementById('diskUsed').textContent = diskUsed + ' GB';
+ document.getElementById('diskAvailable').textContent = (diskTotal - diskUsed) + ' GB';
+
+ // 更新网络
+ const net = data.network || {};
+ document.getElementById('totalDownload').textContent = (net.download_mb || 0) + ' GB';
+ document.getElementById('totalUpload').textContent = (net.upload_mb || 0) + ' GB';
+
+ // 更新系统信息
+ const sys = data.system || {};
+ const uptime = sys.uptime_seconds || 0;
+ const days = Math.floor(uptime / 86400);
+ const hours = Math.floor((uptime % 86400) / 3600);
+ const mins = Math.floor((uptime % 3600) / 60);
+ document.getElementById('uptime').textContent = days + ' 天 ' + hours + ' 时 ' + mins + ' 分';
+ document.getElementById('processCount').textContent = sys.process_count || 0;
+
+ // 更新GPU信息(传入真实数据)
+ updateGPUInfo(data.gpu || []);
+ }
+ } catch (error) {
+ console.error('获取系统信息失败:', error);
+ // 如果API调用失败,使用模拟数据作为后备
+ useMockData();
+ }
+ }
+
+ // 使用模拟数据(当API不可用时)
+ function useMockData() {
// 更新CPU
const cpuUsage = Math.floor(Math.random() * 30) + 20;
document.getElementById('cpuPercent').textContent = cpuUsage + '%';
@@ -1768,10 +1829,6 @@
document.getElementById('uptime').textContent = days + ' 天 ' + hours + ' 时 ' + mins + ' 分';
document.getElementById('processCount').textContent = Math.floor(Math.random() * 200 + 100);
document.getElementById('loadAvg').textContent = (Math.random() * 2).toFixed(2) + ', ' + (Math.random() * 1.5).toFixed(2) + ', ' + (Math.random() * 1).toFixed(2);
-
- // 更新时间
- // const now = new Date();
- // document.getElementById('updateTime').textContent = now.toLocaleTimeString('zh-CN');
}
// GPU配置 - 支持模拟1-8块GPU
@@ -1787,95 +1844,167 @@
{ name: 'NVIDIA RTX 4080', memory: 16 }
];
- // 初始化GPU列表
- function initGPUList() {
- const gpuList = document.getElementById('gpuList');
- const gpuCount = Math.min(GPU_COUNT, 8);
- document.getElementById('gpuCount').textContent = `检测到 ${gpuCount} 块 GPU`;
-
- let gpuCardsHTML = '';
- for (let i = 0; i < gpuCount; i++) {
- const config = gpuConfigs[i % gpuConfigs.length];
- gpuCardsHTML += `
-
-
-
-
-
-
-
-
${config.name}
-
PCIe ${Math.floor(Math.random() * 4 + 1)}:00.0
-
-
-
- 0%
-
-
-
-
-
-
显存
-
0/${config.memory}G
-
-
-
-
-
-
- `;
+ // 初始化GPU列表(获取真实数据)
+ async function initGPUList() {
+ try {
+ const response = await fetch(`${API_BASE}/system-info`);
+ const result = await response.json();
+ const gpuData = (result.data && result.data.gpu) || [];
+ updateGPUInfo(gpuData);
+ } catch (error) {
+ console.error('初始化GPU列表失败:', error);
+ useMockGPUData();
}
- gpuList.innerHTML = gpuCardsHTML;
}
// 更新GPU信息
- function updateGPUInfo() {
+ function updateGPUInfo(gpuData) {
+ // 如果有真实数据,使用真实数据
+ if (gpuData && gpuData.length > 0) {
+ const gpuCount = gpuData.length;
+ document.getElementById('gpuCount').textContent = `检测到 ${gpuCount} 块 GPU`;
+
+ let totalUsedMemory = 0;
+ let totalMemory = 0;
+
+ // 重新初始化GPU列表
+ const gpuList = document.getElementById('gpuList');
+ if (gpuList) {
+ let gpuCardsHTML = '';
+ for (let i = 0; i < gpuCount; i++) {
+ const gpu = gpuData[i];
+ totalUsedMemory += gpu.memory_used_gb;
+ totalMemory += gpu.memory_total_gb;
+
+ gpuCardsHTML += `
+
+
+
+
+ ${gpu.gpu_percent}%
+
+
+
+
+
+
显存
+
${gpu.memory_used_gb}/${gpu.memory_total_gb} GB
+
+
+
温度
+
${gpu.temperature}°C
+
+
+
功耗
+
${gpu.power_w} W
+
+
+
Fan
+
${gpu.fan_speed || 0}%
+
+
+
+
Clock: ${gpu.clock_mhz || 0} MHz
+
Driver: ${gpu.driver_version || '-'}
+
+
+ `;
+ }
+ gpuList.innerHTML = gpuCardsHTML;
+ }
+
+ // 更新总显存
+ const gpuTotalMem = document.getElementById('gpuTotalMemory');
+ if (gpuTotalMem) {
+ gpuTotalMem.textContent = `${totalUsedMemory}/${totalMemory} GB`;
+ }
+ return;
+ }
+
+ // 没有真实数据,使用模拟数据
+ useMockGPUData();
+ }
+
+ // 使用模拟GPU数据
+ function useMockGPUData() {
const gpuCount = Math.min(GPU_COUNT, 8);
let totalUsedMemory = 0;
let totalMemory = 0;
- for (let i = 0; i < gpuCount; i++) {
- const config = gpuConfigs[i % gpuConfigs.length];
- const gpuUsage = Math.floor(Math.random() * 60 + 20);
- const memUsed = (Math.random() * config.memory * 0.7 + config.memory * 0.1).toFixed(1);
- const temp = Math.floor(Math.random() * 30 + 40);
- const power = Math.floor(Math.random() * 150 + 100);
- const fan = Math.floor(gpuUsage + Math.random() * 10);
+ // 重新初始化GPU列表
+ const gpuList = document.getElementById('gpuList');
+ if (gpuList) {
+ let gpuCardsHTML = '';
+ for (let i = 0; i < gpuCount; i++) {
+ const config = gpuConfigs[i % gpuConfigs.length];
+ const gpuUsage = Math.floor(Math.random() * 60 + 20);
+ const memUsed = (Math.random() * config.memory * 0.7 + config.memory * 0.1).toFixed(1);
+ const temp = Math.floor(Math.random() * 30 + 40);
+ const power = Math.floor(Math.random() * 150 + 100);
+ const fan = Math.floor(gpuUsage + Math.random() * 10);
- totalUsedMemory += parseFloat(memUsed);
- totalMemory += config.memory;
+ totalUsedMemory += parseFloat(memUsed);
+ totalMemory += config.memory;
- document.getElementById(`gpuPercent${i}`).textContent = gpuUsage + '%';
- document.getElementById(`gpuBar${i}`).style.width = gpuUsage + '%';
- document.getElementById(`gpuMem${i}`).textContent = `${parseFloat(memUsed).toFixed(1)}/${config.memory} GB`;
- document.getElementById(`gpuTemp${i}`).textContent = temp + '°C';
- document.getElementById(`gpuPower${i}`).textContent = power + ' W';
- document.getElementById(`gpuFan${i}`).textContent = fan + '%';
-
- // 根据温度改变颜色
- const tempEl = document.getElementById(`gpuTemp${i}`);
- if (temp >= 80) {
- tempEl.className = 'font-medium text-red-600';
- } else if (temp >= 70) {
- tempEl.className = 'font-medium text-yellow-600';
- } else {
- tempEl.className = 'font-medium text-gray-800';
+ gpuCardsHTML += `
+
+
+
+
+
+
+
+
${config.name}
+
PCIe ${Math.floor(Math.random() * 4 + 1)}:00.0
+
+
+
+ ${gpuUsage}%
+
+
+
+
+
+
显存
+
${parseFloat(memUsed).toFixed(1)}/${config.memory} GB
+
+
+
+
+
+
+ `;
}
+ gpuList.innerHTML = gpuCardsHTML;
+ document.getElementById('gpuCount').textContent = `检测到 ${gpuCount} 块 GPU`;
}
// 更新总显存
- document.getElementById('gpuTotalMemory').textContent = `${totalUsedMemory.toFixed(1)}/${totalMemory} GB`;
+ const gpuTotalMem = document.getElementById('gpuTotalMemory');
+ if (gpuTotalMem) {
+ gpuTotalMem.textContent = `${totalUsedMemory.toFixed(1)}/${totalMemory} GB`;
+ }
}
// 启动硬件监控自动刷新