GPU检测修改

This commit is contained in:
2026-01-26 16:18:23 +08:00
parent 18e88601c0
commit b7cd8097ac
4 changed files with 445 additions and 104 deletions

View File

@@ -366,6 +366,156 @@ def health_check():
return jsonify({'status': 'error', 'code': 1, 'message': str(e)})
# ============ 详细系统监控 ============
@app.route('/api/system-info', methods=['GET'])
def system_info():
"""获取详细系统监控信息"""
import psutil
import os
try:
# CPU 信息
cpu_percent = psutil.cpu_percent(interval=None)
cpu_counts = psutil.cpu_count()
cpu_freq = psutil.cpu_freq()
# 内存信息
memory = psutil.virtual_memory()
# 磁盘信息
disk = psutil.disk_usage('/')
disk_io = psutil.disk_io_counters()
# 网络信息
net_io = psutil.net_io_counters()
# 系统启动时间
boot_time = psutil.boot_time()
uptime_seconds = time.time() - boot_time
# GPU 信息
gpu_list = []
try:
import pynvml
pynvml.nvmlInit()
gpu_count = pynvml.nvmlDeviceGetCount()
for i in range(gpu_count):
try:
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
name = pynvml.nvmlDeviceGetName(handle)
# 获取显存信息
try:
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
memory_used = mem_info.used
memory_total = mem_info.total
except:
memory_used = 0
memory_total = 0
# 获取利用率
try:
util = pynvml.nvmlDeviceGetUtilizationRates(handle)
gpu_util = util.gpu
mem_util = util.memory
except:
gpu_util = 0
mem_util = 0
# 获取温度 - pynvml 11.x API: 只接受handle参数
try:
temp = pynvml.nvmlDeviceGetTemperature(handle)
except:
temp = 0
# 获取功耗
try:
power = pynvml.nvmlDeviceGetPowerUsage(handle)
except:
power = 0
# 获取风扇转速 (百分比)
try:
fan_speed = pynvml.nvmlDeviceGetFanSpeed(handle)
except:
fan_speed = 0
# 获取显卡时钟频率 (MHz)
try:
clock = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_SM)
except:
clock = 0
# 获取显存时钟频率 (MHz)
try:
mem_clock = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_MEM)
except:
mem_clock = 0
# 获取驱动版本信息
try:
version = pynvml.nvmlSystemGetDriverVersion()
except:
version = ''
gpu_list.append({
'name': name.decode() if isinstance(name, bytes) else name,
'memory_used_gb': round(memory_used / (1024**3), 1),
'memory_total_gb': round(memory_total / (1024**3), 1),
'gpu_percent': gpu_util,
'memory_percent': mem_util,
'temperature': temp,
'power_w': round(power / 1000, 1) if power > 0 else 0,
'fan_speed': fan_speed,
'clock_mhz': clock,
'memory_clock_mhz': mem_clock,
'driver_version': version.decode() if isinstance(version, bytes) else version
})
except Exception as e:
logger.debug(f"获取GPU {i} 信息失败: {e}")
continue
pynvml.nvmlShutdown()
except Exception as e:
logger.warning(f"获取GPU信息失败: {e}")
gpu_list = []
return jsonify({
'code': 0,
'data': {
'cpu': {
'percent': cpu_percent,
'cores': cpu_counts,
'frequency_mhz': cpu_freq.current if cpu_freq else 0
},
'memory': {
'percent': memory.percent,
'used_gb': round(memory.used / (1024**3), 1),
'total_gb': round(memory.total / (1024**3), 1),
'available_gb': round(memory.available / (1024**3), 1),
'cached_gb': round(memory.cached / (1024**3), 1) if hasattr(memory, 'cached') else 0
},
'disk': {
'percent': disk.percent,
'used_gb': round(disk.used / (1024**3), 0),
'total_gb': round(disk.total / (1024**3), 0),
'read_mb': round(disk_io.read_bytes / (1024**2), 0),
'write_mb': round(disk_io.write_bytes / (1024**2), 0)
},
'network': {
'upload_mb': round(net_io.bytes_sent / (1024**2), 1),
'download_mb': round(net_io.bytes_recv / (1024**2), 1)
},
'system': {
'uptime_seconds': uptime_seconds,
'process_count': len(psutil.pids())
},
'gpu': gpu_list
}
})
except Exception as e:
logger.error(f"获取系统信息失败: {e}")
return jsonify({'code': 1, 'message': str(e)})
# ============ 通用 CRUD 操作 ============
def generic_get_all(table_name, order_by='create_time DESC'):
"""通用查询所有"""