GPU检测修改
This commit is contained in:
150
src/main.py
150
src/main.py
@@ -366,6 +366,156 @@ def health_check():
|
||||
return jsonify({'status': 'error', 'code': 1, 'message': str(e)})
|
||||
|
||||
|
||||
# ============ 详细系统监控 ============
|
||||
@app.route('/api/system-info', methods=['GET'])
|
||||
def system_info():
|
||||
"""获取详细系统监控信息"""
|
||||
import psutil
|
||||
import os
|
||||
try:
|
||||
# CPU 信息
|
||||
cpu_percent = psutil.cpu_percent(interval=None)
|
||||
cpu_counts = psutil.cpu_count()
|
||||
cpu_freq = psutil.cpu_freq()
|
||||
|
||||
# 内存信息
|
||||
memory = psutil.virtual_memory()
|
||||
|
||||
# 磁盘信息
|
||||
disk = psutil.disk_usage('/')
|
||||
disk_io = psutil.disk_io_counters()
|
||||
|
||||
# 网络信息
|
||||
net_io = psutil.net_io_counters()
|
||||
|
||||
# 系统启动时间
|
||||
boot_time = psutil.boot_time()
|
||||
uptime_seconds = time.time() - boot_time
|
||||
|
||||
# GPU 信息
|
||||
gpu_list = []
|
||||
try:
|
||||
import pynvml
|
||||
pynvml.nvmlInit()
|
||||
gpu_count = pynvml.nvmlDeviceGetCount()
|
||||
for i in range(gpu_count):
|
||||
try:
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
name = pynvml.nvmlDeviceGetName(handle)
|
||||
|
||||
# 获取显存信息
|
||||
try:
|
||||
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
|
||||
memory_used = mem_info.used
|
||||
memory_total = mem_info.total
|
||||
except:
|
||||
memory_used = 0
|
||||
memory_total = 0
|
||||
|
||||
# 获取利用率
|
||||
try:
|
||||
util = pynvml.nvmlDeviceGetUtilizationRates(handle)
|
||||
gpu_util = util.gpu
|
||||
mem_util = util.memory
|
||||
except:
|
||||
gpu_util = 0
|
||||
mem_util = 0
|
||||
|
||||
# 获取温度 - pynvml 11.x API: 只接受handle参数
|
||||
try:
|
||||
temp = pynvml.nvmlDeviceGetTemperature(handle)
|
||||
except:
|
||||
temp = 0
|
||||
|
||||
# 获取功耗
|
||||
try:
|
||||
power = pynvml.nvmlDeviceGetPowerUsage(handle)
|
||||
except:
|
||||
power = 0
|
||||
|
||||
# 获取风扇转速 (百分比)
|
||||
try:
|
||||
fan_speed = pynvml.nvmlDeviceGetFanSpeed(handle)
|
||||
except:
|
||||
fan_speed = 0
|
||||
|
||||
# 获取显卡时钟频率 (MHz)
|
||||
try:
|
||||
clock = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_SM)
|
||||
except:
|
||||
clock = 0
|
||||
|
||||
# 获取显存时钟频率 (MHz)
|
||||
try:
|
||||
mem_clock = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_MEM)
|
||||
except:
|
||||
mem_clock = 0
|
||||
|
||||
# 获取驱动版本信息
|
||||
try:
|
||||
version = pynvml.nvmlSystemGetDriverVersion()
|
||||
except:
|
||||
version = ''
|
||||
|
||||
gpu_list.append({
|
||||
'name': name.decode() if isinstance(name, bytes) else name,
|
||||
'memory_used_gb': round(memory_used / (1024**3), 1),
|
||||
'memory_total_gb': round(memory_total / (1024**3), 1),
|
||||
'gpu_percent': gpu_util,
|
||||
'memory_percent': mem_util,
|
||||
'temperature': temp,
|
||||
'power_w': round(power / 1000, 1) if power > 0 else 0,
|
||||
'fan_speed': fan_speed,
|
||||
'clock_mhz': clock,
|
||||
'memory_clock_mhz': mem_clock,
|
||||
'driver_version': version.decode() if isinstance(version, bytes) else version
|
||||
})
|
||||
except Exception as e:
|
||||
logger.debug(f"获取GPU {i} 信息失败: {e}")
|
||||
continue
|
||||
pynvml.nvmlShutdown()
|
||||
except Exception as e:
|
||||
logger.warning(f"获取GPU信息失败: {e}")
|
||||
gpu_list = []
|
||||
|
||||
return jsonify({
|
||||
'code': 0,
|
||||
'data': {
|
||||
'cpu': {
|
||||
'percent': cpu_percent,
|
||||
'cores': cpu_counts,
|
||||
'frequency_mhz': cpu_freq.current if cpu_freq else 0
|
||||
},
|
||||
'memory': {
|
||||
'percent': memory.percent,
|
||||
'used_gb': round(memory.used / (1024**3), 1),
|
||||
'total_gb': round(memory.total / (1024**3), 1),
|
||||
'available_gb': round(memory.available / (1024**3), 1),
|
||||
'cached_gb': round(memory.cached / (1024**3), 1) if hasattr(memory, 'cached') else 0
|
||||
},
|
||||
'disk': {
|
||||
'percent': disk.percent,
|
||||
'used_gb': round(disk.used / (1024**3), 0),
|
||||
'total_gb': round(disk.total / (1024**3), 0),
|
||||
'read_mb': round(disk_io.read_bytes / (1024**2), 0),
|
||||
'write_mb': round(disk_io.write_bytes / (1024**2), 0)
|
||||
},
|
||||
'network': {
|
||||
'upload_mb': round(net_io.bytes_sent / (1024**2), 1),
|
||||
'download_mb': round(net_io.bytes_recv / (1024**2), 1)
|
||||
},
|
||||
'system': {
|
||||
'uptime_seconds': uptime_seconds,
|
||||
'process_count': len(psutil.pids())
|
||||
},
|
||||
'gpu': gpu_list
|
||||
}
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"获取系统信息失败: {e}")
|
||||
return jsonify({'code': 1, 'message': str(e)})
|
||||
|
||||
|
||||
# ============ 通用 CRUD 操作 ============
|
||||
def generic_get_all(table_name, order_by='create_time DESC'):
|
||||
"""通用查询所有"""
|
||||
|
||||
Reference in New Issue
Block a user