use lazy_static::lazy_static;
use std::collections::VecDeque;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, RwLock};
lazy_static! {
pub static ref GLOBAL_GPU_STATE: Arc<GpuState> = Arc::new(GpuState::new());
}
#[derive(Debug)]
pub struct GpuState {
pub used_mib: AtomicU32,
pub total_mib: AtomicU32,
pub name: RwLock<String>,
pub history: RwLock<VecDeque<HistoryPoint>>,
}
#[derive(Debug, Clone)]
pub struct HistoryPoint {
pub timestamp: chrono::DateTime<chrono::Local>,
pub used_mib: u32,
pub temperature: u32,
pub core_clock: u32,
pub mem_clock: u32,
pub power_draw: f32,
pub fan_speed: u32,
pub throttle_reasons: String,
}
impl Default for GpuState {
fn default() -> Self {
Self::new()
}
}
impl GpuState {
pub fn new() -> Self {
Self {
used_mib: AtomicU32::new(0),
total_mib: AtomicU32::new(0),
name: RwLock::new("GPU".into()),
history: RwLock::new(VecDeque::with_capacity(10)),
}
}
pub fn read(&self) -> (u32, u32) {
(
self.used_mib.load(Ordering::Relaxed),
self.total_mib.load(Ordering::Relaxed),
)
}
pub fn ratio(&self) -> f64 {
let (used, total) = self.read();
if total == 0 {
return 0.0;
}
(used as f64 / total as f64).clamp(0.0, 1.0)
}
pub fn label(&self) -> String {
let (used, total) = self.read();
if total == 0 {
return "N/A".into();
}
format!(
"{:.1} GB / {:.1} GB",
used as f64 / 1024.0,
total as f64 / 1024.0
)
}
pub fn gpu_name(&self) -> String {
self.name.read().unwrap().clone()
}
}
pub fn spawn_gpu_monitor() -> Arc<GpuState> {
let state = GLOBAL_GPU_STATE.clone();
let bg = state.clone();
tokio::spawn(async move {
let mut poll_count = 0u64;
loop {
if let Some(metrics) = poll_nvidia_smi().await {
bg.used_mib.store(metrics.used_mib, Ordering::Relaxed);
bg.total_mib.store(metrics.total_mib, Ordering::Relaxed);
if !metrics.name.is_empty() {
let mut name = bg.name.write().unwrap();
if *name == "GPU" {
*name = metrics.name;
}
}
if poll_count.is_multiple_of(60) {
let mut history = bg.history.write().unwrap();
history.push_back(HistoryPoint {
timestamp: chrono::Local::now(),
used_mib: metrics.used_mib,
temperature: metrics.temperature,
core_clock: metrics.core_clock,
mem_clock: metrics.mem_clock,
power_draw: metrics.power_draw,
fan_speed: metrics.fan_speed,
throttle_reasons: metrics.throttle_reasons,
});
if history.len() > 10 {
history.pop_front();
}
}
}
poll_count += 1;
tokio::time::sleep(std::time::Duration::from_secs(2)).await;
}
});
state
}
pub struct GpuMetrics {
pub used_mib: u32,
pub total_mib: u32,
pub name: String,
pub temperature: u32,
pub core_clock: u32,
pub mem_clock: u32,
pub power_draw: f32,
pub fan_speed: u32,
pub throttle_reasons: String,
}
async fn poll_nvidia_smi() -> Option<GpuMetrics> {
let output = tokio::process::Command::new("nvidia-smi")
.args([
"--query-gpu=memory.used,memory.total,name,temperature.gpu,clocks.current.graphics,clocks.current.memory,power.draw,fan.speed,clocks_throttle_reasons.active",
"--format=csv,noheader,nounits",
])
.output()
.await
.ok()?;
if !output.status.success() {
return None;
}
let stdout = String::from_utf8_lossy(&output.stdout);
let line = stdout.trim();
let mut it = line.split(',').map(|s| s.trim());
let (Some(p0), Some(p1), Some(p2), Some(p3), Some(p4), Some(p5), Some(p6), Some(p7), Some(p8)) = (
it.next(),
it.next(),
it.next(),
it.next(),
it.next(),
it.next(),
it.next(),
it.next(),
it.next(),
) else {
return None;
};
Some(GpuMetrics {
used_mib: p0.parse().ok()?,
total_mib: p1.parse().ok()?,
name: p2.to_string(),
temperature: p3.parse().ok()?,
core_clock: p4.parse().ok()?,
mem_clock: p5.parse().ok()?,
power_draw: p6.parse().unwrap_or(0.0),
fan_speed: p7.parse().unwrap_or(0),
throttle_reasons: p8.to_string(),
})
}