vor 0.1.0

Cross-platform performance instrumentation with an in-app egui panel and live system and GPU metrics.
Documentation
//! GPU-system metrics rendered as system rows in the panel.
//!
//! [`ensure_collector`] spawns a background thread that polls the
//! platform GPU backend and stores the latest reading in atomics;
//! [`PanelState::tick`](crate::viz::PanelState::tick) snapshots them
//! into each frame's [`SystemSample`](crate::viz::SystemSample). The
//! panel starts the collector itself, so callers wire up nothing.
//!
//! Backends are feature-gated:
//! - `mac`: IOKit `IOAccelerator` (util / SM) + the private IOReport
//!   framework (power), no `sudo`. PCIe is unified-memory on Apple
//!   Silicon, so that row stays zero.
//! - `cuda`: NVML (util / power / PCIe). NVML has no SM-occupancy
//!   counter, so that row is dropped on NVIDIA.
//! - `web` / no platform feature: no API available; no GPU rows.

#[cfg(all(feature = "gpu", feature = "viz"))]
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};

#[cfg(all(feature = "mac", feature = "viz"))]
mod mac;
#[cfg(all(feature = "cuda", not(feature = "mac"), feature = "viz"))]
mod nvml;

// Pick the platform backend. `mac` wins if both are somehow enabled;
// they never coexist on real hardware.
#[cfg(all(feature = "mac", feature = "viz"))]
use mac::Sampler;
#[cfg(all(feature = "cuda", not(feature = "mac"), feature = "viz"))]
use nvml::Sampler;

#[cfg(all(feature = "gpu", feature = "viz"))]
static GPU_UTIL_BITS: AtomicU32 = AtomicU32::new(0);
#[cfg(all(feature = "gpu", feature = "viz"))]
static GPU_SM_BITS: AtomicU32 = AtomicU32::new(0);
#[cfg(all(feature = "gpu", feature = "viz"))]
static PCIE_BPS: AtomicU64 = AtomicU64::new(0);
#[cfg(all(feature = "gpu", feature = "viz"))]
static GPU_POWER_BITS: AtomicU32 = AtomicU32::new(0);
#[cfg(all(feature = "gpu", feature = "viz"))]
static GPU_MEM_BYTES: AtomicU64 = AtomicU64::new(0);
#[cfg(all(feature = "gpu", feature = "viz"))]
static GPU_TEMP_BITS: AtomicU32 = AtomicU32::new(0);
#[cfg(all(feature = "gpu", feature = "viz"))]
static GPU_CLOCK_BITS: AtomicU32 = AtomicU32::new(0);

/// One poll of the platform GPU backend. `sm` is macOS-only; `temp`
/// and `clock` are NVIDIA-only; backends leave fields they lack zero.
#[cfg(all(feature = "gpu", feature = "viz"))]
#[derive(Clone, Copy)]
struct GpuReading {
    util: f32,
    sm: f32,
    pcie_bps: u64,
    power_w: f32,
    mem_bytes: u64,
    temp_c: f32,
    clock_mhz: f32,
}

/// Ensure the background GPU poller is running.
///
/// Idempotent and cheap to re-call: the thread is spawned at most
/// once per process. Called from [`PanelState::new`] so collection
/// begins with the panel.
#[cfg(all(feature = "gpu", feature = "viz"))]
pub(crate) fn ensure_collector() {
    use std::sync::Once;
    use std::time::Duration;

    static START: Once = Once::new();
    START.call_once(|| {
        // ~4 Hz tracks load without busy-polling the source. On macOS
        // power is ΔEnergy / Δt, so the interval is also its window.
        const POLL: Duration = Duration::from_millis(250);
        std::thread::Builder::new()
            .name("vor-gpu".into())
            .spawn(|| {
                let mut sampler = Sampler::new();
                loop {
                    std::thread::sleep(POLL);
                    store(sampler.poll());
                }
            })
            .unwrap();
    });
}

#[cfg(all(feature = "viz", not(feature = "gpu")))]
pub(crate) const fn ensure_collector() {}

#[cfg(all(feature = "gpu", feature = "viz"))]
fn store(reading: GpuReading) {
    let GpuReading {
        util,
        sm,
        pcie_bps,
        power_w,
        mem_bytes,
        temp_c,
        clock_mhz,
    } = reading;
    GPU_UTIL_BITS.store(util.to_bits(), Ordering::Relaxed);
    GPU_SM_BITS.store(sm.to_bits(), Ordering::Relaxed);
    PCIE_BPS.store(pcie_bps, Ordering::Relaxed);
    GPU_POWER_BITS.store(power_w.to_bits(), Ordering::Relaxed);
    GPU_MEM_BYTES.store(mem_bytes, Ordering::Relaxed);
    GPU_TEMP_BITS.store(temp_c.to_bits(), Ordering::Relaxed);
    GPU_CLOCK_BITS.store(clock_mhz.to_bits(), Ordering::Relaxed);
}

#[cfg(all(feature = "gpu", feature = "viz"))]
pub(crate) fn read_gpu_util() -> f32 {
    f32::from_bits(GPU_UTIL_BITS.load(Ordering::Relaxed))
}
#[cfg(all(feature = "mac", feature = "viz"))]
pub(crate) fn read_gpu_sm() -> f32 {
    f32::from_bits(GPU_SM_BITS.load(Ordering::Relaxed))
}
#[cfg(all(feature = "cuda", feature = "viz"))]
pub(crate) fn read_pcie_bps() -> u64 {
    PCIE_BPS.load(Ordering::Relaxed)
}
#[cfg(all(feature = "gpu", feature = "viz"))]
pub(crate) fn read_gpu_power_w() -> f32 {
    f32::from_bits(GPU_POWER_BITS.load(Ordering::Relaxed))
}
#[cfg(all(feature = "gpu", feature = "viz"))]
pub(crate) fn read_gpu_mem_bytes() -> u64 {
    GPU_MEM_BYTES.load(Ordering::Relaxed)
}
#[cfg(all(feature = "cuda", feature = "viz"))]
pub(crate) fn read_gpu_temp_c() -> f32 {
    f32::from_bits(GPU_TEMP_BITS.load(Ordering::Relaxed))
}
#[cfg(all(feature = "cuda", feature = "viz"))]
pub(crate) fn read_gpu_clock_mhz() -> f32 {
    f32::from_bits(GPU_CLOCK_BITS.load(Ordering::Relaxed))
}

#[cfg(all(test, feature = "mac", feature = "viz"))]
mod tests {
    /// Exercises the IOKit + IOReport FFI end-to-end on real
    /// hardware: utilization in range, power a finite non-negative
    /// rate. Catches signature / unit drift a compile check misses.
    #[test]
    fn poll_yields_sane_readings() {
        let mut sampler = super::mac::Sampler::new();
        std::thread::sleep(std::time::Duration::from_millis(300));
        let r = sampler.poll();
        assert!((0.0..=100.0).contains(&r.util));
        assert!((0.0..=100.0).contains(&r.sm));
        assert!(r.power_w.is_finite() && r.power_w >= 0.0);
    }
}

#[cfg(all(test, feature = "cuda", not(feature = "mac"), feature = "viz"))]
mod tests {
    /// Exercises NVML end-to-end on a machine with an NVIDIA driver:
    /// utilization in range, power a finite non-negative rate. Panics
    /// in `Sampler::new` if no driver is present, so run it on the GPU
    /// host.
    #[test]
    fn poll_yields_sane_readings() {
        let mut sampler = super::nvml::Sampler::new();
        std::thread::sleep(std::time::Duration::from_millis(300));
        let r = sampler.poll();
        assert!((0.0..=100.0).contains(&r.util));
        assert!(r.power_w.is_finite() && r.power_w >= 0.0);
    }
}