vor 0.2.1

Cross-platform performance instrumentation with an in-app egui panel and live system and GPU metrics.
Documentation
//! Vor-owned per-frame system metrics, sampled without egui.
//!
//! [`SystemSample`] is one frame's reading of frame time, memory, I/O,
//! and (where a backend supplies them) GPU counters. [`sample_now`]
//! produces one; the live [`viz`](crate::viz) panel and the headless
//! recorder both call it, so the sampling logic lives here in the core
//! rather than behind the `viz` feature.
//!
//! [`SYSTEM_COLUMNS`] is the schema: the ordered list of `(name, unit,
//! getter)` the viewer plots and the recorder serializes. Which
//! columns exist depends on the compiled platform features, so a
//! capture records the live column names in its header rather than
//! relying on the reader's features matching the writer's.

use web_time::Instant;

use crate::IoTick;

/// One frame's worth of vor-owned system metrics.
///
/// Produced by [`sample_now`] once per displayed frame (live panel) or
/// per [`frame_mark`](crate::frame_mark) (recorder).
#[derive(Clone, Copy)]
pub struct SystemSample {
    /// Wall time since the previous sample.
    pub frame_ns: u64,
    /// Resident memory at sample time.
    pub memory_mb: f64,
    /// Wall time spent in I/O since the previous sample, as recorded
    /// via [`record_io`](crate::record_io).
    pub io_ns: u64,
    /// Bytes transferred since the previous sample.
    pub io_bytes: u64,
    /// GPU utilization, 0-100. Latest reading from the GPU poller.
    #[cfg(feature = "gpu")]
    pub gpu_util: f32,
    /// SM / shader-core utilization, 0-100. macOS only; NVML exposes
    /// no equivalent counter.
    #[cfg(feature = "mac")]
    pub gpu_sm: f32,
    /// PCIe throughput in bytes per second. NVIDIA only; the macOS
    /// backend does not read PCIe.
    #[cfg(feature = "cuda")]
    pub pcie_bps: u64,
    /// Instantaneous GPU power draw in watts.
    #[cfg(feature = "gpu")]
    pub gpu_power_w: f32,
    /// GPU memory in use, in bytes.
    #[cfg(feature = "gpu")]
    pub gpu_mem_bytes: u64,
    /// GPU temperature in degrees C. NVIDIA only (no clean source on
    /// macOS).
    #[cfg(feature = "cuda")]
    pub gpu_temp_c: f32,
    /// GPU SM clock in MHz. NVIDIA only.
    #[cfg(feature = "cuda")]
    pub gpu_clock_mhz: f32,
}

/// One plotted/recorded system metric: a name, a unit, the scalar
/// getter, and a one-line description for the panel's hover tooltip.
pub(crate) struct SystemColumn {
    pub name: &'static str,
    pub unit: &'static str,
    pub get: fn(&SystemSample) -> f64,
    /// Hover-tooltip text; read by the `viz` panel only (the capture
    /// header carries names + units, not descriptions).
    #[cfg_attr(not(feature = "viz"), allow(dead_code))]
    pub description: &'static str,
}

// GPU rows appear only where a backend supplies them; without one they
// would plot flat zeros, so they are dropped entirely. `gpu_sm` is
// macOS-only since NVML has no SM-occupancy counter.
pub(crate) const SYSTEM_COLUMNS: &[SystemColumn] = &[
    SystemColumn {
        name: "frame_ms",
        unit: "ms",
        get: frame_ms_of,
        description: "Wall time between displayed frames. 1000 / frame_ms = FPS.",
    },
    SystemColumn {
        name: "memory_mb",
        unit: "MB",
        get: memory_mb_of,
        description: "Resident memory of this process.",
    },
    SystemColumn {
        name: "io_ms",
        unit: "ms",
        get: io_ms_of,
        description: "Wall time spent in I/O this frame, as reported via record_io.",
    },
    SystemColumn {
        name: "io_MB",
        unit: "MB",
        get: io_mb_of,
        description: "Bytes transferred this frame, via record_io.",
    },
    #[cfg(feature = "gpu")]
    SystemColumn {
        name: "gpu_util",
        unit: "%",
        get: gpu_util_of,
        description: "Overall GPU utilization (0-100%).",
    },
    #[cfg(feature = "mac")]
    SystemColumn {
        name: "gpu_sm",
        unit: "%",
        get: gpu_sm_of,
        description: "Shader / renderer-core utilization (0-100%).",
    },
    #[cfg(feature = "cuda")]
    SystemColumn {
        name: "pcie",
        unit: "MB/s",
        get: pcie_mbps_of,
        description: "PCIe throughput between host and GPU (TX + RX).",
    },
    #[cfg(feature = "gpu")]
    SystemColumn {
        name: "gpu_power",
        unit: "W",
        get: gpu_power_of,
        description: "Instantaneous GPU power draw.",
    },
    #[cfg(feature = "gpu")]
    SystemColumn {
        name: "gpu_mem",
        unit: "MB",
        get: gpu_mem_of,
        description: "GPU memory in use (unified memory on macOS).",
    },
    #[cfg(feature = "cuda")]
    SystemColumn {
        name: "gpu_temp",
        unit: "\u{b0}C",
        get: gpu_temp_of,
        description: "GPU core temperature.",
    },
    #[cfg(feature = "cuda")]
    SystemColumn {
        name: "gpu_clock",
        unit: "MHz",
        get: gpu_clock_of,
        description: "GPU SM clock frequency.",
    },
];

const fn frame_ms_of(s: &SystemSample) -> f64 {
    s.frame_ns as f64 / 1e6
}
const fn memory_mb_of(s: &SystemSample) -> f64 {
    s.memory_mb
}
const fn io_ms_of(s: &SystemSample) -> f64 {
    s.io_ns as f64 / 1e6
}
const fn io_mb_of(s: &SystemSample) -> f64 {
    s.io_bytes as f64 / 1e6
}
#[cfg(feature = "gpu")]
const fn gpu_util_of(s: &SystemSample) -> f64 {
    s.gpu_util as f64
}
#[cfg(feature = "mac")]
const fn gpu_sm_of(s: &SystemSample) -> f64 {
    s.gpu_sm as f64
}
#[cfg(feature = "cuda")]
const fn pcie_mbps_of(s: &SystemSample) -> f64 {
    s.pcie_bps as f64 / 1e6
}
#[cfg(feature = "gpu")]
const fn gpu_power_of(s: &SystemSample) -> f64 {
    s.gpu_power_w as f64
}
#[cfg(feature = "gpu")]
const fn gpu_mem_of(s: &SystemSample) -> f64 {
    s.gpu_mem_bytes as f64 / 1e6
}
#[cfg(feature = "cuda")]
const fn gpu_temp_of(s: &SystemSample) -> f64 {
    s.gpu_temp_c as f64
}
#[cfg(feature = "cuda")]
const fn gpu_clock_of(s: &SystemSample) -> f64 {
    s.gpu_clock_mhz as f64
}

/// Sample one frame's system metrics.
///
/// `last` carries the previous sample's instant so `frame_ns` is the
/// gap since then; the first call (with `None`) reports `frame_ns` 0.
/// Each consumer (panel, recorder) keeps its own `last` so their
/// timelines stay independent.
pub(crate) fn sample_now(last: &mut Option<Instant>) -> SystemSample {
    let now = Instant::now();
    let frame_ns = match *last {
        Some(prev) => now.duration_since(prev).as_nanos() as u64,
        None => 0,
    };
    *last = Some(now);
    let memory_mb = crate::current_memory_bytes()
        .map(|b| b as f64 / 1e6)
        .unwrap_or(0.0);
    let IoTick { elapsed_ns, bytes } = crate::drain_io();
    SystemSample {
        frame_ns,
        memory_mb,
        io_ns: elapsed_ns,
        io_bytes: bytes,
        #[cfg(feature = "gpu")]
        gpu_util: crate::gpu::read_gpu_util(),
        #[cfg(feature = "mac")]
        gpu_sm: crate::gpu::read_gpu_sm(),
        #[cfg(feature = "cuda")]
        pcie_bps: crate::gpu::read_pcie_bps(),
        #[cfg(feature = "gpu")]
        gpu_power_w: crate::gpu::read_gpu_power_w(),
        #[cfg(feature = "gpu")]
        gpu_mem_bytes: crate::gpu::read_gpu_mem_bytes(),
        #[cfg(feature = "cuda")]
        gpu_temp_c: crate::gpu::read_gpu_temp_c(),
        #[cfg(feature = "cuda")]
        gpu_clock_mhz: crate::gpu::read_gpu_clock_mhz(),
    }
}