use crate::metrics::MemoryMetrics;
#[cfg_attr(not(target_os = "linux"), allow(dead_code))]
pub fn detect_unified_memory(
gpu_name: Option<&str>,
gpu_total_bytes: Option<u64>,
sys_total_bytes: u64,
) -> bool {
if matches_unified_gpu_name(gpu_name) {
return true;
}
let Some(gpu_total) = gpu_total_bytes else {
return false;
};
if sys_total_bytes == 0 {
return false;
}
let diff = gpu_total.abs_diff(sys_total_bytes) as f64;
let tolerance = sys_total_bytes as f64 * 0.10;
diff <= tolerance
}
fn matches_unified_gpu_name(name: Option<&str>) -> bool {
const UNIFIED_FAMILIES: &[&str] = &[
"GB10", "GB200", "GB300", "GH200", "Grace", "Spark", "Jetson", "Tegra", "Orin", "Xavier", "Thor", "TX1", "TX2", "Nano", "AGX", "DRIVE", ];
let Some(name) = name else { return false };
let lower = name.to_ascii_lowercase();
UNIFIED_FAMILIES
.iter()
.any(|family| lower.contains(&family.to_ascii_lowercase()))
}
#[cfg_attr(not(target_os = "linux"), allow(dead_code))]
pub fn round_up_to_marketed_gib(bytes: u64) -> u64 {
const GIB: u64 = 1024 * 1024 * 1024;
if bytes == 0 {
return 0;
}
let gib_ceil = bytes.div_ceil(GIB);
let next_pow2 = gib_ceil.next_power_of_two();
next_pow2.saturating_mul(GIB)
}
#[cfg_attr(not(target_os = "linux"), allow(dead_code))]
pub fn select_display_total(
is_unified: bool,
gpu_total_bytes: Option<u64>,
sys_total_bytes: u64,
) -> u64 {
if let Some(nvml_total) = gpu_total_bytes {
if nvml_total >= sys_total_bytes {
return nvml_total;
}
}
if is_unified {
return round_up_to_marketed_gib(sys_total_bytes);
}
sys_total_bytes
}
#[cfg(target_os = "linux")]
pub fn collect_memory_metrics(device: &Option<nvml_wrapper::Device>) -> MemoryMetrics {
use crate::metrics::gpu::nvml_optional;
use procfs::Current;
let meminfo = procfs::Meminfo::current();
let (total_bytes, available_bytes, cached_bytes) = match &meminfo {
Ok(m) => {
let total = m.mem_total;
let available = m.mem_available.unwrap_or(0);
let cached = m.cached;
(total, available, cached)
}
Err(e) => {
tracing::warn!("Failed to read /proc/meminfo: {}", e);
(0, 0, 0)
}
};
let used_bytes = total_bytes.saturating_sub(available_bytes);
let (gpu_memory_total_bytes, gpu_memory_used_bytes) = device
.as_ref()
.and_then(|d| nvml_optional(d.memory_info()))
.map(|info| (Some(info.total), Some(info.used)))
.unwrap_or((None, None));
let gpu_name = device.as_ref().and_then(|d| nvml_optional(d.name()));
let gpu_estimated_bytes = device.as_ref().and_then(|d| {
nvml_optional(d.running_compute_processes()).map(|procs| {
procs
.iter()
.map(|p| match p.used_gpu_memory {
nvml_wrapper::enums::device::UsedGpuMemory::Used(bytes) => bytes,
nvml_wrapper::enums::device::UsedGpuMemory::Unavailable => 0,
})
.sum::<u64>()
})
});
let is_unified =
detect_unified_memory(gpu_name.as_deref(), gpu_memory_total_bytes, total_bytes);
let display_total_bytes = select_display_total(is_unified, gpu_memory_total_bytes, total_bytes);
MemoryMetrics {
total_bytes,
display_total_bytes,
used_bytes,
available_bytes,
cached_bytes,
gpu_estimated_bytes,
gpu_memory_total_bytes,
gpu_memory_used_bytes,
is_unified,
}
}
#[cfg(not(target_os = "linux"))]
pub fn collect_memory_metrics(sys: &sysinfo::System) -> MemoryMetrics {
let total_bytes = sys.total_memory();
let available_bytes = sys.available_memory();
let used_bytes = sys.used_memory();
MemoryMetrics {
total_bytes,
display_total_bytes: total_bytes,
used_bytes,
available_bytes,
cached_bytes: total_bytes
.saturating_sub(used_bytes)
.saturating_sub(available_bytes),
gpu_estimated_bytes: None,
gpu_memory_total_bytes: None,
gpu_memory_used_bytes: None,
is_unified: cfg!(target_os = "macos"),
}
}
#[cfg(test)]
mod tests {
use super::*;
const GB: u64 = 1_000_000_000;
const GIB: u64 = 1024 * 1024 * 1024;
#[test]
fn detect_unified_returns_true_when_gpu_equals_system() {
assert!(detect_unified_memory(None, Some(128 * GB), 128 * GB));
}
#[test]
fn detect_unified_returns_true_within_tolerance() {
assert!(detect_unified_memory(None, Some(120 * GB), 128 * GB));
}
#[test]
fn detect_unified_returns_false_for_discrete_gpu() {
assert!(!detect_unified_memory(
Some("NVIDIA GeForce RTX 4090"),
Some(24 * GB),
128 * GB
));
}
#[test]
fn detect_unified_returns_false_for_80gb_discrete_on_large_host() {
assert!(!detect_unified_memory(
Some("NVIDIA H100 80GB PCIe"),
Some(80 * GB),
512 * GB
));
}
#[test]
fn detect_unified_returns_false_when_gpu_missing() {
assert!(!detect_unified_memory(None, None, 128 * GB));
}
#[test]
fn detect_unified_returns_false_when_system_total_zero() {
assert!(!detect_unified_memory(None, Some(128 * GB), 0));
}
#[test]
fn detect_unified_recognises_gb10_by_name_when_nvml_memory_info_missing() {
assert!(detect_unified_memory(
Some("NVIDIA GB10"),
None,
130_663_821_312
));
}
#[test]
fn detect_unified_recognises_grace_hopper_by_name() {
assert!(detect_unified_memory(
Some("NVIDIA GH200 480GB"),
None,
500 * GB
));
}
#[test]
fn detect_unified_recognises_jetson_by_name() {
assert!(detect_unified_memory(
Some("Jetson AGX Orin"),
None,
60 * GB
));
}
#[test]
fn detect_unified_recognises_extended_nvidia_unified_families() {
for name in [
"NVIDIA GB200",
"NVIDIA GB300 Ultra",
"NVIDIA Jetson Orin Nano",
"NVIDIA Jetson Xavier NX",
"NVIDIA Jetson Thor",
"NVIDIA Jetson TX2",
"NVIDIA Tegra X1",
"NVIDIA DRIVE AGX Orin",
"NVIDIA DGX Spark",
] {
assert!(
detect_unified_memory(Some(name), None, 32 * GB),
"{name} should be recognised as unified memory",
);
}
}
#[test]
fn detect_unified_still_rejects_discrete_consumer_and_datacenter_gpus() {
for name in [
"NVIDIA GeForce RTX 3090",
"NVIDIA GeForce RTX 3090 Ti",
"NVIDIA GeForce RTX 4090",
"NVIDIA GeForce RTX 5090",
"NVIDIA RTX A6000",
"NVIDIA RTX 6000 Ada Generation",
"NVIDIA RTX PRO 4000 Blackwell",
"NVIDIA RTX PRO 6000 Blackwell",
"Quadro RTX 8000",
"NVIDIA H100 80GB PCIe",
"NVIDIA H200",
"NVIDIA A100-SXM4-80GB",
"NVIDIA L40S",
"NVIDIA L4",
] {
assert!(
!detect_unified_memory(Some(name), Some(24 * GB), 256 * GB),
"{name} should NOT be flagged as unified memory",
);
}
}
#[test]
fn round_up_to_marketed_gib_handles_spark() {
assert_eq!(round_up_to_marketed_gib(130_663_821_312), 128 * GIB);
}
#[test]
fn round_up_to_marketed_gib_handles_jetson_orin() {
assert_eq!(round_up_to_marketed_gib(56 * GIB), 64 * GIB);
}
#[test]
fn round_up_to_marketed_gib_passes_through_exact_power_of_two() {
assert_eq!(round_up_to_marketed_gib(128 * GIB), 128 * GIB);
assert_eq!(round_up_to_marketed_gib(32 * GIB), 32 * GIB);
}
#[test]
fn round_up_to_marketed_gib_zero_is_zero() {
assert_eq!(round_up_to_marketed_gib(0), 0);
}
#[test]
fn select_display_total_prefers_nvml_when_it_reports_full_pool() {
let kernel = 131_000_000_000;
let nvml = 137_438_953_472; assert_eq!(select_display_total(true, Some(nvml), kernel), nvml);
}
#[test]
fn select_display_total_rounds_up_when_unified_and_nvml_missing() {
let kernel = 130_663_821_312; assert_eq!(select_display_total(true, None, kernel), 128 * GIB);
}
#[test]
fn select_display_total_ignores_nvml_when_smaller_than_kernel() {
let kernel = 130_663_821_312;
let nvml_smaller = 100 * GIB;
assert_eq!(
select_display_total(true, Some(nvml_smaller), kernel),
128 * GIB
);
}
#[test]
fn select_display_total_falls_back_when_not_unified() {
let kernel = 128 * GB;
let vram = 24 * GB;
assert_eq!(select_display_total(false, Some(vram), kernel), kernel);
}
#[test]
fn select_display_total_falls_back_when_nvml_missing_and_not_unified() {
let kernel = 16 * GB;
assert_eq!(select_display_total(false, None, kernel), kernel);
}
#[cfg(target_os = "linux")]
#[test]
fn collect_memory_metrics_none_device_reads_meminfo() {
let metrics = collect_memory_metrics(&None);
assert!(metrics.total_bytes > 0);
assert!(metrics.available_bytes > 0);
assert!(metrics.gpu_estimated_bytes.is_none());
assert!(metrics.gpu_memory_total_bytes.is_none());
assert!(metrics.gpu_memory_used_bytes.is_none());
assert!(!metrics.is_unified);
assert_eq!(metrics.display_total_bytes, metrics.total_bytes);
}
#[cfg(not(target_os = "linux"))]
#[test]
fn collect_memory_metrics_returns_real_values() {
let mut sys = sysinfo::System::new();
sys.refresh_memory();
let metrics = collect_memory_metrics(&sys);
assert!(metrics.total_bytes > 0);
assert!(metrics.available_bytes > 0);
}
}