Skip to main content

hanzo_engine/utils/
memory_usage.rs

1use hanzo_ml::{Device, Result};
2use sysinfo::System;
3#[cfg(feature = "metal")]
4use tracing::warn;
5
6#[cfg(feature = "metal")]
7const SIZE_IN_MB: usize = 1024 * 1024;
8
9#[derive(Debug, Clone, Copy)]
10pub enum DeviceMemory {
11    Discrete { total: usize, free: usize },
12    Unified { budget: usize, allocated: usize },
13}
14
15impl DeviceMemory {
16    pub fn total(&self) -> usize {
17        match *self {
18            Self::Discrete { total, .. } => total,
19            Self::Unified { budget, .. } => budget,
20        }
21    }
22
23    pub fn available(&self) -> usize {
24        match *self {
25            Self::Discrete { free, .. } => free,
26            Self::Unified { budget, allocated } => budget.saturating_sub(allocated),
27        }
28    }
29
30    pub fn is_unified(&self) -> bool {
31        matches!(self, Self::Unified { .. })
32    }
33}
34
35pub struct MemoryUsage;
36
37impl MemoryUsage {
38    #[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
39    pub fn query(&self, device: &Device) -> Result<DeviceMemory> {
40        match device {
41            Device::Cpu => {
42                let sys = System::new_all();
43                Ok(DeviceMemory::Discrete {
44                    total: usize::try_from(sys.total_memory())?,
45                    free: usize::try_from(sys.available_memory())?,
46                })
47            }
48            #[cfg(feature = "vulkan")]
49            Device::Vulkan(_) => {
50                // 8060S APU: unified memory shared with system RAM
51                let sys = System::new_all();
52                Ok(DeviceMemory::Discrete {
53                    total: usize::try_from(sys.total_memory())?,
54                    free: usize::try_from(sys.available_memory())?,
55                })
56            }
57            #[cfg(feature = "rocm")]
58            Device::Rocm(_) => {
59                // gfx1151 APU: unified memory shared with system RAM
60                let sys = System::new_all();
61                Ok(DeviceMemory::Discrete {
62                    total: usize::try_from(sys.total_memory())?,
63                    free: usize::try_from(sys.available_memory())?,
64                })
65            }
66            #[cfg(feature = "cuda")]
67            Device::Cuda(dev) => {
68                if super::normal::is_integrated_gpu(device) {
69                    let sys = System::new_all();
70                    let total_bytes = usize::try_from(sys.total_memory())?;
71                    let avail_bytes = usize::try_from(sys.available_memory())?;
72                    let fraction = igpu_memory_fraction();
73                    let budget = (total_bytes as f64 * fraction) as usize;
74                    let free = (avail_bytes as f64 * fraction) as usize;
75                    Ok(DeviceMemory::Unified {
76                        budget,
77                        allocated: budget.saturating_sub(free),
78                    })
79                } else {
80                    use hanzo_ml::cuda::cudarc::driver::result;
81                    use hanzo_ml::cuda_backend::WrapErr;
82
83                    dev.cuda_stream().context().bind_to_thread().w()?;
84                    let (free, total) = result::mem_get_info().w()?;
85                    Ok(DeviceMemory::Discrete { total, free })
86                }
87            }
88            #[cfg(not(feature = "cuda"))]
89            Device::Cuda(_) => {
90                hanzo_ml::bail!("Cannot query memory for CUDA device")
91            }
92            #[cfg(feature = "metal")]
93            Device::Metal(dev) => {
94                let sysctl_floor = metal_sysctl_floor_bytes()?;
95                let device_max = dev.device().recommended_max_working_set_size();
96                let budget = sysctl_floor.max(device_max);
97                let allocated = dev.current_allocated_size();
98
99                // recommendedMaxWorkingSetSize is dynamic and can underreport on small/pressured Apple Silicon.
100                // Dividing by 2 here is a heuristic to indicate that we are now below an expected value.
101                // See: https://github.com/hanzoai/engine/issues/2127
102                if device_max < sysctl_floor / 2 {
103                    warn!(
104                        "Metal recommendedMaxWorkingSetSize ({} MB) is much smaller than the system-RAM floor ({} MB); currentAllocatedSize = {} MB. Using the floor.",
105                        device_max / SIZE_IN_MB,
106                        sysctl_floor / SIZE_IN_MB,
107                        allocated / SIZE_IN_MB,
108                    );
109                }
110
111                Ok(DeviceMemory::Unified { budget, allocated })
112            }
113            #[cfg(not(feature = "metal"))]
114            Device::Metal(_) => {
115                hanzo_ml::bail!("Cannot query memory for Metal device")
116            }
117        }
118    }
119}
120
121#[cfg(feature = "cuda")]
122fn igpu_memory_fraction() -> f64 {
123    std::env::var("HANZO_IGPU_MEMORY_FRACTION")
124        .ok()
125        .and_then(|s| s.parse::<f64>().ok())
126        .and_then(|f| {
127            if (0.0..=1.0).contains(&f) {
128                Some(f)
129            } else {
130                None
131            }
132        })
133        .unwrap_or(0.75)
134}
135
136#[cfg(feature = "metal")]
137fn metal_sysctl_floor_bytes() -> Result<usize> {
138    let sys = System::new_all();
139    let system_ram_mb = usize::try_from(sys.total_memory())? / SIZE_IN_MB;
140
141    let sysctl_mb = std::process::Command::new("sysctl")
142        .arg("-n")
143        .arg("iogpu.wired_limit_mb")
144        .output()
145        .ok()
146        .and_then(|o| String::from_utf8(o.stdout).ok())
147        .and_then(|s| s.trim().parse::<usize>().ok());
148
149    let default_cap_mb = match system_ram_mb {
150        x if x <= 36 * 1024 => (system_ram_mb * 2) / 3,
151        x if x > 36 * 1024 => (system_ram_mb * 3) / 4,
152        x => {
153            return Err(hanzo_ml::Error::Msg(format!(
154                "Invalid system ram mb value {x}."
155            )))
156        }
157    };
158
159    let floor_mb = match sysctl_mb {
160        Some(0) | None => default_cap_mb,
161        Some(x) => x,
162    };
163    Ok(floor_mb * SIZE_IN_MB)
164}