hanzo_engine/utils/
memory_usage.rs1use hanzo_ml::{Device, Result};
2use sysinfo::System;
3#[cfg(feature = "metal")]
4use tracing::warn;
5
6#[cfg(feature = "metal")]
7const SIZE_IN_MB: usize = 1024 * 1024;
8
9#[derive(Debug, Clone, Copy)]
10pub enum DeviceMemory {
11 Discrete { total: usize, free: usize },
12 Unified { budget: usize, allocated: usize },
13}
14
15impl DeviceMemory {
16 pub fn total(&self) -> usize {
17 match *self {
18 Self::Discrete { total, .. } => total,
19 Self::Unified { budget, .. } => budget,
20 }
21 }
22
23 pub fn available(&self) -> usize {
24 match *self {
25 Self::Discrete { free, .. } => free,
26 Self::Unified { budget, allocated } => budget.saturating_sub(allocated),
27 }
28 }
29
30 pub fn is_unified(&self) -> bool {
31 matches!(self, Self::Unified { .. })
32 }
33}
34
35pub struct MemoryUsage;
36
37impl MemoryUsage {
38 #[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
39 pub fn query(&self, device: &Device) -> Result<DeviceMemory> {
40 match device {
41 Device::Cpu => {
42 let sys = System::new_all();
43 Ok(DeviceMemory::Discrete {
44 total: usize::try_from(sys.total_memory())?,
45 free: usize::try_from(sys.available_memory())?,
46 })
47 }
48 #[cfg(feature = "vulkan")]
49 Device::Vulkan(_) => {
50 let sys = System::new_all();
52 Ok(DeviceMemory::Discrete {
53 total: usize::try_from(sys.total_memory())?,
54 free: usize::try_from(sys.available_memory())?,
55 })
56 }
57 #[cfg(feature = "rocm")]
58 Device::Rocm(_) => {
59 let sys = System::new_all();
61 Ok(DeviceMemory::Discrete {
62 total: usize::try_from(sys.total_memory())?,
63 free: usize::try_from(sys.available_memory())?,
64 })
65 }
66 #[cfg(feature = "cuda")]
67 Device::Cuda(dev) => {
68 if super::normal::is_integrated_gpu(device) {
69 let sys = System::new_all();
70 let total_bytes = usize::try_from(sys.total_memory())?;
71 let avail_bytes = usize::try_from(sys.available_memory())?;
72 let fraction = igpu_memory_fraction();
73 let budget = (total_bytes as f64 * fraction) as usize;
74 let free = (avail_bytes as f64 * fraction) as usize;
75 Ok(DeviceMemory::Unified {
76 budget,
77 allocated: budget.saturating_sub(free),
78 })
79 } else {
80 use hanzo_ml::cuda::cudarc::driver::result;
81 use hanzo_ml::cuda_backend::WrapErr;
82
83 dev.cuda_stream().context().bind_to_thread().w()?;
84 let (free, total) = result::mem_get_info().w()?;
85 Ok(DeviceMemory::Discrete { total, free })
86 }
87 }
88 #[cfg(not(feature = "cuda"))]
89 Device::Cuda(_) => {
90 hanzo_ml::bail!("Cannot query memory for CUDA device")
91 }
92 #[cfg(feature = "metal")]
93 Device::Metal(dev) => {
94 let sysctl_floor = metal_sysctl_floor_bytes()?;
95 let device_max = dev.device().recommended_max_working_set_size();
96 let budget = sysctl_floor.max(device_max);
97 let allocated = dev.current_allocated_size();
98
99 if device_max < sysctl_floor / 2 {
103 warn!(
104 "Metal recommendedMaxWorkingSetSize ({} MB) is much smaller than the system-RAM floor ({} MB); currentAllocatedSize = {} MB. Using the floor.",
105 device_max / SIZE_IN_MB,
106 sysctl_floor / SIZE_IN_MB,
107 allocated / SIZE_IN_MB,
108 );
109 }
110
111 Ok(DeviceMemory::Unified { budget, allocated })
112 }
113 #[cfg(not(feature = "metal"))]
114 Device::Metal(_) => {
115 hanzo_ml::bail!("Cannot query memory for Metal device")
116 }
117 }
118 }
119}
120
121#[cfg(feature = "cuda")]
122fn igpu_memory_fraction() -> f64 {
123 std::env::var("HANZO_IGPU_MEMORY_FRACTION")
124 .ok()
125 .and_then(|s| s.parse::<f64>().ok())
126 .and_then(|f| {
127 if (0.0..=1.0).contains(&f) {
128 Some(f)
129 } else {
130 None
131 }
132 })
133 .unwrap_or(0.75)
134}
135
136#[cfg(feature = "metal")]
137fn metal_sysctl_floor_bytes() -> Result<usize> {
138 let sys = System::new_all();
139 let system_ram_mb = usize::try_from(sys.total_memory())? / SIZE_IN_MB;
140
141 let sysctl_mb = std::process::Command::new("sysctl")
142 .arg("-n")
143 .arg("iogpu.wired_limit_mb")
144 .output()
145 .ok()
146 .and_then(|o| String::from_utf8(o.stdout).ok())
147 .and_then(|s| s.trim().parse::<usize>().ok());
148
149 let default_cap_mb = match system_ram_mb {
150 x if x <= 36 * 1024 => (system_ram_mb * 2) / 3,
151 x if x > 36 * 1024 => (system_ram_mb * 3) / 4,
152 x => {
153 return Err(hanzo_ml::Error::Msg(format!(
154 "Invalid system ram mb value {x}."
155 )))
156 }
157 };
158
159 let floor_mb = match sysctl_mb {
160 Some(0) | None => default_cap_mb,
161 Some(x) => x,
162 };
163 Ok(floor_mb * SIZE_IN_MB)
164}