1use crate::trace::{TraceStep, TRACER};
16
17use super::error::GpuError;
18use super::ledger::VramLedger;
19use super::profiler::GpuProfiler;
20use super::wait::{self, WaitConfig};
21
22pub struct VramGuard {
27 ledger: VramLedger,
28 budget_mb: usize,
29}
30
31impl VramGuard {
32 pub fn acquire(budget_mb: usize, task: &str) -> Result<Self, GpuError> {
37 TRACER.span(TraceStep::VramQuery, format!("guard_acquire budget={budget_mb}MB"), || {
38 let mut ledger = super::ledger::auto_ledger();
39 ledger.try_reserve(budget_mb, task)?;
40 Ok(Self { ledger, budget_mb })
41 })
42 }
43
44 pub fn acquire_wait(budget_mb: usize, task: &str, timeout_secs: u64) -> Result<Self, GpuError> {
46 TRACER.span(
47 TraceStep::WaitPoll,
48 format!("guard_wait budget={budget_mb}MB timeout={timeout_secs}s"),
49 || {
50 let mut ledger = super::ledger::auto_ledger();
51 let config = WaitConfig::with_timeout_secs(timeout_secs);
52 let mut profiler = GpuProfiler::disabled();
53 wait::wait_for_vram(&mut ledger, budget_mb, task, &config, &mut profiler)?;
54 Ok(Self { ledger, budget_mb })
55 },
56 )
57 }
58
59 pub fn update_actual(&mut self, actual_mb: usize) -> Result<(), GpuError> {
64 self.ledger.update_actual(actual_mb)
65 }
66
67 pub fn budget_mb(&self) -> usize {
69 self.budget_mb
70 }
71
72 pub fn gpu_uuid(&self) -> &str {
74 &self.ledger.gpu_uuid
75 }
76
77 pub fn status(&self) -> Result<String, GpuError> {
79 super::ledger::gpu_status_display(&self.ledger)
80 }
81 pub fn check_overshoot(&self) -> Option<(usize, usize)> {
87 let actual = self
88 .ledger
89 .read_reservations()
90 .ok()?
91 .iter()
92 .filter(|r| r.pid == std::process::id())
93 .find_map(|r| r.actual_mb)?;
94 if actual > self.budget_mb {
95 Some((actual, self.budget_mb))
96 } else {
97 None
98 }
99 }
100
101 pub fn auto_estimate_budget(param_count: usize) -> usize {
107 let weight_mb = (param_count * 2) / (1024 * 1024);
108 let overhead = weight_mb / 5; weight_mb + overhead
110 }
111}
112
113#[cfg(test)]
114mod tests {
115 use super::*;
116
117 use std::sync::atomic::{AtomicU32, Ordering};
118
119 static TEST_COUNTER: AtomicU32 = AtomicU32::new(0);
120
121 fn test_guard_ledger(total_mb: usize) -> VramLedger {
122 let n = TEST_COUNTER.fetch_add(1, Ordering::Relaxed);
123 let dir = std::env::temp_dir().join("entrenar-guard-test");
124 std::fs::create_dir_all(&dir).expect("dir creation should succeed");
125 let path = dir.join(format!("guard-{n}-{}.json", std::process::id()));
126 VramLedger::new("GPU-test-guard".into(), total_mb, 0.85).with_path(path)
127 }
128
129 #[test]
130 fn test_guard_direct_acquire() {
131 let mut ledger = test_guard_ledger(24000);
132 ledger.try_reserve(5000, "guard-test").expect("should succeed");
133 assert_eq!(ledger.total_reserved().expect("should succeed"), 5000);
134 drop(ledger);
136 }
137
138 #[test]
139 fn test_guard_update_actual() {
140 let mut ledger = test_guard_ledger(24000);
141 ledger.try_reserve(8000, "guard-actual").expect("should succeed");
142 ledger.update_actual(7200).expect("should succeed");
143 assert_eq!(ledger.total_reserved().expect("should succeed"), 7200);
144 }
145
146 #[test]
147 fn test_guard_rejects_over_budget() {
148 let mut ledger = test_guard_ledger(10000);
149 let result = ledger.try_reserve(9000, "too-big");
150 assert!(result.is_err());
151 }
152
153 #[test]
154 fn test_guard_budget_mb() {
155 let ledger = test_guard_ledger(24000);
156 let guard = VramGuard { ledger, budget_mb: 8000 };
157 assert_eq!(guard.budget_mb(), 8000);
158 }
159
160 #[test]
161 fn test_guard_gpu_uuid() {
162 let ledger = test_guard_ledger(24000);
163 let guard = VramGuard { ledger, budget_mb: 5000 };
164 assert_eq!(guard.gpu_uuid(), "GPU-test-guard");
165 }
166
167 #[test]
168 fn test_guard_status() {
169 let ledger = test_guard_ledger(24000);
170 let guard = VramGuard { ledger, budget_mb: 5000 };
171 let result = guard.status();
173 let _ = result;
175 }
176
177 #[test]
178 fn test_guard_update_actual_without_reservation() {
179 let ledger = test_guard_ledger(24000);
180 let mut guard = VramGuard { ledger, budget_mb: 5000 };
181 let result = guard.update_actual(4000);
183 assert!(result.is_ok());
184 }
185
186 #[test]
187 fn test_guard_multiple_reservations_sequential() {
188 let mut ledger1 = test_guard_ledger(24000);
189 ledger1.try_reserve(3000, "task-1").expect("should succeed");
190 let reserved = ledger1.total_reserved().expect("should succeed");
191 assert_eq!(reserved, 3000);
192
193 drop(ledger1);
195 }
196
197 #[test]
198 fn test_guard_zero_budget() {
199 let mut ledger = test_guard_ledger(24000);
200 let result = ledger.try_reserve(0, "zero-budget");
202 assert!(result.is_ok());
203 }
204
205 #[test]
206 fn test_guard_exact_budget() {
207 let mut ledger = test_guard_ledger(10000);
209 let result = ledger.try_reserve(8000, "near-limit");
211 assert!(result.is_ok());
212 }
213
214 #[test]
215 fn test_guard_update_actual_reduces_reserved() {
216 let mut ledger = test_guard_ledger(24000);
217 ledger.try_reserve(8000, "actual-test").expect("should succeed");
218 assert_eq!(ledger.total_reserved().expect("should succeed"), 8000);
219 ledger.update_actual(6000).expect("should succeed");
220 assert_eq!(ledger.total_reserved().expect("should succeed"), 6000);
221 }
222
223 #[test]
226 fn test_guard_struct_fields() {
227 let ledger = test_guard_ledger(16000);
228 let guard = VramGuard { ledger, budget_mb: 4000 };
229 assert_eq!(guard.budget_mb(), 4000);
230 assert_eq!(guard.gpu_uuid(), "GPU-test-guard");
231 }
232
233 #[test]
234 fn test_guard_status_returns_string() {
235 let mut ledger = test_guard_ledger(24000);
236 ledger.try_reserve(5000, "status-test").expect("should succeed");
237 let guard = VramGuard { ledger, budget_mb: 5000 };
238 let status = guard.status();
239 assert!(status.is_ok());
240 let status_str = status.unwrap();
241 assert!(status_str.contains("GPU-test-guard"));
242 assert!(status_str.contains("5000 MB budget"));
243 }
244
245 #[test]
246 fn test_guard_status_empty_ledger() {
247 let ledger = test_guard_ledger(24000);
248 let guard = VramGuard { ledger, budget_mb: 0 };
249 let status = guard.status();
250 assert!(status.is_ok());
251 let s = status.unwrap();
252 assert!(s.contains("none") || s.contains("Reservations"));
253 }
254
255 #[test]
256 fn test_guard_update_actual_with_active_reservation() {
257 let mut ledger = test_guard_ledger(24000);
258 ledger.try_reserve(10000, "update-actual").expect("should succeed");
259 let mut guard = VramGuard { ledger, budget_mb: 10000 };
260 let result = guard.update_actual(9500);
261 assert!(result.is_ok());
262 }
263
264 #[test]
265 fn test_guard_small_gpu() {
266 let mut ledger = test_guard_ledger(2048);
268 let result = ledger.try_reserve(1740, "small-gpu");
270 assert!(result.is_ok());
271 let result2 = ledger.try_reserve(1, "overflow");
273 assert!(result2.is_err());
274 }
275
276 #[test]
277 fn test_guard_capacity_calculation() {
278 let ledger = test_guard_ledger(10000);
279 assert_eq!(ledger.capacity_mb(), 8500);
281 }
282
283 #[test]
284 fn test_guard_available_mb_after_reserve() {
285 let mut ledger = test_guard_ledger(20000);
286 ledger.try_reserve(7000, "test").expect("should succeed");
288 let available = ledger.available_mb().expect("should succeed");
289 assert_eq!(available, 10000);
290 }
291
292 #[test]
293 fn test_guard_multiple_sequential_reserve_release() {
294 let mut ledger = test_guard_ledger(24000);
295 ledger.try_reserve(5000, "first").expect("ok");
297 assert_eq!(ledger.total_reserved().expect("ok"), 5000);
298 ledger.release().expect("ok");
299 assert_eq!(ledger.total_reserved().expect("ok"), 0);
300 ledger.try_reserve(8000, "second").expect("ok");
301 assert_eq!(ledger.total_reserved().expect("ok"), 8000);
302 }
303
304 #[test]
305 fn test_guard_profiler_report_accessible() {
306 let ledger = test_guard_ledger(24000);
307 let guard = VramGuard { ledger, budget_mb: 0 };
308 let report = guard.ledger.profiler_report();
310 assert!(report.contains("No operations recorded"));
311 }
312
313 #[test]
314 fn test_guard_drop_does_not_panic() {
315 let ledger = test_guard_ledger(24000);
316 let guard = VramGuard { ledger, budget_mb: 3000 };
317 drop(guard);
319 }
320
321 #[test]
322 fn test_guard_drop_with_reservation_releases() {
323 let n = TEST_COUNTER.fetch_add(1, Ordering::Relaxed);
324 let dir = std::env::temp_dir().join("entrenar-guard-test");
325 std::fs::create_dir_all(&dir).expect("dir creation should succeed");
326 let path = dir.join(format!("guard-drop-{n}-{}.json", std::process::id()));
327
328 {
329 let mut ledger =
330 VramLedger::new("GPU-test-guard".into(), 24000, 0.85).with_path(path.clone());
331 ledger.try_reserve(5000, "drop-reserve").expect("ok");
332 let guard = VramGuard { ledger, budget_mb: 5000 };
333 drop(guard);
335 }
336
337 let check_ledger = VramLedger::new("GPU-test-guard".into(), 24000, 0.85).with_path(path);
339 let reserved = check_ledger.total_reserved().expect("ok");
340 assert_eq!(reserved, 0);
341 }
342}