mlx_native/buffer_pool.rs
1//! [`MlxBufferPool`] — arena-style GPU buffer allocator with reuse.
2//!
3//! Buffers are bucketed by power-of-two sizes. When a buffer is released back
4//! to the pool, it is added to the free list for its size bucket. A subsequent
5//! `alloc` call will reuse a free buffer of compatible (>= requested) size
6//! rather than allocating new Metal memory.
7//!
8//! Two return-path patterns are supported and **must not be mixed within a
9//! single arena cycle**:
10//!
11//! * **Per-buffer** via [`release`](MlxBufferPool::release) — explicit return
12//! of a single buffer to the free list, suitable for ad-hoc patterns where
13//! the caller knows the precise lifetime of each buffer.
14//! * **Arena bulk** via [`reset`](MlxBufferPool::reset) — bulk-return of every
15//! buffer handed out by [`alloc`](MlxBufferPool::alloc) since the previous
16//! reset. Suitable for per-inference / per-decode-token arena patterns
17//! where no individual buffer's lifetime crosses the reset boundary.
18//!
19//! Internally, every `alloc` records an ARC-cloned `metal::Buffer` handle so
20//! that `reset` can bulk-recycle without requiring callers to enumerate every
21//! buffer individually. ARC retain on `metal::Buffer` is cheap (refcount inc).
22
23use std::collections::HashMap;
24
25use crate::buffer::MlxBuffer;
26use crate::device::MlxDevice;
27use crate::dtypes::DType;
28use crate::error::{MlxError, Result};
29
30/// Arena-style buffer pool that reuses Metal buffer allocations.
31///
32/// # Design
33///
34/// * Buffers are bucketed by their allocated size rounded up to the nearest
35/// power of two. This reduces fragmentation at the cost of occasionally
36/// over-allocating by up to 2x.
37/// * `release()` returns a single buffer; `reset()` returns all outstanding
38/// buffers handed out since the last reset.
39/// * The `MlxDevice` is passed in at every [`alloc`] call (rather than stored
40/// in the pool). This keeps the pool free of lifetime parameters so it
41/// can be embedded in any owner struct (e.g. the per-decode-token
42/// `DecodeBuffers` cache in hf2q's qwen35 forward path).
43///
44/// # Why an arena reset matters
45///
46/// In the per-decode-token hot path, each token allocates ~1750 Metal buffers
47/// for scratch / intermediate / parameter storage across attention, FFN, and
48/// linear-attention layers. Direct `MlxDevice::alloc_buffer()` calls hit
49/// Metal's allocator each time (5-30 µs each); pooling reuses the underlying
50/// `metal::Buffer` objects across token boundaries so steady-state allocation
51/// cost amortizes to near zero. See ADR-012 §Optimize / Task #15 for the
52/// MoE dwq46 0.90× parity gap that motivated this work.
53pub struct MlxBufferPool {
54 /// Free buffers keyed by their power-of-two bucket size.
55 free: HashMap<usize, Vec<metal::Buffer>>,
56 /// Buffers handed out by [`alloc`] since the last [`reset`]. Each entry
57 /// holds an ARC-cloned `metal::Buffer` so the pool's reference keeps the
58 /// underlying GPU allocation alive even after the caller's `MlxBuffer`
59 /// goes out of scope. [`reset`] drains this into [`free`].
60 in_use: Vec<(usize, metal::Buffer)>,
61 /// Residency set that owns the allocations registered by this pool.
62 residency_set: Option<crate::residency::ResidencySet>,
63 /// Unique Metal buffers this pool added to the residency set, keyed by
64 /// their stable contents pointer. This avoids double-removing buffers if
65 /// callers mix release/reset despite that pattern being unsupported.
66 resident_buffers: HashMap<usize, metal::Buffer>,
67}
68
69impl Default for MlxBufferPool {
70 fn default() -> Self {
71 Self::new()
72 }
73}
74
75impl MlxBufferPool {
76 /// Create a new empty buffer pool. The Metal device is passed to
77 /// [`alloc`] at every call site, so the pool itself is lifetime-free.
78 pub fn new() -> Self {
79 Self {
80 free: HashMap::new(),
81 in_use: Vec::new(),
82 residency_set: None,
83 resident_buffers: HashMap::new(),
84 }
85 }
86
87 /// Allocate a buffer from the pool.
88 ///
89 /// If a free buffer of compatible size exists in the pool, it is reused
90 /// (with updated dtype/shape metadata). Otherwise a new Metal buffer is
91 /// allocated from `device` at the bucket size so future reuse is
92 /// possible for any request up to that bucket.
93 ///
94 /// Each successful `alloc` registers the buffer in the pool's in-use
95 /// list (ARC clone — cheap), so a subsequent [`reset`] returns it to
96 /// the free list automatically.
97 pub fn alloc(
98 &mut self,
99 device: &MlxDevice,
100 byte_len: usize,
101 dtype: DType,
102 shape: Vec<usize>,
103 ) -> Result<MlxBuffer> {
104 let (buffer, added_residency) = self.alloc_inner(device, byte_len, dtype, shape)?;
105 if added_residency {
106 if let Some(set) = self.residency_set.as_ref() {
107 set.commit();
108 }
109 }
110 Ok(buffer)
111 }
112
113 /// Allocate several buffers and commit residency-set updates once.
114 pub fn alloc_batch<I>(&mut self, device: &MlxDevice, requests: I) -> Result<Vec<MlxBuffer>>
115 where
116 I: IntoIterator<Item = (usize, DType, Vec<usize>)>,
117 {
118 let mut buffers = Vec::new();
119 let mut added_residency = false;
120
121 for (byte_len, dtype, shape) in requests {
122 let (buffer, added) = self.alloc_inner(device, byte_len, dtype, shape)?;
123 added_residency |= added;
124 buffers.push(buffer);
125 }
126
127 if added_residency {
128 if let Some(set) = self.residency_set.as_ref() {
129 set.commit();
130 }
131 }
132
133 Ok(buffers)
134 }
135
136 fn alloc_inner(
137 &mut self,
138 device: &MlxDevice,
139 byte_len: usize,
140 dtype: DType,
141 shape: Vec<usize>,
142 ) -> Result<(MlxBuffer, bool)> {
143 let bucket = bucket_size(byte_len);
144 let mut added_residency = false;
145
146 // Try to reuse a free buffer from this bucket.
147 let metal_buf = self
148 .free
149 .get_mut(&bucket)
150 .and_then(|free_list| free_list.pop());
151
152 let metal_buf = match metal_buf {
153 Some(b) => b,
154 None => {
155 // Fresh allocation at bucket size.
156 let raw = device
157 .metal_device()
158 .new_buffer(bucket as u64, metal::MTLResourceOptions::StorageModeShared);
159 if raw.contents().is_null() {
160 return Err(MlxError::BufferAllocationError { bytes: bucket });
161 }
162 // ADR-015 iter61a-2 (broken-window B-W-1 residual fix): zero-init
163 // every fresh pool allocation. The same MTLResourceOptions::
164 // StorageModeShared recycling that affects MlxDevice::alloc_buffer
165 // (closed in iter61a, src/device.rs) ALSO affects the pool's
166 // fresh-allocation path. iter61a closed device-direct allocations
167 // but the per-decode-token / per-prefill-chunk arena pool grew its
168 // free list via this `new_buffer` call without zero-init — so on
169 // the FIRST cold-process prefill (where the pool is empty and
170 // every alloc takes the fresh path), kernels reading scratch /
171 // intermediate buffers before fully populating them propagated
172 // recycled-page garbage into logits. Empirically: 5/5 cold-run
173 // first-token logit dumps on 27B-dwq46 produced 5 distinct
174 // hashes, with max abs logit diff up to 5.06 across runs and
175 // 248044/248044 logits differing — far above kernel-reduction
176 // ULP noise, consistent with structural memory contamination.
177 //
178 // Cost: one memset per fresh allocation. Reused buffers (the
179 // steady-state hot path after warm-up) skip this entirely
180 // because their bytes are valid producer outputs from prior
181 // pool cycles. Per `feedback_no_broken_windows` + mantra
182 // "No fallback. No stub. Just pure excellence." — fix at the
183 // source.
184 //
185 // Safety: `raw.contents()` is non-null (verified above), points
186 // to exactly `bucket` bytes of `StorageModeShared` memory we
187 // just allocated and have exclusive access to. The buffer is
188 // not yet wrapped in `MlxBuffer` and not yet in `in_use` /
189 // residency set, so no other thread or GPU dispatch references
190 // it. Writing zero bytes is well-defined for any DType.
191 unsafe {
192 std::ptr::write_bytes(raw.contents() as *mut u8, 0, bucket);
193 }
194 added_residency = self.register_residency_allocation(device, &raw)?;
195 raw
196 }
197 };
198
199 // Track the handout so reset() can recycle it. ARC clone is cheap.
200 self.in_use.push((bucket, metal_buf.clone()));
201
202 Ok((MlxBuffer::from_raw(metal_buf, dtype, shape), added_residency))
203 }
204
205 /// Return a single buffer to the pool's free list for future reuse.
206 ///
207 /// The Metal memory is **not** deallocated — it stays resident on the GPU
208 /// for fast reuse. `release` is the per-buffer alternative to [`reset`];
209 /// see the module docs for guidance on which to use.
210 ///
211 /// **Mixing `release` and `reset` within the same arena cycle is not
212 /// supported** — the pool's in-use list does not deduplicate, so a buffer
213 /// returned via `release` and then bulk-returned via `reset` would land in
214 /// the free list twice (each entry holds an ARC clone of the same Metal
215 /// buffer; the duplication wastes a free-list slot but is not a memory
216 /// leak — both clones drop together once popped). Pick one pattern per
217 /// arena cycle.
218 pub fn release(&mut self, buffer: MlxBuffer) {
219 let bucket = bucket_size(buffer.byte_len());
220 let metal_buf = buffer.into_inner();
221 self.free.entry(bucket).or_default().push(metal_buf);
222 }
223
224 /// Bulk-return every buffer handed out by [`alloc`] since the last reset
225 /// to the pool's free list.
226 ///
227 /// # Caller contract
228 ///
229 /// All `MlxBuffer` values returned by `alloc` since the last reset must be
230 /// out-of-scope (dropped) at the time `reset` is called. Reset transfers
231 /// the pool's ARC clones to the free list, where they become available to
232 /// subsequent [`alloc`] calls. If a caller is still holding an `MlxBuffer`
233 /// and a later `alloc` re-issues the underlying buffer, the two callers
234 /// will share GPU memory (aliasing). The Metal ARC keeps the storage
235 /// alive in either case, but writes from the new caller will be visible
236 /// to the stale caller — a correctness bug, not a memory error.
237 ///
238 /// In Rust's ownership model, locally-bound `MlxBuffer` values fall out of
239 /// scope at the end of their lexical block, making the per-decode-token
240 /// arena pattern safe by construction:
241 ///
242 /// ```ignore
243 /// loop {
244 /// pool.reset(); // start of token — recycle previous token's buffers
245 /// forward_pass(&pool); // many alloc(), no explicit release
246 /// } // forward_pass returns; locals dropped
247 /// ```
248 pub fn reset(&mut self) {
249 for (bucket, metal_buf) in self.in_use.drain(..) {
250 self.free.entry(bucket).or_default().push(metal_buf);
251 }
252 }
253
254 /// Register an externally-allocated buffer with this pool's residency set
255 /// without taking ownership.
256 ///
257 /// # Why this exists
258 ///
259 /// [`alloc`](Self::alloc) bucket-rounds requests up to the next power of
260 /// two, which is acceptable for transient per-token scratch (the worst
261 /// case is ~2× over-allocation on a few megabytes) but unacceptable for
262 /// large static weight tensors. hf2q's Qwen3.5-MoE weight set totals
263 /// ~17.26 GB; bucket-rounding would balloon that to ~25.55 GB
264 /// (+8.3 GB / +48% blowup) — unshippable on a 128 GB unified-memory
265 /// M5 Max once KV cache and intermediates are layered on top.
266 ///
267 /// `register_existing` provides a *residency-only* path: the caller
268 /// allocates the buffer at its exact size via
269 /// [`MlxDevice::alloc_buffer`](crate::MlxDevice::alloc_buffer) (or
270 /// loads it via [`GgufFile::load_tensor_into_pool`](crate::GgufFile::load_tensor_into_pool)),
271 /// retains the [`MlxBuffer`] handle, and asks the pool to add the
272 /// underlying Metal allocation to its residency set so it gets the
273 /// MTLResidencySet hint on the next dispatch.
274 ///
275 /// # Ownership semantics
276 ///
277 /// * The pool **does not** take ownership of the buffer. The caller's
278 /// `MlxBuffer` handle remains the canonical owner.
279 /// * The pool **does not** recycle this buffer on [`reset`](Self::reset)
280 /// (it is not added to `in_use`).
281 /// * The pool **does** include this buffer in its residency set so it
282 /// is hinted-resident on the next encoder dispatch.
283 /// * On pool [`Drop`], the residency-set membership is removed but the
284 /// underlying Metal buffer is **not** freed — the caller's `MlxBuffer`
285 /// handle keeps the ARC alive.
286 ///
287 /// # `HF2Q_NO_RESIDENCY=1` escape hatch
288 ///
289 /// When the environment variable `HF2Q_NO_RESIDENCY=1` is set, the
290 /// process boots its [`MlxDevice`](crate::MlxDevice) without any
291 /// residency set (see `device.rs`). In that mode this method returns
292 /// `Ok(())` without touching anything — operators who suspect a
293 /// residency-induced regression can opt out without recompiling.
294 ///
295 /// # Idempotence
296 ///
297 /// Registering the same buffer twice (identified by its
298 /// `metal::Buffer.contents()` pointer) is a no-op on the second call —
299 /// the residency set membership is tracked in a `HashMap` keyed by
300 /// contents pointer.
301 ///
302 /// # Errors
303 ///
304 /// Returns `MlxError::InvalidArgument` if the buffer was allocated on a
305 /// different `MlxDevice` than any previously registered buffer.
306 pub fn register_existing(
307 &mut self,
308 device: &MlxDevice,
309 buffer: &MlxBuffer,
310 ) -> Result<()> {
311 // ADR-015 iter8e (Phase 3b): MlxDevice::alloc_buffer now
312 // auto-registers each new buffer with the device's residency set
313 // via Arc<MlxBufferStorage>. If this caller's buffer already owns
314 // its registration, short-circuit — re-registering would double-add
315 // and the pool's Drop would issue a stray removeAllocation: against
316 // a buffer the storage's RAII path will also remove.
317 if let Some(buffer_set) = buffer.residency_set() {
318 let Some(device_set) = device.residency_set() else {
319 return Err(MlxError::InvalidArgument(
320 "MlxBuffer is registered with a residency set, but device has none".into(),
321 ));
322 };
323 if !buffer_set.same_owner(device_set) {
324 return Err(MlxError::InvalidArgument(
325 "MlxBufferPool cannot register a buffer from a different residency-enabled device"
326 .into(),
327 ));
328 }
329 // Adopt the buffer's residency set so the pool's same_owner
330 // checks downstream agree, but do NOT add the buffer — it's
331 // already in the set via its own Arc<MlxBufferStorage>.
332 match self.residency_set.as_ref() {
333 Some(pool_set) if !pool_set.same_owner(device_set) => {
334 return Err(MlxError::InvalidArgument(
335 "MlxBufferPool cannot mix residency-enabled devices".into(),
336 ));
337 }
338 Some(_) => {}
339 None => {
340 self.residency_set = Some(device_set.clone());
341 }
342 }
343 return Ok(());
344 }
345
346 let added = self.register_residency_allocation(device, buffer.metal_buffer())?;
347 if added {
348 if let Some(set) = self.residency_set.as_ref() {
349 // Batched-add path: explicit commit (counts in the
350 // commit-call counter) preserves the
351 // `commit_called_after_alloc_batch`-style semantics.
352 set.commit();
353 }
354 }
355 Ok(())
356 }
357
358 /// Return all free buffers' count (for diagnostics).
359 pub fn free_count(&self) -> usize {
360 self.free.values().map(|v| v.len()).sum()
361 }
362
363 /// Total number of bytes held in the free list.
364 pub fn free_bytes(&self) -> usize {
365 self.free
366 .iter()
367 .map(|(&bucket, bufs)| bucket * bufs.len())
368 .sum()
369 }
370
371 /// Number of buffers currently in-use (alloc'd but not yet reset).
372 pub fn in_use_count(&self) -> usize {
373 self.in_use.len()
374 }
375
376 /// Clear all free buffers, releasing Metal memory. Does not affect
377 /// in-use tracking.
378 pub fn clear(&mut self) {
379 let mut removed_any = false;
380
381 if let Some(set) = self.residency_set.as_ref() {
382 for metal_buf in self.free.values().flatten() {
383 let key = buffer_key(metal_buf);
384 if let Some(resident_buf) = self.resident_buffers.remove(&key) {
385 set.remove_allocation(&resident_buf);
386 removed_any = true;
387 }
388 }
389
390 if removed_any {
391 set.commit();
392 }
393 }
394
395 self.free.clear();
396 }
397
398 fn register_residency_allocation(
399 &mut self,
400 device: &MlxDevice,
401 buffer: &metal::Buffer,
402 ) -> Result<bool> {
403 let Some(device_set) = device.residency_set() else {
404 return Ok(false);
405 };
406
407 match self.residency_set.as_ref() {
408 Some(pool_set) if !pool_set.same_owner(device_set) => {
409 return Err(MlxError::InvalidArgument(
410 "MlxBufferPool cannot mix residency-enabled devices".into(),
411 ));
412 }
413 Some(_) => {}
414 None => {
415 self.residency_set = Some(device_set.clone());
416 }
417 }
418
419 let key = buffer_key(buffer);
420 if !self.resident_buffers.contains_key(&key) {
421 device_set.add_allocation(buffer);
422 self.resident_buffers.insert(key, buffer.clone());
423 return Ok(true);
424 }
425
426 Ok(false)
427 }
428
429 fn remove_all_residency_allocations(&mut self) {
430 let Some(set) = self.residency_set.as_ref() else {
431 return;
432 };
433
434 if self.resident_buffers.is_empty() {
435 return;
436 }
437
438 for buffer in self.resident_buffers.values() {
439 set.remove_allocation(buffer);
440 }
441 set.commit();
442 self.resident_buffers.clear();
443 }
444}
445
446impl Drop for MlxBufferPool {
447 fn drop(&mut self) {
448 self.remove_all_residency_allocations();
449 }
450}
451
452/// Round `n` up to the nearest power of two.
453///
454/// Returns 1 for n == 0 (though callers should never request 0 bytes).
455fn bucket_size(n: usize) -> usize {
456 if n <= 1 {
457 return 1;
458 }
459 n.next_power_of_two()
460}
461
462#[inline]
463fn buffer_key(buffer: &metal::Buffer) -> usize {
464 buffer.contents() as usize
465}
466
467#[cfg(test)]
468mod tests {
469 use super::*;
470
471 #[test]
472 fn test_bucket_size_powers() {
473 assert_eq!(bucket_size(0), 1);
474 assert_eq!(bucket_size(1), 1);
475 assert_eq!(bucket_size(2), 2);
476 assert_eq!(bucket_size(3), 4);
477 assert_eq!(bucket_size(4), 4);
478 assert_eq!(bucket_size(5), 8);
479 assert_eq!(bucket_size(1023), 1024);
480 assert_eq!(bucket_size(1024), 1024);
481 assert_eq!(bucket_size(1025), 2048);
482 }
483
484 #[test]
485 fn test_pool_arena_reset_recycles_in_use() {
486 // Per-decode-token arena pattern: alloc many, drop locals, reset, alloc again.
487 // Subsequent allocs must reuse the same Metal buffers (verified by ARC-cloned
488 // contents pointer).
489 let device = MlxDevice::new().expect("device");
490 let mut pool = MlxBufferPool::new();
491
492 // Cycle 1: allocate three buffers in different buckets, then drop them
493 // (locals fall out of scope at the end of the block).
494 let (ptr_a, ptr_b, ptr_c) = {
495 let buf_a = pool.alloc(&device, 1024, DType::F32, vec![256]).expect("alloc a");
496 let buf_b = pool.alloc(&device, 2048, DType::F32, vec![512]).expect("alloc b");
497 let buf_c = pool.alloc(&device, 1024, DType::F32, vec![256]).expect("alloc c");
498 (buf_a.contents_ptr(), buf_b.contents_ptr(), buf_c.contents_ptr())
499 };
500 assert_eq!(pool.in_use_count(), 3);
501 assert_eq!(pool.free_count(), 0);
502
503 // Reset returns all three to free.
504 pool.reset();
505 assert_eq!(pool.in_use_count(), 0);
506 assert_eq!(pool.free_count(), 3);
507
508 // Cycle 2: allocate compatible-bucket buffers, must reuse the same
509 // underlying Metal buffers (contents_ptr equal).
510 let buf_d = pool.alloc(&device, 1024, DType::F32, vec![256]).expect("alloc d");
511 let buf_e = pool.alloc(&device, 2048, DType::F32, vec![512]).expect("alloc e");
512 let ptr_d = buf_d.contents_ptr();
513 let ptr_e = buf_e.contents_ptr();
514
515 // Pointers must come from {a, b, c} — bucket 1024 reuse for d (matches a or c),
516 // bucket 2048 reuse for e (matches b).
517 assert!(
518 ptr_d == ptr_a || ptr_d == ptr_c,
519 "buf_d {:?} must reuse one of a {:?} / c {:?}",
520 ptr_d, ptr_a, ptr_c,
521 );
522 assert_eq!(ptr_e, ptr_b, "buf_e must reuse b (only 2048-bucket buffer)");
523
524 // After cycle-2 alloc, free has 1 (the unused 1024-bucket buffer) + in_use 2.
525 assert_eq!(pool.in_use_count(), 2);
526 assert_eq!(pool.free_count(), 1);
527 }
528
529 #[test]
530 fn test_pool_reset_with_no_alloc_is_idempotent() {
531 // Empty reset must be a no-op. No MlxDevice required — pool
532 // operations on an empty pool don't touch the device; the
533 // smoke check used to live here was incidental and triggered
534 // the unused-variable warning since `device` was bound but
535 // never consumed.
536 let mut pool = MlxBufferPool::new();
537 pool.reset();
538 assert_eq!(pool.in_use_count(), 0);
539 assert_eq!(pool.free_count(), 0);
540 // Multiple resets without intervening alloc — still no-op.
541 pool.reset();
542 pool.reset();
543 assert_eq!(pool.in_use_count(), 0);
544 }
545
546 #[test]
547 fn test_register_existing_does_not_recycle_on_reset() {
548 // Externally-allocated buffer registered via register_existing must
549 // NOT be added to the in_use list — reset() should leave the caller's
550 // ownership intact and the buffer must remain valid after the pool
551 // is dropped.
552 let device = MlxDevice::new().expect("device");
553 let mut pool = MlxBufferPool::new();
554
555 // Allocate the buffer EXTERNALLY (via device.alloc_buffer, not
556 // pool.alloc) — this is the no-bucket-rounding path hf2q uses for
557 // static weight tensors.
558 let external = device
559 .alloc_buffer(4096, DType::U8, vec![4096])
560 .expect("alloc external");
561 let external_ptr = external.contents_ptr();
562
563 // Register with the pool's residency set.
564 pool.register_existing(&device, &external)
565 .expect("register_existing");
566
567 // in_use must remain empty (external buffer is not arena-recycled).
568 assert_eq!(pool.in_use_count(), 0);
569
570 // reset() must be a no-op for externally-registered buffers.
571 pool.reset();
572 assert_eq!(pool.in_use_count(), 0);
573 assert_eq!(pool.free_count(), 0);
574
575 // Drop the pool. The external MlxBuffer must still be valid — its
576 // metal::Buffer ARC is held by `external`, not by the pool.
577 drop(pool);
578 assert_eq!(external.contents_ptr(), external_ptr);
579 // Confirm the buffer is still accessible (no UAF).
580 let slice: &[u8] = external.as_slice().expect("slice still valid");
581 assert_eq!(slice.len(), 4096);
582 }
583
584 #[test]
585 fn test_register_existing_idempotent() {
586 // Registering the same buffer twice must not duplicate the residency
587 // membership (resident_buffers HashMap is keyed by contents pointer).
588 let device = MlxDevice::new().expect("device");
589 let mut pool = MlxBufferPool::new();
590
591 let external = device
592 .alloc_buffer(2048, DType::U8, vec![2048])
593 .expect("alloc external");
594
595 pool.register_existing(&device, &external)
596 .expect("register 1");
597 pool.register_existing(&device, &external)
598 .expect("register 2 (idempotent)");
599
600 // Drop the pool (Drop::drop runs remove_all_residency_allocations).
601 // No double-remove panic is the actual assertion here.
602 drop(pool);
603 // Buffer still valid.
604 let _slice: &[u8] = external.as_slice().expect("still valid");
605 }
606
607 #[test]
608 fn test_register_existing_no_residency_env_is_noop() {
609 // With HF2Q_NO_RESIDENCY=1 the device boots without a residency set,
610 // so register_existing has no set to register against and must
611 // return Ok(()) as a no-op without touching anything.
612 //
613 // This test runs serially with other residency-env tests via the
614 // shared TEST_LOCK in tests/test_residency_set.rs — but unit tests
615 // here run in the same process and could race with that integration
616 // test if both are running. We mitigate by:
617 // 1. Reading + restoring the original env value.
618 // 2. Resetting the residency env-cache flag before AND after.
619 //
620 // The unit-test name is uniquely keyed; cargo test by default
621 // single-threads tests within the same binary only when --test-threads=1
622 // is set. We accept that this test could flake under -j > 1 with
623 // the integration tests; in practice cargo test schedules unit and
624 // integration test binaries separately.
625 let prev = std::env::var("HF2Q_NO_RESIDENCY").ok();
626 crate::residency::reset_residency_env_cache_for_test();
627 std::env::set_var("HF2Q_NO_RESIDENCY", "1");
628
629 let device = MlxDevice::new().expect("device");
630 assert!(
631 !device.residency_sets_enabled(),
632 "device should boot without residency under HF2Q_NO_RESIDENCY=1",
633 );
634
635 let mut pool = MlxBufferPool::new();
636 let external = device
637 .alloc_buffer(1024, DType::U8, vec![1024])
638 .expect("alloc external");
639
640 // register_existing must succeed as a no-op.
641 pool.register_existing(&device, &external)
642 .expect("register_existing under HF2Q_NO_RESIDENCY=1 should succeed");
643
644 // Pool's internal residency_set must remain None.
645 assert!(pool.residency_set.is_none());
646 assert!(pool.resident_buffers.is_empty());
647
648 // Cleanup env.
649 match prev {
650 Some(v) => std::env::set_var("HF2Q_NO_RESIDENCY", v),
651 None => std::env::remove_var("HF2Q_NO_RESIDENCY"),
652 }
653 crate::residency::reset_residency_env_cache_for_test();
654 }
655
656 #[test]
657 fn test_pool_release_remains_supported_for_compat() {
658 // The existing per-buffer release() pattern still works. Mixing
659 // release+reset within the same arena cycle is documented as
660 // unsupported but technically lands a duplicate clone in free —
661 // verify the duplicate is harmless (alloc still picks up a buffer).
662 let device = MlxDevice::new().expect("device");
663 let mut pool = MlxBufferPool::new();
664
665 let buf = pool.alloc(&device, 1024, DType::F32, vec![256]).expect("alloc");
666 assert_eq!(pool.in_use_count(), 1);
667 pool.release(buf);
668 // release() does NOT remove from in_use; that's acceptable per the
669 // documented contract (don't mix patterns). Free has the released one.
670 assert_eq!(pool.free_count(), 1);
671 assert_eq!(pool.in_use_count(), 1);
672
673 // Allocating again pulls from free first.
674 let _buf2 = pool.alloc(&device, 1024, DType::F32, vec![256]).expect("alloc 2");
675 assert_eq!(pool.free_count(), 0);
676 assert_eq!(pool.in_use_count(), 2);
677 }
678}