zk_nalloc/bump.rs
1//! Core bump allocator for nalloc.
2//!
3//! A bump allocator is the fastest possible allocator: it simply increments
4//! a pointer. This module provides a thread-safe, atomic bump allocator
5//! optimized for ZK prover workloads with fallback support.
6
7use std::alloc::{GlobalAlloc, Layout, System};
8use std::ptr::NonNull;
9use std::sync::atomic::{fence, AtomicBool, AtomicUsize, Ordering};
10
11use crate::config::SECURE_WIPE_PATTERN;
12
13/// A fast, lock-free bump allocator with fallback support.
14///
15/// Thread-safety is achieved via atomic compare-and-swap on the cursor.
16/// This allows multiple threads to allocate concurrently without locks,
17/// though there may be occasional retries on contention.
18///
19/// When the arena is exhausted and the `fallback` feature is enabled,
20/// allocations fall back to the system allocator.
21pub struct BumpAlloc {
22 /// Base pointer of the memory region (never changes after init).
23 base: NonNull<u8>,
24 /// End pointer of the memory region (never changes after init).
25 limit: NonNull<u8>,
26 /// Current allocation cursor (atomically updated).
27 cursor: AtomicUsize,
28 /// Tracks whether the arena has been recycled (reset after use).
29 /// Used to optimize zero-initialization in WitnessArena.
30 is_recycled: AtomicBool,
31 /// Counter for fallback allocations (for monitoring).
32 #[cfg(feature = "fallback")]
33 fallback_count: AtomicUsize,
34 /// Total bytes allocated via fallback.
35 #[cfg(feature = "fallback")]
36 fallback_bytes: AtomicUsize,
37}
38
39impl BumpAlloc {
40 /// Create a new bump allocator from a raw memory block.
41 ///
42 /// # Safety
43 /// The memory block `[base, base+size)` must be valid and writable.
44 #[inline]
45 pub unsafe fn new(base: *mut u8, size: usize) -> Self {
46 debug_assert!(!base.is_null());
47 debug_assert!(size > 0);
48
49 let base_nn = NonNull::new_unchecked(base);
50 let limit_nn = NonNull::new_unchecked(base.add(size));
51
52 Self {
53 base: base_nn,
54 limit: limit_nn,
55 cursor: AtomicUsize::new(base as usize),
56 is_recycled: AtomicBool::new(false),
57 #[cfg(feature = "fallback")]
58 fallback_count: AtomicUsize::new(0),
59 #[cfg(feature = "fallback")]
60 fallback_bytes: AtomicUsize::new(0),
61 }
62 }
63
64 /// Get the base pointer of this allocator.
65 #[inline]
66 pub fn base_ptr(&self) -> *mut u8 {
67 self.base.as_ptr()
68 }
69
70 /// Allocate memory with the given size and alignment.
71 ///
72 /// Returns a null pointer if there is not enough space and fallback is disabled.
73 /// With the `fallback` feature, falls back to system allocator.
74 #[inline(always)]
75 pub fn alloc(&self, size: usize, align: usize) -> *mut u8 {
76 // Runtime validation (Issue #6): prevent memory corruption from invalid inputs
77 if size == 0 || align == 0 || !align.is_power_of_two() {
78 return std::ptr::null_mut();
79 }
80
81 loop {
82 let current = self.cursor.load(Ordering::Relaxed);
83
84 // Issue #7: Use checked arithmetic to prevent integer overflow
85 let aligned = match current.checked_add(align - 1) {
86 Some(v) => v & !(align - 1),
87 None => return self.handle_exhaustion(size, align),
88 };
89 let next = match aligned.checked_add(size) {
90 Some(v) => v,
91 None => return self.handle_exhaustion(size, align),
92 };
93
94 if next > self.limit.as_ptr() as usize {
95 // Arena exhausted
96 return self.handle_exhaustion(size, align);
97 }
98
99 if self
100 .cursor
101 .compare_exchange_weak(current, next, Ordering::AcqRel, Ordering::Relaxed)
102 .is_ok()
103 {
104 return aligned as *mut u8;
105 }
106 // Contention: another thread allocated concurrently. Retry.
107 }
108 }
109
110 /// Handle arena exhaustion - either fallback or return null.
111 #[cold]
112 #[inline(never)]
113 fn handle_exhaustion(&self, size: usize, align: usize) -> *mut u8 {
114 #[cfg(debug_assertions)]
115 {
116 eprintln!(
117 "[nalloc] Arena exhausted: requested {} bytes (align {}), remaining {} bytes",
118 size,
119 align,
120 self.remaining()
121 );
122 }
123
124 #[cfg(feature = "fallback")]
125 {
126 // Fall back to system allocator
127 let layout = match Layout::from_size_align(size, align) {
128 Ok(l) => l,
129 Err(_) => return std::ptr::null_mut(),
130 };
131
132 let ptr = unsafe { System.alloc(layout) };
133
134 if !ptr.is_null() {
135 self.fallback_count.fetch_add(1, Ordering::Relaxed);
136 self.fallback_bytes.fetch_add(size, Ordering::Relaxed);
137
138 #[cfg(debug_assertions)]
139 eprintln!("[nalloc] Fallback allocation: {} bytes", size);
140 }
141
142 ptr
143 }
144
145 #[cfg(not(feature = "fallback"))]
146 {
147 std::ptr::null_mut()
148 }
149 }
150
151 /// Check if this arena has been recycled (reset after initial use).
152 ///
153 /// Uses `Acquire` ordering so that all memory writes performed by the
154 /// thread that called `reset()` (in particular the volatile zeroing in
155 /// `secure_reset`) are visible to the caller before any subsequent reads
156 /// from arena memory. A `Relaxed` load would break the happens-before
157 /// chain with the `Release` store in `reset()`.
158 #[inline]
159 pub fn is_recycled(&self) -> bool {
160 self.is_recycled.load(Ordering::Acquire)
161 }
162
163 /// Get the number of fallback allocations (only with `fallback` feature).
164 #[cfg(feature = "fallback")]
165 #[inline]
166 pub fn fallback_count(&self) -> usize {
167 self.fallback_count.load(Ordering::Relaxed)
168 }
169
170 /// Get the total bytes allocated via fallback (only with `fallback` feature).
171 ///
172 /// **Note (Issue #9)**: This tracks the *requested* allocation size, not the actual
173 /// size allocated by the system allocator (which may be larger due to alignment
174 /// and internal bookkeeping). Use this for monitoring, not precise accounting.
175 #[cfg(feature = "fallback")]
176 #[inline]
177 pub fn fallback_bytes(&self) -> usize {
178 self.fallback_bytes.load(Ordering::Relaxed)
179 }
180
181 /// Reset the bump pointer to the base.
182 ///
183 /// # Safety
184 /// All previously allocated memory becomes invalid after this call.
185 ///
186 /// # Warning (Issue #10)
187 /// **Fallback allocations are NOT freed by reset.** When arena exhaustion triggers
188 /// fallback to the system allocator (with `fallback` feature), those allocations
189 /// must be individually deallocated via `GlobalAlloc::dealloc`. If using NAlloc
190 /// as the global allocator, this happens automatically when the memory is dropped.
191 /// However, if using arenas directly, be aware that reset only reclaims arena memory,
192 /// not system allocator memory.
193 #[inline]
194 pub unsafe fn reset(&self) {
195 self.cursor
196 .store(self.base.as_ptr() as usize, Ordering::SeqCst);
197 self.is_recycled.store(true, Ordering::Release);
198
199 #[cfg(feature = "fallback")]
200 {
201 // Reset fallback counters
202 self.fallback_count.store(0, Ordering::Relaxed);
203 self.fallback_bytes.store(0, Ordering::Relaxed);
204 }
205 }
206
207 /// Zero out all memory in the arena and reset the cursor.
208 ///
209 /// This is critical for security-sensitive applications like ZK provers,
210 /// where witness data must be wiped after use to prevent leakage.
211 ///
212 /// Uses volatile writes to prevent the compiler from optimizing away
213 /// the zeroing operation (dead store elimination).
214 ///
215 /// # Safety
216 /// All previously allocated memory becomes invalid after this call.
217 #[inline]
218 pub unsafe fn secure_reset(&self) {
219 let base = self.base.as_ptr();
220 let size = self.limit.as_ptr() as usize - base as usize;
221
222 // Use volatile writes to prevent dead store elimination.
223 // This ensures the memory is actually zeroed even if it's never read again.
224 Self::volatile_memset(base, SECURE_WIPE_PATTERN, size);
225
226 // Issue #5: Full memory barrier for multi-threaded safety.
227 // compiler_fence only prevents compiler reordering, not CPU reordering.
228 // Using atomic fence ensures other threads observe the zeroed memory
229 // before seeing the reset cursor.
230 fence(Ordering::SeqCst);
231
232 self.reset();
233 }
234
235 /// Volatile memset implementation that cannot be optimized away.
236 ///
237 /// This is critical for cryptographic security - we need to guarantee
238 /// that sensitive data is actually erased from memory.
239 #[inline(never)]
240 #[allow(unreachable_code)] // Platform-specific code paths return early, making fallback unreachable on some platforms
241 unsafe fn volatile_memset(ptr: *mut u8, value: u8, len: usize) {
242 // Method 1: Use platform-specific secure zeroing where available (for value == 0)
243 #[cfg(any(target_os = "linux", target_os = "android"))]
244 if value == 0 {
245 // explicit_bzero is guaranteed not to be optimized away
246 extern "C" {
247 fn explicit_bzero(s: *mut libc::c_void, n: libc::size_t);
248 }
249 explicit_bzero(ptr as *mut libc::c_void, len);
250 return;
251 }
252
253 #[cfg(target_vendor = "apple")]
254 {
255 // memset_s is guaranteed not to be optimized away (C11)
256 // Note: memset_s supports non-zero values
257 extern "C" {
258 fn memset_s(
259 s: *mut libc::c_void,
260 smax: libc::size_t,
261 c: libc::c_int,
262 n: libc::size_t,
263 ) -> libc::c_int;
264 }
265 let _ = memset_s(ptr as *mut libc::c_void, len, value as libc::c_int, len);
266 return;
267 }
268
269 #[cfg(target_os = "windows")]
270 if value == 0 {
271 // RtlSecureZeroMemory is guaranteed not to be optimized away
272 extern "system" {
273 fn RtlSecureZeroMemory(ptr: *mut u8, len: usize);
274 }
275 RtlSecureZeroMemory(ptr, len);
276 return;
277 }
278
279 // Issue #4: Generic volatile write loop for:
280 // - Non-zero values on Linux/Android/Windows (platform APIs only handle zero)
281 // - All values on other platforms
282 // Using usize-sized writes for better performance
283 let ptr_usize = ptr as *mut usize;
284 let pattern_usize = if value == 0 {
285 0usize
286 } else {
287 let mut p = 0usize;
288 for i in 0..std::mem::size_of::<usize>() {
289 p |= (value as usize) << (i * 8);
290 }
291 p
292 };
293
294 let full_words = len / std::mem::size_of::<usize>();
295 let remainder = len % std::mem::size_of::<usize>();
296
297 // Write full usize words
298 for i in 0..full_words {
299 std::ptr::write_volatile(ptr_usize.add(i), pattern_usize);
300 }
301
302 // Write remaining bytes
303 let remainder_ptr = ptr.add(full_words * std::mem::size_of::<usize>());
304 for i in 0..remainder {
305 std::ptr::write_volatile(remainder_ptr.add(i), value);
306 }
307 }
308
309 /// Returns the total capacity in bytes.
310 #[inline]
311 pub fn capacity(&self) -> usize {
312 self.limit.as_ptr() as usize - self.base.as_ptr() as usize
313 }
314
315 /// Returns the number of bytes currently allocated.
316 #[inline]
317 pub fn used(&self) -> usize {
318 self.cursor.load(Ordering::Relaxed) - self.base.as_ptr() as usize
319 }
320
321 /// Returns the number of bytes remaining.
322 #[inline]
323 pub fn remaining(&self) -> usize {
324 self.capacity() - self.used()
325 }
326}
327
328// Safety: BumpAlloc can be shared across threads because:
329// - `base` and `limit` are never modified after construction
330// - `cursor` uses atomic operations for thread-safe updates
331// - `is_recycled` uses atomic operations
332unsafe impl Send for BumpAlloc {}
333unsafe impl Sync for BumpAlloc {}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338
339 #[test]
340 fn test_nonnull_safety() {
341 let mut buffer = vec![0u8; 1024];
342 let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };
343
344 assert_eq!(alloc.capacity(), 1024);
345 assert_eq!(alloc.used(), 0);
346 assert_eq!(alloc.remaining(), 1024);
347 assert!(!alloc.is_recycled());
348 }
349
350 #[test]
351 fn test_recycled_flag() {
352 let mut buffer = vec![0u8; 1024];
353 let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };
354
355 assert!(!alloc.is_recycled());
356
357 let _ = alloc.alloc(64, 8);
358 assert!(!alloc.is_recycled());
359
360 unsafe { alloc.reset() };
361 assert!(alloc.is_recycled());
362 }
363
364 #[test]
365 fn test_secure_reset_zeroes_memory() {
366 let mut buffer = vec![0xFFu8; 1024];
367 let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };
368
369 // Allocate and write data
370 let ptr = alloc.alloc(512, 8);
371 assert!(!ptr.is_null());
372 unsafe {
373 std::ptr::write_bytes(ptr, 0xAB, 512);
374 }
375
376 // Secure reset
377 unsafe { alloc.secure_reset() };
378
379 // Verify memory is zeroed
380 for i in 0..1024 {
381 assert_eq!(buffer[i], 0, "Byte {} not zeroed", i);
382 }
383 }
384
385 #[test]
386 fn test_alignment() {
387 let mut buffer = vec![0u8; 4096];
388 let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };
389
390 // Test various alignments
391 for align_pow in 0..8 {
392 let align = 1usize << align_pow;
393 let ptr = alloc.alloc(64, align);
394 assert!(!ptr.is_null());
395 assert_eq!((ptr as usize) % align, 0, "Alignment {} failed", align);
396 }
397 }
398
399 #[test]
400 #[cfg(feature = "fallback")]
401 fn test_fallback_allocation() {
402 // Create a tiny arena that will exhaust quickly
403 let mut buffer = vec![0u8; 256];
404 let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };
405
406 // Fill the arena
407 let _ = alloc.alloc(256, 1);
408
409 // This should trigger fallback
410 let ptr = alloc.alloc(64, 8);
411 assert!(!ptr.is_null(), "Fallback allocation should succeed");
412
413 assert!(alloc.fallback_count() > 0, "Fallback count should increase");
414 assert!(alloc.fallback_bytes() >= 64, "Fallback bytes should track");
415
416 // Don't forget to free the fallback allocation
417 unsafe {
418 System.dealloc(ptr, Layout::from_size_align(64, 8).unwrap());
419 }
420 }
421
422 #[test]
423 #[cfg(not(feature = "fallback"))]
424 fn test_exhaustion_returns_null() {
425 let mut buffer = vec![0u8; 256];
426 let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };
427
428 // Fill the arena
429 let _ = alloc.alloc(256, 1);
430
431 // This should return null without fallback
432 let ptr = alloc.alloc(64, 8);
433 assert!(
434 ptr.is_null(),
435 "Should return null when exhausted without fallback"
436 );
437 }
438}