zk-nalloc 0.2.6

High-performance, deterministic memory allocator optimized for Zero-Knowledge Proof (ZKP) systems and cryptographic provers.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
//! Core bump allocator for nalloc.
//!
//! A bump allocator is the fastest possible allocator: it simply increments
//! a pointer. This module provides a thread-safe, atomic bump allocator
//! optimized for ZK prover workloads with fallback support.

#[cfg(feature = "fallback")]
use std::alloc::{GlobalAlloc, Layout, System};
use std::ptr::NonNull;
use std::sync::atomic::{fence, AtomicBool, AtomicUsize, Ordering};

use crate::config::SECURE_WIPE_PATTERN;

/// A fast, lock-free bump allocator with fallback support.
///
/// Thread-safety is achieved via atomic compare-and-swap on the cursor.
/// This allows multiple threads to allocate concurrently without locks,
/// though there may be occasional retries on contention.
///
/// When the arena is exhausted and the `fallback` feature is enabled,
/// allocations fall back to the system allocator.
pub struct BumpAlloc {
    /// Base pointer of the memory region (never changes after init).
    base: NonNull<u8>,
    /// End pointer of the memory region (never changes after init).
    limit: NonNull<u8>,
    /// Current allocation cursor (atomically updated).
    cursor: AtomicUsize,
    /// Tracks whether the arena has been recycled (reset after use).
    /// Used to optimize zero-initialization in WitnessArena.
    is_recycled: AtomicBool,
    /// Counter for fallback allocations (for monitoring).
    #[cfg(feature = "fallback")]
    fallback_count: AtomicUsize,
    /// Total bytes allocated via fallback.
    #[cfg(feature = "fallback")]
    fallback_bytes: AtomicUsize,
}

impl BumpAlloc {
    /// Create a new bump allocator from a raw memory block.
    ///
    /// # Safety
    /// The memory block `[base, base+size)` must be valid and writable.
    #[inline]
    pub unsafe fn new(base: *mut u8, size: usize) -> Self {
        debug_assert!(!base.is_null());
        debug_assert!(size > 0);

        let base_nn = NonNull::new_unchecked(base);
        let limit_nn = NonNull::new_unchecked(base.add(size));

        Self {
            base: base_nn,
            limit: limit_nn,
            cursor: AtomicUsize::new(base as usize),
            is_recycled: AtomicBool::new(false),
            #[cfg(feature = "fallback")]
            fallback_count: AtomicUsize::new(0),
            #[cfg(feature = "fallback")]
            fallback_bytes: AtomicUsize::new(0),
        }
    }

    /// Get the base pointer of this allocator.
    #[inline]
    pub fn base_ptr(&self) -> *mut u8 {
        self.base.as_ptr()
    }

    /// Allocate memory with the given size and alignment.
    ///
    /// Returns a null pointer if there is not enough space and fallback is disabled.
    /// With the `fallback` feature, falls back to system allocator.
    #[inline(always)]
    pub fn alloc(&self, size: usize, align: usize) -> *mut u8 {
        // Runtime validation (Issue #6): prevent memory corruption from invalid inputs
        if size == 0 || align == 0 || !align.is_power_of_two() {
            return std::ptr::null_mut();
        }

        loop {
            let current = self.cursor.load(Ordering::Relaxed);

            // Issue #7: Use checked arithmetic to prevent integer overflow
            let aligned = match current.checked_add(align - 1) {
                Some(v) => v & !(align - 1),
                None => return self.handle_exhaustion(size, align),
            };
            let next = match aligned.checked_add(size) {
                Some(v) => v,
                None => return self.handle_exhaustion(size, align),
            };

            if next > self.limit.as_ptr() as usize {
                // Arena exhausted
                return self.handle_exhaustion(size, align);
            }

            if self
                .cursor
                .compare_exchange_weak(current, next, Ordering::AcqRel, Ordering::Relaxed)
                .is_ok()
            {
                return aligned as *mut u8;
            }
            // Contention: another thread allocated concurrently. Retry.
        }
    }

    /// Handle arena exhaustion - either fallback or return null.
    #[cold]
    #[inline(never)]
    fn handle_exhaustion(&self, size: usize, align: usize) -> *mut u8 {
        #[cfg(debug_assertions)]
        {
            eprintln!(
                "[nalloc] Arena exhausted: requested {} bytes (align {}), remaining {} bytes",
                size,
                align,
                self.remaining()
            );
        }

        #[cfg(feature = "fallback")]
        {
            // Fall back to system allocator
            let layout = match Layout::from_size_align(size, align) {
                Ok(l) => l,
                Err(_) => return std::ptr::null_mut(),
            };

            let ptr = unsafe { System.alloc(layout) };

            if !ptr.is_null() {
                self.fallback_count.fetch_add(1, Ordering::Relaxed);
                self.fallback_bytes.fetch_add(size, Ordering::Relaxed);

                #[cfg(debug_assertions)]
                eprintln!("[nalloc] Fallback allocation: {} bytes", size);
            }

            ptr
        }

        #[cfg(not(feature = "fallback"))]
        {
            std::ptr::null_mut()
        }
    }

    /// Check if this arena has been recycled (reset after initial use).
    ///
    /// Uses `Acquire` ordering so that all memory writes performed by the
    /// thread that called `reset()` (in particular the volatile zeroing in
    /// `secure_reset`) are visible to the caller before any subsequent reads
    /// from arena memory.  A `Relaxed` load would break the happens-before
    /// chain with the `Release` store in `reset()`.
    #[inline]
    pub fn is_recycled(&self) -> bool {
        self.is_recycled.load(Ordering::Acquire)
    }

    /// Get the number of fallback allocations (only with `fallback` feature).
    #[cfg(feature = "fallback")]
    #[inline]
    pub fn fallback_count(&self) -> usize {
        self.fallback_count.load(Ordering::Relaxed)
    }

    /// Get the total bytes allocated via fallback (only with `fallback` feature).
    ///
    /// **Note (Issue #9)**: This tracks the *requested* allocation size, not the actual
    /// size allocated by the system allocator (which may be larger due to alignment
    /// and internal bookkeeping). Use this for monitoring, not precise accounting.
    #[cfg(feature = "fallback")]
    #[inline]
    pub fn fallback_bytes(&self) -> usize {
        self.fallback_bytes.load(Ordering::Relaxed)
    }

    /// Reset the bump pointer to the base.
    ///
    /// # Safety
    /// All previously allocated memory becomes invalid after this call.
    ///
    /// # Warning (Issue #10)
    /// **Fallback allocations are NOT freed by reset.** When arena exhaustion triggers
    /// fallback to the system allocator (with `fallback` feature), those allocations
    /// must be individually deallocated via `GlobalAlloc::dealloc`. If using NAlloc
    /// as the global allocator, this happens automatically when the memory is dropped.
    /// However, if using arenas directly, be aware that reset only reclaims arena memory,
    /// not system allocator memory.
    #[inline]
    pub unsafe fn reset(&self) {
        self.cursor
            .store(self.base.as_ptr() as usize, Ordering::SeqCst);
        self.is_recycled.store(true, Ordering::Release);

        #[cfg(feature = "fallback")]
        {
            // Reset fallback counters
            self.fallback_count.store(0, Ordering::Relaxed);
            self.fallback_bytes.store(0, Ordering::Relaxed);
        }
    }

    /// Zero out all memory in the arena and reset the cursor.
    ///
    /// This is critical for security-sensitive applications like ZK provers,
    /// where witness data must be wiped after use to prevent leakage.
    ///
    /// Uses volatile writes to prevent the compiler from optimizing away
    /// the zeroing operation (dead store elimination).
    ///
    /// # Safety
    /// All previously allocated memory becomes invalid after this call.
    #[inline]
    pub unsafe fn secure_reset(&self) {
        let base = self.base.as_ptr();
        let size = self.limit.as_ptr() as usize - base as usize;

        // Use volatile writes to prevent dead store elimination.
        // This ensures the memory is actually zeroed even if it's never read again.
        Self::volatile_memset(base, SECURE_WIPE_PATTERN, size);

        // Issue #5: Full memory barrier for multi-threaded safety.
        // compiler_fence only prevents compiler reordering, not CPU reordering.
        // Using atomic fence ensures other threads observe the zeroed memory
        // before seeing the reset cursor.
        fence(Ordering::SeqCst);

        self.reset();
    }

    /// Volatile memset implementation that cannot be optimized away.
    ///
    /// This is critical for cryptographic security - we need to guarantee
    /// that sensitive data is actually erased from memory.
    #[inline(never)]
    #[allow(unreachable_code)] // Platform-specific code paths return early, making fallback unreachable on some platforms
    unsafe fn volatile_memset(ptr: *mut u8, value: u8, len: usize) {
        // Method 1: Use platform-specific secure zeroing where available (for value == 0)
        #[cfg(any(target_os = "linux", target_os = "android"))]
        if value == 0 {
            // explicit_bzero is guaranteed not to be optimized away
            extern "C" {
                fn explicit_bzero(s: *mut libc::c_void, n: libc::size_t);
            }
            explicit_bzero(ptr as *mut libc::c_void, len);
            return;
        }

        #[cfg(target_vendor = "apple")]
        {
            // memset_s is guaranteed not to be optimized away (C11)
            // Note: memset_s supports non-zero values
            extern "C" {
                fn memset_s(
                    s: *mut libc::c_void,
                    smax: libc::size_t,
                    c: libc::c_int,
                    n: libc::size_t,
                ) -> libc::c_int;
            }
            let _ = memset_s(ptr as *mut libc::c_void, len, value as libc::c_int, len);
            return;
        }

        #[cfg(target_os = "windows")]
        if value == 0 {
            // RtlSecureZeroMemory is guaranteed not to be optimized away
            extern "system" {
                fn RtlSecureZeroMemory(ptr: *mut u8, len: usize);
            }
            RtlSecureZeroMemory(ptr, len);
            return;
        }

        // Issue #4: Generic volatile write loop for:
        // - Non-zero values on Linux/Android/Windows (platform APIs only handle zero)
        // - All values on other platforms
        // Using usize-sized writes for better performance
        let ptr_usize = ptr as *mut usize;
        let pattern_usize = if value == 0 {
            0usize
        } else {
            let mut p = 0usize;
            for i in 0..std::mem::size_of::<usize>() {
                p |= (value as usize) << (i * 8);
            }
            p
        };

        let full_words = len / std::mem::size_of::<usize>();
        let remainder = len % std::mem::size_of::<usize>();

        // Write full usize words
        for i in 0..full_words {
            std::ptr::write_volatile(ptr_usize.add(i), pattern_usize);
        }

        // Write remaining bytes
        let remainder_ptr = ptr.add(full_words * std::mem::size_of::<usize>());
        for i in 0..remainder {
            std::ptr::write_volatile(remainder_ptr.add(i), value);
        }
    }

    /// Returns the total capacity in bytes.
    #[inline]
    pub fn capacity(&self) -> usize {
        self.limit.as_ptr() as usize - self.base.as_ptr() as usize
    }

    /// Returns the number of bytes currently allocated.
    #[inline]
    pub fn used(&self) -> usize {
        self.cursor.load(Ordering::Relaxed) - self.base.as_ptr() as usize
    }

    /// Returns the number of bytes remaining.
    #[inline]
    pub fn remaining(&self) -> usize {
        self.capacity() - self.used()
    }
}

// Safety: BumpAlloc can be shared across threads because:
// - `base` and `limit` are never modified after construction
// - `cursor` uses atomic operations for thread-safe updates
// - `is_recycled` uses atomic operations
unsafe impl Send for BumpAlloc {}
unsafe impl Sync for BumpAlloc {}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_nonnull_safety() {
        let mut buffer = vec![0u8; 1024];
        let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };

        assert_eq!(alloc.capacity(), 1024);
        assert_eq!(alloc.used(), 0);
        assert_eq!(alloc.remaining(), 1024);
        assert!(!alloc.is_recycled());
    }

    #[test]
    fn test_recycled_flag() {
        let mut buffer = vec![0u8; 1024];
        let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };

        assert!(!alloc.is_recycled());

        let _ = alloc.alloc(64, 8);
        assert!(!alloc.is_recycled());

        unsafe { alloc.reset() };
        assert!(alloc.is_recycled());
    }

    #[test]
    fn test_secure_reset_zeroes_memory() {
        let mut buffer = vec![0xFFu8; 1024];
        let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };

        // Allocate and write data
        let ptr = alloc.alloc(512, 8);
        assert!(!ptr.is_null());
        unsafe {
            std::ptr::write_bytes(ptr, 0xAB, 512);
        }

        // Secure reset
        unsafe { alloc.secure_reset() };

        // Verify memory is zeroed
        for i in 0..1024 {
            assert_eq!(buffer[i], 0, "Byte {} not zeroed", i);
        }
    }

    #[test]
    fn test_alignment() {
        let mut buffer = vec![0u8; 4096];
        let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };

        // Test various alignments
        for align_pow in 0..8 {
            let align = 1usize << align_pow;
            let ptr = alloc.alloc(64, align);
            assert!(!ptr.is_null());
            assert_eq!((ptr as usize) % align, 0, "Alignment {} failed", align);
        }
    }

    #[test]
    #[cfg(feature = "fallback")]
    fn test_fallback_allocation() {
        // Create a tiny arena that will exhaust quickly
        let mut buffer = vec![0u8; 256];
        let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };

        // Fill the arena
        let _ = alloc.alloc(256, 1);

        // This should trigger fallback
        let ptr = alloc.alloc(64, 8);
        assert!(!ptr.is_null(), "Fallback allocation should succeed");

        assert!(alloc.fallback_count() > 0, "Fallback count should increase");
        assert!(alloc.fallback_bytes() >= 64, "Fallback bytes should track");

        // Don't forget to free the fallback allocation
        unsafe {
            System.dealloc(ptr, Layout::from_size_align(64, 8).unwrap());
        }
    }

    #[test]
    #[cfg(not(feature = "fallback"))]
    fn test_exhaustion_returns_null() {
        let mut buffer = vec![0u8; 256];
        let alloc = unsafe { BumpAlloc::new(buffer.as_mut_ptr(), buffer.len()) };

        // Fill the arena
        let _ = alloc.alloc(256, 1);

        // This should return null without fallback
        let ptr = alloc.alloc(64, 8);
        assert!(
            ptr.is_null(),
            "Should return null when exhausted without fallback"
        );
    }
}