zk-nalloc 0.2.6

High-performance, deterministic memory allocator optimized for Zero-Knowledge Proof (ZKP) systems and cryptographic provers.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
//! nalloc: A ZK-Proof optimized memory allocator.
//!
//! This crate provides a high-performance, deterministic memory allocator
//! specifically designed for Zero-Knowledge proof systems. It is framework-agnostic
//! and works with any ZK system: Halo2, Plonky2, Risc0, SP1, Miden, Cairo, Arkworks, etc.
//!
//! # Features
//!
//! - **Arena-based allocation**: Pre-reserved memory pools for different workload types
//! - **Bump allocation**: O(1) allocation via atomic pointer increment
//! - **Security-first**: Volatile secure wiping for witness data
//! - **Cache-optimized**: 64-byte alignment for FFT/NTT SIMD operations
//! - **Cross-platform**: Linux, macOS, Windows, and Unix support
//! - **Zero ZK dependencies**: Pure memory primitive, no framework lock-in
//! - **Fallback support**: Gracefully falls back to system allocator when arena exhausted
//!
//! # Cargo Features
//!
//! - `fallback` (default): Fall back to system allocator when arena is exhausted
//! - `huge-pages`: Enable Linux 2MB/1GB huge page support
//! - `guard-pages`: Add guard pages at arena boundaries for overflow detection
//! - `mlock`: Lock witness memory to prevent swapping (security)
//!
//! # Usage
//!
//! As a global allocator:
//! ```rust,no_run
//! use zk_nalloc::NAlloc;
//!
//! #[global_allocator]
//! static ALLOC: NAlloc = NAlloc::new();
//!
//! fn main() {
//!     let data = vec![0u64; 1000];
//!     println!("Allocated {} elements", data.len());
//! }
//! ```
//!
//! Using specialized arenas directly:
//! ```rust
//! use zk_nalloc::NAlloc;
//!
//! let alloc = NAlloc::new();
//! let witness = alloc.witness();
//! let ptr = witness.alloc(1024, 8);
//! assert!(!ptr.is_null());
//!
//! // Securely wipe when done
//! unsafe { witness.secure_wipe(); }
//! ```

pub mod arena;
pub mod bump;
pub mod config;
pub mod platform;
pub mod polynomial;
pub mod witness;

pub use arena::{ArenaManager, ArenaStats};
pub use bump::BumpAlloc;
pub use config::*;
pub use platform::sys;
#[cfg(feature = "guard-pages")]
pub use platform::GuardedAlloc;
#[cfg(feature = "huge-pages")]
pub use platform::HugePageSize;
pub use platform::{AllocErrorKind, AllocFailed};
pub use polynomial::PolynomialArena;
pub use witness::WitnessArena;

use std::alloc::{GlobalAlloc, Layout, System};
use std::ptr::{copy_nonoverlapping, null_mut};
use std::sync::atomic::{AtomicPtr, AtomicU8, Ordering};

/// Initialization state for NAlloc.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
enum InitState {
    /// Not yet initialized
    Uninitialized = 0,
    /// Currently being initialized by another thread
    Initializing = 1,
    /// Successfully initialized with arenas
    Initialized = 2,
    /// Failed to initialize, using system allocator fallback
    Fallback = 3,
}

/// The global ZK-optimized allocator.
///
/// `NAlloc` provides a drop-in replacement for the standard Rust global allocator,
/// with special optimizations for ZK-Proof workloads.
///
/// # Memory Strategy
///
/// - **Large allocations (>1MB)**: Routed to Polynomial Arena (FFT vectors)
/// - **Small allocations**: Routed to Scratch Arena (temporary buffers)
/// - **Witness data**: Use `NAlloc::witness()` for security-critical allocations
///
/// # Thread Safety
///
/// This allocator uses lock-free atomic operations for initialization and
/// allocation. It's safe to use from multiple threads concurrently.
///
/// # Fallback Behavior
///
/// If arena initialization fails (e.g., out of memory), NAlloc gracefully
/// falls back to the system allocator rather than panicking. This ensures
/// your application continues to function even under memory pressure.
///
/// # Security: `static` Usage and Witness Wipe
///
/// When used as a `#[global_allocator]` static, **Rust does not run `Drop`
/// for statics**.  The `impl Drop for NAlloc` therefore only fires for
/// non-static instances (e.g. `NAlloc::try_new()` in tests or scoped provers).
///
/// **For the `static` use-case you must wipe witness memory manually before
/// the prover exits:**
///
/// ```rust,no_run
/// use zk_nalloc::NAlloc;
///
/// #[global_allocator]
/// static ALLOC: NAlloc = NAlloc::new();
///
/// fn shutdown() {
///     // Must be called explicitly — Drop will NOT run for a static.
///     unsafe { ALLOC.witness().secure_wipe(); }
/// }
/// ```
///
/// Failure to do so leaves witness data in RAM until the OS reclaims the
/// pages, which may be observable by other processes on the same host.
#[must_use]
pub struct NAlloc {
    /// Pointer to the ArenaManager (null until initialized)
    arenas: AtomicPtr<ArenaManager>,
    /// Initialization state
    init_state: AtomicU8,
}

impl NAlloc {
    /// Create a new `NAlloc` instance.
    ///
    /// The arenas are lazily initialized on the first allocation.
    pub const fn new() -> Self {
        Self {
            arenas: AtomicPtr::new(null_mut()),
            init_state: AtomicU8::new(InitState::Uninitialized as u8),
        }
    }

    /// Try to create NAlloc and initialize arenas immediately.
    ///
    /// Returns an error if arena allocation fails, allowing the caller
    /// to handle the failure gracefully.
    pub fn try_new() -> Result<Self, AllocFailed> {
        let nalloc = Self::new();
        nalloc.try_init()?;
        Ok(nalloc)
    }

    /// Try to initialize arenas.
    ///
    /// Returns Ok if initialization succeeds or was already done.
    /// Returns Err if initialization fails.
    fn try_init(&self) -> Result<(), AllocFailed> {
        let state = self.init_state.load(Ordering::Acquire);

        match state {
            s if s == InitState::Initialized as u8 => Ok(()),
            s if s == InitState::Fallback as u8 => {
                Err(AllocFailed::with_kind(0, AllocErrorKind::OutOfMemory))
            }
            _ => {
                let ptr = self.init_arenas();
                if ptr.is_null() {
                    Err(AllocFailed::with_kind(0, AllocErrorKind::OutOfMemory))
                } else {
                    Ok(())
                }
            }
        }
    }

    /// Initialize the arenas if not already done.
    ///
    /// This uses a spin-lock pattern with atomic state to prevent
    /// recursive allocation issues and handle initialization failures gracefully.
    #[cold]
    #[inline(never)]
    fn init_arenas(&self) -> *mut ArenaManager {
        // Fast path: already initialized
        let state = self.init_state.load(Ordering::Acquire);
        if state == InitState::Initialized as u8 {
            return self.arenas.load(Ordering::Acquire);
        }
        if state == InitState::Fallback as u8 {
            return null_mut();
        }

        // Try to acquire initialization lock
        if self
            .init_state
            .compare_exchange(
                InitState::Uninitialized as u8,
                InitState::Initializing as u8,
                Ordering::AcqRel,
                Ordering::Relaxed,
            )
            .is_ok()
        {
            // We won the race - initialize
            match ArenaManager::new() {
                Ok(manager) => {
                    // Use system allocator to avoid recursive allocation
                    let layout = Layout::new::<ArenaManager>();
                    let raw = unsafe { System.alloc(layout) as *mut ArenaManager };

                    if raw.is_null() {
                        // Failed to allocate manager struct - enter fallback mode
                        eprintln!("[nalloc] Warning: Failed to allocate ArenaManager struct, using system allocator");
                        self.init_state
                            .store(InitState::Fallback as u8, Ordering::Release);
                        return null_mut();
                    }

                    unsafe {
                        std::ptr::write(raw, manager);
                    }
                    self.arenas.store(raw, Ordering::Release);
                    self.init_state
                        .store(InitState::Initialized as u8, Ordering::Release);
                    return raw;
                }
                Err(e) => {
                    // Arena allocation failed - enter fallback mode
                    eprintln!(
                        "[nalloc] Warning: Arena initialization failed ({}), using system allocator",
                        e
                    );
                    self.init_state
                        .store(InitState::Fallback as u8, Ordering::Release);
                    return null_mut();
                }
            }
        }

        // Another thread is initializing - spin wait with timeout (Issue #2).
        // We mix hint::spin_loop() (PAUSE/YIELD on x86) with periodic
        // thread::yield_now() so the OS scheduler can run the thread that is
        // actually performing the initialisation.  Without the yield, on
        // 2-CPU CI runners all waiting threads can starve the init thread.
        for i in 0..MAX_CAS_RETRIES {
            for _ in 0..SPIN_ITERATIONS {
                std::hint::spin_loop();
            }
            // Every 10 outer iterations hand control back to the OS scheduler
            // so the initialising thread gets CPU time.
            if i % 10 == 9 {
                std::thread::yield_now();
            }
            let state = self.init_state.load(Ordering::Acquire);

            match state {
                s if s == InitState::Initialized as u8 => {
                    return self.arenas.load(Ordering::Acquire);
                }
                s if s == InitState::Fallback as u8 => {
                    return null_mut();
                }
                _ => continue,
            }
        }

        // Issue #2: Timeout - initialization is stuck or taking too long
        // Fall back to system allocator rather than spinning forever
        #[cfg(debug_assertions)]
        eprintln!("[nalloc] Warning: Arena initialization timed out, using system allocator");
        null_mut()
    }

    /// Check if NAlloc is operating in fallback mode (using system allocator).
    #[must_use]
    #[inline]
    pub fn is_fallback_mode(&self) -> bool {
        self.init_state.load(Ordering::Relaxed) == InitState::Fallback as u8
    }

    /// Check if NAlloc is fully initialized with arenas.
    #[must_use]
    #[inline]
    pub fn is_initialized(&self) -> bool {
        self.init_state.load(Ordering::Relaxed) == InitState::Initialized as u8
    }

    #[inline(always)]
    fn get_arenas(&self) -> Option<&ArenaManager> {
        let state = self.init_state.load(Ordering::Acquire);

        if state == InitState::Initialized as u8 {
            let ptr = self.arenas.load(Ordering::Acquire);
            if !ptr.is_null() {
                return Some(unsafe { &*ptr });
            }
        }

        if state == InitState::Uninitialized as u8 || state == InitState::Initializing as u8 {
            let ptr = self.init_arenas();
            if !ptr.is_null() {
                return Some(unsafe { &*ptr });
            }
        }

        None
    }

    /// Access the witness arena directly.
    ///
    /// Use this for allocating sensitive private inputs that need
    /// zero-initialization and secure wiping.
    ///
    /// # Panics
    ///
    /// Panics if arena initialization failed. Use `try_witness()` for
    /// fallible access.
    ///
    /// # Example
    ///
    /// ```rust
    /// use zk_nalloc::NAlloc;
    ///
    /// let alloc = NAlloc::new();
    /// let witness = alloc.witness();
    /// let secret_ptr = witness.alloc(256, 8);
    /// assert!(!secret_ptr.is_null());
    ///
    /// // Securely wipe when done
    /// unsafe { witness.secure_wipe(); }
    /// ```
    #[inline]
    pub fn witness(&self) -> WitnessArena {
        self.try_witness()
            .expect("Arena initialization failed - use try_witness() for fallible access")
    }

    /// Try to access the witness arena.
    ///
    /// Returns `None` if arena initialization failed.
    #[must_use]
    #[inline]
    pub fn try_witness(&self) -> Option<WitnessArena> {
        self.get_arenas().map(|a| WitnessArena::new(a.witness()))
    }

    /// Access the polynomial arena directly.
    ///
    /// Use this for FFT/NTT-friendly polynomial coefficient vectors.
    /// Provides 64-byte alignment by default for SIMD operations.
    ///
    /// # Panics
    ///
    /// Panics if arena initialization failed. Use `try_polynomial()` for
    /// fallible access.
    ///
    /// # Example
    ///
    /// ```rust
    /// use zk_nalloc::NAlloc;
    ///
    /// let alloc = NAlloc::new();
    /// let poly = alloc.polynomial();
    /// let coeffs = poly.alloc_fft_friendly(1024); // 1K coefficients
    /// assert!(!coeffs.is_null());
    /// assert_eq!((coeffs as usize) % 64, 0); // 64-byte aligned
    /// ```
    #[inline]
    pub fn polynomial(&self) -> PolynomialArena {
        self.try_polynomial()
            .expect("Arena initialization failed - use try_polynomial() for fallible access")
    }

    /// Try to access the polynomial arena.
    ///
    /// Returns `None` if arena initialization failed.
    #[must_use]
    #[inline]
    pub fn try_polynomial(&self) -> Option<PolynomialArena> {
        self.get_arenas()
            .map(|a| PolynomialArena::new(a.polynomial()))
    }

    /// Access the scratch arena directly.
    ///
    /// Use this for temporary computation space.
    ///
    /// # Panics
    ///
    /// Panics if arena initialization failed. Use `try_scratch()` for
    /// fallible access.
    #[inline]
    pub fn scratch(&self) -> std::sync::Arc<BumpAlloc> {
        self.try_scratch()
            .expect("Arena initialization failed - use try_scratch() for fallible access")
    }

    /// Try to access the scratch arena.
    ///
    /// Returns `None` if arena initialization failed.
    #[must_use]
    #[inline]
    pub fn try_scratch(&self) -> Option<std::sync::Arc<BumpAlloc>> {
        self.get_arenas().map(|a| a.scratch())
    }

    /// Reset all arenas, freeing all allocated memory.
    ///
    /// The witness arena is securely wiped before reset.
    ///
    /// # Safety
    /// This will invalidate all previously allocated memory.
    ///
    /// # Note
    /// Does nothing if operating in fallback mode.
    pub unsafe fn reset_all(&self) {
        if let Some(arenas) = self.get_arenas() {
            arenas.reset_all();
        }
    }

    /// Get statistics about arena usage.
    ///
    /// Returns `None` if operating in fallback mode.
    ///
    /// Useful for monitoring memory consumption and tuning arena sizes.
    #[must_use]
    pub fn stats(&self) -> Option<ArenaStats> {
        self.get_arenas().map(|a| a.stats())
    }

    /// Get statistics, returning default stats if in fallback mode.
    #[must_use]
    pub fn stats_or_default(&self) -> ArenaStats {
        self.stats().unwrap_or(ArenaStats {
            witness_used: 0,
            witness_capacity: 0,
            polynomial_used: 0,
            polynomial_capacity: 0,
            scratch_used: 0,
            scratch_capacity: 0,
            #[cfg(feature = "fallback")]
            witness_fallback_bytes: 0,
            #[cfg(feature = "fallback")]
            polynomial_fallback_bytes: 0,
            #[cfg(feature = "fallback")]
            scratch_fallback_bytes: 0,
        })
    }
}

impl Default for NAlloc {
    fn default() -> Self {
        Self::new()
    }
}

impl Drop for NAlloc {
    fn drop(&mut self) {
        // Only clean up if we successfully initialized arenas.
        // Fallback mode never allocated an ArenaManager on the heap.
        if *self.init_state.get_mut() == InitState::Initialized as u8 {
            let ptr = *self.arenas.get_mut();
            if !ptr.is_null() {
                unsafe {
                    // Run ArenaManager's own Drop (securely wipes witness, unmaps arenas).
                    std::ptr::drop_in_place(ptr);
                    // Deallocate the heap slot we allocated in init_arenas().
                    let layout = Layout::new::<ArenaManager>();
                    System.dealloc(ptr as *mut u8, layout);
                }
            }
        }
    }
}

// Safety: NAlloc uses atomic operations for all shared state
unsafe impl Send for NAlloc {}
unsafe impl Sync for NAlloc {}

unsafe impl GlobalAlloc for NAlloc {
    #[inline(always)]
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
        debug_assert!(layout.size() > 0);
        debug_assert!(layout.align() > 0);
        debug_assert!(layout.align().is_power_of_two());

        // Try to use arenas
        if let Some(arenas) = self.get_arenas() {
            // Strategy:
            // 1. Large allocations (>threshold) go to Polynomial Arena (likely vectors)
            // 2. Smaller allocations go to Scratch Arena
            // 3. User can explicitly use Witness Arena via NAlloc::witness()

            if layout.size() > LARGE_ALLOC_THRESHOLD {
                arenas.polynomial().alloc(layout.size(), layout.align())
            } else {
                arenas.scratch().alloc(layout.size(), layout.align())
            }
        } else {
            // Fallback to system allocator
            System.alloc(layout)
        }
    }

    #[inline(always)]
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
        // In fallback mode, we need to actually deallocate
        if self.is_fallback_mode() {
            System.dealloc(ptr, layout);
            return;
        }

        // Issue #1: Check if this allocation came from fallback
        // Arena allocations are within known address ranges; fallback allocations are not
        if let Some(arenas) = self.get_arenas() {
            let ptr_addr = ptr as usize;
            if !arenas.contains_address(ptr_addr) {
                // This was a fallback allocation - free it via system allocator
                System.dealloc(ptr, layout);
            }
        }

        // For arena allocations, deallocation is a no-op.
        // Memory is reclaimed by calling reset() on the arena.
    }

    #[inline(always)]
    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
        debug_assert!(!ptr.is_null());
        debug_assert!(layout.size() > 0);
        debug_assert!(new_size > 0);

        let old_size = layout.size();

        // If the new size is smaller or equal, just return the same pointer.
        // (The bump allocator doesn't shrink.)
        if new_size <= old_size {
            return ptr;
        }

        // Allocate a new block
        let new_layout = Layout::from_size_align_unchecked(new_size, layout.align());
        let new_ptr = self.alloc(new_layout);

        if new_ptr.is_null() {
            return null_mut();
        }

        // Copy the old data
        copy_nonoverlapping(ptr, new_ptr, old_size);

        // Dealloc the old pointer (no-op for bump allocator, but semantically correct)
        self.dealloc(ptr, layout);

        new_ptr
    }

    #[inline(always)]
    unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 {
        let ptr = self.alloc(layout);
        if !ptr.is_null() {
            // Note: mmap'd memory is already zeroed, but we zero anyway for
            // recycled memory or if user specifically requested zeroed allocation.
            std::ptr::write_bytes(ptr, 0, layout.size());
        }
        ptr
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::alloc::GlobalAlloc;

    #[test]
    fn test_global_alloc_api() {
        let alloc = NAlloc::new();
        let layout = Layout::from_size_align(1024, 8).unwrap();
        unsafe {
            let ptr = alloc.alloc(layout);
            assert!(!ptr.is_null());
            // Check that we can write to it
            ptr.write(42);
            assert_eq!(ptr.read(), 42);
        }
    }

    #[test]
    fn test_try_new() {
        // This should succeed on any reasonable system
        let result = NAlloc::try_new();
        assert!(result.is_ok());

        let alloc = result.unwrap();
        assert!(alloc.is_initialized());
        assert!(!alloc.is_fallback_mode());
    }

    #[test]
    fn test_fallback_mode_detection() {
        let alloc = NAlloc::new();
        // Force initialization
        let _ = alloc.stats();

        // Should be initialized (not fallback) on a normal system
        assert!(alloc.is_initialized() || alloc.is_fallback_mode());
    }

    #[test]
    fn test_try_accessors() {
        let alloc = NAlloc::new();

        // These should return Some on a normal system
        assert!(alloc.try_witness().is_some());
        assert!(alloc.try_polynomial().is_some());
        assert!(alloc.try_scratch().is_some());
    }

    #[test]
    fn test_realloc() {
        let alloc = NAlloc::new();
        let layout = Layout::from_size_align(64, 8).unwrap();
        unsafe {
            let ptr = alloc.alloc(layout);
            assert!(!ptr.is_null());

            // Write some data
            for i in 0..64 {
                ptr.add(i).write(i as u8);
            }

            // Realloc to a larger size
            let new_ptr = alloc.realloc(ptr, layout, 128);
            assert!(!new_ptr.is_null());

            // Verify data was copied
            for i in 0..64 {
                assert_eq!(new_ptr.add(i).read(), i as u8);
            }
        }
    }

    #[test]
    fn test_alloc_zeroed() {
        let alloc = NAlloc::new();
        let layout = Layout::from_size_align(1024, 8).unwrap();
        unsafe {
            let ptr = alloc.alloc_zeroed(layout);
            assert!(!ptr.is_null());

            // Verify memory is zeroed
            for i in 0..1024 {
                assert_eq!(*ptr.add(i), 0);
            }
        }
    }

    #[test]
    fn test_stats() {
        let alloc = NAlloc::new();

        // Trigger arena initialization with an allocation
        let layout = Layout::from_size_align(1024, 8).unwrap();
        unsafe {
            let _ = alloc.alloc(layout);
        }

        let stats = alloc.stats();
        assert!(stats.is_some());

        let stats = stats.unwrap();
        assert!(stats.scratch_used >= 1024);
        assert!(stats.total_capacity() > 0);
    }

    #[test]
    fn test_stats_or_default() {
        let alloc = NAlloc::new();

        // Should work even before initialization
        let stats = alloc.stats_or_default();
        // Just verify it doesn't panic
        let _ = stats.total_capacity();
    }

    #[test]
    fn test_large_allocation_routing() {
        let alloc = NAlloc::new();

        // Small allocation (<1MB) should go to scratch
        let small_layout = Layout::from_size_align(1024, 8).unwrap();
        unsafe {
            let _ = alloc.alloc(small_layout);
        }

        let stats_after_small = alloc.stats().unwrap();
        assert!(stats_after_small.scratch_used >= 1024);

        // Large allocation (>1MB) should go to polynomial
        let large_layout = Layout::from_size_align(2 * 1024 * 1024, 64).unwrap();
        unsafe {
            let _ = alloc.alloc(large_layout);
        }

        let stats_after_large = alloc.stats().unwrap();
        assert!(stats_after_large.polynomial_used >= 2 * 1024 * 1024);
    }

    #[test]
    fn test_drop_deallocates_arena_manager() {
        // Verify that Drop runs without panic and actually frees the ArenaManager.
        // If Drop is missing, valgrind/miri would catch the leak; here we test
        // that drop_in_place + dealloc completes without UB or double-free.
        {
            let alloc = NAlloc::try_new().expect("NAlloc::try_new should succeed");
            assert!(alloc.is_initialized());
            // alloc drops here → Drop impl runs → ArenaManager is freed
        }
        // If we reach here without SIGSEGV / panic, the Drop impl is correct.
        // Run a second init to confirm the heap is still healthy.
        let alloc2 = NAlloc::try_new().expect("heap still healthy after previous drop");
        assert!(alloc2.is_initialized());
    }

    #[test]
    fn test_concurrent_init() {
        use std::sync::Arc;
        use std::thread;

        let alloc = Arc::new(NAlloc::new());
        let mut handles = vec![];

        // Spawn multiple threads that try to initialize simultaneously
        for _ in 0..8 {
            let alloc = Arc::clone(&alloc);
            handles.push(thread::spawn(move || {
                let layout = Layout::from_size_align(64, 8).unwrap();
                unsafe {
                    let ptr = alloc.alloc(layout);
                    assert!(!ptr.is_null());
                }
            }));
        }

        for h in handles {
            h.join().unwrap();
        }

        // After all threads complete, should be in a consistent state
        assert!(alloc.is_initialized() || alloc.is_fallback_mode());
    }
}