Skip to main content

bsql_arena/
lib.rs

1#![forbid(unsafe_code)]
2#![deny(clippy::all)]
3
4//! Bump allocator for row data — one allocation per query result.
5//!
6//! All row data (strings, byte arrays) from a single query is allocated into a
7//! contiguous arena. When the result is dropped, one deallocation frees everything.
8//!
9//! # Thread-local recycling
10//!
11//! Arenas are recycled from a thread-local pool (LIFO, up to 4 per thread).
12//! The arena object itself is never heap-allocated fresh on the hot path.
13//!
14//! # Chunk growth
15//!
16//! Initial chunk: 8KB. Growth: double the previous chunk size (capped at 1MB).
17//! On `reset()`, chunks larger than 64KB are discarded to prevent long-term bloat.
18
19use std::cell::RefCell;
20
21/// Initial chunk size: 8KB covers most result sets.
22const INITIAL_CHUNK_SIZE: usize = 8 * 1024;
23
24/// Maximum chunk size: 1MB cap to prevent runaway growth.
25const MAX_CHUNK_SIZE: usize = 1024 * 1024;
26
27/// Maximum number of arenas in the thread-local pool.
28const MAX_POOL_SIZE: usize = 4;
29
30/// Shrink threshold: chunks larger than this are discarded on reset.
31const SHRINK_THRESHOLD: usize = 64 * 1024;
32
33/// A bump allocator for row data.
34///
35/// Memory is allocated in contiguous chunks. Each `alloc` call bumps a pointer
36/// forward. There is no per-allocation deallocation — the entire arena is freed
37/// at once via `reset()` or `Drop`.
38///
39/// # Example
40///
41/// ```
42/// use bsql_arena::Arena;
43///
44/// let mut arena = Arena::new();
45/// let offset = arena.alloc_copy(b"hello");
46/// assert_eq!(arena.get(offset, 5), b"hello");
47/// arena.reset();
48/// ```
49pub struct Arena {
50    chunks: Vec<Vec<u8>>,
51    /// Cached cumulative chunk capacities for O(1) offset resolution.
52    /// `prefix_sums[i]` = sum of capacities of chunks 0..i.
53    prefix_sums: Vec<usize>,
54    current: usize,
55    offset: usize,
56}
57
58impl Arena {
59    /// Create a new arena with an 8KB initial chunk.
60    pub fn new() -> Self {
61        let chunk = Vec::with_capacity(INITIAL_CHUNK_SIZE);
62        Self {
63            chunks: vec![chunk],
64            prefix_sums: vec![0],
65            current: 0,
66            offset: 0,
67        }
68    }
69
70    /// Allocate `len` bytes, returning a mutable slice into the arena.
71    ///
72    /// The returned slice is zeroed. For copying data in, prefer `alloc_copy`.
73    pub fn alloc(&mut self, len: usize) -> &mut [u8] {
74        if len == 0 {
75            return &mut [];
76        }
77
78        self.ensure_capacity(len);
79
80        let chunk = &mut self.chunks[self.current];
81        let start = self.offset;
82        let new_len = start + len;
83
84        // Extend the chunk's length (capacity is guaranteed by ensure_capacity)
85        // vec![0u8; N].resize(N, 0) zeroes new bytes. This is the cost of safe
86        // Rust — the kernel already zeroes mmap'd pages, so the real overhead is
87        // only on reused capacity. Cannot avoid without unsafe.
88        if new_len > chunk.len() {
89            chunk.resize(new_len, 0);
90        }
91
92        self.offset = new_len;
93        &mut chunk[start..new_len]
94    }
95
96    /// Copy `data` into the arena and return the global offset.
97    ///
98    /// The offset can be used with `get()` to retrieve the data later.
99    pub fn alloc_copy(&mut self, data: &[u8]) -> usize {
100        if data.is_empty() {
101            return self.global_offset();
102        }
103
104        self.ensure_capacity(data.len());
105
106        let chunk = &mut self.chunks[self.current];
107        let start = self.offset;
108        let new_len = start + data.len();
109
110        // Common case: appending to the end of the chunk. Use extend_from_slice
111        // which copies data directly without zeroing first (unlike resize+copy).
112        if start == chunk.len() {
113            chunk.extend_from_slice(data);
114        } else {
115            if new_len > chunk.len() {
116                chunk.resize(new_len, 0);
117            }
118            chunk[start..new_len].copy_from_slice(data);
119        }
120
121        let global = self.global_offset_at(self.current, start);
122        self.offset = new_len;
123        global
124    }
125
126    /// Retrieve a slice from the arena by global offset and length.
127    ///
128    /// # Panics
129    ///
130    /// Panics if the offset + length exceeds the arena's allocated range.
131    pub fn get(&self, global_offset: usize, len: usize) -> &[u8] {
132        if len == 0 {
133            return &[];
134        }
135
136        let (chunk_idx, local_offset) = self.resolve_offset(global_offset);
137        &self.chunks[chunk_idx][local_offset..local_offset + len]
138    }
139
140    /// Retrieve a str slice from the arena. Returns `None` if not valid UTF-8.
141    ///
142    /// Uses SIMD-accelerated UTF-8 validation via `simdutf8`.
143    pub fn get_str(&self, global_offset: usize, len: usize) -> Option<&str> {
144        if len == 0 {
145            return Some("");
146        }
147        simdutf8::basic::from_utf8(self.get(global_offset, len)).ok()
148    }
149
150    /// Reset the arena for reuse. Keeps allocated memory but resets the bump pointer.
151    ///
152    /// Chunks larger than 64KB are discarded to prevent long-term bloat.
153    pub fn reset(&mut self) {
154        // Discard oversized chunks, keep small ones
155        self.chunks.retain(|c| c.capacity() <= SHRINK_THRESHOLD);
156
157        if self.chunks.is_empty() {
158            self.chunks.push(Vec::with_capacity(INITIAL_CHUNK_SIZE));
159        }
160
161        // Clear all chunks (set len to 0, keep capacity)
162        for chunk in &mut self.chunks {
163            chunk.clear();
164        }
165
166        // Rebuild prefix_sums
167        self.rebuild_prefix_sums();
168
169        self.current = 0;
170        self.offset = 0;
171    }
172
173    /// Total bytes allocated in this arena (across all chunks).
174    pub fn allocated(&self) -> usize {
175        let mut total = 0;
176        for (i, chunk) in self.chunks.iter().enumerate() {
177            if i < self.current {
178                total += chunk.len();
179            } else if i == self.current {
180                total += self.offset;
181            }
182        }
183        total
184    }
185
186    /// Total capacity of all chunks (for diagnostics).
187    pub fn capacity(&self) -> usize {
188        self.chunks.iter().map(|c| c.capacity()).sum()
189    }
190
191    // --- Internal ---
192
193    /// Ensure the current chunk has room for `len` bytes. If not, allocate a new chunk.
194    fn ensure_capacity(&mut self, len: usize) {
195        let chunk = &self.chunks[self.current];
196        let remaining = chunk.capacity().saturating_sub(self.offset);
197
198        if remaining >= len {
199            return;
200        }
201
202        // Need a new chunk. Size = max(double previous capacity, len, INITIAL_CHUNK_SIZE)
203        let prev_cap = chunk.capacity();
204        let new_cap = prev_cap
205            .saturating_mul(2)
206            .max(len)
207            .max(INITIAL_CHUNK_SIZE)
208            .min(MAX_CHUNK_SIZE.max(len)); // allow exceeding MAX for single large allocs
209
210        // Check if the next chunk already exists and has enough capacity
211        let next_idx = self.current + 1;
212        if next_idx < self.chunks.len() && self.chunks[next_idx].capacity() >= len {
213            self.current = next_idx;
214            self.offset = 0;
215            return;
216        }
217
218        // Allocate a new chunk and update prefix_sums
219        let new_chunk = Vec::with_capacity(new_cap);
220        let prefix = self.prefix_sums[self.chunks.len() - 1]
221            + self.chunks.last().map_or(0, |c| c.capacity());
222        if next_idx < self.chunks.len() {
223            self.chunks[next_idx] = new_chunk;
224            // Rebuild prefix_sums since a chunk capacity changed
225            self.rebuild_prefix_sums();
226        } else {
227            self.chunks.push(new_chunk);
228            self.prefix_sums.push(prefix);
229        }
230        self.current = next_idx;
231        self.offset = 0;
232    }
233
234    /// Rebuild the prefix_sums cache from current chunk capacities.
235    fn rebuild_prefix_sums(&mut self) {
236        self.prefix_sums.clear();
237        let mut sum = 0;
238        for chunk in &self.chunks {
239            self.prefix_sums.push(sum);
240            sum += chunk.capacity();
241        }
242    }
243
244    /// Compute the global offset for the current position.
245    pub fn global_offset(&self) -> usize {
246        self.global_offset_at(self.current, self.offset)
247    }
248
249    /// Compute a global offset from chunk index and local offset.
250    /// O(1) using cached prefix_sums.
251    fn global_offset_at(&self, chunk_idx: usize, local_offset: usize) -> usize {
252        self.prefix_sums[chunk_idx] + local_offset
253    }
254
255    /// Resolve a global offset to (chunk_index, local_offset).
256    /// O(log n) using binary search on prefix_sums.
257    fn resolve_offset(&self, global_offset: usize) -> (usize, usize) {
258        // for the common case (most queries fit in one 8KB chunk).
259        if self.chunks.len() == 1 {
260            debug_assert!(
261                global_offset < self.chunks[0].capacity(),
262                "arena offset {global_offset} out of bounds in single chunk (cap={})",
263                self.chunks[0].capacity()
264            );
265            return (0, global_offset);
266        }
267
268        // Binary search: find the last chunk whose prefix_sum <= global_offset
269        let idx = match self.prefix_sums.binary_search(&global_offset) {
270            Ok(i) => i,
271            Err(0) => 0, // guard against underflow when global_offset < prefix_sums[0]
272            Err(i) => i - 1,
273        };
274        let local = global_offset - self.prefix_sums[idx];
275        debug_assert!(
276            local < self.chunks[idx].capacity(),
277            "arena offset {global_offset} out of bounds in chunk {idx} (cap={})",
278            self.chunks[idx].capacity()
279        );
280        (idx, local)
281    }
282}
283
284impl Default for Arena {
285    fn default() -> Self {
286        Self::new()
287    }
288}
289
290// --- Thread-local arena pool ---
291
292thread_local! {
293    static ARENA_POOL: RefCell<Vec<Arena>> = const { RefCell::new(Vec::new()) };
294}
295
296/// Acquire an arena from the thread-local pool, or create a new one.
297///
298/// LIFO ordering: returns the most recently released arena (warmest cache).
299///
300/// # Example
301///
302/// ```
303/// use bsql_arena::{acquire_arena, release_arena};
304///
305/// let mut arena = acquire_arena();
306/// let offset = arena.alloc_copy(b"data");
307/// // ... use arena ...
308/// release_arena(arena);
309/// ```
310pub fn acquire_arena() -> Arena {
311    ARENA_POOL
312        .with(|pool| pool.borrow_mut().pop())
313        .unwrap_or_default()
314}
315
316/// Return an arena to the thread-local pool for reuse.
317///
318/// The arena is reset (bump pointer zeroed, oversized chunks discarded).
319/// If the pool is full (4 arenas), the arena is dropped instead.
320pub fn release_arena(mut arena: Arena) {
321    arena.reset();
322    ARENA_POOL.with(|pool| {
323        let mut pool = pool.borrow_mut();
324        if pool.len() < MAX_POOL_SIZE {
325            pool.push(arena);
326        }
327        // else: drop the arena (too many in pool)
328    });
329}
330
331// ---------------------------------------------------------------------------
332// ArenaRows — arena-backed row storage with borrowed strings
333// ---------------------------------------------------------------------------
334
335/// A collection of decoded rows backed by an arena.
336///
337/// Text and blob columns in `T` are `&'static str` / `&'static [u8]` whose
338/// memory actually lives in the arena stored alongside them. The `'static`
339/// lifetime is a fiction — the data is valid for as long as this struct lives.
340///
341/// # Safety contract
342///
343/// The `Vec<T>` is dropped **before** the `Arena` (Rust drops fields in
344/// declaration order). The `&'static str` / `&'static [u8]` references
345/// inside `T` are never dereferenced after the arena is freed.
346///
347/// # Drop order guarantee
348///
349/// Rust guarantees fields are dropped in declaration order (RFC 1857).
350/// `rows` is declared before `arena`, so all `T` values (and their borrowed
351/// pointers) are dropped before the arena memory is freed.
352pub struct ArenaRows<T> {
353    rows: Vec<T>,
354    arena: Arena,
355}
356
357impl<T> ArenaRows<T> {
358    /// Build `ArenaRows` from an arena and a row vector.
359    ///
360    /// `T` should contain only Copy types (integers, floats, bools) and
361    /// byte-range indices into a separately validated text buffer. No
362    /// `&'static str` transmute is involved.
363    pub fn new(rows: Vec<T>, arena: Arena) -> Self {
364        Self { rows, arena }
365    }
366
367    /// Number of rows.
368    #[inline]
369    pub fn len(&self) -> usize {
370        self.rows.len()
371    }
372
373    /// Whether the result set is empty.
374    #[inline]
375    pub fn is_empty(&self) -> bool {
376        self.rows.is_empty()
377    }
378
379    /// Get a row by index.
380    #[inline]
381    pub fn get(&self, idx: usize) -> Option<&T> {
382        self.rows.get(idx)
383    }
384
385    /// Iterate over rows by reference.
386    #[inline]
387    pub fn iter(&self) -> std::slice::Iter<'_, T> {
388        self.rows.iter()
389    }
390
391    /// Consume into the inner `Vec<T>` and arena.
392    ///
393    /// Returns both so the caller can decide what to do with the arena.
394    pub fn into_parts(self) -> (Vec<T>, Arena) {
395        (self.rows, self.arena)
396    }
397
398    /// Total bytes allocated in the backing arena.
399    #[inline]
400    pub fn arena_allocated(&self) -> usize {
401        self.arena.allocated()
402    }
403}
404
405impl<T> std::ops::Deref for ArenaRows<T> {
406    type Target = [T];
407
408    #[inline]
409    fn deref(&self) -> &[T] {
410        &self.rows
411    }
412}
413
414impl<'a, T> IntoIterator for &'a ArenaRows<T> {
415    type Item = &'a T;
416    type IntoIter = std::slice::Iter<'a, T>;
417
418    #[inline]
419    fn into_iter(self) -> Self::IntoIter {
420        self.rows.iter()
421    }
422}
423
424impl<T: std::fmt::Debug> std::fmt::Debug for ArenaRows<T> {
425    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
426        f.debug_struct("ArenaRows")
427            .field("len", &self.rows.len())
428            .field("arena_allocated", &self.arena.allocated())
429            .field("rows", &self.rows)
430            .finish()
431    }
432}
433
434// ---------------------------------------------------------------------------
435// ValidatedRows — batch-validated text, zero unsafe
436// ---------------------------------------------------------------------------
437
438/// A collection of decoded rows with batch-validated text data.
439///
440/// Text columns are stored as byte ranges `(u32, u32)` into a shared,
441/// batch-validated `String` buffer. Blob columns are stored as byte ranges
442/// into the `Arena`. Scalar columns (i64, f64, bool) are stored directly.
443///
444/// # Zero unsafe
445///
446/// The text buffer is validated once via `String::from_utf8` (SIMD-accelerated
447/// in std on modern CPUs). No `from_utf8_unchecked`, no `transmute`, no
448/// lifetime extension.
449///
450/// # Usage pattern
451///
452/// The codegen generates an "inner" struct with byte ranges and a "view" struct
453/// with `&str`. `ValidatedRows::iter()` maps inner -> view by slicing the
454/// validated text buffer.
455pub struct ValidatedRows<T> {
456    rows: Vec<T>,
457    text_buf: String,
458    blob_arena: Arena,
459}
460
461impl<T> ValidatedRows<T> {
462    /// Build `ValidatedRows` from a text buffer (already validated as UTF-8),
463    /// a blob arena, and the decoded inner rows.
464    pub fn new(rows: Vec<T>, text_buf: String, blob_arena: Arena) -> Self {
465        Self {
466            rows,
467            text_buf,
468            blob_arena,
469        }
470    }
471
472    /// Get the validated text buffer.
473    #[inline]
474    pub fn text(&self) -> &str {
475        &self.text_buf
476    }
477
478    /// Get a text slice by byte range. Panics if range is out of bounds
479    /// or not on a UTF-8 char boundary (impossible if ranges were recorded
480    /// correctly during the step loop).
481    #[inline]
482    pub fn text_slice(&self, start: u32, end: u32) -> &str {
483        &self.text_buf[start as usize..end as usize]
484    }
485
486    /// Get a blob slice from the arena by global offset and length.
487    #[inline]
488    pub fn blob_slice(&self, offset: u32, len: u32) -> &[u8] {
489        self.blob_arena.get(offset as usize, len as usize)
490    }
491
492    /// Number of rows.
493    #[inline]
494    pub fn len(&self) -> usize {
495        self.rows.len()
496    }
497
498    /// Whether the result set is empty.
499    #[inline]
500    pub fn is_empty(&self) -> bool {
501        self.rows.is_empty()
502    }
503
504    /// Get an inner row by index.
505    #[inline]
506    pub fn get_inner(&self, idx: usize) -> Option<&T> {
507        self.rows.get(idx)
508    }
509
510    /// Iterate over inner rows by reference.
511    #[inline]
512    pub fn iter_inner(&self) -> std::slice::Iter<'_, T> {
513        self.rows.iter()
514    }
515
516    /// Total bytes in the text buffer.
517    #[inline]
518    pub fn text_len(&self) -> usize {
519        self.text_buf.len()
520    }
521
522    /// Total bytes allocated in the blob arena.
523    #[inline]
524    pub fn blob_allocated(&self) -> usize {
525        self.blob_arena.allocated()
526    }
527
528    /// Total bytes allocated (text + blobs).
529    #[inline]
530    pub fn arena_allocated(&self) -> usize {
531        self.text_buf.len() + self.blob_arena.allocated()
532    }
533}
534
535impl<T> std::ops::Deref for ValidatedRows<T> {
536    type Target = [T];
537
538    #[inline]
539    fn deref(&self) -> &[T] {
540        &self.rows
541    }
542}
543
544impl<'a, T> IntoIterator for &'a ValidatedRows<T> {
545    type Item = &'a T;
546    type IntoIter = std::slice::Iter<'a, T>;
547
548    #[inline]
549    fn into_iter(self) -> Self::IntoIter {
550        self.rows.iter()
551    }
552}
553
554impl<T: std::fmt::Debug> std::fmt::Debug for ValidatedRows<T> {
555    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
556        f.debug_struct("ValidatedRows")
557            .field("len", &self.rows.len())
558            .field("text_len", &self.text_buf.len())
559            .field("blob_allocated", &self.blob_arena.allocated())
560            .field("rows", &self.rows)
561            .finish()
562    }
563}
564
565#[cfg(test)]
566mod tests {
567    use super::*;
568
569    #[test]
570    fn basic_alloc_and_get() {
571        let mut arena = Arena::new();
572        let offset = arena.alloc_copy(b"hello");
573        assert_eq!(arena.get(offset, 5), b"hello");
574    }
575
576    #[test]
577    fn multiple_allocs() {
578        let mut arena = Arena::new();
579        let o1 = arena.alloc_copy(b"foo");
580        let o2 = arena.alloc_copy(b"bar");
581        let o3 = arena.alloc_copy(b"baz");
582
583        assert_eq!(arena.get(o1, 3), b"foo");
584        assert_eq!(arena.get(o2, 3), b"bar");
585        assert_eq!(arena.get(o3, 3), b"baz");
586    }
587
588    #[test]
589    fn alloc_str_retrieval() {
590        let mut arena = Arena::new();
591        let offset = arena.alloc_copy(b"hello world");
592        assert_eq!(arena.get_str(offset, 11), Some("hello world"));
593    }
594
595    #[test]
596    fn zero_length_alloc() {
597        let mut arena = Arena::new();
598        let offset = arena.alloc_copy(b"");
599        let data = arena.get(offset, 0);
600        assert!(data.is_empty());
601    }
602
603    #[test]
604    fn alloc_returns_zeroed_slice() {
605        let mut arena = Arena::new();
606        let slice = arena.alloc(16);
607        assert!(slice.iter().all(|&b| b == 0));
608    }
609
610    #[test]
611    fn reset_allows_reuse() {
612        let mut arena = Arena::new();
613        let _o1 = arena.alloc_copy(b"before reset");
614        assert_eq!(arena.allocated(), 12);
615
616        arena.reset();
617        assert_eq!(arena.allocated(), 0);
618
619        let o2 = arena.alloc_copy(b"after reset");
620        assert_eq!(arena.get(o2, 11), b"after reset");
621    }
622
623    #[test]
624    fn chunk_growth() {
625        let mut arena = Arena::new();
626
627        // Fill the initial 8KB chunk
628        let big = vec![0xAA; INITIAL_CHUNK_SIZE + 1];
629        let offset = arena.alloc_copy(&big);
630        assert_eq!(arena.get(offset, big.len())[0], 0xAA);
631        assert!(
632            arena.chunks.len() >= 2,
633            "should have grown to a second chunk"
634        );
635    }
636
637    #[test]
638    fn large_single_alloc() {
639        let mut arena = Arena::new();
640        let data = vec![0x42; 2 * MAX_CHUNK_SIZE];
641        let offset = arena.alloc_copy(&data);
642        let result = arena.get(offset, data.len());
643        assert!(result.iter().all(|&b| b == 0x42));
644    }
645
646    #[test]
647    fn one_hundred_rows_in_one_chunk() {
648        let mut arena = Arena::new();
649        let row_data = b"typical row data, about 50 bytes of text content.";
650
651        let mut offsets = Vec::new();
652        for _ in 0..100 {
653            offsets.push(arena.alloc_copy(row_data));
654        }
655
656        // 100 * 50 = 5000 bytes, fits in 8KB initial chunk
657        assert_eq!(arena.chunks.len(), 1);
658
659        for &offset in &offsets {
660            assert_eq!(arena.get(offset, row_data.len()), row_data);
661        }
662    }
663
664    #[test]
665    fn reset_discards_oversized_chunks() {
666        let mut arena = Arena::new();
667
668        // Allocate a chunk larger than SHRINK_THRESHOLD
669        let big = vec![0xFF; SHRINK_THRESHOLD + 1];
670        arena.alloc_copy(&big);
671
672        let _chunks_before = arena.chunks.len();
673        arena.reset();
674
675        // Oversized chunks should be discarded
676        for chunk in &arena.chunks {
677            assert!(
678                chunk.capacity() <= SHRINK_THRESHOLD,
679                "oversized chunk not discarded: capacity={}",
680                chunk.capacity()
681            );
682        }
683    }
684
685    #[test]
686    fn thread_local_pool_acquire_release() {
687        let mut arena = acquire_arena();
688        arena.alloc_copy(b"test data");
689        release_arena(arena);
690
691        // Second acquire should get the recycled arena
692        let arena2 = acquire_arena();
693        assert_eq!(arena2.allocated(), 0); // should be reset
694        release_arena(arena2);
695    }
696
697    #[test]
698    fn thread_local_pool_max_size() {
699        // Release MAX_POOL_SIZE + 1 arenas, only MAX_POOL_SIZE should be kept
700        for _ in 0..MAX_POOL_SIZE + 2 {
701            let arena = Arena::new();
702            release_arena(arena);
703        }
704
705        ARENA_POOL.with(|pool| {
706            assert!(pool.borrow().len() <= MAX_POOL_SIZE);
707        });
708    }
709
710    #[test]
711    fn capacity_reports_total() {
712        let arena = Arena::new();
713        assert!(arena.capacity() >= INITIAL_CHUNK_SIZE);
714    }
715
716    #[test]
717    fn allocated_tracks_usage() {
718        let mut arena = Arena::new();
719        assert_eq!(arena.allocated(), 0);
720        arena.alloc_copy(b"12345");
721        assert_eq!(arena.allocated(), 5);
722        arena.alloc_copy(b"67890");
723        assert_eq!(arena.allocated(), 10);
724    }
725
726    #[test]
727    fn alloc_at_exact_8kb_boundary() {
728        let mut arena = Arena::new();
729
730        // Fill exactly to the 8KB boundary
731        let filler = vec![0xAA; INITIAL_CHUNK_SIZE];
732        let o1 = arena.alloc_copy(&filler);
733        assert_eq!(arena.get(o1, INITIAL_CHUNK_SIZE)[0], 0xAA);
734        assert_eq!(arena.chunks.len(), 1);
735
736        // Next alloc (even 1 byte) must trigger a new chunk
737        let o2 = arena.alloc_copy(b"x");
738        assert_eq!(arena.get(o2, 1), b"x");
739        assert!(arena.chunks.len() >= 2, "should have grown past 8KB chunk");
740
741        // Data from both chunks must still be accessible
742        assert_eq!(arena.get(o1, INITIAL_CHUNK_SIZE)[0], 0xAA);
743        assert_eq!(
744            arena.get(o1, INITIAL_CHUNK_SIZE)[INITIAL_CHUNK_SIZE - 1],
745            0xAA
746        );
747    }
748
749    #[test]
750    fn prefix_sums_correct_after_multi_chunk() {
751        let mut arena = Arena::new();
752        let mut offsets = Vec::new();
753
754        // Force 4 chunks
755        for i in 0..4 {
756            let data = vec![i as u8; INITIAL_CHUNK_SIZE + 1];
757            offsets.push((arena.alloc_copy(&data), data.len()));
758        }
759
760        // Verify all data is retrievable (exercises prefix_sums-based resolve_offset)
761        for (idx, &(offset, len)) in offsets.iter().enumerate() {
762            let data = arena.get(offset, len);
763            assert!(data.iter().all(|&b| b == idx as u8));
764        }
765    }
766
767    #[test]
768    fn prefix_sums_correct_after_reset() {
769        let mut arena = Arena::new();
770
771        // Force a second chunk
772        let big = vec![0xBB; INITIAL_CHUNK_SIZE + 1];
773        arena.alloc_copy(&big);
774        assert!(arena.chunks.len() >= 2);
775
776        arena.reset();
777
778        // After reset, alloc should work correctly with rebuilt prefix_sums
779        let o = arena.alloc_copy(b"after reset");
780        assert_eq!(arena.get(o, 11), b"after reset");
781    }
782
783    /// T-01: resolve_offset with global_offset=0 must return (0, 0)
784    #[test]
785    fn resolve_offset_zero() {
786        let arena = Arena::new();
787        let (chunk_idx, local) = arena.resolve_offset(0);
788        assert_eq!(chunk_idx, 0);
789        assert_eq!(local, 0);
790    }
791
792    /// Single-chunk fast-path in resolve_offset.
793    #[test]
794    fn resolve_offset_single_chunk_fast_path() {
795        let mut arena = Arena::new();
796        // Stay within one chunk
797        let o1 = arena.alloc_copy(b"hello");
798        let o2 = arena.alloc_copy(b"world");
799        assert_eq!(arena.chunks.len(), 1, "should be single chunk");
800
801        // resolve_offset uses fast-path
802        assert_eq!(arena.get(o1, 5), b"hello");
803        assert_eq!(arena.get(o2, 5), b"world");
804    }
805
806    // --- Audit gap tests ---
807
808    // #56: get_str with invalid UTF-8
809    #[test]
810    fn get_str_invalid_utf8_returns_none() {
811        let mut arena = Arena::new();
812        let offset = arena.alloc_copy(&[0xFF, 0xFE, 0xFD]);
813        assert_eq!(arena.get_str(offset, 3), None);
814    }
815
816    // #56 extra: get_str with valid UTF-8
817    #[test]
818    fn get_str_valid_utf8() {
819        let mut arena = Arena::new();
820        let offset = arena.alloc_copy("hello".as_bytes());
821        assert_eq!(arena.get_str(offset, 5), Some("hello"));
822    }
823
824    // #56 extra: get_str with empty string
825    #[test]
826    fn get_str_empty_returns_some_empty() {
827        let arena = Arena::new();
828        assert_eq!(arena.get_str(0, 0), Some(""));
829    }
830
831    // #57: get() with offset beyond bounds panics
832    #[test]
833    #[should_panic]
834    fn get_out_of_bounds_panics() {
835        let arena = Arena::new();
836        // Try to read beyond the arena (capacity is 8KB but nothing allocated)
837        arena.get(INITIAL_CHUNK_SIZE + 100, 1);
838    }
839
840    // #58: ensure_capacity reusing existing next chunk
841    #[test]
842    fn ensure_capacity_reuses_next_chunk() {
843        let mut arena = Arena::new();
844
845        // Fill first chunk to force a second
846        let big = vec![0xAA; INITIAL_CHUNK_SIZE + 1];
847        arena.alloc_copy(&big);
848        assert!(arena.chunks.len() >= 2);
849
850        // Reset (keeps small chunks)
851        arena.reset();
852        assert_eq!(arena.current, 0);
853        assert_eq!(arena.offset, 0);
854
855        // Now fill first chunk again — second alloc should reuse existing chunk
856        let filler = vec![0xBB; INITIAL_CHUNK_SIZE];
857        arena.alloc_copy(&filler);
858        // Next alloc should reuse the existing second chunk if capacity is sufficient
859        let o = arena.alloc_copy(b"reuse check");
860        assert_eq!(arena.get(o, 11), b"reuse check");
861    }
862
863    // #59: Multi-thread safety: acquire on thread A, release on thread B
864    #[test]
865    fn arena_cross_thread_no_crash() {
866        // Thread-local pools are per-thread, so this just verifies
867        // Arena is Send (can move between threads) without crashing.
868        let mut arena = Arena::new();
869        arena.alloc_copy(b"test data");
870
871        let handle = std::thread::spawn(move || {
872            // Arena moved to another thread — should not crash
873            assert_eq!(arena.get(0, 9), b"test data");
874            arena.reset();
875            arena
876        });
877
878        let arena = handle.join().unwrap();
879        // Release on the original thread's pool
880        release_arena(arena);
881    }
882
883    // --- ArenaRows tests (safe) ---
884
885    #[test]
886    fn arena_rows_basic() {
887        let arena = Arena::new();
888        let ar: ArenaRows<i64> = ArenaRows::new(vec![42], arena);
889        assert_eq!(ar.len(), 1);
890        assert!(!ar.is_empty());
891        assert_eq!(ar[0], 42);
892        assert_eq!(ar.get(0), Some(&42));
893    }
894
895    #[test]
896    fn arena_rows_empty() {
897        let arena = Arena::new();
898        let ar: ArenaRows<i64> = ArenaRows::new(vec![], arena);
899        assert!(ar.is_empty());
900        assert_eq!(ar.len(), 0);
901        assert!(ar.get(0).is_none());
902    }
903
904    #[test]
905    fn arena_rows_iter() {
906        let arena = Arena::new();
907        let ar: ArenaRows<i64> = ArenaRows::new(vec![10, 20, 30], arena);
908        let vals: Vec<&i64> = ar.iter().collect();
909        assert_eq!(vals, vec![&10, &20, &30]);
910    }
911
912    #[test]
913    fn arena_rows_deref() {
914        let arena = Arena::new();
915        let ar: ArenaRows<i64> = ArenaRows::new(vec![1, 2, 3], arena);
916        let slice: &[i64] = &ar;
917        assert_eq!(slice, &[1, 2, 3]);
918    }
919
920    #[test]
921    fn arena_rows_for_loop() {
922        let arena = Arena::new();
923        let ar: ArenaRows<i64> = ArenaRows::new(vec![10, 20], arena);
924        let mut sum = 0;
925        for &val in &ar {
926            sum += val;
927        }
928        assert_eq!(sum, 30);
929    }
930
931    #[test]
932    fn arena_rows_debug() {
933        let arena = Arena::new();
934        let ar: ArenaRows<i64> = ArenaRows::new(vec![42], arena);
935        let dbg = format!("{ar:?}");
936        assert!(dbg.contains("ArenaRows"));
937        assert!(dbg.contains("42"));
938    }
939
940    #[test]
941    fn arena_rows_arena_allocated() {
942        let mut arena = Arena::new();
943        arena.alloc_copy(b"some data");
944        let allocated = arena.allocated();
945        let ar: ArenaRows<i64> = ArenaRows::new(vec![], arena);
946        assert_eq!(ar.arena_allocated(), allocated);
947    }
948
949    #[test]
950    fn arena_rows_into_parts() {
951        let arena = Arena::new();
952        let ar: ArenaRows<i64> = ArenaRows::new(vec![1, 2, 3], arena);
953        let (v, _arena) = ar.into_parts();
954        assert_eq!(v, vec![1, 2, 3]);
955    }
956
957    #[test]
958    fn arena_rows_into_parts_empty() {
959        let arena = Arena::new();
960        let ar: ArenaRows<i64> = ArenaRows::new(vec![], arena);
961        let (v, _arena) = ar.into_parts();
962        assert!(v.is_empty());
963    }
964
965    #[test]
966    fn arena_rows_get_out_of_bounds() {
967        let arena = Arena::new();
968        let ar: ArenaRows<i64> = ArenaRows::new(vec![42], arena);
969        assert_eq!(ar.get(0), Some(&42));
970        assert_eq!(ar.get(1), None);
971        assert_eq!(ar.get(999), None);
972    }
973
974    // --- ValidatedRows tests ---
975
976    #[test]
977    fn validated_rows_basic() {
978        let text_buf = String::from("alicebob");
979        let blob_arena = Arena::new();
980
981        #[derive(Debug)]
982        #[allow(dead_code)]
983        struct Inner {
984            id: i64,
985            name_start: u32,
986            name_end: u32,
987        }
988
989        let rows = vec![
990            Inner {
991                id: 1,
992                name_start: 0,
993                name_end: 5,
994            },
995            Inner {
996                id: 2,
997                name_start: 5,
998                name_end: 8,
999            },
1000        ];
1001        let vr = ValidatedRows::new(rows, text_buf, blob_arena);
1002
1003        assert_eq!(vr.len(), 2);
1004        assert!(!vr.is_empty());
1005        assert_eq!(vr.text_slice(vr[0].name_start, vr[0].name_end), "alice");
1006        assert_eq!(vr.text_slice(vr[1].name_start, vr[1].name_end), "bob");
1007    }
1008
1009    #[test]
1010    fn validated_rows_empty() {
1011        let vr: ValidatedRows<i64> = ValidatedRows::new(vec![], String::new(), Arena::new());
1012        assert!(vr.is_empty());
1013        assert_eq!(vr.len(), 0);
1014        assert_eq!(vr.text_len(), 0);
1015    }
1016
1017    #[test]
1018    fn validated_rows_blob() {
1019        let mut blob_arena = Arena::new();
1020        let off = blob_arena.alloc_copy(&[0xDE, 0xAD]);
1021
1022        #[derive(Debug)]
1023        struct Inner {
1024            blob_off: u32,
1025            blob_len: u32,
1026        }
1027
1028        let rows = vec![Inner {
1029            blob_off: off as u32,
1030            blob_len: 2,
1031        }];
1032        let vr = ValidatedRows::new(rows, String::new(), blob_arena);
1033
1034        assert_eq!(vr.blob_slice(vr[0].blob_off, vr[0].blob_len), &[0xDE, 0xAD]);
1035    }
1036
1037    #[test]
1038    fn validated_rows_arena_allocated() {
1039        let mut blob_arena = Arena::new();
1040        blob_arena.alloc_copy(&[1, 2, 3]);
1041        let text_buf = String::from("hello");
1042
1043        let vr: ValidatedRows<i64> = ValidatedRows::new(vec![], text_buf, blob_arena);
1044        assert_eq!(vr.arena_allocated(), 5 + 3); // text_len + blob_allocated
1045    }
1046
1047    #[test]
1048    fn validated_rows_debug() {
1049        let vr: ValidatedRows<i64> = ValidatedRows::new(vec![42], String::new(), Arena::new());
1050        let dbg = format!("{vr:?}");
1051        assert!(dbg.contains("ValidatedRows"));
1052        assert!(dbg.contains("42"));
1053    }
1054
1055    #[test]
1056    fn validated_rows_deref() {
1057        let vr: ValidatedRows<i64> = ValidatedRows::new(vec![1, 2, 3], String::new(), Arena::new());
1058        let slice: &[i64] = &vr;
1059        assert_eq!(slice, &[1, 2, 3]);
1060    }
1061
1062    #[test]
1063    fn validated_rows_iter() {
1064        let vr: ValidatedRows<i64> = ValidatedRows::new(vec![10, 20], String::new(), Arena::new());
1065        let mut sum = 0;
1066        for &val in &vr {
1067            sum += val;
1068        }
1069        assert_eq!(sum, 30);
1070    }
1071
1072    // --- alloc zero length slice ---
1073
1074    #[test]
1075    fn alloc_zero_returns_empty_slice() {
1076        let mut arena = Arena::new();
1077        let slice = arena.alloc(0);
1078        assert!(slice.is_empty());
1079    }
1080
1081    // --- get_str with zero length ---
1082
1083    #[test]
1084    fn get_str_zero_len_returns_empty() {
1085        let arena = Arena::new();
1086        assert_eq!(arena.get_str(0, 0), Some(""));
1087    }
1088
1089    // ===============================================================
1090    // ValidatedRows — comprehensive tests
1091    // ===============================================================
1092
1093    #[test]
1094    fn validated_rows_empty_text_buf() {
1095        let vr: ValidatedRows<i64> = ValidatedRows::new(vec![1, 2, 3], String::new(), Arena::new());
1096        assert_eq!(vr.text(), "");
1097        assert_eq!(vr.text_len(), 0);
1098        assert_eq!(vr.len(), 3);
1099    }
1100
1101    #[test]
1102    fn validated_rows_blob_only_no_text() {
1103        let mut blob_arena = Arena::new();
1104        let o1 = blob_arena.alloc_copy(&[0x01, 0x02, 0x03]);
1105        let o2 = blob_arena.alloc_copy(&[0xAA, 0xBB]);
1106
1107        #[derive(Debug)]
1108        struct Inner {
1109            off: u32,
1110            len: u32,
1111        }
1112
1113        let rows = vec![
1114            Inner {
1115                off: o1 as u32,
1116                len: 3,
1117            },
1118            Inner {
1119                off: o2 as u32,
1120                len: 2,
1121            },
1122        ];
1123        let vr = ValidatedRows::new(rows, String::new(), blob_arena);
1124        assert_eq!(vr.text_len(), 0);
1125        assert_eq!(vr.blob_slice(vr[0].off, vr[0].len), &[0x01, 0x02, 0x03]);
1126        assert_eq!(vr.blob_slice(vr[1].off, vr[1].len), &[0xAA, 0xBB]);
1127    }
1128
1129    #[test]
1130    #[should_panic]
1131    fn validated_rows_text_slice_out_of_bounds() {
1132        let vr: ValidatedRows<i64> = ValidatedRows::new(vec![], String::from("hi"), Arena::new());
1133        // end is beyond the text buffer
1134        vr.text_slice(0, 100);
1135    }
1136
1137    #[test]
1138    #[should_panic]
1139    fn validated_rows_blob_slice_out_of_bounds() {
1140        let blob_arena = Arena::new();
1141        let vr: ValidatedRows<i64> = ValidatedRows::new(vec![], String::new(), blob_arena);
1142        // nothing allocated in blob arena
1143        vr.blob_slice(0, 100);
1144    }
1145
1146    #[test]
1147    fn validated_rows_large_10k_rows() {
1148        let mut text_buf = String::new();
1149        let blob_arena = Arena::new();
1150
1151        #[derive(Debug)]
1152        struct Inner {
1153            start: u32,
1154            end: u32,
1155        }
1156
1157        let mut rows = Vec::with_capacity(10_000);
1158        for i in 0..10_000u32 {
1159            let start = text_buf.len() as u32;
1160            text_buf.push_str(&format!("row_{i}"));
1161            let end = text_buf.len() as u32;
1162            rows.push(Inner { start, end });
1163        }
1164
1165        let vr = ValidatedRows::new(rows, text_buf, blob_arena);
1166        assert_eq!(vr.len(), 10_000);
1167        assert_eq!(vr.text_slice(vr[0].start, vr[0].end), "row_0");
1168        assert_eq!(vr.text_slice(vr[9999].start, vr[9999].end), "row_9999");
1169    }
1170
1171    #[test]
1172    fn validated_rows_text_slice_empty_range() {
1173        let vr: ValidatedRows<i64> =
1174            ValidatedRows::new(vec![], String::from("hello"), Arena::new());
1175        assert_eq!(vr.text_slice(0, 0), "");
1176        assert_eq!(vr.text_slice(3, 3), "");
1177    }
1178
1179    #[test]
1180    fn validated_rows_get_inner() {
1181        let vr: ValidatedRows<i64> =
1182            ValidatedRows::new(vec![10, 20, 30], String::new(), Arena::new());
1183        assert_eq!(vr.get_inner(0), Some(&10));
1184        assert_eq!(vr.get_inner(1), Some(&20));
1185        assert_eq!(vr.get_inner(2), Some(&30));
1186        assert_eq!(vr.get_inner(3), None);
1187    }
1188
1189    #[test]
1190    fn validated_rows_iter_inner() {
1191        let vr: ValidatedRows<i64> = ValidatedRows::new(vec![5, 10], String::new(), Arena::new());
1192        let vals: Vec<&i64> = vr.iter_inner().collect();
1193        assert_eq!(vals, vec![&5, &10]);
1194    }
1195
1196    #[test]
1197    fn validated_rows_blob_allocated_zero() {
1198        let vr: ValidatedRows<i64> = ValidatedRows::new(vec![], String::new(), Arena::new());
1199        assert_eq!(vr.blob_allocated(), 0);
1200    }
1201
1202    // ===============================================================
1203    // Arena — additional edge cases
1204    // ===============================================================
1205
1206    #[test]
1207    fn arena_get_zero_len() {
1208        let arena = Arena::new();
1209        let data = arena.get(0, 0);
1210        assert!(data.is_empty());
1211    }
1212
1213    #[test]
1214    fn arena_alloc_copy_zero_len() {
1215        let mut arena = Arena::new();
1216        let offset = arena.alloc_copy(b"");
1217        assert_eq!(arena.get(offset, 0), &[]);
1218    }
1219
1220    #[test]
1221    fn arena_global_offset_initial() {
1222        let arena = Arena::new();
1223        assert_eq!(arena.global_offset(), 0);
1224    }
1225
1226    #[test]
1227    fn arena_global_offset_advances() {
1228        let mut arena = Arena::new();
1229        arena.alloc_copy(b"12345");
1230        assert_eq!(arena.global_offset(), 5);
1231        arena.alloc_copy(b"67890");
1232        assert_eq!(arena.global_offset(), 10);
1233    }
1234
1235    #[test]
1236    fn arena_multiple_resets() {
1237        let mut arena = Arena::new();
1238        for _ in 0..10 {
1239            arena.alloc_copy(b"data");
1240            assert_eq!(arena.allocated(), 4);
1241            arena.reset();
1242            assert_eq!(arena.allocated(), 0);
1243        }
1244    }
1245
1246    #[test]
1247    fn arena_get_str_unicode() {
1248        let texts = [
1249            "\u{1F600}\u{1F4A9}",         // emoji
1250            "\u{4e16}\u{754c}",           // CJK
1251            "caf\u{00e9}",                // accented
1252            "\u{1F468}\u{200D}\u{1F469}", // ZWJ
1253        ];
1254        for text in &texts {
1255            let mut arena = Arena::new();
1256            let offset = arena.alloc_copy(text.as_bytes());
1257            assert_eq!(
1258                arena.get_str(offset, text.len()),
1259                Some(*text),
1260                "failed for text: {text}"
1261            );
1262        }
1263    }
1264
1265    #[test]
1266    fn arena_get_str_partial_utf8_returns_none() {
1267        // 0xC3 is the start of a 2-byte UTF-8 sequence, incomplete without the second byte
1268        let mut arena = Arena::new();
1269        let offset = arena.alloc_copy(&[0xC3]);
1270        assert_eq!(arena.get_str(offset, 1), None);
1271    }
1272
1273    #[test]
1274    fn arena_default_is_new() {
1275        let a1 = Arena::new();
1276        let a2 = Arena::default();
1277        assert_eq!(a1.allocated(), a2.allocated());
1278        assert_eq!(a1.capacity(), a2.capacity());
1279    }
1280
1281    // ===============================================================
1282    // ArenaRows — additional edge cases
1283    // ===============================================================
1284
1285    #[test]
1286    fn arena_rows_large() {
1287        let arena = Arena::new();
1288        let rows: Vec<i64> = (0..1000).collect();
1289        let ar = ArenaRows::new(rows, arena);
1290        assert_eq!(ar.len(), 1000);
1291        assert_eq!(ar[0], 0);
1292        assert_eq!(ar[999], 999);
1293    }
1294
1295    #[test]
1296    fn arena_rows_with_arena_data() {
1297        let mut arena = Arena::new();
1298        let offset = arena.alloc_copy(b"stored data");
1299
1300        #[derive(Debug)]
1301        #[allow(dead_code)]
1302        struct Inner {
1303            off: usize,
1304            len: usize,
1305        }
1306
1307        let ar = ArenaRows::new(
1308            vec![Inner {
1309                off: offset,
1310                len: 11,
1311            }],
1312            arena,
1313        );
1314        assert_eq!(ar.len(), 1);
1315    }
1316
1317    // ===============================================================
1318    // Thread-local pool edge cases
1319    // ===============================================================
1320
1321    #[test]
1322    fn thread_local_pool_acquire_fresh() {
1323        // Drain the pool first
1324        ARENA_POOL.with(|pool| pool.borrow_mut().clear());
1325        let arena = acquire_arena();
1326        assert_eq!(arena.allocated(), 0);
1327        release_arena(arena);
1328    }
1329
1330    #[test]
1331    fn thread_local_pool_recycle_resets() {
1332        let mut arena = Arena::new();
1333        arena.alloc_copy(b"something");
1334        assert!(arena.allocated() > 0);
1335        release_arena(arena);
1336
1337        let arena2 = acquire_arena();
1338        assert_eq!(arena2.allocated(), 0, "recycled arena should be reset");
1339        release_arena(arena2);
1340    }
1341
1342    // --- Audit: arena cannot return stale data after reset ---
1343
1344    #[test]
1345    fn arena_reset_clears_data_positions() {
1346        let mut arena = Arena::new();
1347        let o1 = arena.alloc_copy(b"first query data");
1348        assert_eq!(arena.get(o1, 16), b"first query data");
1349
1350        arena.reset();
1351        assert_eq!(arena.allocated(), 0);
1352        assert_eq!(arena.current, 0);
1353        assert_eq!(arena.offset, 0);
1354
1355        // After reset, a new alloc should produce offset 0 (same as o1)
1356        // but the data is different. No stale data leaks.
1357        let o2 = arena.alloc_copy(b"second query dat");
1358        assert_eq!(o2, 0, "first alloc after reset should be at offset 0");
1359        assert_eq!(arena.get(o2, 16), b"second query dat");
1360    }
1361
1362    #[test]
1363    fn arena_reset_discards_oversized_chunks() {
1364        let mut arena = Arena::new();
1365        // Allocate a 128KB blob (> SHRINK_THRESHOLD of 64KB)
1366        let big = vec![0xAA; 128 * 1024];
1367        arena.alloc_copy(&big);
1368        let cap_before = arena.capacity();
1369        assert!(cap_before >= 128 * 1024);
1370
1371        arena.reset();
1372        let cap_after = arena.capacity();
1373        // Oversized chunks should be discarded — capacity should shrink
1374        assert!(
1375            cap_after < cap_before,
1376            "oversized chunks should be discarded on reset: before={cap_before}, after={cap_after}"
1377        );
1378    }
1379
1380    // --- Audit: alloc_copy zero-length returns stable offset ---
1381
1382    #[test]
1383    fn alloc_copy_zero_length_returns_valid_offset() {
1384        let mut arena = Arena::new();
1385        let o1 = arena.alloc_copy(b"");
1386        let o2 = arena.alloc_copy(b"hello");
1387        // Zero-length alloc should return a valid global offset
1388        // without advancing the bump pointer.
1389        assert_eq!(o1, o2, "zero-length alloc should not advance offset");
1390        assert_eq!(arena.get(o2, 5), b"hello");
1391    }
1392
1393    // --- Audit: get with zero length returns empty slice ---
1394
1395    #[test]
1396    fn get_zero_length_returns_empty() {
1397        let arena = Arena::new();
1398        assert_eq!(arena.get(0, 0), &[]);
1399        assert_eq!(arena.get(9999, 0), &[]);
1400    }
1401}