index_arena/
lib.rs

1// Copyright (c) [2024] [Yegor Vaskonyan]
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! A simple, id-based, heterogeneous arena allocator.
5//!
6//! ## Id-based
7//!
8//! Uses small unique identifiers instead of references to represent allocations.
9//! This leverages the type system to statically assign every identifier to the
10//! arena it belongs to, ensuring safety without incurring runtime overhead.
11//!
12//! Accessing individual elements is achieved via various arena methods, 
13//! conceptually similar to indexing a `Vec`.
14//!
15//! ## Heterogeneous
16//!
17//! Supports allocating values of all statically sized, non-ZST types as well slices and string slices. 
18//! This is particularly useful for managing tree-like data structures 
19//! with different node types.
20//!
21//! ## Statically guaranteed safety
22//!
23//! The implementation leverages the power of the Rust's type
24//! system, achieving safety with almost no runtime checks.
25//!
26//! ## No `Drop`
27//!
28//! This design, however, has one downside: the arena does not know about individual objects
29//! it contains, which makes it impossible to run their destructors on `drop`.
30//!
31//! ## Examples
32//!
33//! ```rust
34//! use index_arena::{Id, new_arena};
35//!
36//! struct Even<A> {
37//!     next: Option<Id<Odd<A>, A>>,
38//! }
39//!
40//! struct Odd<A> {
41//!     next: Option<Id<Even<A>, A>>,
42//! }
43//!
44//! let mut arena = new_arena!();
45//!
46//! let three = arena.alloc(Odd { next: None });
47//! let two = arena.alloc(Even { next: Some(three) });
48//! let one = arena.alloc(Odd { next: Some(two) });
49//!
50//! assert_eq!(&arena[one].next, &Some(two));
51//! ```
52
53#![allow(private_bounds)]
54
55use core::alloc::Layout;
56use core::marker::PhantomData;
57use core::mem::MaybeUninit;
58use core::ptr;
59use std::fmt::Debug;
60use std::hash::Hash;
61use std::ops::{Index, IndexMut};
62use std::slice::{from_raw_parts, from_raw_parts_mut};
63use std::str::{from_utf8_unchecked, from_utf8_unchecked_mut};
64use aligned_vec::{AVec, ConstAlign};
65use derive_where::derive_where;
66
67use crate::utils::MaybeUninitExt;
68
69mod utils;
70
71macro_rules! assert_const {
72    ($cond:expr, $($arg:tt)+) => {
73        if const { !$cond } {
74            assert!($cond, $($arg)+);
75        }
76    };
77
78    ($cond:expr $(,)?) => {
79        if const { !$cond } {
80            assert!($cond);
81        }
82    };
83}
84
85const MAX_ALIGN: usize = 128;
86
87#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
88#[repr(transparent)]
89pub struct RawId {
90    byte_offset: u32,
91}
92
93/// `Id` specialization for statically sized types.
94///
95/// All the guarantees `Id` makes also apply for this type.
96#[derive_where(Debug, Copy, Clone, Eq, PartialEq, Hash)]
97#[repr(transparent)]
98struct SizedId<T, A> {
99    // Invariant: `byte_offset` always represents a valid location
100    // within the arena holding a value of type `T`, provided `size_of::<T>() > 0`.
101    byte_offset: u32,
102    _marker: PhantomData<(T, A)>,
103}
104
105impl<T, A> SizedId<T, A> {
106    #[inline]
107    unsafe fn new(byte_offset: usize) -> SizedId<T, A> {
108        let byte_offset: u32 = byte_offset.try_into()
109            .expect("`byte_offset` must not exceed `u32::MAX`");
110
111        SizedId { byte_offset, _marker: PhantomData }
112    }
113}
114
115impl<T, A> SizedId<MaybeUninit<T>, A> {
116    /// Converts this `SizedId<MaybeUninit<T>, A>` to `SizedId<T, A>`,
117    /// assuming the associated value is initialized.
118    ///
119    /// # Safety
120    /// The caller must ensure the value is fully initialized before calling this method.
121    #[inline]
122    unsafe fn assume_init(self) -> SizedId<T, A> {
123        SizedId { byte_offset: self.byte_offset, _marker: PhantomData }
124    }
125}
126
127/// `Id` specialization for slices.
128///
129/// All the guarantees `Id` makes also apply for this type.
130#[derive_where(Debug, Copy, Clone, Eq, PartialEq, Hash)]
131struct SliceId<T, A> {
132    byte_offset: u32,
133    len: u32,
134    _marker: PhantomData<(T, A)>,
135}
136
137impl<T, A> SliceId<T, A> {
138    #[inline]
139    unsafe fn new(byte_offset: usize, len: usize) -> SliceId<T, A> {
140        let byte_offset: u32 = byte_offset.try_into()
141            .expect("`byte_offset` must not exceed `u32::MAX`");
142
143        let len: u32 = len.try_into()
144            .expect("`len` must not exceed `u32::MAX`");
145
146        SliceId { byte_offset, len, _marker: PhantomData }
147    }
148}
149
150impl<T, A> SliceId<MaybeUninit<T>, A> {
151    /// Converts this `SliceId<MaybeUninit<T>, A>` to `SliceId<T, A>`,
152    /// assuming the associated value is initialized.
153    ///
154    /// # Safety
155    /// The caller must ensure all slice elements are fully
156    /// initialized before calling this method.
157    #[inline]
158    unsafe fn assume_init(self) -> SliceId<T, A> {
159        SliceId { byte_offset: self.byte_offset, len: self.len, _marker: PhantomData }
160    }
161}
162
163/// `Id` specialization for string slices.
164/// 
165/// The underlying slice always represents a valid UTF-8 encoded string.
166/// All the guarantees `Id` makes also apply for this type.
167#[derive_where(Debug, Copy, Clone, Eq, PartialEq, Hash)]
168#[repr(transparent)]
169struct StrId<A>(SliceId<u8, A>);
170
171impl<A> StrId<A> {
172    #[inline]
173    unsafe fn new(slice_id: SliceId<u8, A>) -> StrId<A> {
174        StrId(slice_id)
175    }
176}
177
178/// Describes an `Id` specialization.
179///
180/// # Safety
181/// Implementors must uphold all the guarantees `Id` makes.
182unsafe trait SpecId<A> {
183    type Id: Debug + Copy + Clone + Eq + PartialEq + Hash;
184
185    fn get(arena: &Arena<A>, id: Self::Id) -> &Self;
186
187    fn get_mut(arena: &mut Arena<A>, id: Self::Id) -> &mut Self;
188
189    fn get_raw_id(id: Self::Id) -> RawId;
190}
191
192unsafe impl<T, A> SpecId<A> for T {
193    type Id = SizedId<T, A>;
194
195    #[inline]
196    fn get(arena: &Arena<A>, id: Self::Id) -> &Self {
197        assert_const!(size_of::<T>() != 0 && align_of::<T>() <= MAX_ALIGN);
198
199        let byte_offset = id.byte_offset as usize;
200
201        let ptr = unsafe {
202            let raw_ptr = arena.storage.as_ptr().add(byte_offset);
203            raw_ptr.cast()
204        };
205
206        unsafe { &*ptr }
207    }
208
209    #[inline]
210    fn get_mut(arena: &mut Arena<A>, id: Self::Id) -> &mut Self {
211        assert_const!(size_of::<T>() != 0 && align_of::<T>() <= MAX_ALIGN);
212
213        let byte_offset = id.byte_offset as usize;
214
215        let ptr = unsafe {
216            let raw_ptr = arena.storage.as_mut_ptr().add(byte_offset);
217            raw_ptr.cast()
218        };
219
220        unsafe { &mut *ptr }
221    }
222
223    #[inline]
224    fn get_raw_id(id: Self::Id) -> RawId {
225        RawId { byte_offset: id.byte_offset }
226    }
227}
228
229unsafe impl<T, A> SpecId<A> for [T] {
230    type Id = SliceId<T, A>;
231
232    fn get(arena: &Arena<A>, id: Self::Id) -> &Self {
233        let byte_offset = id.byte_offset as usize;
234        let len = id.len as usize;
235
236        let ptr = unsafe {
237            let raw_ptr = arena.storage.as_ptr().add(byte_offset);
238            raw_ptr.cast()
239        };
240
241        unsafe { from_raw_parts(ptr, len) }
242    }
243
244    fn get_mut(arena: &mut Arena<A>, id: Self::Id) -> &mut Self {
245        let byte_offset = id.byte_offset as usize;
246        let len = id.len as usize;
247
248        let ptr = unsafe {
249            let raw_ptr = arena.storage.as_mut_ptr().add(byte_offset);
250            raw_ptr.cast()
251        };
252
253        unsafe { from_raw_parts_mut(ptr, len) }
254    }
255
256    fn get_raw_id(id: Self::Id) -> RawId {
257        RawId { byte_offset: id.byte_offset }
258    }
259}
260
261unsafe impl<A> SpecId<A> for str {
262    type Id = StrId<A>;
263
264    fn get(arena: &Arena<A>, id: Self::Id) -> &Self {
265        let bytes = <[u8] as SpecId<A>>::get(arena, id.0);
266        unsafe { from_utf8_unchecked(bytes) }
267    }
268
269    fn get_mut(arena: &mut Arena<A>, id: Self::Id) -> &mut Self {
270        let bytes = <[u8] as SpecId<A>>::get_mut(arena, id.0);
271        unsafe { from_utf8_unchecked_mut(bytes) }
272    }
273
274    fn get_raw_id(id: Self::Id) -> RawId {
275        <[u8] as SpecId<A>>::get_raw_id(id.0)
276    }
277} 
278
279/// A unique identifier for an object allocated using `Arena`.
280///
281/// `Id<T, A>` can only be used with the specific arena from which it was created,
282/// thanks to the type parameter `A`, which uniquely identifies the arena.
283///
284/// An `Id<T, A>` guarantees that calling `Arena::get` with it will always yield
285/// a reference to the same object (bitwise identical), unless the object is
286/// explicitly mutated via a mutable reference obtained from `Arena::get_mut`.
287/// The object associated with this `Id` is guaranteed to have the same lifetime
288/// as the arena itself, meaning it remains valid as long as the arena exists.
289#[derive_where(Debug, Copy, Clone, Eq, PartialEq, Hash)]
290#[repr(transparent)]
291pub struct Id<T: ?Sized + SpecId<A>, A> {
292    spec: T::Id,
293}
294
295impl<T: ?Sized + SpecId<A>, A> Id<T, A> {
296    #[inline]
297    fn new(spec: T::Id) -> Id<T, A> {
298        Id { spec }
299    }
300
301    #[inline]
302    fn get(self, arena: &Arena<A>) -> &T {
303        T::get(arena, self.spec)
304    }
305
306    #[inline]
307    fn get_mut(self, arena: &mut Arena<A>) -> &mut T {
308        T::get_mut(arena, self.spec)
309    }
310
311    #[inline]
312    pub fn to_raw_id(&self) -> RawId {
313        T::get_raw_id(self.spec)
314    }
315}
316
317impl<T: SpecId<A>, A> From<Id<T, A>> for RawId {
318    #[inline]
319    fn from(id: Id<T, A>) -> Self {
320        id.to_raw_id()
321    }
322}
323
324/// A simple heterogeneous arena allocator inspired by the `id_arena` crate.
325///
326/// Unlike the `id_arena` crate, this implementation allows any type to be allocated
327/// within the arena and ensures that identifiers are only valid within the arena they
328/// were created from. This guarantees safety with minimal runtime overhead.
329///
330/// However, this approach has a downside: the arena does not track individual elements,
331/// effectively providing a form of type erasure. As a result, it is not possible to
332/// implement proper dropping of individual elements like in `id_arena::Arena`.
333#[derive_where(Debug)]
334pub struct Arena<A> {
335    storage: AVec<MaybeUninit<u8>, ConstAlign<MAX_ALIGN>>,
336    _marker: PhantomData<A>,
337}
338
339impl<A> Arena<A> {
340    /// Creates a new, empty arena.
341    ///
342    /// # Safety
343    /// The caller must ensure that the `A` type parameter is only used for this arena.
344    #[inline]
345    pub unsafe fn new() -> Arena<A> {
346        Arena {
347            storage: AVec::new(0),
348            _marker: PhantomData,
349        }
350    }
351
352    /// Returns a shared reference to the arena-allocated object associated with given `Id`.
353    #[inline]
354    pub fn get<T: ?Sized + SpecId<A>>(&self, id: Id<T, A>) -> &T {
355        id.get(self)
356    }
357
358    /// Returns a mutable reference to the arena-allocated object associated with given `Id`.
359    #[inline]
360    pub fn get_mut<T: ?Sized + SpecId<A>>(&mut self, id: Id<T, A>) -> &mut T {
361        id.get_mut(self)
362    }
363
364    /// Allocates a new value of type `T` in the arena and returns its `Id`.
365    #[inline]
366    pub fn alloc<T>(&mut self, item: T) -> Id<T, A> {
367        // Allocate a new item without initializing it.
368        let id = self.alloc_uninit::<T>();
369
370        // SAFETY: `MaybeUninit::as_mut_ptr` always returns a valid pointer for `ptr::write`.
371        unsafe { ptr::write(self.get_mut(id).as_mut_ptr(), item); }
372
373        // SAFETY: we have just initialized the memory associated with `id`.
374        unsafe { Id::new(id.spec.assume_init()) }
375    }
376
377    #[inline]
378    pub fn alloc_slice<T: Clone>(&mut self, slice: &[T]) -> Id<[T], A> {
379        let id = self.alloc_slice_uninit(slice.len());
380        <MaybeUninit<T> as MaybeUninitExt<T>>::clone_from_slice(self.get_mut(id), slice);
381        unsafe { Id::new(id.spec.assume_init()) }
382    }
383    
384    #[inline]
385    pub fn alloc_str(&mut self, str: &str) -> Id<str, A> {
386        let slice = self.alloc_slice(str.as_bytes());
387        Id::new(unsafe { StrId::new(slice.spec) })
388    }
389
390    #[inline]
391    fn alloc_uninit<T>(&mut self) -> Id<MaybeUninit<T>, A> {
392        assert_const!(align_of::<T>() <= MAX_ALIGN && size_of::<T>() != 0);
393
394        // SAFETY: `align_of::<T>` cannot exceed `MAX_ALIGN`.
395        let byte_offset = unsafe { self.alloc_layout(Layout::new::<T>()) };
396
397        // SAFETY: the memory location at `byte_offset` is properly
398        // aligned to hold a value of type `T` and `MaybeUninit`
399        // does not require initialization.
400        unsafe { Id::new(SizedId::new(byte_offset)) }
401    }
402
403    #[inline]
404    fn alloc_slice_uninit<T>(&mut self, len: usize) -> Id<[MaybeUninit<T>], A> {
405        assert_const!(align_of::<T>() <= MAX_ALIGN && size_of::<T>() != 0);
406
407        let layout = Layout::array::<T>(len).unwrap();
408
409        // SAFETY: the alignment of an array is the same as the alignment of
410        // its elements and `align_of::<T>` cannot exceed `MAX_ALIGN`.
411        let byte_offset = unsafe { self.alloc_layout(layout) };
412
413        // SAFETY: the memory location at `byte_offset` is properly
414        // aligned to hold a value of type `[T; len]` and `MaybeUninit`
415        // does not require initialization.
416        unsafe { Id::new(SliceId::new(byte_offset, len)) }
417    }
418
419    /// Allocates uninitialized memory suitable to hold a value with the given layout
420    /// and returns the index of the beginning of the allocation.
421    ///
422    /// # Safety
423    /// `layout.size()` must not be zero and `layout.align()` must not exceed `MAX_ALIGN`.
424    #[inline]
425    unsafe fn alloc_layout(&mut self, layout: Layout) -> usize {
426        // Since the backing storage is aligned to `MAX_ALIGN` and
427        // `layout.align() <= MAX_ALIGN`, we only need to ensure that
428        // the start of the new allocation is aligned to `layout.align()`.
429        // SAFETY: `layout.align()` is guaranteed to be a power of two.
430        let padding = unsafe { compute_padding(self.storage.len(), layout.align()) };
431
432        // SAFETY: `compute_padding` ensures that `padding < layout.align()`
433        // and `Layout` guarantees that both size and alignment do not exceed
434        // `isize::MAX`. Therefore, `layout.size() + padding` can be at most
435        // `2 * (isize::MAX as usize)`, which is less than `usize::MAX`.
436        let padded_size = unsafe { layout.size().unchecked_add(padding) };
437
438        let unaligned_byte_offset = self.alloc_raw(padded_size);
439
440        // SAFETY: `padding < padded_size`, `grow()` didn't panic and length
441        // cannot be less than capacity, so this must not overflow.
442        unsafe { unaligned_byte_offset.unchecked_add(padding) }
443    }
444
445    /// Allocates `size_in_bytes` uninitialized bytes in the arena and
446    /// returns the byte offset of the beginning of the allocation.
447    #[inline]
448    fn alloc_raw(&mut self, size_in_bytes: usize) -> usize {
449        self.storage.reserve(size_in_bytes);
450
451        let old_len = self.storage.len();
452
453        // SAFETY: `storage.reserve()` didn't panic and length cannot
454        // be less than capacity, so this must not overflow.
455        let new_len = unsafe { old_len.unchecked_add(size_in_bytes) };
456
457        // SAFETY: we have just reserved `additional` bytes.
458        unsafe { self.storage.set_len(new_len); }
459
460        old_len
461    }
462}
463
464/// Computes the smallest possible number of bytes that must
465/// be added to `addr` to align it to `align` bytes.
466///
467/// The returned value is always within the range `[0, align)`.
468///
469/// # Safety
470/// `align` must be a power of two.
471#[inline]
472const unsafe fn compute_padding(addr: usize, align: usize) -> usize {
473    // This function is essentially a simplified version of `core::ptr::align_offset`.
474    // As such, the correctness of this code depends on the correctness of the latter.
475
476    // SAFETY: `align` is a power of two, so it cannot be zero.
477    let align_minus_one = unsafe { align.unchecked_sub(1) };
478
479    // Voodoo magic!
480
481    let aligned_addr = addr.wrapping_add(align_minus_one) & 0usize.wrapping_sub(align);
482    let byte_offset = aligned_addr.wrapping_sub(addr);
483
484    // From `std::ptr::align_offset`:
485    //
486    // Masking by `-align` affects only the low bits, and thus cannot reduce
487    // the value by more than `align - 1`. Therefore, even though intermediate
488    // values might wrap, the `byte_offset` is always within the range `[0, align)`.
489    debug_assert!(byte_offset < align);
490
491    // Correctness check.
492    debug_assert!((addr + byte_offset) % align == 0);
493
494    byte_offset
495}
496
497impl<T: ?Sized + SpecId<A>, A> Index<Id<T, A>> for Arena<A> {
498    type Output = T;
499
500    #[inline]
501    fn index(&self, id: Id<T, A>) -> &Self::Output {
502        self.get(id)
503    }
504}
505
506impl<T: ?Sized + SpecId<A>, A> IndexMut<Id<T, A>> for Arena<A> {
507    #[inline]
508    fn index_mut(&mut self, id: Id<T, A>) -> &mut Self::Output {
509        self.get_mut(id)
510    }
511}
512
513#[macro_export]
514macro_rules! new_arena {
515    () => {
516        $crate::new_arena!(Default)
517    };
518
519    ($name:ident) => {
520        {
521            struct $name;
522            // SAFETY: `$name` is unique for each macro invocation.
523            unsafe { $crate::Arena::<$name>::new() }
524        }
525    };
526}
527
528#[cfg(test)]
529mod test {
530    use super::*;
531
532    #[test]
533    fn arena_alloc_one_u8() {
534        let mut arena = new_arena!();
535        let id = arena.alloc(123u8);
536        assert_eq!(arena.get(id), &123u8);
537    }
538
539    #[test]
540    fn arena_alloc_one_u64() {
541        let mut arena = new_arena!();
542        let id = arena.alloc(u64::MAX);
543        assert_eq!(arena.get(id), &u64::MAX);
544    }
545
546    #[test]
547    fn arena_alloc_mut() {
548        let mut arena = new_arena!();
549
550        let id = arena.alloc(456u32);
551        assert_eq!(arena.get(id), &456u32);
552
553        *arena.get_mut(id) += 234;
554        assert_eq!(*arena.get(id), 456u32 + 234);
555    }
556
557    #[test]
558    fn arena_alloc_multiple_sized() {
559        let mut arena = new_arena!();
560
561        let a_id = arena.alloc(12u16);
562        let b_id = arena.alloc("hell");
563        let c_id = arena.alloc(131u128);
564
565        assert_eq!(arena.get(b_id), &"hell");
566        assert_eq!(arena.get(a_id), &12u16);
567        assert_eq!(arena.get(c_id), &131u128);
568    }
569
570    #[test]
571    fn arena_alloc_multiple_sized_mut() {
572        let mut arena = new_arena!();
573
574        let a_id = arena.alloc(12u16);
575        let b_id = arena.alloc("hell");
576        let c_id = arena.alloc(131u128);
577
578        assert_eq!(arena[b_id], "hell");
579        assert_eq!(arena[a_id], 12u16);
580        assert_eq!(arena[c_id], 131u128);
581
582        *arena.get_mut(c_id) = 1;
583        assert_eq!(arena.get(c_id), &1);
584
585        *arena.get_mut(b_id) = "heaven";
586        *arena.get_mut(a_id) *= 3;
587
588        assert_eq!(arena.get(b_id), &"heaven");
589        assert_eq!(*arena.get(a_id), 12u16 * 3);
590    }
591    
592    #[test]
593    fn arena_alloc_slice() {
594        let mut arena = new_arena!();
595        let fruits = ["banana", "orange", "apple"];
596        let id = arena.alloc_slice(&fruits);
597        assert_eq!(arena[id][1], "orange");
598    }
599
600    #[test]
601    fn arena_alloc_slice_mut() {
602        let mut arena = new_arena!();
603        let numbers = [12i64, -451i64, 0i64];
604        let id = arena.alloc_slice(&numbers);
605        assert_eq!(arena[id][1], -451i64);
606        arena[id][1] = 3i64;
607        assert_eq!(arena[id][1], 3i64);
608    }
609    
610    #[test]
611    fn arena_alloc_multiple() {
612        let mut arena = new_arena!();
613        let counter = arena.alloc(123);
614        let fruits = arena.alloc_slice(&["banana", "orange", "apple"]);
615        assert_eq!(arena[counter], 123);
616        assert_eq!(arena[fruits], ["banana", "orange", "apple"]);
617    }
618
619    #[test]
620    fn arena_alloc_multiple_mut() {
621        let mut arena = new_arena!();
622        let counter = arena.alloc(123);
623        let fruits = arena.alloc_slice(&["banana", "orange", "apple"]);
624        assert_eq!(arena[counter], 123);
625        assert_eq!(arena[fruits], ["banana", "orange", "apple"]);
626        arena[counter] = 43;
627        assert_eq!(arena[counter], 43);
628        arena[fruits][0] = "pineapple";
629        assert_eq!(arena[fruits][0], "pineapple");
630    }
631    
632    #[test]
633    fn arena_alloc_str() {
634        let mut arena = new_arena!();
635        let hello = arena.alloc_str("Hello!");
636        assert_eq!(&arena[hello], "Hello!");
637    }
638}