1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
//! Module containing the `Arena` and `Uninitialized` structs. For convenience the
//! `Arena` is exported at the root of the crate.

use std::mem::size_of;
use std::ops::Deref;
use std::cell::Cell;
use std::borrow::Cow;
use std::fmt;

const ARENA_BLOCK: usize = 64 * 1024;

/// An arena implementation that uses preallocated 64KiB pages for all allocations.
/// If a new allocation were to be pushed over the the boundaries of the page, a
/// new page is internally allocated first, thus this version of the arena can never
/// run out of memory unless the process runs out of heap altogether.
///
/// Allocating a type larger than the page size will result in a new heap allocation
/// just for that type separate from the page mechanism.
pub struct Arena {
    store: Cell<Vec<Vec<u8>>>,
    ptr: Cell<*mut u8>,
    offset: Cell<usize>,
}

/// A pointer to an uninitialized region of memory.
pub struct Uninitialized<'arena, T: Copy> {
    pointer: &'arena mut MaybeUninit<T>,
}

/// Almost a copy of https://github.com/rust-lang/rust/issues/53491
union MaybeUninit<T: Copy> {
    value: T,
    _uninit: (),
}

impl<'arena, T: Copy> Uninitialized<'arena, T> {
    /// Initialize the memory at the pointer with a given value.
    #[inline]
    pub fn init(self, value: T) -> &'arena mut T {
        unsafe {
            self.pointer.value = value;
            &mut self.pointer.value
        }
    }

    /// Get a reference to the pointer without writing to it.
    ///
    /// **Calling this method without calling `init` is undefined behavior.**
    #[inline]
    pub unsafe fn as_ref(&self) -> &'arena T {
        &*(&self.pointer.value as *const T)
    }

    /// Convert the `Uninitialized` to a regular mutable reference.
    ///
    /// **Calling this method without calling `init` is undefined behavior.**
    #[inline]
    pub unsafe fn as_mut_ref(self) -> &'arena mut T {
        &mut self.pointer.value
    }

    /// Convert a raw pointer to an `Uninitialized`. This method is unsafe since it can
    /// bind to arbitrary lifetimes.
    #[inline]
    pub unsafe fn from_raw(pointer: *mut T) -> Self {
        Uninitialized {
            pointer: &mut *(pointer as *mut MaybeUninit<T>),
        }
    }
}

impl<'arena, T: Copy> From<&'arena mut T> for Uninitialized<'arena, T> {
    #[inline]
    fn from(pointer: &'arena mut T) -> Self {
        unsafe { Self::from_raw(pointer) }
    }
}

/// A wrapper around a `str` slice that has an extra `0` byte allocated following
/// its contents.
#[derive(Clone, Copy, PartialEq)]
pub struct NulTermStr<'arena>(&'arena str);

impl<'arena> fmt::Debug for NulTermStr<'arena> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        fmt::Debug::fmt(self.0, f)
    }
}

impl<'arena> fmt::Display for NulTermStr<'arena> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        fmt::Display::fmt(self.0, f)
    }
}

impl<'arena> NulTermStr<'arena> {
    /// Read byte at a given `index`. This does not check for length boundaries,
    /// but is guaranteed to return `0` for `index` equal to the length.
    ///
    /// This can be a very useful optimization when reading a long string one
    /// byte at a time until termination, if checking for `0` can replace what
    /// would otherwise have to be length checks.
    ///
    /// ```rust
    /// # use toolshed::Arena;
    /// # fn main() {
    /// let arena = Arena::new();
    /// let str = arena.alloc_nul_term_str("foo");
    ///
    /// // We can safely get the underlying `&str` at any time.
    /// assert_eq!(&str[..], "foo");
    ///
    /// unsafe {
    ///     // First 3 bytes are known to us
    ///     assert_eq!(str.byte_unchecked(0), b'f');
    ///     assert_eq!(str.byte_unchecked(1), b'o');
    ///     assert_eq!(str.byte_unchecked(2), b'o');
    ///
    ///     // Following is safe and guaranteed to be '0'
    ///     assert_eq!(str.byte_unchecked(3), 0);
    ///
    ///     // Reading index 4 would be undefined behavior!
    /// }
    /// # }
    /// ```
    pub unsafe fn byte_unchecked(&self, index: usize) -> u8 {
        *self.0.as_ptr().add(index)
    }
}

impl<'arena> AsRef<str> for NulTermStr<'arena> {
    fn as_ref(&self) -> &str {
        self.0
    }
}

impl<'arena> Deref for NulTermStr<'arena> {
    type Target = &'arena str;

    fn deref(&self) -> &&'arena str {
        &self.0
    }
}

impl<'arena> From<NulTermStr<'arena>> for &'arena str {
    fn from(nts: NulTermStr<'arena>) -> &'arena str {
        nts.0
    }
}

impl Arena {
    /// Create a new arena with a single preallocated 64KiB page.
    pub fn new() -> Self {
        let mut store = vec![Vec::with_capacity(ARENA_BLOCK)];
        let ptr = store[0].as_mut_ptr();

        Arena {
            store: Cell::new(store),
            ptr: Cell::new(ptr),
            offset: Cell::new(0),
        }
    }

    /// Put the value onto the page of the arena and return a reference to it.
    #[inline]
    pub fn alloc<'arena, T: Sized + Copy>(&'arena self, value: T) -> &'arena mut T {
        self.alloc_uninitialized().init(value)
    }

    /// Allocate enough bytes for the type `T`, then return an `Uninitialized` pointer to the memory.
    #[inline]
    pub fn alloc_uninitialized<'arena, T: Sized + Copy>(&'arena self) -> Uninitialized<'arena, T> {
        Uninitialized {
            pointer: unsafe { &mut *(self.require(size_of::<T>()) as *mut MaybeUninit<T>) },
        }
    }

    /// Allocate a slice of `T` slice onto the arena and return a reference to it.
    /// This is useful when the original slice has an undefined lifetime.
    ///
    /// Note: static slices (`&'static [T]`) can be safely used in place of arena-bound
    ///       slices without having to go through this method.
    pub fn alloc_slice<'arena, T: Copy>(&'arena self, val: &[T]) -> &'arena [T] {
        let ptr = self.require(val.len() * size_of::<T>()) as *mut T;

        unsafe {
            use std::ptr::copy_nonoverlapping;
            use std::slice::from_raw_parts;

            copy_nonoverlapping(val.as_ptr(), ptr, val.len());
            from_raw_parts(ptr, val.len())
        }
    }

    /// Allocate a statically-sized but lazily-generated slice `[T]` out of an iterator
    /// This is useful if you're going to make a slice of something and put it on the arena,
    /// but you don't want to make an allocation first just to have something to copy in.
    ///
    /// The slice will be at maximum length `n`, further elements of the iterator ignored and not evaluated.
    /// If the iterator yields less than `n` elements, a shorter slice will simply be returned.
    pub fn alloc_lazy_slice<'arena, T, I: Iterator<Item=T>>(&'arena self, vals: I, n: usize) -> &'arena [T] {
      // Grab space for `n` elements even if it may turn out we have to walk it back
      let ptr = self.require(n * size_of::<T>()) as *mut T;
      let mut i: usize = 0; 

      unsafe {
        use std::slice::from_raw_parts;

        for val in vals.take(n) {
          *ptr.offset(i as isize) = val;
          i += 1;
        }
        // Now fix the slice length and arena offset
        let diff = n - i;
        self.reset_to( self.offset() - diff * size_of::<T>() );
        from_raw_parts(ptr, i)
      }
    }

    /// Put a `Vec<T>` on the arena without reallocating.
    pub fn alloc_vec<'arena, T: Copy>(&'arena self, mut val: Vec<T>) -> &'arena [T] {
        use std::{mem, slice};

        let ptr = val.as_mut_ptr();
        let cap = val.capacity();
        let len = val.len();

        mem::forget(val);

        let out = self.alloc_byte_vec(unsafe {
            Vec::from_raw_parts(ptr as _, 0, cap * size_of::<T>())
        });

        unsafe { slice::from_raw_parts(out as _, len) }
    }

    /// Allocate many items at once, avoid allocation for owned values.
    #[inline]
    pub fn alloc_cow<'input, 'arena, T>(&'arena self, vals: Cow<'input, [T]>) -> &'arena [T]
    where
        T: Sized + Copy + 'input,
    {
        match vals {
            Cow::Owned(vec)      => self.alloc_vec(vec),
            Cow::Borrowed(slice) => self.alloc_slice(slice),
        }
    }

    /// Allocate an `&str` slice onto the arena and return a reference to it. This is
    /// useful when the original slice has an undefined lifetime.
    ///
    /// Note: static slices (`&'static str`) can be safely used in place of arena-bound
    ///       slices without having to go through this method.
    pub fn alloc_str<'arena>(&'arena self, val: &str) -> &'arena str {
        unsafe {
            use std::str::from_utf8_unchecked;

            from_utf8_unchecked(self.alloc_slice(val.as_bytes()))
        }
    }

    /// Allocate an `&str` slice onto the arena as null terminated C-style string.
    /// No checks are performed on the source and whether or not it already contains
    /// any nul bytes. While this does not create any memory issues, it assumes that
    /// the reader of the source can deal with malformed source.
    pub fn alloc_nul_term_str<'arena>(&'arena self, val: &str) -> NulTermStr {
        let len_with_zero = val.len() + 1;
        let ptr = self.require(len_with_zero);

        unsafe {
            use std::ptr::copy_nonoverlapping;
            use std::slice::from_raw_parts;
            use std::str::from_utf8_unchecked;

            copy_nonoverlapping(val.as_ptr(), ptr, val.len());
            *ptr.add(val.len()) = 0;

            NulTermStr(from_utf8_unchecked(from_raw_parts(ptr, val.len())))
        }
    }

    /// Pushes the `String` as it's own page onto the arena and returns a reference to it.
    /// This does not copy or reallocate the original `String`.
    pub fn alloc_string<'arena>(&'arena self, val: String) -> &'arena str {
        let len = val.len();
        let ptr = self.alloc_byte_vec(val.into_bytes());

        unsafe {
            use std::str::from_utf8_unchecked;
            use std::slice::from_raw_parts;

            from_utf8_unchecked(from_raw_parts(ptr, len))
        }
    }

    #[inline]
    fn alloc_byte_vec(&self, mut val: Vec<u8>) -> *mut u8 {
        let ptr = val.as_mut_ptr();

        let mut temp = self.store.replace(Vec::new());
        temp.push(val);
        self.store.replace(temp);

        ptr
    }

    fn alloc_bytes(&self, size: usize) -> *mut u8 {
        self.alloc_byte_vec(Vec::with_capacity(size))
    }

    #[inline]
    fn require(&self, size: usize) -> *mut u8 {
        // This should be optimized away for size known at compile time.
        if size > ARENA_BLOCK {
            return self.alloc_bytes(size);
        }

        let size = match size % size_of::<usize>() {
            0 => size,
            n => size + (size_of::<usize>() - n),
        };

        let offset = self.offset.get();
        let cap = offset + size;

        if cap > ARENA_BLOCK {
            self.grow();

            self.offset.set(size);
            self.ptr.get()
        } else {
            self.offset.set(cap);
            unsafe { self.ptr.get().add(offset) }
        }
    }

    fn grow(&self) {
        let ptr = self.alloc_byte_vec(Vec::with_capacity(ARENA_BLOCK));
        self.ptr.set(ptr);
    }

    /// Resets the pointer to the current page of the arena.
    ///
    /// **Using this method is an extremely bad idea!**
    ///
    /// The only case where the use of this method would be justified is
    /// in benchmarks where creation of a structure on the arena is to be
    /// tested without the cost of re-creating the arena itself on every iteration.
    #[doc(hidden)]
    #[inline]
    pub unsafe fn clear(&self) {
        self.reset_to(0)
    }

    #[doc(hidden)]
    #[inline]
    pub unsafe fn offset(&self) -> usize {
        self.offset.get()
    }

    #[doc(hidden)]
    #[inline]
    pub unsafe fn reset_to(&self, offset: usize) {
        self.offset.set(offset)
    }
}

/// Akin to `CopyCell`: `Sync` is unsafe but `Send` is totally fine!
unsafe impl Send for Arena {}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn allocate_some_stuff() {
        let arena = Arena::new();

        assert_eq!(arena.alloc(0u64), &0);
        assert_eq!(arena.alloc(42u64), &42);
        assert_eq!(arena.alloc(0x8000000u64), &0x8000000u64);

        assert_eq!(arena.offset.get(), 8 * 3);

        // For inspecting internals
        let mut arena = arena;

        assert_eq!(arena.store.get_mut().len(), 1);
    }

    #[test]
    fn allocate_some_vecs() {
        let arena = Arena::new();

        let vecs = vec![vec![1u64, 2, 3, 4], vec![7; ARENA_BLOCK * 2], vec![]];

        for vec in vecs {
            assert_eq!(arena.alloc_vec(vec.clone()), &vec[..]);
        }
    }

    #[test]
    fn allocate_some_cows() {
        let arena = Arena::new();

        let vecs = vec![vec![1u64, 2, 3, 4], vec![7; ARENA_BLOCK * 2], vec![]];

        for vec in vecs {
            assert_eq!(arena.alloc_cow(vec.clone().into()), &vec[..]);
        }
    }

    #[test]
    fn allocate_huge_heap() {
        let arena = Arena::new();

        assert_eq!(arena.alloc(0u64), &0);
        assert_eq!(arena.alloc(42u64), &42);

        arena.alloc_uninitialized::<[usize; 1024 * 1024]>();

        // Still writes to the first page
        assert_eq!(arena.offset.get(), 8 * 2);
        assert_eq!(arena.alloc(0x8000000u64), &0x8000000u64);
        assert_eq!(arena.offset.get(), 8 * 3);

        // For inspecting internals
        let mut arena = arena;

        // However second page has been added
        assert_eq!(arena.store.get_mut().len(), 2);

        // Second page is appropriately large
        assert_eq!(
            arena.store.get_mut()[1].capacity(),
            size_of::<usize>() * 1024 * 1024
        );
    }

    #[test]
    fn alloc_slice() {
        let arena = Arena::new();

        assert_eq!(arena.alloc_slice(&[10u16, 20u16]), &[10u16, 20u16][..]);
        assert_eq!(arena.offset.get(), 8);
    }

    #[test]
    fn alloc_lazy_slices() {
      let arena = Arena::new();
      let nums: [u32; 6] = [1, 2, 3, 4, 5, 1000];
      let big_nums: [u32; 6] = [100, 200, 300, 400, 500, 1050];

      // Put the whole array in the arena
      let all_nums = arena.alloc_lazy_slice(nums.iter().map(|x| *x), 6);
      // Truncate it using the `n` argument
      let trunc_nums = arena.alloc_lazy_slice(big_nums.iter().map(|x| *x), 3);
      // Put a whole array of half the nums in the arena
      let half_nums = arena.alloc_lazy_slice(nums[0..3].iter().map(|x| *x), 6);

      assert!(nums.iter().eq(all_nums.iter()));
      assert!(nums[0..3].iter().eq(half_nums.iter()));
      assert!(big_nums[0..3].iter().eq(trunc_nums.iter()));
    }

    #[test]
    fn aligns_slice_allocs() {
        let arena = Arena::new();

        assert_eq!(arena.alloc_slice(b"foo"), b"foo");
        assert_eq!(arena.offset.get(), 8);

        assert_eq!(arena.alloc_slice(b"doge to the moon!"), b"doge to the moon!");
        assert_eq!(arena.offset.get(), 32);
    }

    #[test]
    fn aligns_str_allocs() {
        let arena = Arena::new();

        assert_eq!(arena.alloc_str("foo"), "foo");
        assert_eq!(arena.offset.get(), 8);

        assert_eq!(arena.alloc_str("doge to the moon!"), "doge to the moon!");
        assert_eq!(arena.offset.get(), 32);
    }

    #[test]
    fn alloc_nul_term_str() {
        let arena = Arena::new();
        let nts = arena.alloc_nul_term_str("abcdefghijk");
        let allocated = unsafe { ::std::slice::from_raw_parts(nts.as_ptr(), 12) };

        assert_eq!(arena.offset.get(), 16);
        assert_eq!(
            allocated,
            "abcdefghijk\u{0}".as_bytes(),
        );

        assert_eq!(&**nts, "abcdefghijk");
    }
}