cowstr 0.6.0

Copy-on-Write shared strings
Documentation
use core::mem::{align_of, size_of};
use std::alloc;
use std::alloc::Layout;
use std::ptr::NonNull;

#[cfg(feature = "singlethreaded")]
use std::cell::Cell;

#[cfg(not(feature = "singlethreaded"))]
use std::sync::atomic::{AtomicUsize, Ordering};

/// helper to initialize the raw allocated memory
macro_rules! POKE {
    ($dest:ident.$member:ident = $val:expr) => {
        core::ptr::write(&mut (*$dest).$member, $val);
    };
}

/// The inner reference counted String alike
#[derive(Debug)]
#[repr(C, align(32))]
pub(crate) struct RcString {
    #[cfg(not(feature = "singlethreaded"))]
    refcount: AtomicUsize,
    #[cfg(feature = "singlethreaded")]
    refcount: Cell<usize>,
    len: usize,
    capacity: usize,
    data: [u8; 0],
}

const RCSTRING_SIZE: usize = size_of::<RcString>();
const RCSTRING_ALIGN: usize = align_of::<RcString>();

impl RcString {
    pub(crate) fn allocate(capacity: usize) -> Option<NonNull<Self>> {
        unsafe {
            let layout = Layout::from_size_align_unchecked(
                capacity
                    .checked_add(RCSTRING_SIZE)
                    .expect("Capacity overflow"),
                RCSTRING_ALIGN,
            );

            let rcstring = alloc::alloc(layout) as *mut Self;
            if rcstring.is_null() {
                return None;
            }

            #[cfg(not(feature = "singlethreaded"))]
            POKE!(rcstring.refcount = AtomicUsize::new(1usize));
            #[cfg(feature = "singlethreaded")]
            POKE!(rcstring.refcount = Cell::new(1usize));

            POKE!(rcstring.len = 0);
            POKE!(rcstring.capacity = layout.size() - RCSTRING_SIZE);

            Some(NonNull::new_unchecked(rcstring))
        }
    }

    pub(crate) fn grow(this: NonNull<Self>, reserve: usize) -> Option<NonNull<Self>> {
        unsafe {
            debug_assert_eq!(this.as_ref().strong_count(), 1);

            let layout = Layout::from_size_align_unchecked(
                this.as_ref().capacity + RCSTRING_SIZE,
                RCSTRING_ALIGN,
            );

            let min_new_size = (this.as_ref().len + RCSTRING_SIZE)
                .checked_add(reserve)
                .expect("Capacity overflow");

            let new_size = DEFAULT_ALLOCATION_STRATEGY
                .grow(layout.size(), min_new_size)
                .expect("Capacity overflow");

            let rcstring = alloc::realloc(this.as_ptr() as *mut u8, layout, new_size) as *mut Self;
            if rcstring.is_null() {
                return None;
            }

            POKE!(rcstring.capacity = new_size - RCSTRING_SIZE);

            Some(NonNull::new_unchecked(rcstring))
        }
    }

    pub(crate) fn shrink(this: NonNull<Self>, new_capacity: usize) -> Option<NonNull<Self>> {
        unsafe {
            debug_assert_eq!(this.as_ref().strong_count(), 1);

            let layout = Layout::from_size_align_unchecked(
                this.as_ref().capacity + RCSTRING_SIZE,
                RCSTRING_ALIGN,
            );

            let new_size = DEFAULT_ALLOCATION_STRATEGY
                .align((this.as_ref().len + RCSTRING_SIZE).max(new_capacity + RCSTRING_SIZE));

            if new_size < layout.size() {
                // really shrink
                let rcstring =
                    alloc::realloc(this.as_ptr() as *mut u8, layout, new_size) as *mut Self;
                if rcstring.is_null() {
                    return None;
                }

                POKE!(rcstring.capacity = new_size - RCSTRING_SIZE);

                Some(NonNull::new_unchecked(rcstring))
            } else {
                /* no-op */
                Some(this)
            }
        }
    }

    /// Unconditionally deallocates a `RcString`.
    ///
    /// # Safety
    ///
    /// Must only be called with a valid `RcString` pointer whose refcount is zero.
    pub(crate) unsafe fn dealloc(this: NonNull<Self>) {
        debug_assert_eq!(this.as_ref().strong_count(), 0);
        let layout = Layout::from_size_align_unchecked(
            this.as_ref().capacity + RCSTRING_SIZE,
            RCSTRING_ALIGN,
        );
        alloc::dealloc(this.as_ptr() as *mut u8, layout);
    }

    /// Creates a new `RcString` from a &[u8] with some spare capacity.
    ///
    /// Safety:
    ///
    /// The source must be valid utf-8
    unsafe fn from_bytes_unchecked(source: &[u8], reserve: usize) -> Option<NonNull<Self>> {
        let mut rcstring = Self::allocate(
            source
                .len()
                .checked_add(reserve)
                .expect("Capacity overflow"),
        )?;

        // TODO: miri will complain here
        //
        // error: Undefined Behavior: attempting a write access using <198665> at
        //        alloc82691[0x18], but that tag does not exist in the borrow stack for this
        //        location
        //
        // help: <198665> would have been created here, but this is a zero-size retag
        //       ([0x18..0x18]) so the tag in question does not exist anywhere
        //
        // Since RcString is repr(C) and its .data is a ZST and we want RcString pointers to
        // be thin pointer there is currently no way around this. Eventually RcString should
        // become a DST with `data: [u8]` while still passing a thin pointer around. Though this
        // would need ptr::from/to_raw_parts which are currently unstable.
        std::ptr::copy_nonoverlapping(
            source.as_ptr(),
            rcstring.as_mut().data.as_mut_ptr(),
            source.len(),
        );

        rcstring.as_mut().len = source.len();

        Some(rcstring)
    }

    /// Creates a new `RcString` from a &str with some spare capacity.
    #[inline]
    pub(crate) fn from_str(source: &str, reserve: usize) -> Option<NonNull<Self>> {
        // Safety: source is a valid utf-8 string
        unsafe { Self::from_bytes_unchecked(source.as_bytes(), reserve) }
    }

    /// Appends the given `char` to the end of this `RcString`.
    ///
    /// # Panics
    ///
    /// There is not enough space available. The capacity has to be extended before calling
    /// this.
    #[inline]
    pub(crate) fn push(&mut self, ch: char) {
        let mut buf = [0u8; 4];
        let bytes = ch.encode_utf8(&mut buf).as_bytes();

        assert!(self.len + bytes.len() <= self.capacity);

        unsafe {
            std::ptr::copy_nonoverlapping(
                bytes.as_ptr(),
                self.data.as_mut_ptr().add(self.len),
                bytes.len(),
            );
        }
        self.len += bytes.len();
    }

    /// Appends the given `str` to the end of this `RcString`.
    ///
    /// # Panics
    ///
    /// There is not enough space available. The capacity has to be extended before calling
    /// this.
    #[inline]
    pub(crate) fn push_str(&mut self, s: &str) {
        assert!(self.len + s.len() <= self.capacity);

        unsafe {
            std::ptr::copy_nonoverlapping(
                s.as_ptr(),
                self.data.as_mut_ptr().add(self.len),
                s.len(),
            );
        }
        self.len += s.len();
    }

    /// Returns self as &str
    #[inline]
    pub(crate) fn as_str(&self) -> &str {
        unsafe {
            std::str::from_utf8_unchecked(std::slice::from_raw_parts(self.data.as_ptr(), self.len))
        }
    }

    /// Returns self as &mut str
    #[inline]
    pub(crate) fn as_mut_str(&mut self) -> &mut str {
        unsafe {
            std::str::from_utf8_unchecked_mut(std::slice::from_raw_parts_mut(
                self.data.as_mut_ptr(),
                self.len,
            ))
        }
    }

    /// Returns the amount of bytes that can be pushed without reallocation.
    #[inline]
    pub(crate) fn spare_capacity(&self) -> usize {
        self.capacity - self.len
    }

    /// Returns the capacity of the allocation.
    #[inline]
    pub(crate) fn capacity(&self) -> usize {
        self.capacity
    }

    #[inline]
    #[cfg(not(feature = "singlethreaded"))]
    pub(crate) fn strong_count(&self) -> usize {
        self.refcount.load(Ordering::Relaxed)
    }

    #[inline]
    #[cfg(feature = "singlethreaded")]
    pub(crate) fn strong_count(&self) -> usize {
        self.refcount.get()
    }

    #[cfg(not(feature = "singlethreaded"))]
    pub(crate) fn increment_strong_count(&self) {
        self.refcount.fetch_add(1, Ordering::Relaxed);
    }

    #[cfg(feature = "singlethreaded")]
    pub(crate) fn increment_strong_count(&self) {
        self.refcount.set(self.refcount.get() + 1);
    }

    #[cfg(feature = "singlethreaded")]
    pub(crate) fn decrement_strong_count(&self) -> usize {
        let count = self.refcount.get() - 1;
        self.refcount.set(count);
        count
    }

    #[cfg(not(feature = "singlethreaded"))]
    pub(crate) fn decrement_strong_count(&self) -> usize {
        self.refcount.fetch_sub(1, Ordering::Release) - 1
    }
}

const KB: usize = 1024;
const MB: usize = KB * 1024;
const GB: usize = MB * 1024;

/// The allocation strategy when resizing an allocation
struct AllocationStrategy {
    /// Minimum allocation size returned
    pub min_allocation: usize,
    /// first cut, below this the allocations will be doubled, above until grow_const they will be increased by 50%
    pub grow_half: usize,
    /// second cut, above this allocation will grow constantly by grow_const/2
    pub grow_const: usize,
}

const DEFAULT_ALLOCATION_STRATEGY: AllocationStrategy = AllocationStrategy {
    min_allocation: 32,
    grow_half: 8 * MB,
    grow_const: /* 1 */ GB,
};

impl AllocationStrategy {
    /// returns the new size that shall be allocated, either per allocation strategy or when
    /// that is not sufficient, the `minimum_needed` size rounded up to a multiple of `min_allocation`.
    #[inline]
    pub fn grow(&self, old_size: usize, minimum_needed: usize) -> Option<usize> {
        Some(
            self.align(
                if old_size < self.min_allocation {
                    self.min_allocation
                } else if old_size < self.grow_half {
                    old_size.checked_mul(2)?
                } else if old_size < self.grow_const {
                    old_size.checked_add(old_size / 2)?
                } else {
                    old_size.checked_add(self.grow_const / 2)?
                }
                .max(minimum_needed),
            ),
        )
    }

    /// rounds/aligns the size by the `min_allocation` bytes
    #[inline]
    pub fn align(&self, size: usize) -> usize {
        if size > 0 {
            // TODO: when stable .next_multiple_of(self.min_allocation)
            size | (self.min_allocation - 1)
        } else {
            0
        }
    }
}