mjb_gc 0.2.0

Garbage collected pointers with immediate dropping
Documentation
#![feature(cell_update)]
#![feature(maybe_uninit_extra)]
#![feature(box_syntax)]
#![feature(allocator_api)]
// #![feature(negative_impls)]
// #![feature(auto_traits)]
// #![feature(min_specialization)]
#![forbid(unsafe_op_in_unsafe_fn)]
#![warn(clippy::missing_const_for_fn)]
#![no_std]

//! Thread-local reference-counted pointers with immediate cycle collection.
//!
//! The `Gc<T>` type provides shared ownership of a value. It is not `Send`,
//! since the cycle collection occurs on a single thread.

#[cfg(feature = "trace")]
extern crate std;

#[cfg(feature = "trace")]
use std::println;

extern crate alloc;

mod trace_impls;

use alloc::{
    alloc::{Allocator, Global},
    boxed::Box,
};
use core::{
    alloc::Layout,
    cell::Cell,
    fmt,
    fmt::Formatter,
    marker::{PhantomData, Unpin},
    mem::MaybeUninit,
    ops::Deref,
    pin::Pin,
    ptr,
    ptr::NonNull,
};

#[cfg(feature = "derive")]
pub use mjb_gc_derive::Trace;

/// A pointer type over a value that provides shared ownership and immediate
/// cycle collection upon `Drop`.
pub struct Gc<T: Trace> {
    ptr: NonNull<GcInner<T>>,
    _phantom: PhantomData<GcInner<T>>,
}

// Having a NonNull in the struct means that it isn't Send or Sync.
// It is important that it is neither, since it does not use atomic
// operations.

// Internal functions
impl<T: Trace> Gc<T> {
    fn inner(&self) -> &GcInner<T> {
        // SAFETY: Owning a Gc means that the pointer is valid
        unsafe { self.ptr.as_ref() }
    }

    fn as_inner_ptr(&self) -> *const GcInner<T> {
        self.ptr.as_ptr()
    }

    fn as_inner_ptr_mut(&mut self) -> *mut GcInner<T> {
        self.ptr.as_ptr()
    }

    // SAFETY: This assumes that there are no other references to the value
    unsafe fn as_value_mut(&mut self) -> &mut MaybeUninit<T> {
        unsafe { &mut (*self.as_inner_ptr_mut()).value }
    }

    fn from_inner(ptr: NonNull<GcInner<T>>) -> Self {
        Self {
            ptr,
            _phantom: PhantomData,
        }
    }

    #[track_caller]
    fn assert_undropped(&self) {
        if self.inner().dropped.get() {
            panic!("Gc was already dropped");
        }
    }
}

impl<T: Trace> Gc<T> {
    /// Creates a new `Gc<T>` with the given value.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use mjb_gc::Gc;
    ///
    /// // Why would we ever want to GC an integer?
    /// let five = Gc::new(5);
    /// ```
    #[must_use]
    pub fn new(value: T) -> Self {
        Self::from_inner(
            Box::leak(box GcInner {
                ref_count: Cell::new(1),
                internal_ref_count: Cell::new(0),
                done: Cell::new(false),
                dropped: Cell::new(false),
                value: MaybeUninit::new(value),
            })
            .into(),
        )
    }

    /// Construct a new `Gc<T>` value using a reference to itself.
    /// Attempting to dereference the reference before this function returns
    /// will panic.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use mjb_gc::{Gc, Trace};
    ///
    /// #[derive(Trace)]
    /// struct Gadget {
    ///     self_gc: Gc<Gadget>,
    ///     // ...
    /// }
    ///
    /// impl Gadget {
    ///     fn new() -> Gc<Self> {
    ///         Gc::new_cyclic(|self_gc| Gadget {
    ///             self_gc,
    ///             // ...
    ///         })
    ///     }
    /// }
    /// # let g = Gadget::new(); // Check it works
    /// ```
    ///
    /// ```rust,should_panic
    /// use mjb_gc::Gc;
    ///
    /// let _ = Gc::new_cyclic(|a| {
    ///     let _ = &*a; // Dereference the Gc, and thus panic.
    /// });
    /// ```
    #[must_use]
    pub fn new_cyclic<F>(data_fn: F) -> Self
    where
        F: FnOnce(Self) -> T,
    {
        // This happens to be a lot safer than `Rc::new_cyclic`.
        // Since we have a separate dropped flag, it means that we can
        // just use a full `Gc` type, rather than having to use a `Weak`
        let mut gc = Self::from_inner(
            Box::leak(box GcInner {
                ref_count: Cell::new(1),
                internal_ref_count: Cell::new(0),
                done: Cell::new(false),
                dropped: Cell::new(true),
                value: MaybeUninit::<T>::uninit(),
            })
            .into(),
        );

        // Keep ownership of the gc, since we need ownership to write a
        // value into it.
        let data = data_fn(gc.clone());

        // We now write the value in and set the dropped flag.

        // SAFETY: We are the only reference, since the dropped flag was
        // false
        unsafe { gc.as_value_mut().write(data) };

        gc.inner().dropped.set(false);

        gc
    }

    /// Creates a pinned `Gc<T>` with the given value.
    #[must_use]
    pub fn pin(value: T) -> Pin<Self> {
        // SAFETY: We don't give any way to remove the pin wrapper.
        unsafe { Pin::new_unchecked(Self::new(value)) }
    }

    /// Whether two `Gc`s point to the same allocation.
    #[must_use]
    pub fn ptr_eq(&self, other: &Self) -> bool {
        ptr::eq(self.inner(), other.inner())
    }

    #[must_use]
    pub fn as_ptr(&self) -> *const T {
        self.assert_undropped();

        // SAFETY: The inner_ptr is valid
        unsafe { (*self.as_inner_ptr()).value.as_ptr() }
    }

    #[must_use]
    pub fn as_mut_ptr(&mut self) -> *mut T {
        self.assert_undropped();

        // SAFETY: The inner_ptr is valid
        unsafe { (*self.as_inner_ptr_mut()).value.as_mut_ptr() }
    }

    /// Unwraps the `Gc<T>` if this is the only reference.
    ///
    /// Otherwise, an `Err` is returned with the same `Gc` that was passed in.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use mjb_gc::Gc;
    ///
    /// let x = Gc::new(3);
    /// assert_eq!(Gc::try_unwrap(x), Ok(3));
    ///
    /// let x = Gc::new(4);
    /// let _y = Gc::clone(&x);
    /// assert_eq!(*Gc::try_unwrap(x).unwrap_err(), 4);
    /// ```
    pub fn try_unwrap(this: Self) -> Result<T, Self> {
        if Self::is_unique(&this) {
            this.assert_undropped();

            let inner = this.inner();

            inner.dropped.set(true); // So that Drop doesn't drop the value.

            // SAFETY: We checked the dropped flag and the reference count
            // above.
            Ok(unsafe { inner.value.assume_init_read() })
        } else {
            Err(this)
        }
    }

    /// Gets the amount of references to this `Gc<T>`.
    ///
    /// Note that this includes both external and internal references.
    #[must_use]
    pub fn ref_count(this: &Self) -> usize {
        this.inner().ref_count.get()
    }

    /// Whether this reference is the only reference to the value.
    ///
    /// If the value is self-referential, those references will cause
    /// this never to be true.
    #[must_use]
    pub fn is_unique(this: &Self) -> bool {
        this.inner().ref_count.get() == 1
    }

    /// Get a mutable reference into the `Gc` if there are no other
    /// references to the allocation.
    ///
    /// Returns [`None`] otherwise, since mutability xor aliasing.
    ///
    /// See also [`make_mut`](Gc::make_mut), which
    /// [`clone`](Clone::clone)s the inner value instead of returning
    /// [`None`].
    ///
    /// # Panics
    ///
    /// If the value has already been dropped, this function will panic.
    ///
    /// # Examples
    ///
    /// ```
    /// use mjb_gc::Gc;
    ///
    /// let mut x = Gc::new(3);
    /// *Gc::get_mut(&mut x).unwrap() = 4;
    /// assert_eq!(*x, 4);
    ///
    /// let _y = Gc::clone(&x);
    /// assert!(Gc::get_mut(&mut x).is_none());
    /// ```
    #[must_use]
    pub fn get_mut(this: &mut Self) -> Option<&mut T> {
        if Self::is_unique(this) {
            this.assert_undropped();

            // SAFETY: We checked the dropped flag and the reference count
            // above.
            Some(unsafe { Self::get_unchecked_mut(this) })
        } else {
            None
        }
    }

    /// Get the value as mutable, bypassing any safety checks.
    ///
    /// # Safety
    ///
    /// There needs to be no other references in use while this one is in use.
    #[must_use]
    pub unsafe fn get_unchecked_mut(this: &mut Self) -> &mut T {
        // SAFETY: Caller-given-guarantees
        unsafe { this.as_value_mut().assume_init_mut() }
    }
}

impl<T: Trace + Clone> Gc<T> {
    /// Make a mutable reference into the given `Gc`.
    ///
    /// If there are more references to the value, it will be cloned.
    ///
    /// # Panics
    ///
    /// If the value has already been dropped, this function will panic.
    #[must_use]
    pub fn make_mut(this: &mut Self) -> &mut T {
        this.assert_undropped();

        if !Self::is_unique(this) {
            // We clone the data to a new Gc.
            *this = Self::new(T::clone(this));
        }

        // SAFETY: We checked the dropped flag and the reference count
        // above.
        unsafe { Self::get_unchecked_mut(this) }
    }
}

impl<T: Trace> Deref for Gc<T> {
    type Target = T;

    /// Dereference a `Gc<T>` into `T`.
    ///
    /// If the value has already been dropped, this function will panic.
    ///
    /// # Examples
    ///
    /// ```rust,should_panic
    /// use mjb_gc::{Gc, Trace};
    ///
    /// #[derive(Trace)]
    /// struct Cyclic(Gc<Cyclic>);
    ///
    /// impl Drop for Cyclic {
    ///     fn drop(&mut self) {
    ///         let _a = &*self.0; // Dereference the Gc and panic.
    ///     }
    /// }
    ///
    /// let a = Gc::new_cyclic(|v| Cyclic(v));
    /// ```
    fn deref(&self) -> &T {
        self.assert_undropped();

        // SAFETY: We checked the dropped flag above.
        unsafe { self.inner().value.assume_init_ref() }
    }
}

impl<T: Trace> Clone for Gc<T> {
    /// Creates a new `Gc` pointer to the same allocation.
    ///
    /// This doesn't clone the inner data.
    ///
    /// This call will panic if the reference count would overflow.
    fn clone(&self) -> Self {
        self.inner()
            .ref_count
            .update(|v| v.checked_add(1).expect("Gc ref-count overflow"));

        Self::from_inner(self.ptr)
    }

    fn clone_from(&mut self, other: &Self) {
        if !ptr::eq(self.inner(), other.inner()) {
            // Don't clone when we are pointing to the same inner
            *self = other.clone();
        }
    }
}

impl<T: Trace + fmt::Debug> fmt::Debug for Gc<T> {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        fmt::Debug::fmt(&**self, f)
    }
}

impl<T: Trace + fmt::Display> fmt::Display for Gc<T> {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        fmt::Display::fmt(&**self, f)
    }
}

impl<T: Trace + PartialEq> PartialEq for Gc<T> {
    fn eq(&self, other: &Self) -> bool {
        **self == **other
    }
}

impl<T: Trace + Eq> Eq for Gc<T> {}

impl<T: Trace> Unpin for Gc<T> {}

struct GcInner<T: Trace> {
    ref_count: Cell<usize>,

    // These are used internally when detecting cycles
    done: Cell<bool>,
    internal_ref_count: Cell<usize>,
    dropped: Cell<bool>,
    value: MaybeUninit<T>,
}

impl<T: Trace> GcInner<T> {
    fn value(&self) -> Option<&T> {
        if !self.dropped.get() {
            // SAFETY: We checked the dropped flag
            Some(unsafe { self.value.assume_init_ref() })
        } else {
            None
        }
    }
}

impl<T: Trace> fmt::Debug for GcInner<T> {
    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
        f.debug_struct("GcInner")
            .field("ref_count", &self.ref_count.get())
            .field("done", &self.done.get())
            .field("internal_ref_count", &self.internal_ref_count.get())
            .field("dropped", &self.dropped.get())
            .finish_non_exhaustive()
    }
}

impl<T: Trace> Drop for Gc<T> {
    /// Drops the `Gc`.
    ///
    /// This will decrement the reference count. If there are no external
    /// references, this will drop the contained value.
    ///
    /// # Examples
    ///
    /// ```
    /// use mjb_gc::{Gc, Trace};
    ///
    /// #[derive(Trace)]
    /// struct Cyclic(Gc<Cyclic>);
    ///
    /// impl Drop for Cyclic {
    ///     fn drop(&mut self) {
    ///         println!("dropped!");
    ///     }
    /// }
    ///
    /// let foo = Gc::new_cyclic(|g| Cyclic(g));
    /// let foo2 = Gc::clone(&foo);
    ///
    /// drop(foo); // Doesn't print anything
    /// drop(foo2); // Prints "dropped!"
    /// ```
    fn drop(&mut self) {
        let inner = self.inner();
        inner.ref_count.update(|v| v - 1);
        if inner.ref_count.get() == 0 {
            // There is no references: drop it.
            if !inner.dropped.get() {
                // SAFETY: We have checked the dropped flag.
                unsafe { self.as_value_mut().assume_init_drop() };
            }

            // SAFETY: We are the only reference to inner
            unsafe { ptr::drop_in_place(self.as_inner_ptr_mut()) };

            // SAFETY: We owned this memory
            unsafe { Global.deallocate(self.ptr.cast(), Layout::new::<GcInner<T>>()) };
        } else if let Some(v_ref) = inner.value() {
            if !T::could_contain_cycles() {
                return;
            }

            // We don't assume a dropped value is initialized.

            // There is other references. We check to see if there is a cycle,
            // and if so, drop this one, which will break the cycle.

            // First, we zero all the internal counts reachable from here.
            // We have the `done` variable so that we don't have a recursive loop.

            // INVARIANT: all the `done`s reachable from v_ref are set to `false`.

            #[cfg(feature = "trace")]
            println!("\n>>>> untrace");

            inner.internal_ref_count.set(0); // So that we don't have a loop.
            inner.done.set(true);

            unsafe { v_ref.untrace() };

            inner.done.set(false);

            unsafe { v_ref.set_undone() };

            // We then trace through all the Gcs and add all the counts up
            // into `internal_ref_count`.

            // INVARIANT: all the `done`s reachable from v_ref are set to `false`.

            #[cfg(feature = "trace")]
            println!("\n>>>> trace");

            inner.done.set(true);

            unsafe { v_ref.trace() };

            inner.done.set(false);

            unsafe { v_ref.set_undone() };

            // If our internal_ref_count was incremented, we have a loop,
            // so we check if all the counts match, and if they do, there
            // are no external references, so we can drop our value.
            if inner.internal_ref_count.get() == inner.ref_count.get() {
                #[cfg(feature = "trace")]
                println!("\n>>>> counts_match");

                inner.done.set(true);

                let matched = v_ref.counts_match();

                #[cfg(feature = "trace")]
                if matched {
                    println!("\n>>>> all_matched");
                }

                inner.done.set(false);

                unsafe { v_ref.set_undone() };

                if matched {
                    // It is important that `dropped` is set before the actual value
                    // is dropped, since its Drop impl might try to access it, since
                    // we have a loop.
                    inner.dropped.set(true);

                    unsafe { self.as_value_mut().assume_init_drop() };
                }
            }
        }
    }
}

// This trait indicates Gc values that are acyclic.
// This means that they don't contain recursive values, thus
// removing the branch above for if the ref-count wasn't decreased
// all the way to 0.

// We can then specialise the drop (through another trait) so that it just acts
// like Rc.

// This is simply `Freeze`.

// pub unsafe auto trait GcAcyclic {}

// impl<T> !GcAcyclic for UnsafeCell<T> {}

/// A trait for tracing through `Gc`ed values.
///
/// # Safety
///
/// All accessible contained `Gc`s must be traced.
///
/// `could_contain_cycles()` must return `true` if the type could
/// contain cycles.
pub unsafe trait Trace {
    /// Resets all inner `Gc`s for another trace cycle.
    ///
    /// Internally, this resets the recorded reference count from inside the
    /// loop.
    ///
    /// # Safety
    ///
    /// This function should not be used outside of `Trace`'s implementation.
    unsafe fn untrace(&self);

    /// This function traces through each inner `Gc`.
    ///
    /// Internally, records a reference count from inside the loop.
    ///
    /// # Safety
    ///
    /// This function should not be used outside of `Trace`'s implementation.
    unsafe fn trace(&self);

    /// This function resets the `done` flag on a `Gc`.
    ///
    /// # Safety
    ///
    /// This function should not be used outside of `Trace`'s implementation.
    unsafe fn set_undone(&self);

    /// Whether all the counts match, meaning there is no external references
    /// to the set.
    ///
    /// If there are no children, this function should return `true`.
    ///
    /// This must still be called on all inner `Gc`s, even once one returns
    /// false. This can still be done in a single expression for most types
    /// by using `&` rather than `&&`.
    fn counts_match(&self) -> bool;

    /// Whether the type could contain cycles.
    fn could_contain_cycles() -> bool {
        true
    }
}

// This trait indicates values that don't contain a Gc.

// pub auto trait NotGc {}

// impl<T: Trace> !NotGc for Gc<T> {}

// FIXME: this can be used once we have intersection impls
//
// unsafe impl<T: NotGc> Trace for T {
//     default unsafe fn untrace(&self) {}
//     default unsafe fn trace(&self) {}
//     default unsafe fn set_undone(&self) {}
//     default fn counts_match(&self) -> bool {
//         true
//     }
// }

unsafe impl<T: Trace> Trace for Gc<T> {
    /// Resets the `Gc` for another trace cycle.
    unsafe fn untrace(&self) {
        let inner = self.inner();
        inner.internal_ref_count.set(0);

        if !inner.done.get() {
            #[cfg(feature = "trace")]
            println!("not done yet");

            inner.done.set(true);

            if let Some(v) = inner.value() {
                unsafe { v.untrace() };
            }
        }
    }

    /// Runs a trace cycle on the `Gc`.
    unsafe fn trace(&self) {
        let inner = self.inner();

        // This can't overflow, since it cannot be larger than the ref_count.
        inner.internal_ref_count.update(|v| v + 1);

        if inner.internal_ref_count.get() > inner.ref_count.get() {
            unreachable!("more references than ref_count");
        }

        if !inner.done.get() {
            #[cfg(feature = "trace")]
            println!("not done yet");

            inner.done.set(true);

            if let Some(v) = inner.value() {
                unsafe { v.trace() };
            }
        }
    }

    /// Sets the done flags to false.
    unsafe fn set_undone(&self) {
        let inner = self.inner();
        if inner.done.get() {
            #[cfg(feature = "trace")]
            println!("not done yet");

            inner.done.set(false);

            if let Some(v) = inner.value() {
                unsafe { v.set_undone() };
            }
        }
    }

    /// Checks whether the reference counts match.
    fn counts_match(&self) -> bool {
        let inner = self.inner();

        #[cfg(feature = "trace")]
        println!(
            " int_rc => {}, rc => {}",
            inner.internal_ref_count.get(),
            inner.ref_count.get()
        );

        let mut v = inner.internal_ref_count.get() == inner.ref_count.get();

        if !inner.done.get() {
            #[cfg(feature = "trace")]
            println!("not done yet");

            inner.done.set(true);

            if let Some(value) = inner.value() {
                v &= value.counts_match();
            }
        }
        v
    }

    fn could_contain_cycles() -> bool {
        T::could_contain_cycles()
    }
}

/// Implement an empty trace on a type.
///
/// This is used inside the `impl` of `Trace` for a type that does not own any
/// [`Gc`] pointers:
///
/// ```rust
/// use mjb_gc::{unsafe_empty_trace, Trace};
///
/// struct A;
///
/// unsafe impl Trace for A {
///     unsafe_empty_trace! {}
/// }
/// ```
///
/// The recommended (and shorter) way is to instead derive [`Trace`] onto the
/// type.
///
/// ```rust
/// use mjb_gc::Trace;
///
/// #[derive(Trace)]
/// struct B;
/// ```
#[macro_export]
macro_rules! unsafe_empty_trace {
    () => {
        $crate::unsafe_field_trace! {}
    };
}

#[macro_export]
macro_rules! unsafe_field_trace {
    ($($f:tt: $T:ty),*) => {
        unsafe fn untrace(&self) {
            // Make sure that the field is the expected type to verify
            // the static calls for could_contain_cycles
            $(
                unsafe { <$T as $crate::Trace>::untrace(&self.$f) };
            )*
        }
        unsafe fn trace(&self) {
            $(
                unsafe { $crate::Trace::trace(&self.$f) };
            )*
        }
        unsafe fn set_undone(&self) {
            $(
                unsafe { $crate::Trace::set_undone(&self.$f) };
            )*
        }
        fn counts_match(&self) -> bool {
            true $(& $crate::Trace::counts_match(&self.$f))*
        }
        fn could_contain_cycles() -> bool {
            false $(|| <$T as $crate::Trace>::could_contain_cycles())*
        }
    };
    ($($f:tt),*) => {
        unsafe fn untrace(&self) {
            $(
                unsafe { $crate::Trace::untrace(&self.$f) };
            )*
        }
        unsafe fn trace(&self) {
            $(
                unsafe { $crate::Trace::trace(&self.$f) };
            )*
        }
        unsafe fn set_undone(&self) {
            $(
                unsafe { $crate::Trace::set_undone(&self.$f) };
            )*
        }
        fn counts_match(&self) -> bool {
            true $(& $crate::Trace::counts_match(&self.$f))*
        }
        fn could_contain_cycles() -> bool {
            true
        }
    };
    ($($f:tt: $T:ty,)*) => { $crate::unsafe_field_trace! {$($f: $T),*} };
    ($($f:tt,)*) => { $crate::unsafe_field_trace! {$($f),*} };
}

#[cfg(test)]
mod tests;