Skip to main content

jrsonnet_gcmodule/
collect.rs

1// The main idea comes from cpython 3.8's `gcmodule.c` [1].
2//
3// [1]: https://github.com/python/cpython/blob/v3.8.0/Modules/gcmodule.c
4
5// NOTE: Consider adding generation support if necessary. It won't be too hard.
6
7use crate::Cc;
8use crate::Trace;
9use crate::cc::CcDummy;
10use crate::cc::CcDyn;
11use crate::cc::GcClone;
12use crate::debug;
13use crate::ref_count::RefCount;
14use crate::ref_count::SingleThreadRefCount;
15use std::cell::Cell;
16use std::cell::RefCell;
17use std::cell::UnsafeCell;
18use std::marker::PhantomData;
19use std::mem;
20use std::ptr;
21use std::ptr::without_provenance;
22
23/// Provides advanced explicit control about where to store [`Cc`](type.Cc.html)
24/// objects.
25///
26/// An [`ObjectSpace`](struct.ObjectSpace.html) provides an alternative place for
27/// a collection of [`Cc`](type.Cc.html) objects. Those objects are isolated from
28/// the default thread-local space used by objects crated via `Cc::new`.
29/// The objects in this space can be collected by
30/// [`ObjectSpace::collect_cycles()`](struct.ObjectSpace.html#method.collect_cycles),
31/// but not [`collect_thread_cycles`](fn.collect_thread_cycles.html).
32///
33/// Use [`ObjectSpace::create`](struct.ObjectSpace.html#method.create) to
34/// create new objects within the space.
35///
36/// Objects within a space should not refer to objects in a different space.
37/// Failing to do so might cause memory leak.
38///
39/// # Example
40///
41/// ```
42/// use jrsonnet_gcmodule::{Cc, ObjectSpace, Trace, TraceBox};
43/// use std::cell::RefCell;
44///
45/// let mut space = ObjectSpace::default();
46/// assert_eq!(space.count_tracked(), 0);
47///
48/// {
49///     type List = Cc<RefCell<Vec<TraceBox<dyn Trace>>>>;
50///     let a: List = space.create(Default::default());
51///     let b: List = space.create(Default::default());
52///     a.borrow_mut().push(TraceBox(Box::new(b.clone())));
53///     b.borrow_mut().push(TraceBox(Box::new(a.clone())));
54/// }
55///
56/// assert_eq!(space.count_tracked(), 2);
57/// assert_eq!(space.collect_cycles(), 2);
58/// ```
59pub struct ObjectSpace {
60    /// Linked list to the tracked objects.
61    pub(crate) list: RefCell<OwnedGcHeader>,
62
63    /// Mark `ObjectSpace` as `!Send` and `!Sync`. This enforces thread-exclusive
64    /// access to the linked list so methods can use `&self` instead of
65    /// `&mut self`, together with usage of interior mutability.
66    _phantom: PhantomData<Cc<()>>,
67}
68
69/// This is a private type.
70pub trait AbstractObjectSpace: 'static + Sized {
71    type RefCount: RefCount;
72    type Header;
73
74    /// Insert "header" and "value" to the linked list.
75    fn insert(&self, header: *const Self::Header, value: &dyn CcDyn);
76
77    /// Remove from linked list.
78    fn remove(header: &Self::Header);
79
80    /// Create a `RefCount` object.
81    fn new_ref_count(&self, tracked: bool) -> Self::RefCount;
82
83    fn empty_header(&self) -> Self::Header;
84}
85
86impl AbstractObjectSpace for ObjectSpace {
87    type RefCount = SingleThreadRefCount;
88    type Header = GcHeader;
89
90    fn insert(&self, header: *const Self::Header, value: &dyn CcDyn) {
91        let list = self.list.borrow();
92        let prev: &GcHeader = list.inner();
93        debug_assert!(unsafe { (*header).next.get() }.is_null());
94        let next = prev.next.get();
95        unsafe { (*header).prev.set(prev) };
96        unsafe { (*header).next.set(next) };
97        unsafe {
98            // safety: The linked list is maintained, and pointers are valid.
99            (*next).prev.set(header);
100            // safety: To access vtable pointer. Test by test_gc_header_value.
101            let fat_ptr: [*mut (); 2] = mem::transmute(value);
102            (*header).ccdyn_vptr.set(fat_ptr[1]);
103        }
104        prev.next.set(header);
105    }
106
107    #[inline]
108    fn remove(header: &Self::Header) {
109        let header: &GcHeader = header;
110        debug_assert!(!header.next.get().is_null());
111        debug_assert!(!header.prev.get().is_null());
112        let next = unmask_ptr(header.next.get());
113        let prev = unmask_ptr(header.prev.get());
114        // safety: The linked list is maintained. Pointers in it are valid.
115        unsafe {
116            (*prev).next.set(next);
117            (*next).prev.set(prev);
118        }
119        header.next.set(ptr::null_mut());
120    }
121
122    #[inline]
123    fn new_ref_count(&self, tracked: bool) -> Self::RefCount {
124        SingleThreadRefCount::new(tracked)
125    }
126
127    #[inline]
128    fn empty_header(&self) -> Self::Header {
129        GcHeader::empty()
130    }
131}
132
133impl Default for ObjectSpace {
134    /// Constructs an empty [`ObjectSpace`](struct.ObjectSpace.html).
135    fn default() -> Self {
136        let header = new_gc_list();
137        Self {
138            list: RefCell::new(header),
139            _phantom: PhantomData,
140        }
141    }
142}
143
144impl ObjectSpace {
145    /// Count objects tracked by this [`ObjectSpace`](struct.ObjectSpace.html).
146    pub fn count_tracked(&self) -> usize {
147        let list = self.list.borrow();
148        let list: &GcHeader = list.inner();
149        let mut count = 0;
150        visit_list(list, |_| count += 1);
151        count
152    }
153
154    /// Collect cyclic garbage tracked by this [`ObjectSpace`](struct.ObjectSpace.html).
155    /// Return the number of objects collected.
156    pub fn collect_cycles(&self) -> usize {
157        let list = self.list.borrow();
158        let list: &GcHeader = list.inner();
159        collect_list(list, ())
160    }
161
162    /// Constructs a new [`Cc<T>`](type.Cc.html) in this
163    /// [`ObjectSpace`](struct.ObjectSpace.html).
164    ///
165    /// The returned object should only refer to objects in the same space.
166    /// Otherwise the collector might fail to collect cycles.
167    pub fn create<T: Trace>(&self, value: T) -> Cc<T> {
168        // `&mut self` ensures thread-exclusive access.
169        Cc::new_in_space(value, self)
170    }
171
172    /// Leak all objects allocated in this space
173    pub fn leak(&self) {
174        *self.list.borrow_mut() = new_gc_list();
175    }
176
177    // TODO: Consider implementing "merge" or method to collect multiple spaces
178    // together, to make it easier to support generational collection.
179}
180
181impl Drop for ObjectSpace {
182    fn drop(&mut self) {
183        self.collect_cycles();
184    }
185}
186
187pub trait Linked {
188    fn next(&self) -> *const Self;
189    fn prev(&self) -> *const Self;
190    fn set_prev(&self, other: *const Self);
191
192    fn value_ptr(this: *const Self) -> *const dyn CcDyn;
193    /// Get the trait object to operate on the actual `CcBox`.
194    fn value(&self) -> &dyn CcDyn;
195}
196
197/// Internal metadata used by the cycle collector.
198#[repr(C, align(8))]
199pub struct GcHeader {
200    pub(crate) next: Cell<*const GcHeader>,
201    pub(crate) prev: Cell<*const GcHeader>,
202
203    /// Vtable of (`&CcBox<T> as &dyn CcDyn`)
204    pub(crate) ccdyn_vptr: Cell<*const ()>,
205
206    /// <https://github.com/rust-lang/unsafe-code-guidelines/issues/256#issuecomment-2506767812>
207    pub(crate) _marker: UnsafeCell<()>,
208}
209
210impl Linked for GcHeader {
211    #[inline]
212    fn next(&self) -> *const Self {
213        self.next.get()
214    }
215    #[inline]
216    fn prev(&self) -> *const Self {
217        self.prev.get()
218    }
219    #[inline]
220    fn set_prev(&self, other: *const Self) {
221        self.prev.set(other);
222    }
223    fn value_ptr(this: *const Self) -> *const dyn CcDyn {
224        // safety: To build trait object from self and vtable pointer.
225        // Test by test_gc_header_value_consistency().
226        unsafe {
227            let fat_ptr: (*const (), *const ()) = (this.add(1).cast(), (*this).ccdyn_vptr.get());
228            mem::transmute(fat_ptr)
229        }
230    }
231    #[inline]
232    fn value(&self) -> &dyn CcDyn {
233        unsafe { &*Self::value_ptr(self) }
234    }
235}
236
237impl GcHeader {
238    /// Create an empty header.
239    pub(crate) fn empty() -> Self {
240        Self {
241            next: Cell::new(ptr::null()),
242            prev: Cell::new(ptr::null()),
243            ccdyn_vptr: Cell::new(CcDummy::ccdyn_vptr()),
244            _marker: UnsafeCell::new(()),
245        }
246    }
247}
248
249/// Collect cyclic garbage in the current thread created by
250/// [`Cc::new`](type.Cc.html#method.new).
251/// Return the number of objects collected.
252#[must_use]
253pub fn collect_thread_cycles() -> usize {
254    debug::log(|| ("collect", "collect_thread_cycles"));
255    THREAD_OBJECT_SPACE.with(ObjectSpace::collect_cycles)
256}
257
258/// Count number of objects tracked by the collector in the current thread
259/// created by [`Cc::new`](type.Cc.html#method.new).
260/// Return the number of objects tracked.
261#[must_use]
262pub fn count_thread_tracked() -> usize {
263    THREAD_OBJECT_SPACE.with(ObjectSpace::count_tracked)
264}
265
266thread_local!(pub(crate) static THREAD_OBJECT_SPACE: ObjectSpace = ObjectSpace::default());
267
268/// Acquire reference to thread-local global object space
269pub fn with_thread_object_space<R>(handler: impl FnOnce(&ObjectSpace) -> R) -> R {
270    THREAD_OBJECT_SPACE.with(handler)
271}
272
273pub(crate) struct OwnedGcHeader {
274    raw: *mut GcHeader,
275}
276impl OwnedGcHeader {
277    fn inner(&self) -> &GcHeader {
278        unsafe { &*self.raw }
279    }
280}
281impl Drop for OwnedGcHeader {
282    fn drop(&mut self) {
283        drop(unsafe { Box::from_raw(self.raw) });
284    }
285}
286
287/// Create an empty linked list with a dummy `GcHeader`.
288pub(crate) fn new_gc_list() -> OwnedGcHeader {
289    let header = Box::into_raw(Box::new(GcHeader::empty()));
290    unsafe { (*header).prev.set(header) };
291    unsafe { (*header).next.set(header) };
292
293    OwnedGcHeader { raw: header }
294}
295
296/// Scan the specified linked list. Collect cycles.
297pub(crate) fn collect_list<L: Linked, K>(list: &L, lock: K) -> usize {
298    update_refs(list);
299    subtract_refs(list);
300    release_unreachable(list, lock)
301}
302
303/// Visit the linked list.
304pub(crate) fn visit_list<'a, L: Linked>(list: &'a L, mut func: impl FnMut(&'a L)) {
305    // Skip the first dummy entry.
306    let mut ptr = list.next();
307    while !ptr::eq(ptr, list) {
308        // The linked list is maintained so the pointer is valid.
309        let header: &L = unsafe { &*ptr };
310        ptr = header.next();
311        func(header);
312    }
313}
314
315const PTR_MASK: usize = usize::MAX & !(0b11);
316const PREV_MASK_COLLECTING: usize = 1;
317const PREV_MASK_VISITED: usize = 2;
318const PREV_SHIFT: u32 = 2;
319
320#[inline]
321fn unmask_ptr<T>(ptr: *const T) -> *const T {
322    ptr.map_addr(|ptr| ptr & PTR_MASK)
323}
324
325/// Temporarily use `GcHeader.prev` as `gc_ref_count`.
326/// Idea comes from <https://bugs.python.org/issue33597>.
327fn update_refs<L: Linked>(list: &L) {
328    visit_list(list, |header| {
329        let ref_count = header.value().gc_ref_count();
330        // It's possible that the ref_count becomes 0 in a multi-thread context:
331        //  thread 1> drop()
332        //  thread 1> drop() -> dec_ref()
333        //  thread 2> collect_cycles()        # take linked list lock
334        //  thread 1> drop() -> drop_ccbox()  # blocked by the linked list lock
335        //  thread 2> observe that ref_count is 0, but T is not dropped yet.
336        // In such case just ignore the object by not marking it as COLLECTING.
337        if ref_count > 0 {
338            let shifted = (ref_count << PREV_SHIFT) | PREV_MASK_COLLECTING;
339            header.set_prev(without_provenance(shifted));
340        } else {
341            debug_assert!(header.prev() as usize & PREV_MASK_COLLECTING == 0);
342        }
343    });
344}
345
346/// Subtract ref counts in `GcHeader.prev` by calling the non-recursive
347/// `Trace::trace` on every track objects.
348///
349/// After this, potential unreachable objects will have ref count down
350/// to 0. If vertexes in a connected component _all_ have ref count 0,
351/// they are unreachable and can be released.
352fn subtract_refs<L: Linked>(list: &L) {
353    let mut tracer = |header: *const ()| {
354        // safety: The type is known to be GcHeader.
355        let header = unsafe { &*header.cast::<L>() };
356        if is_collecting(header) {
357            debug_assert!(
358                !is_unreachable(header),
359                "bug: object {} becomes unreachable while trying to dec_ref (is Trace impl correct?)",
360                debug_name(header)
361            );
362            edit_gc_ref_count(header, -1);
363        }
364    };
365    visit_list(list, |header| {
366        set_visited(header);
367        header.value().gc_traverse(&mut tracer);
368    });
369}
370
371/// Mark objects as reachable recursively. So ref count 0 means unreachable
372/// values. This also removes the COLLECTING flag for reachable objects so
373/// unreachable objects all have the COLLECTING flag set.
374fn mark_reachable<L: Linked>(list: &L) {
375    fn revive<L: Linked>(header: *const ()) {
376        // safety: The type is known to be GcHeader.
377        let header = unsafe { &*header.cast::<L>() };
378        // hasn't visited?
379        if is_collecting(header) {
380            unset_collecting(header);
381            if is_unreachable(header) {
382                edit_gc_ref_count(header, 1); // revive
383            }
384            header.value().gc_traverse(&mut revive::<L>); // revive recursively
385        }
386    }
387    visit_list(list, |header| {
388        if is_collecting(header) && !is_unreachable(header) {
389            unset_collecting(header);
390            header.value().gc_traverse(&mut revive::<L>);
391        }
392    });
393}
394
395/// Release unreachable objects in the linked list.
396fn release_unreachable<L: Linked, K>(list: &L, lock: K) -> usize {
397    // Mark reachable objects. For example, A refers B. A's gc_ref_count
398    // is 1 while B's gc_ref_count is 0. In this case B should be revived
399    // by A's non-zero gc_ref_count.
400    mark_reachable(list);
401
402    let mut count = 0;
403
404    // Count unreachable objects. This is an optimization to avoid realloc.
405    visit_list(list, |header| {
406        if is_unreachable(header) {
407            count += 1;
408        }
409    });
410
411    debug::log(|| ("collect", format!("{count} unreachable objects")));
412
413    // Build a list of what to drop. The collecting steps change the linked list
414    // so `visit_list` cannot be used.
415    //
416    // Here we keep extra references to the `CcBox<T>` to keep them alive. This
417    // ensures metadata fields like `ref_count` is available.
418    let mut to_drop: Vec<Box<dyn GcClone>> = Vec::with_capacity(count);
419    visit_list(list, |header| {
420        if is_unreachable(header) {
421            to_drop.push(header.value().gc_clone());
422        }
423    });
424
425    // Restore "prev" so deleting nodes from the linked list can work.
426    restore_prev(list);
427
428    // Drop the lock so deref() can work, reference counts and the linked list
429    // can be changed. This is needed because gc_drop_t might change the ref
430    // counts. This is okay for linked list because objects has been cloned
431    // to a separate `to_drop` list and the original linked list is no longer
432    // used.
433    drop(lock);
434    // Drop the reference to the list so we don't reuse it.
435    // drop(list);
436
437    #[cfg(feature = "debug")]
438    {
439        crate::debug::GC_DROPPING.with(|d| d.set(true));
440    }
441
442    // Drop `T` without releasing memory of `CcBox<T>`. This might trigger some
443    // recursive drops of other `Cc<T>`. `CcBox<T>` need to stay alive so
444    // `Cc<T>::drop` can read the ref count metadata.
445    for value in &to_drop {
446        value.gc_drop_t();
447    }
448
449    // At this point the only references to the `CcBox<T>`s are inside the
450    // `to_drop` list. Dropping `to_drop` would release the memory.
451    for value in &to_drop {
452        let ref_count = value.gc_ref_count();
453        assert_eq!(
454            ref_count, 1,
455            concat!(
456                "bug: unexpected ref-count after dropping cycles\n",
457                "This usually indicates a buggy Trace or Drop implementation."
458            )
459        );
460    }
461
462    #[cfg(feature = "debug")]
463    {
464        crate::debug::GC_DROPPING.with(|d| d.set(false));
465    }
466
467    count
468}
469
470/// Restore `GcHeader.prev` as a pointer used in the linked list.
471fn restore_prev<L: Linked>(list: &L) {
472    let mut prev = list;
473    visit_list(list, |header| {
474        header.set_prev(prev);
475        prev = header;
476    });
477}
478
479fn is_unreachable<L: Linked>(header: &L) -> bool {
480    let prev = header.prev().addr();
481    is_collecting(header) && (prev >> PREV_SHIFT) == 0
482}
483
484pub(crate) fn is_collecting<L: Linked>(header: &L) -> bool {
485    let prev = header.prev().addr();
486    (prev & PREV_MASK_COLLECTING) != 0
487}
488
489fn set_visited<L: Linked>(header: &L) -> bool {
490    let prev = header.prev();
491    let visited = (prev.addr() & PREV_MASK_VISITED) != 0;
492    debug_assert!(
493        !visited,
494        "bug: double visit: {} (is Trace impl correct?)",
495        debug_name(header)
496    );
497    let new_prev = prev.map_addr(|prev| prev | PREV_MASK_VISITED);
498    header.set_prev(new_prev);
499    visited
500}
501
502fn unset_collecting<L: Linked>(header: &L) {
503    let prev = header.prev();
504    let new_prev = prev.map_addr(|prev| (prev & PREV_MASK_COLLECTING) ^ prev);
505    header.set_prev(new_prev);
506}
507
508fn edit_gc_ref_count<L: Linked>(header: &L, delta: isize) {
509    let prev = header.prev() as isize;
510    let new_prev = prev + (1 << PREV_SHIFT) * delta;
511    header.set_prev(without_provenance(new_prev.cast_unsigned()));
512}
513
514#[allow(unused_variables)]
515fn debug_name<L: Linked>(header: &L) -> String {
516    #[cfg(feature = "debug")]
517    {
518        header.value().gc_debug_name()
519    }
520
521    #[cfg(not(feature = "debug"))]
522    {
523        "(enable gcmodule \"debug\" feature for debugging)".to_string()
524    }
525}