jrsonnet_gcmodule/
collect.rs

1// The main idea comes from cpython 3.8's `gcmodule.c` [1].
2//
3// [1]: https://github.com/python/cpython/blob/v3.8.0/Modules/gcmodule.c
4
5// NOTE: Consider adding generation support if necessary. It won't be too hard.
6
7use crate::cc::CcDummy;
8use crate::cc::CcDyn;
9use crate::cc::GcClone;
10use crate::debug;
11use crate::ref_count::RefCount;
12use crate::ref_count::SingleThreadRefCount;
13use crate::Cc;
14use crate::Trace;
15use std::cell::Cell;
16use std::cell::RefCell;
17use std::cell::UnsafeCell;
18use std::marker::PhantomData;
19use std::mem;
20use std::ptr::without_provenance;
21
22/// Provides advanced explicit control about where to store [`Cc`](type.Cc.html)
23/// objects.
24///
25/// An [`ObjectSpace`](struct.ObjectSpace.html) provides an alternative place for
26/// a collection of [`Cc`](type.Cc.html) objects. Those objects are isolated from
27/// the default thread-local space used by objects crated via `Cc::new`.
28/// The objects in this space can be collected by
29/// [`ObjectSpace::collect_cycles()`](struct.ObjectSpace.html#method.collect_cycles),
30/// but not [`collect_thread_cycles`](fn.collect_thread_cycles.html).
31///
32/// Use [`ObjectSpace::create`](struct.ObjectSpace.html#method.create) to
33/// create new objects within the space.
34///
35/// Objects within a space should not refer to objects in a different space.
36/// Failing to do so might cause memory leak.
37///
38/// # Example
39///
40/// ```
41/// use jrsonnet_gcmodule::{Cc, ObjectSpace, Trace, TraceBox};
42/// use std::cell::RefCell;
43///
44/// let mut space = ObjectSpace::default();
45/// assert_eq!(space.count_tracked(), 0);
46///
47/// {
48///     type List = Cc<RefCell<Vec<TraceBox<dyn Trace>>>>;
49///     let a: List = space.create(Default::default());
50///     let b: List = space.create(Default::default());
51///     a.borrow_mut().push(TraceBox(Box::new(b.clone())));
52///     b.borrow_mut().push(TraceBox(Box::new(a.clone())));
53/// }
54///
55/// assert_eq!(space.count_tracked(), 2);
56/// assert_eq!(space.collect_cycles(), 2);
57/// ```
58pub struct ObjectSpace {
59    /// Linked list to the tracked objects.
60    pub(crate) list: RefCell<OwnedGcHeader>,
61
62    /// Mark `ObjectSpace` as `!Send` and `!Sync`. This enforces thread-exclusive
63    /// access to the linked list so methods can use `&self` instead of
64    /// `&mut self`, together with usage of interior mutability.
65    _phantom: PhantomData<Cc<()>>,
66}
67
68/// This is a private type.
69pub trait AbstractObjectSpace: 'static + Sized {
70    type RefCount: RefCount;
71    type Header;
72
73    /// Insert "header" and "value" to the linked list.
74    fn insert(&self, header: *const Self::Header, value: &dyn CcDyn);
75
76    /// Remove from linked list.
77    fn remove(header: &Self::Header);
78
79    /// Create a `RefCount` object.
80    fn new_ref_count(&self, tracked: bool) -> Self::RefCount;
81
82    fn empty_header(&self) -> Self::Header;
83}
84
85impl AbstractObjectSpace for ObjectSpace {
86    type RefCount = SingleThreadRefCount;
87    type Header = GcHeader;
88
89    fn insert(&self, header: *const Self::Header, value: &dyn CcDyn) {
90        let list = self.list.borrow();
91        let prev: &GcHeader = list.inner();
92        debug_assert!(unsafe { (*header).next.get() }.is_null());
93        let next = prev.next.get();
94        unsafe { (*header).prev.set(prev) };
95        unsafe { (*header).next.set(next) };
96        unsafe {
97            // safety: The linked list is maintained, and pointers are valid.
98            (*next).prev.set(header);
99            // safety: To access vtable pointer. Test by test_gc_header_value.
100            let fat_ptr: [*mut (); 2] = mem::transmute(value);
101            (*header).ccdyn_vptr.set(fat_ptr[1]);
102        }
103        prev.next.set(header);
104    }
105
106    #[inline]
107    fn remove(header: &Self::Header) {
108        let header: &GcHeader = header;
109        debug_assert!(!header.next.get().is_null());
110        debug_assert!(!header.prev.get().is_null());
111        let next = unmask_ptr(header.next.get());
112        let prev = unmask_ptr(header.prev.get());
113        // safety: The linked list is maintained. Pointers in it are valid.
114        unsafe {
115            (*prev).next.set(next);
116            (*next).prev.set(prev);
117        }
118        header.next.set(std::ptr::null_mut());
119    }
120
121    #[inline]
122    fn new_ref_count(&self, tracked: bool) -> Self::RefCount {
123        SingleThreadRefCount::new(tracked)
124    }
125
126    #[inline]
127    fn empty_header(&self) -> Self::Header {
128        GcHeader::empty()
129    }
130}
131
132impl Default for ObjectSpace {
133    /// Constructs an empty [`ObjectSpace`](struct.ObjectSpace.html).
134    fn default() -> Self {
135        let header = new_gc_list();
136        Self {
137            list: RefCell::new(header),
138            _phantom: PhantomData,
139        }
140    }
141}
142
143impl ObjectSpace {
144    /// Count objects tracked by this [`ObjectSpace`](struct.ObjectSpace.html).
145    pub fn count_tracked(&self) -> usize {
146        let list = self.list.borrow();
147        let list: &GcHeader = list.inner();
148        let mut count = 0;
149        visit_list(list, |_| count += 1);
150        count
151    }
152
153    /// Collect cyclic garbage tracked by this [`ObjectSpace`](struct.ObjectSpace.html).
154    /// Return the number of objects collected.
155    pub fn collect_cycles(&self) -> usize {
156        let list = self.list.borrow();
157        let list: &GcHeader = list.inner();
158        collect_list(list, ())
159    }
160
161    /// Constructs a new [`Cc<T>`](type.Cc.html) in this
162    /// [`ObjectSpace`](struct.ObjectSpace.html).
163    ///
164    /// The returned object should only refer to objects in the same space.
165    /// Otherwise the collector might fail to collect cycles.
166    pub fn create<T: Trace>(&self, value: T) -> Cc<T> {
167        // `&mut self` ensures thread-exclusive access.
168        Cc::new_in_space(value, self)
169    }
170
171    /// Leak all objects allocated in this space
172    pub fn leak(&self) {
173        *self.list.borrow_mut() = new_gc_list();
174    }
175
176    // TODO: Consider implementing "merge" or method to collect multiple spaces
177    // together, to make it easier to support generational collection.
178}
179
180impl Drop for ObjectSpace {
181    fn drop(&mut self) {
182        self.collect_cycles();
183    }
184}
185
186pub trait Linked {
187    fn next(&self) -> *const Self;
188    fn prev(&self) -> *const Self;
189    fn set_prev(&self, other: *const Self);
190
191    fn value_ptr(this: *const Self) -> *const dyn CcDyn;
192    /// Get the trait object to operate on the actual `CcBox`.
193    fn value(&self) -> &dyn CcDyn;
194}
195
196/// Internal metadata used by the cycle collector.
197#[repr(C)]
198pub struct GcHeader {
199    pub(crate) next: Cell<*const GcHeader>,
200    pub(crate) prev: Cell<*const GcHeader>,
201
202    /// Vtable of (`&CcBox<T> as &dyn CcDyn`)
203    pub(crate) ccdyn_vptr: Cell<*const ()>,
204
205    /// https://github.com/rust-lang/unsafe-code-guidelines/issues/256#issuecomment-2506767812
206    pub(crate) _marker: UnsafeCell<()>,
207}
208
209impl Linked for GcHeader {
210    #[inline]
211    fn next(&self) -> *const Self {
212        self.next.get()
213    }
214    #[inline]
215    fn prev(&self) -> *const Self {
216        self.prev.get()
217    }
218    #[inline]
219    fn set_prev(&self, other: *const Self) {
220        self.prev.set(other)
221    }
222    fn value_ptr(this: *const Self) -> *const dyn CcDyn {
223        // safety: To build trait object from self and vtable pointer.
224        // Test by test_gc_header_value_consistency().
225        unsafe {
226            let fat_ptr: (*const (), *const ()) = (this.offset(1) as _, (*this).ccdyn_vptr.get());
227            mem::transmute(fat_ptr)
228        }
229    }
230    #[inline]
231    fn value(&self) -> &dyn CcDyn {
232        unsafe { mem::transmute(Self::value_ptr(self)) }
233    }
234}
235
236impl GcHeader {
237    /// Create an empty header.
238    pub(crate) fn empty() -> Self {
239        Self {
240            next: Cell::new(std::ptr::null()),
241            prev: Cell::new(std::ptr::null()),
242            ccdyn_vptr: Cell::new(CcDummy::ccdyn_vptr()),
243            _marker: UnsafeCell::new(()),
244        }
245    }
246}
247
248/// Collect cyclic garbage in the current thread created by
249/// [`Cc::new`](type.Cc.html#method.new).
250/// Return the number of objects collected.
251pub fn collect_thread_cycles() -> usize {
252    debug::log(|| ("collect", "collect_thread_cycles"));
253    THREAD_OBJECT_SPACE.with(|list| list.collect_cycles())
254}
255
256/// Count number of objects tracked by the collector in the current thread
257/// created by [`Cc::new`](type.Cc.html#method.new).
258/// Return the number of objects tracked.
259pub fn count_thread_tracked() -> usize {
260    THREAD_OBJECT_SPACE.with(|list| list.count_tracked())
261}
262
263thread_local!(pub(crate) static THREAD_OBJECT_SPACE: ObjectSpace = ObjectSpace::default());
264
265/// Acquire reference to thread-local global object space
266pub fn with_thread_object_space<R>(handler: impl FnOnce(&ObjectSpace) -> R) -> R {
267    THREAD_OBJECT_SPACE.with(handler)
268}
269
270pub(crate) struct OwnedGcHeader {
271    raw: *mut GcHeader,
272}
273impl OwnedGcHeader {
274    fn inner(&self) -> &GcHeader {
275        unsafe { &*self.raw }
276    }
277}
278impl Drop for OwnedGcHeader {
279    fn drop(&mut self) {
280        drop(unsafe { Box::from_raw(self.raw) });
281    }
282}
283
284/// Create an empty linked list with a dummy GcHeader.
285pub(crate) fn new_gc_list() -> OwnedGcHeader {
286    let header = Box::into_raw(Box::new(GcHeader::empty()));
287    unsafe { (*header).prev.set(header) };
288    unsafe { (*header).next.set(header) };
289
290    OwnedGcHeader { raw: header }
291}
292
293/// Scan the specified linked list. Collect cycles.
294pub(crate) fn collect_list<L: Linked, K>(list: &L, lock: K) -> usize {
295    update_refs(list);
296    subtract_refs(list);
297    release_unreachable(list, lock)
298}
299
300/// Visit the linked list.
301pub(crate) fn visit_list<'a, L: Linked>(list: &'a L, mut func: impl FnMut(&'a L)) {
302    // Skip the first dummy entry.
303    let mut ptr = list.next();
304    while ptr as *const _ != list as *const _ {
305        // The linked list is maintained so the pointer is valid.
306        let header: &L = unsafe { &*ptr };
307        ptr = header.next();
308        func(header);
309    }
310}
311
312const PTR_MASK: usize = usize::MAX & !(0b11);
313const PREV_MASK_COLLECTING: usize = 1;
314const PREV_MASK_VISITED: usize = 2;
315const PREV_SHIFT: u32 = 2;
316
317#[inline]
318fn unmask_ptr<T>(ptr: *const T) -> *const T {
319    ptr.map_addr(|ptr| ptr & PTR_MASK)
320}
321
322/// Temporarily use `GcHeader.prev` as `gc_ref_count`.
323/// Idea comes from https://bugs.python.org/issue33597.
324fn update_refs<L: Linked>(list: &L) {
325    visit_list(list, |header| {
326        let ref_count = header.value().gc_ref_count();
327        // It's possible that the ref_count becomes 0 in a multi-thread context:
328        //  thread 1> drop()
329        //  thread 1> drop() -> dec_ref()
330        //  thread 2> collect_cycles()        # take linked list lock
331        //  thread 1> drop() -> drop_ccbox()  # blocked by the linked list lock
332        //  thread 2> observe that ref_count is 0, but T is not dropped yet.
333        // In such case just ignore the object by not marking it as COLLECTING.
334        if ref_count > 0 {
335            let shifted = (ref_count << PREV_SHIFT) | PREV_MASK_COLLECTING;
336            header.set_prev(without_provenance(shifted));
337        } else {
338            debug_assert!(header.prev() as usize & PREV_MASK_COLLECTING == 0);
339        }
340    });
341}
342
343/// Subtract ref counts in `GcHeader.prev` by calling the non-recursive
344/// `Trace::trace` on every track objects.
345///
346/// After this, potential unreachable objects will have ref count down
347/// to 0. If vertexes in a connected component _all_ have ref count 0,
348/// they are unreachable and can be released.
349fn subtract_refs<L: Linked>(list: &L) {
350    let mut tracer = |header: *const ()| {
351        // safety: The type is known to be GcHeader.
352        let header = unsafe { &*(header as *const L) };
353        if is_collecting(header) {
354            debug_assert!(
355                !is_unreachable(header),
356                "bug: object {} becomes unreachable while trying to dec_ref (is Trace impl correct?)",
357                debug_name(header)
358            );
359            edit_gc_ref_count(header, -1);
360        }
361    };
362    visit_list(list, |header| {
363        set_visited(header);
364        header.value().gc_traverse(&mut tracer);
365    });
366}
367
368/// Mark objects as reachable recursively. So ref count 0 means unreachable
369/// values. This also removes the COLLECTING flag for reachable objects so
370/// unreachable objects all have the COLLECTING flag set.
371fn mark_reachable<L: Linked>(list: &L) {
372    fn revive<L: Linked>(header: *const ()) {
373        // safety: The type is known to be GcHeader.
374        let header = unsafe { &*(header as *const L) };
375        // hasn't visited?
376        if is_collecting(header) {
377            unset_collecting(header);
378            if is_unreachable(header) {
379                edit_gc_ref_count(header, 1); // revive
380            }
381            header.value().gc_traverse(&mut revive::<L>); // revive recursively
382        }
383    }
384    visit_list(list, |header| {
385        if is_collecting(header) && !is_unreachable(header) {
386            unset_collecting(header);
387            header.value().gc_traverse(&mut revive::<L>)
388        }
389    });
390}
391
392/// Release unreachable objects in the linked list.
393fn release_unreachable<L: Linked, K>(list: &L, lock: K) -> usize {
394    // Mark reachable objects. For example, A refers B. A's gc_ref_count
395    // is 1 while B's gc_ref_count is 0. In this case B should be revived
396    // by A's non-zero gc_ref_count.
397    mark_reachable(list);
398
399    let mut count = 0;
400
401    // Count unreachable objects. This is an optimization to avoid realloc.
402    visit_list(list, |header| {
403        if is_unreachable(header) {
404            count += 1;
405        }
406    });
407
408    debug::log(|| ("collect", format!("{} unreachable objects", count)));
409
410    // Build a list of what to drop. The collecting steps change the linked list
411    // so `visit_list` cannot be used.
412    //
413    // Here we keep extra references to the `CcBox<T>` to keep them alive. This
414    // ensures metadata fields like `ref_count` is available.
415    let mut to_drop: Vec<Box<dyn GcClone>> = Vec::with_capacity(count);
416    visit_list(list, |header| {
417        if is_unreachable(header) {
418            to_drop.push(header.value().gc_clone());
419        }
420    });
421
422    // Restore "prev" so deleting nodes from the linked list can work.
423    restore_prev(list);
424
425    // Drop the lock so deref() can work, reference counts and the linked list
426    // can be changed. This is needed because gc_drop_t might change the ref
427    // counts. This is okay for linked list because objects has been cloned
428    // to a separate `to_drop` list and the original linked list is no longer
429    // used.
430    drop(lock);
431    // Drop the reference to the list so we don't reuse it.
432    // drop(list);
433
434    #[cfg(feature = "debug")]
435    {
436        crate::debug::GC_DROPPING.with(|d| d.set(true));
437    }
438
439    // Drop `T` without releasing memory of `CcBox<T>`. This might trigger some
440    // recursive drops of other `Cc<T>`. `CcBox<T>` need to stay alive so
441    // `Cc<T>::drop` can read the ref count metadata.
442    for value in to_drop.iter() {
443        value.gc_drop_t();
444    }
445
446    // At this point the only references to the `CcBox<T>`s are inside the
447    // `to_drop` list. Dropping `to_drop` would release the memory.
448    for value in to_drop.iter() {
449        let ref_count = value.gc_ref_count();
450        assert_eq!(
451            ref_count, 1,
452            concat!(
453                "bug: unexpected ref-count after dropping cycles\n",
454                "This usually indicates a buggy Trace or Drop implementation."
455            )
456        );
457    }
458
459    #[cfg(feature = "debug")]
460    {
461        crate::debug::GC_DROPPING.with(|d| d.set(false));
462    }
463
464    count
465}
466
467/// Restore `GcHeader.prev` as a pointer used in the linked list.
468fn restore_prev<L: Linked>(list: &L) {
469    let mut prev = list;
470    visit_list(list, |header| {
471        header.set_prev(prev);
472        prev = header;
473    });
474}
475
476fn is_unreachable<L: Linked>(header: &L) -> bool {
477    let prev = header.prev().addr();
478    is_collecting(header) && (prev >> PREV_SHIFT) == 0
479}
480
481pub(crate) fn is_collecting<L: Linked>(header: &L) -> bool {
482    let prev = header.prev().addr();
483    (prev & PREV_MASK_COLLECTING) != 0
484}
485
486fn set_visited<L: Linked>(header: &L) -> bool {
487    let prev = header.prev();
488    let visited = (prev.addr() & PREV_MASK_VISITED) != 0;
489    debug_assert!(
490        !visited,
491        "bug: double visit: {} (is Trace impl correct?)",
492        debug_name(header)
493    );
494    let new_prev = prev.map_addr(|prev| prev | PREV_MASK_VISITED);
495    header.set_prev(new_prev);
496    visited
497}
498
499fn unset_collecting<L: Linked>(header: &L) {
500    let prev = header.prev();
501    let new_prev = prev.map_addr(|prev| (prev & PREV_MASK_COLLECTING) ^ prev);
502    header.set_prev(new_prev);
503}
504
505fn edit_gc_ref_count<L: Linked>(header: &L, delta: isize) {
506    let prev = header.prev() as isize;
507    let new_prev = prev + (1 << PREV_SHIFT) * delta;
508    header.set_prev(without_provenance(new_prev as usize));
509}
510
511#[allow(unused_variables)]
512fn debug_name<L: Linked>(header: &L) -> String {
513    #[cfg(feature = "debug")]
514    {
515        return header.value().gc_debug_name();
516    }
517
518    #[cfg(not(feature = "debug"))]
519    {
520        "(enable gcmodule \"debug\" feature for debugging)".to_string()
521    }
522}