boa_gc/
lib.rs

1//! Boa's **`boa_gc`** crate implements a garbage collector.
2//!
3//! # Crate Overview
4//! **`boa_gc`** is a mark-sweep garbage collector that implements a [`Trace`] and [`Finalize`] trait
5//! for garbage collected values.
6#![doc = include_str!("../ABOUT.md")]
7#![doc(
8    html_logo_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo_black.svg",
9    html_favicon_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo_black.svg"
10)]
11#![cfg_attr(not(test), forbid(clippy::unwrap_used))]
12#![allow(
13    clippy::module_name_repetitions,
14    clippy::redundant_pub_crate,
15    clippy::let_unit_value
16)]
17
18extern crate self as boa_gc;
19
20mod cell;
21mod pointers;
22mod trace;
23
24pub(crate) mod internals;
25
26use internals::{EphemeronBox, ErasedEphemeronBox, ErasedWeakMapBox, WeakMapBox};
27use pointers::{NonTraceable, RawWeakMap};
28use std::{
29    cell::{Cell, RefCell},
30    mem,
31    ptr::NonNull,
32};
33
34pub use crate::trace::{Finalize, Trace, Tracer};
35pub use boa_macros::{Finalize, Trace};
36pub use cell::{GcRef, GcRefCell, GcRefMut};
37pub use internals::GcBox;
38pub use pointers::{Ephemeron, Gc, GcErased, WeakGc, WeakMap};
39
40type GcErasedPointer = NonNull<GcBox<NonTraceable>>;
41type EphemeronPointer = NonNull<dyn ErasedEphemeronBox>;
42type ErasedWeakMapBoxPointer = NonNull<dyn ErasedWeakMapBox>;
43
44thread_local!(static GC_DROPPING: Cell<bool> = const { Cell::new(false) });
45thread_local!(static BOA_GC: RefCell<BoaGc> = RefCell::new( BoaGc {
46    config: GcConfig::default(),
47    runtime: GcRuntimeData::default(),
48    strongs: Vec::default(),
49    weaks: Vec::default(),
50    weak_maps: Vec::default(),
51}));
52
53#[derive(Debug, Clone, Copy)]
54struct GcConfig {
55    /// The threshold at which the garbage collector will trigger a collection.
56    threshold: usize,
57    /// The percentage of used space at which the garbage collector will trigger a collection.
58    used_space_percentage: usize,
59}
60
61// Setting the defaults to an arbitrary value currently.
62//
63// TODO: Add a configure later
64impl Default for GcConfig {
65    fn default() -> Self {
66        Self {
67            // Start at 1MB, the nursary size for V8 is ~1-8MB and SM can be up to 16MB
68            threshold: 1_048_576,
69            used_space_percentage: 70,
70        }
71    }
72}
73
74#[derive(Default, Debug, Clone, Copy)]
75struct GcRuntimeData {
76    collections: usize,
77    bytes_allocated: usize,
78}
79
80#[derive(Debug)]
81struct BoaGc {
82    config: GcConfig,
83    runtime: GcRuntimeData,
84    strongs: Vec<GcErasedPointer>,
85    weaks: Vec<EphemeronPointer>,
86    weak_maps: Vec<ErasedWeakMapBoxPointer>,
87}
88
89impl Drop for BoaGc {
90    fn drop(&mut self) {
91        Collector::dump(self);
92    }
93}
94
95// Whether or not the thread is currently in the sweep phase of garbage collection.
96// During this phase, attempts to dereference a `Gc<T>` pointer will trigger a panic.
97/// `DropGuard` flags whether the Collector is currently running `Collector::sweep()` or `Collector::dump()`
98///
99/// While the `DropGuard` is active, all `GcBox`s must not be dereferenced or accessed as it could cause Undefined Behavior
100#[derive(Debug, Clone)]
101struct DropGuard;
102
103impl DropGuard {
104    fn new() -> Self {
105        GC_DROPPING.with(|dropping| dropping.set(true));
106        Self
107    }
108}
109
110impl Drop for DropGuard {
111    fn drop(&mut self) {
112        GC_DROPPING.with(|dropping| dropping.set(false));
113    }
114}
115
116/// Returns `true` if it is safe for a type to run [`Finalize::finalize`].
117#[must_use]
118#[inline]
119pub fn finalizer_safe() -> bool {
120    GC_DROPPING.with(|dropping| !dropping.get())
121}
122
123/// The Allocator handles allocation of garbage collected values.
124///
125/// The allocator can trigger a garbage collection.
126#[derive(Debug, Clone, Copy)]
127struct Allocator;
128
129impl Allocator {
130    /// Allocate a new garbage collected value to the Garbage Collector's heap.
131    fn alloc_gc<T: Trace>(value: GcBox<T>) -> NonNull<GcBox<T>> {
132        let element_size = size_of_val::<GcBox<T>>(&value);
133        BOA_GC.with(|st| {
134            let mut gc = st.borrow_mut();
135
136            Self::manage_state(&mut gc);
137            // Safety: value cannot be a null pointer, since `Box` cannot return null pointers.
138            let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(value))) };
139            let erased: NonNull<GcBox<NonTraceable>> = ptr.cast();
140
141            gc.strongs.push(erased);
142            gc.runtime.bytes_allocated += element_size;
143
144            ptr
145        })
146    }
147
148    fn alloc_ephemeron<K: Trace + ?Sized, V: Trace>(
149        value: EphemeronBox<K, V>,
150    ) -> NonNull<EphemeronBox<K, V>> {
151        let element_size = size_of_val::<EphemeronBox<K, V>>(&value);
152        BOA_GC.with(|st| {
153            let mut gc = st.borrow_mut();
154
155            Self::manage_state(&mut gc);
156            // Safety: value cannot be a null pointer, since `Box` cannot return null pointers.
157            let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(value))) };
158            let erased: NonNull<dyn ErasedEphemeronBox> = ptr;
159
160            gc.weaks.push(erased);
161            gc.runtime.bytes_allocated += element_size;
162
163            ptr
164        })
165    }
166
167    fn alloc_weak_map<K: Trace + ?Sized, V: Trace + Clone>() -> WeakMap<K, V> {
168        let weak_map = WeakMap {
169            inner: Gc::new(GcRefCell::new(RawWeakMap::new())),
170        };
171        let weak = WeakGc::new(&weak_map.inner);
172
173        BOA_GC.with(|st| {
174            let mut gc = st.borrow_mut();
175
176            let weak_box = WeakMapBox { map: weak };
177
178            // Safety: value cannot be a null pointer, since `Box` cannot return null pointers.
179            let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(weak_box))) };
180            let erased: ErasedWeakMapBoxPointer = ptr;
181
182            gc.weak_maps.push(erased);
183
184            weak_map
185        })
186    }
187
188    fn manage_state(gc: &mut BoaGc) {
189        if gc.runtime.bytes_allocated > gc.config.threshold {
190            Collector::collect(gc);
191
192            // Post collection check
193            // If the allocated bytes are still above the threshold, increase the threshold.
194            if gc.runtime.bytes_allocated
195                > gc.config.threshold / 100 * gc.config.used_space_percentage
196            {
197                gc.config.threshold =
198                    gc.runtime.bytes_allocated / gc.config.used_space_percentage * 100;
199            }
200        }
201    }
202}
203
204struct Unreachables {
205    strong: Vec<GcErasedPointer>,
206    weak: Vec<NonNull<dyn ErasedEphemeronBox>>,
207}
208
209/// This collector currently functions in four main phases
210///
211/// Mark -> Finalize -> Mark -> Sweep
212///
213/// 1. Mark nodes as reachable.
214/// 2. Finalize the unreachable nodes.
215/// 3. Mark again because `Finalize::finalize` can potentially resurrect dead nodes.
216/// 4. Sweep and drop all dead nodes.
217///
218/// A better approach in a more concurrent structure may be to reorder.
219///
220/// Mark -> Sweep -> Finalize
221struct Collector;
222
223impl Collector {
224    /// Run a collection on the full heap.
225    fn collect(gc: &mut BoaGc) {
226        gc.runtime.collections += 1;
227
228        Self::trace_non_roots(gc);
229
230        let mut tracer = Tracer::new();
231
232        let unreachables = Self::mark_heap(&mut tracer, &gc.strongs, &gc.weaks, &gc.weak_maps);
233
234        assert!(tracer.is_empty(), "The queue should be empty");
235
236        // Only finalize if there are any unreachable nodes.
237        if !unreachables.strong.is_empty() || !unreachables.weak.is_empty() {
238            // Finalize all the unreachable nodes.
239            // SAFETY: All passed pointers are valid, since we won't deallocate until `Self::sweep`.
240            unsafe { Self::finalize(unreachables) };
241
242            // Reuse the tracer's already allocated capacity.
243            let _final_unreachables =
244                Self::mark_heap(&mut tracer, &gc.strongs, &gc.weaks, &gc.weak_maps);
245        }
246
247        // SAFETY: The head of our linked list is always valid per the invariants of our GC.
248        unsafe {
249            Self::sweep(
250                &mut gc.strongs,
251                &mut gc.weaks,
252                &mut gc.runtime.bytes_allocated,
253            );
254        }
255
256        // Weak maps have to be cleared after the sweep, since the process dereferences GcBoxes.
257        gc.weak_maps.retain(|w| {
258            // SAFETY: The caller must ensure the validity of every node of `heap_start`.
259            let node_ref = unsafe { w.as_ref() };
260
261            if node_ref.is_live() {
262                node_ref.clear_dead_entries();
263
264                true
265            } else {
266                // SAFETY:
267                // The `Allocator` must always ensure its start node is a valid, non-null pointer that
268                // was allocated by `Box::from_raw(Box::new(..))`.
269                let _unmarked_node = unsafe { Box::from_raw(w.as_ptr()) };
270
271                false
272            }
273        });
274
275        gc.strongs.shrink_to(gc.strongs.len() >> 2);
276        gc.weaks.shrink_to(gc.weaks.len() >> 2);
277        gc.weak_maps.shrink_to(gc.weak_maps.len() >> 2);
278    }
279
280    fn trace_non_roots(gc: &BoaGc) {
281        // Count all the handles located in GC heap.
282        // Then, we can find whether there is a reference from other places, and they are the roots.
283        for node in &gc.strongs {
284            // SAFETY: node must be valid as this phase cannot drop any node.
285            let trace_non_roots_fn = unsafe { node.as_ref() }.trace_non_roots_fn();
286
287            // SAFETY: The function pointer is appropriate for this node type because we extract it from it's VTable.
288            unsafe {
289                trace_non_roots_fn(*node);
290            }
291        }
292
293        for eph in &gc.weaks {
294            // SAFETY: node must be valid as this phase cannot drop any node.
295            let eph_ref = unsafe { eph.as_ref() };
296            eph_ref.trace_non_roots();
297        }
298    }
299
300    /// Walk the heap and mark any nodes deemed reachable
301    fn mark_heap(
302        tracer: &mut Tracer,
303        strongs: &[GcErasedPointer],
304        weaks: &[EphemeronPointer],
305        weak_maps: &[ErasedWeakMapBoxPointer],
306    ) -> Unreachables {
307        // Walk the list, tracing and marking the nodes
308        let mut strong_dead = Vec::new();
309        let mut pending_ephemerons = Vec::new();
310
311        // === Preliminary mark phase ===
312        //
313        // 0. Get the naive list of possibly dead nodes.
314        for node in strongs {
315            // SAFETY: node must be valid as this phase cannot drop any node.
316            let node_ref = unsafe { node.as_ref() };
317            if node_ref.is_rooted() {
318                tracer.enqueue(*node);
319
320                // SAFETY: all nodes must be valid as this phase cannot drop any node.
321                unsafe {
322                    tracer.trace_until_empty();
323                }
324            } else if !node_ref.is_marked() {
325                strong_dead.push(*node);
326            }
327        }
328
329        // 0.1. Early return if there are no ephemerons in the GC
330        if weaks.is_empty() {
331            strong_dead.retain_mut(|node| {
332                // SAFETY: node must be valid as this phase cannot drop any node.
333                unsafe { !node.as_ref().is_marked() }
334            });
335            return Unreachables {
336                strong: strong_dead,
337                weak: Vec::new(),
338            };
339        }
340
341        // === Weak mark phase ===
342        //
343        //
344        // 1. Get the naive list of ephemerons that are supposedly dead or their key is dead and
345        // trace all the ephemerons that have roots and their keys are live. Also remove from
346        // this list the ephemerons that are marked but their value is dead.
347        for eph in weaks {
348            // SAFETY: node must be valid as this phase cannot drop any node.
349            let eph_ref = unsafe { eph.as_ref() };
350            let header = eph_ref.header();
351            if header.is_rooted() {
352                header.mark();
353            }
354            // SAFETY: the garbage collector ensures `eph_ref` always points to valid data.
355            if unsafe { !eph_ref.trace(tracer) } {
356                pending_ephemerons.push(*eph);
357            }
358
359            // SAFETY: all nodes must be valid as this phase cannot drop any node.
360            unsafe {
361                tracer.trace_until_empty();
362            }
363        }
364
365        // 2. Trace all the weak pointers in the live weak maps to make sure they do not get swept.
366        for w in weak_maps {
367            // SAFETY: node must be valid as this phase cannot drop any node.
368            let node_ref = unsafe { w.as_ref() };
369
370            // SAFETY: The garbage collector ensures that all nodes are valid.
371            unsafe { node_ref.trace(tracer) };
372
373            // SAFETY: all nodes must be valid as this phase cannot drop any node.
374            unsafe {
375                tracer.trace_until_empty();
376            }
377        }
378
379        // 3. Iterate through all pending ephemerons, removing the ones which have been successfully
380        // traced. If there are no changes in the pending ephemerons list, it means that there are no
381        // more reachable ephemerons from the remaining ephemeron values.
382        let mut previous_len = pending_ephemerons.len();
383        loop {
384            pending_ephemerons.retain_mut(|eph| {
385                // SAFETY: node must be valid as this phase cannot drop any node.
386                let eph_ref = unsafe { eph.as_ref() };
387                // SAFETY: the garbage collector ensures `eph_ref` always points to valid data.
388                let is_key_marked = unsafe { !eph_ref.trace(tracer) };
389
390                // SAFETY: all nodes must be valid as this phase cannot drop any node.
391                unsafe {
392                    tracer.trace_until_empty();
393                }
394
395                is_key_marked
396            });
397
398            if previous_len == pending_ephemerons.len() {
399                break;
400            }
401
402            previous_len = pending_ephemerons.len();
403        }
404
405        // 4. The remaining list should contain the ephemerons that are either unreachable or its key
406        // is dead. Cleanup the strong pointers since this procedure could have marked some more strong
407        // pointers.
408        strong_dead.retain_mut(|node| {
409            // SAFETY: node must be valid as this phase cannot drop any node.
410            unsafe { !node.as_ref().is_marked() }
411        });
412
413        Unreachables {
414            strong: strong_dead,
415            weak: pending_ephemerons,
416        }
417    }
418
419    /// # Safety
420    ///
421    /// Passing a `strong` or a `weak` vec with invalid pointers will result in Undefined Behaviour.
422    unsafe fn finalize(unreachables: Unreachables) {
423        for node in unreachables.strong {
424            // SAFETY: The caller must ensure all pointers inside `unreachables.strong` are valid.
425            let node_ref = unsafe { node.as_ref() };
426            let run_finalizer_fn = node_ref.run_finalizer_fn();
427
428            // SAFETY: The function pointer is appropriate for this node type because we extract it from it's VTable.
429            unsafe {
430                run_finalizer_fn(node);
431            }
432        }
433        for node in unreachables.weak {
434            // SAFETY: The caller must ensure all pointers inside `unreachables.weak` are valid.
435            let node = unsafe { node.as_ref() };
436            node.finalize_and_clear();
437        }
438    }
439
440    /// # Safety
441    ///
442    /// - Providing an invalid pointer in the `heap_start` or in any of the headers of each
443    ///   node will result in Undefined Behaviour.
444    /// - Providing a list of pointers that weren't allocated by `Box::into_raw(Box::new(..))`
445    ///   will result in Undefined Behaviour.
446    unsafe fn sweep(
447        strong: &mut Vec<GcErasedPointer>,
448        weak: &mut Vec<EphemeronPointer>,
449        total_allocated: &mut usize,
450    ) {
451        let _guard = DropGuard::new();
452
453        strong.retain(|node| {
454            // SAFETY: The caller must ensure the validity of every node of `heap_start`.
455            let node_ref = unsafe { node.as_ref() };
456            if node_ref.is_marked() {
457                node_ref.header.unmark();
458                node_ref.reset_non_root_count();
459
460                true
461            } else {
462                // SAFETY: The algorithm ensures only unmarked/unreachable pointers are dropped.
463                // The caller must ensure all pointers were allocated by `Box::into_raw(Box::new(..))`.
464                let drop_fn = node_ref.drop_fn();
465                let size = node_ref.size();
466                *total_allocated -= size;
467
468                // SAFETY: The function pointer is appropriate for this node type because we extract it from it's VTable.
469                unsafe {
470                    drop_fn(*node);
471                }
472
473                false
474            }
475        });
476
477        weak.retain(|eph| {
478            // SAFETY: The caller must ensure the validity of every node of `heap_start`.
479            let eph_ref = unsafe { eph.as_ref() };
480            let header = eph_ref.header();
481            if header.is_marked() {
482                header.unmark();
483                header.reset_non_root_count();
484
485                true
486            } else {
487                // SAFETY: The algorithm ensures only unmarked/unreachable pointers are dropped.
488                // The caller must ensure all pointers were allocated by `Box::into_raw(Box::new(..))`.
489                let unmarked_eph = unsafe { Box::from_raw(eph.as_ptr()) };
490                let unallocated_bytes = size_of_val(&*unmarked_eph);
491                *total_allocated -= unallocated_bytes;
492
493                false
494            }
495        });
496    }
497
498    // Clean up the heap when BoaGc is dropped
499    fn dump(gc: &mut BoaGc) {
500        // Weak maps have to be dropped first, since the process dereferences GcBoxes.
501        // This can be done without initializing a dropguard since no GcBox's are being dropped.
502        for node in mem::take(&mut gc.weak_maps) {
503            // SAFETY:
504            // The `Allocator` must always ensure its start node is a valid, non-null pointer that
505            // was allocated by `Box::from_raw(Box::new(..))`.
506            let _unmarked_node = unsafe { Box::from_raw(node.as_ptr()) };
507        }
508
509        // Not initializing a dropguard since this should only be invoked when BOA_GC is being dropped.
510        let _guard = DropGuard::new();
511
512        for node in mem::take(&mut gc.strongs) {
513            // SAFETY:
514            // The `Allocator` must always ensure its start node is a valid, non-null pointer that
515            // was allocated by `Box::from_raw(Box::new(..))`.
516            let drop_fn = unsafe { node.as_ref() }.drop_fn();
517
518            // SAFETY: The function pointer is appropriate for this node type because we extract it from it's VTable.
519            unsafe {
520                drop_fn(node);
521            }
522        }
523
524        for node in mem::take(&mut gc.weaks) {
525            // SAFETY:
526            // The `Allocator` must always ensure its start node is a valid, non-null pointer that
527            // was allocated by `Box::from_raw(Box::new(..))`.
528            let _unmarked_node = unsafe { Box::from_raw(node.as_ptr()) };
529        }
530    }
531}
532
533/// Forcefully runs a garbage collection of all unaccessible nodes.
534pub fn force_collect() {
535    BOA_GC.with(|current| {
536        let mut gc = current.borrow_mut();
537
538        if gc.runtime.bytes_allocated > 0 {
539            Collector::collect(&mut gc);
540        }
541    });
542}
543
544#[cfg(test)]
545mod test;
546
547/// Returns `true` is any weak maps are currently allocated.
548#[cfg(test)]
549#[must_use]
550pub fn has_weak_maps() -> bool {
551    BOA_GC.with(|current| {
552        let gc = current.borrow();
553
554        !gc.weak_maps.is_empty()
555    })
556}