boa_gc/lib.rs
1//! Boa's **`boa_gc`** crate implements a garbage collector.
2//!
3//! # Crate Overview
4//! **`boa_gc`** is a mark-sweep garbage collector that implements a [`Trace`] and [`Finalize`] trait
5//! for garbage collected values.
6#![doc = include_str!("../ABOUT.md")]
7#![doc(
8 html_logo_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo_black.svg",
9 html_favicon_url = "https://raw.githubusercontent.com/boa-dev/boa/main/assets/logo_black.svg"
10)]
11#![cfg_attr(not(test), forbid(clippy::unwrap_used))]
12#![allow(
13 clippy::module_name_repetitions,
14 clippy::redundant_pub_crate,
15 clippy::let_unit_value
16)]
17
18extern crate self as boa_gc;
19
20mod cell;
21mod pointers;
22mod trace;
23
24pub(crate) mod internals;
25
26use internals::{EphemeronBox, ErasedEphemeronBox, ErasedWeakMapBox, WeakMapBox};
27use pointers::{NonTraceable, RawWeakMap};
28use std::{
29 cell::{Cell, RefCell},
30 mem,
31 ptr::NonNull,
32};
33
34pub use crate::trace::{Finalize, Trace, Tracer};
35pub use boa_macros::{Finalize, Trace};
36pub use cell::{GcRef, GcRefCell, GcRefMut};
37pub use internals::GcBox;
38pub use pointers::{Ephemeron, Gc, GcErased, WeakGc, WeakMap};
39
40type GcErasedPointer = NonNull<GcBox<NonTraceable>>;
41type EphemeronPointer = NonNull<dyn ErasedEphemeronBox>;
42type ErasedWeakMapBoxPointer = NonNull<dyn ErasedWeakMapBox>;
43
44thread_local!(static GC_DROPPING: Cell<bool> = const { Cell::new(false) });
45thread_local!(static BOA_GC: RefCell<BoaGc> = RefCell::new( BoaGc {
46 config: GcConfig::default(),
47 runtime: GcRuntimeData::default(),
48 strongs: Vec::default(),
49 weaks: Vec::default(),
50 weak_maps: Vec::default(),
51}));
52
53#[derive(Debug, Clone, Copy)]
54struct GcConfig {
55 /// The threshold at which the garbage collector will trigger a collection.
56 threshold: usize,
57 /// The percentage of used space at which the garbage collector will trigger a collection.
58 used_space_percentage: usize,
59}
60
61// Setting the defaults to an arbitrary value currently.
62//
63// TODO: Add a configure later
64impl Default for GcConfig {
65 fn default() -> Self {
66 Self {
67 // Start at 1MB, the nursary size for V8 is ~1-8MB and SM can be up to 16MB
68 threshold: 1_048_576,
69 used_space_percentage: 70,
70 }
71 }
72}
73
74#[derive(Default, Debug, Clone, Copy)]
75struct GcRuntimeData {
76 collections: usize,
77 bytes_allocated: usize,
78}
79
80#[derive(Debug)]
81struct BoaGc {
82 config: GcConfig,
83 runtime: GcRuntimeData,
84 strongs: Vec<GcErasedPointer>,
85 weaks: Vec<EphemeronPointer>,
86 weak_maps: Vec<ErasedWeakMapBoxPointer>,
87}
88
89impl Drop for BoaGc {
90 fn drop(&mut self) {
91 Collector::dump(self);
92 }
93}
94
95// Whether or not the thread is currently in the sweep phase of garbage collection.
96// During this phase, attempts to dereference a `Gc<T>` pointer will trigger a panic.
97/// `DropGuard` flags whether the Collector is currently running `Collector::sweep()` or `Collector::dump()`
98///
99/// While the `DropGuard` is active, all `GcBox`s must not be dereferenced or accessed as it could cause Undefined Behavior
100#[derive(Debug, Clone)]
101struct DropGuard;
102
103impl DropGuard {
104 fn new() -> Self {
105 GC_DROPPING.with(|dropping| dropping.set(true));
106 Self
107 }
108}
109
110impl Drop for DropGuard {
111 fn drop(&mut self) {
112 GC_DROPPING.with(|dropping| dropping.set(false));
113 }
114}
115
116/// Returns `true` if it is safe for a type to run [`Finalize::finalize`].
117#[must_use]
118#[inline]
119pub fn finalizer_safe() -> bool {
120 GC_DROPPING.with(|dropping| !dropping.get())
121}
122
123/// The Allocator handles allocation of garbage collected values.
124///
125/// The allocator can trigger a garbage collection.
126#[derive(Debug, Clone, Copy)]
127struct Allocator;
128
129impl Allocator {
130 /// Allocate a new garbage collected value to the Garbage Collector's heap.
131 fn alloc_gc<T: Trace>(value: GcBox<T>) -> NonNull<GcBox<T>> {
132 let element_size = size_of_val::<GcBox<T>>(&value);
133 BOA_GC.with(|st| {
134 let mut gc = st.borrow_mut();
135
136 Self::manage_state(&mut gc);
137 // Safety: value cannot be a null pointer, since `Box` cannot return null pointers.
138 let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(value))) };
139 let erased: NonNull<GcBox<NonTraceable>> = ptr.cast();
140
141 gc.strongs.push(erased);
142 gc.runtime.bytes_allocated += element_size;
143
144 ptr
145 })
146 }
147
148 fn alloc_ephemeron<K: Trace + ?Sized, V: Trace>(
149 value: EphemeronBox<K, V>,
150 ) -> NonNull<EphemeronBox<K, V>> {
151 let element_size = size_of_val::<EphemeronBox<K, V>>(&value);
152 BOA_GC.with(|st| {
153 let mut gc = st.borrow_mut();
154
155 Self::manage_state(&mut gc);
156 // Safety: value cannot be a null pointer, since `Box` cannot return null pointers.
157 let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(value))) };
158 let erased: NonNull<dyn ErasedEphemeronBox> = ptr;
159
160 gc.weaks.push(erased);
161 gc.runtime.bytes_allocated += element_size;
162
163 ptr
164 })
165 }
166
167 fn alloc_weak_map<K: Trace + ?Sized, V: Trace + Clone>() -> WeakMap<K, V> {
168 let weak_map = WeakMap {
169 inner: Gc::new(GcRefCell::new(RawWeakMap::new())),
170 };
171 let weak = WeakGc::new(&weak_map.inner);
172
173 BOA_GC.with(|st| {
174 let mut gc = st.borrow_mut();
175
176 let weak_box = WeakMapBox { map: weak };
177
178 // Safety: value cannot be a null pointer, since `Box` cannot return null pointers.
179 let ptr = unsafe { NonNull::new_unchecked(Box::into_raw(Box::new(weak_box))) };
180 let erased: ErasedWeakMapBoxPointer = ptr;
181
182 gc.weak_maps.push(erased);
183
184 weak_map
185 })
186 }
187
188 fn manage_state(gc: &mut BoaGc) {
189 if gc.runtime.bytes_allocated > gc.config.threshold {
190 Collector::collect(gc);
191
192 // Post collection check
193 // If the allocated bytes are still above the threshold, increase the threshold.
194 if gc.runtime.bytes_allocated
195 > gc.config.threshold / 100 * gc.config.used_space_percentage
196 {
197 gc.config.threshold =
198 gc.runtime.bytes_allocated / gc.config.used_space_percentage * 100;
199 }
200 }
201 }
202}
203
204struct Unreachables {
205 strong: Vec<GcErasedPointer>,
206 weak: Vec<NonNull<dyn ErasedEphemeronBox>>,
207}
208
209/// This collector currently functions in four main phases
210///
211/// Mark -> Finalize -> Mark -> Sweep
212///
213/// 1. Mark nodes as reachable.
214/// 2. Finalize the unreachable nodes.
215/// 3. Mark again because `Finalize::finalize` can potentially resurrect dead nodes.
216/// 4. Sweep and drop all dead nodes.
217///
218/// A better approach in a more concurrent structure may be to reorder.
219///
220/// Mark -> Sweep -> Finalize
221struct Collector;
222
223impl Collector {
224 /// Run a collection on the full heap.
225 fn collect(gc: &mut BoaGc) {
226 gc.runtime.collections += 1;
227
228 Self::trace_non_roots(gc);
229
230 let mut tracer = Tracer::new();
231
232 let unreachables = Self::mark_heap(&mut tracer, &gc.strongs, &gc.weaks, &gc.weak_maps);
233
234 assert!(tracer.is_empty(), "The queue should be empty");
235
236 // Only finalize if there are any unreachable nodes.
237 if !unreachables.strong.is_empty() || !unreachables.weak.is_empty() {
238 // Finalize all the unreachable nodes.
239 // SAFETY: All passed pointers are valid, since we won't deallocate until `Self::sweep`.
240 unsafe { Self::finalize(unreachables) };
241
242 // Reuse the tracer's already allocated capacity.
243 let _final_unreachables =
244 Self::mark_heap(&mut tracer, &gc.strongs, &gc.weaks, &gc.weak_maps);
245 }
246
247 // SAFETY: The head of our linked list is always valid per the invariants of our GC.
248 unsafe {
249 Self::sweep(
250 &mut gc.strongs,
251 &mut gc.weaks,
252 &mut gc.runtime.bytes_allocated,
253 );
254 }
255
256 // Weak maps have to be cleared after the sweep, since the process dereferences GcBoxes.
257 gc.weak_maps.retain(|w| {
258 // SAFETY: The caller must ensure the validity of every node of `heap_start`.
259 let node_ref = unsafe { w.as_ref() };
260
261 if node_ref.is_live() {
262 node_ref.clear_dead_entries();
263
264 true
265 } else {
266 // SAFETY:
267 // The `Allocator` must always ensure its start node is a valid, non-null pointer that
268 // was allocated by `Box::from_raw(Box::new(..))`.
269 let _unmarked_node = unsafe { Box::from_raw(w.as_ptr()) };
270
271 false
272 }
273 });
274
275 gc.strongs.shrink_to(gc.strongs.len() >> 2);
276 gc.weaks.shrink_to(gc.weaks.len() >> 2);
277 gc.weak_maps.shrink_to(gc.weak_maps.len() >> 2);
278 }
279
280 fn trace_non_roots(gc: &BoaGc) {
281 // Count all the handles located in GC heap.
282 // Then, we can find whether there is a reference from other places, and they are the roots.
283 for node in &gc.strongs {
284 // SAFETY: node must be valid as this phase cannot drop any node.
285 let trace_non_roots_fn = unsafe { node.as_ref() }.trace_non_roots_fn();
286
287 // SAFETY: The function pointer is appropriate for this node type because we extract it from it's VTable.
288 unsafe {
289 trace_non_roots_fn(*node);
290 }
291 }
292
293 for eph in &gc.weaks {
294 // SAFETY: node must be valid as this phase cannot drop any node.
295 let eph_ref = unsafe { eph.as_ref() };
296 eph_ref.trace_non_roots();
297 }
298 }
299
300 /// Walk the heap and mark any nodes deemed reachable
301 fn mark_heap(
302 tracer: &mut Tracer,
303 strongs: &[GcErasedPointer],
304 weaks: &[EphemeronPointer],
305 weak_maps: &[ErasedWeakMapBoxPointer],
306 ) -> Unreachables {
307 // Walk the list, tracing and marking the nodes
308 let mut strong_dead = Vec::new();
309 let mut pending_ephemerons = Vec::new();
310
311 // === Preliminary mark phase ===
312 //
313 // 0. Get the naive list of possibly dead nodes.
314 for node in strongs {
315 // SAFETY: node must be valid as this phase cannot drop any node.
316 let node_ref = unsafe { node.as_ref() };
317 if node_ref.is_rooted() {
318 tracer.enqueue(*node);
319
320 // SAFETY: all nodes must be valid as this phase cannot drop any node.
321 unsafe {
322 tracer.trace_until_empty();
323 }
324 } else if !node_ref.is_marked() {
325 strong_dead.push(*node);
326 }
327 }
328
329 // 0.1. Early return if there are no ephemerons in the GC
330 if weaks.is_empty() {
331 strong_dead.retain_mut(|node| {
332 // SAFETY: node must be valid as this phase cannot drop any node.
333 unsafe { !node.as_ref().is_marked() }
334 });
335 return Unreachables {
336 strong: strong_dead,
337 weak: Vec::new(),
338 };
339 }
340
341 // === Weak mark phase ===
342 //
343 //
344 // 1. Get the naive list of ephemerons that are supposedly dead or their key is dead and
345 // trace all the ephemerons that have roots and their keys are live. Also remove from
346 // this list the ephemerons that are marked but their value is dead.
347 for eph in weaks {
348 // SAFETY: node must be valid as this phase cannot drop any node.
349 let eph_ref = unsafe { eph.as_ref() };
350 let header = eph_ref.header();
351 if header.is_rooted() {
352 header.mark();
353 }
354 // SAFETY: the garbage collector ensures `eph_ref` always points to valid data.
355 if unsafe { !eph_ref.trace(tracer) } {
356 pending_ephemerons.push(*eph);
357 }
358
359 // SAFETY: all nodes must be valid as this phase cannot drop any node.
360 unsafe {
361 tracer.trace_until_empty();
362 }
363 }
364
365 // 2. Trace all the weak pointers in the live weak maps to make sure they do not get swept.
366 for w in weak_maps {
367 // SAFETY: node must be valid as this phase cannot drop any node.
368 let node_ref = unsafe { w.as_ref() };
369
370 // SAFETY: The garbage collector ensures that all nodes are valid.
371 unsafe { node_ref.trace(tracer) };
372
373 // SAFETY: all nodes must be valid as this phase cannot drop any node.
374 unsafe {
375 tracer.trace_until_empty();
376 }
377 }
378
379 // 3. Iterate through all pending ephemerons, removing the ones which have been successfully
380 // traced. If there are no changes in the pending ephemerons list, it means that there are no
381 // more reachable ephemerons from the remaining ephemeron values.
382 let mut previous_len = pending_ephemerons.len();
383 loop {
384 pending_ephemerons.retain_mut(|eph| {
385 // SAFETY: node must be valid as this phase cannot drop any node.
386 let eph_ref = unsafe { eph.as_ref() };
387 // SAFETY: the garbage collector ensures `eph_ref` always points to valid data.
388 let is_key_marked = unsafe { !eph_ref.trace(tracer) };
389
390 // SAFETY: all nodes must be valid as this phase cannot drop any node.
391 unsafe {
392 tracer.trace_until_empty();
393 }
394
395 is_key_marked
396 });
397
398 if previous_len == pending_ephemerons.len() {
399 break;
400 }
401
402 previous_len = pending_ephemerons.len();
403 }
404
405 // 4. The remaining list should contain the ephemerons that are either unreachable or its key
406 // is dead. Cleanup the strong pointers since this procedure could have marked some more strong
407 // pointers.
408 strong_dead.retain_mut(|node| {
409 // SAFETY: node must be valid as this phase cannot drop any node.
410 unsafe { !node.as_ref().is_marked() }
411 });
412
413 Unreachables {
414 strong: strong_dead,
415 weak: pending_ephemerons,
416 }
417 }
418
419 /// # Safety
420 ///
421 /// Passing a `strong` or a `weak` vec with invalid pointers will result in Undefined Behaviour.
422 unsafe fn finalize(unreachables: Unreachables) {
423 for node in unreachables.strong {
424 // SAFETY: The caller must ensure all pointers inside `unreachables.strong` are valid.
425 let node_ref = unsafe { node.as_ref() };
426 let run_finalizer_fn = node_ref.run_finalizer_fn();
427
428 // SAFETY: The function pointer is appropriate for this node type because we extract it from it's VTable.
429 unsafe {
430 run_finalizer_fn(node);
431 }
432 }
433 for node in unreachables.weak {
434 // SAFETY: The caller must ensure all pointers inside `unreachables.weak` are valid.
435 let node = unsafe { node.as_ref() };
436 node.finalize_and_clear();
437 }
438 }
439
440 /// # Safety
441 ///
442 /// - Providing an invalid pointer in the `heap_start` or in any of the headers of each
443 /// node will result in Undefined Behaviour.
444 /// - Providing a list of pointers that weren't allocated by `Box::into_raw(Box::new(..))`
445 /// will result in Undefined Behaviour.
446 unsafe fn sweep(
447 strong: &mut Vec<GcErasedPointer>,
448 weak: &mut Vec<EphemeronPointer>,
449 total_allocated: &mut usize,
450 ) {
451 let _guard = DropGuard::new();
452
453 strong.retain(|node| {
454 // SAFETY: The caller must ensure the validity of every node of `heap_start`.
455 let node_ref = unsafe { node.as_ref() };
456 if node_ref.is_marked() {
457 node_ref.header.unmark();
458 node_ref.reset_non_root_count();
459
460 true
461 } else {
462 // SAFETY: The algorithm ensures only unmarked/unreachable pointers are dropped.
463 // The caller must ensure all pointers were allocated by `Box::into_raw(Box::new(..))`.
464 let drop_fn = node_ref.drop_fn();
465 let size = node_ref.size();
466 *total_allocated -= size;
467
468 // SAFETY: The function pointer is appropriate for this node type because we extract it from it's VTable.
469 unsafe {
470 drop_fn(*node);
471 }
472
473 false
474 }
475 });
476
477 weak.retain(|eph| {
478 // SAFETY: The caller must ensure the validity of every node of `heap_start`.
479 let eph_ref = unsafe { eph.as_ref() };
480 let header = eph_ref.header();
481 if header.is_marked() {
482 header.unmark();
483 header.reset_non_root_count();
484
485 true
486 } else {
487 // SAFETY: The algorithm ensures only unmarked/unreachable pointers are dropped.
488 // The caller must ensure all pointers were allocated by `Box::into_raw(Box::new(..))`.
489 let unmarked_eph = unsafe { Box::from_raw(eph.as_ptr()) };
490 let unallocated_bytes = size_of_val(&*unmarked_eph);
491 *total_allocated -= unallocated_bytes;
492
493 false
494 }
495 });
496 }
497
498 // Clean up the heap when BoaGc is dropped
499 fn dump(gc: &mut BoaGc) {
500 // Weak maps have to be dropped first, since the process dereferences GcBoxes.
501 // This can be done without initializing a dropguard since no GcBox's are being dropped.
502 for node in mem::take(&mut gc.weak_maps) {
503 // SAFETY:
504 // The `Allocator` must always ensure its start node is a valid, non-null pointer that
505 // was allocated by `Box::from_raw(Box::new(..))`.
506 let _unmarked_node = unsafe { Box::from_raw(node.as_ptr()) };
507 }
508
509 // Not initializing a dropguard since this should only be invoked when BOA_GC is being dropped.
510 let _guard = DropGuard::new();
511
512 for node in mem::take(&mut gc.strongs) {
513 // SAFETY:
514 // The `Allocator` must always ensure its start node is a valid, non-null pointer that
515 // was allocated by `Box::from_raw(Box::new(..))`.
516 let drop_fn = unsafe { node.as_ref() }.drop_fn();
517
518 // SAFETY: The function pointer is appropriate for this node type because we extract it from it's VTable.
519 unsafe {
520 drop_fn(node);
521 }
522 }
523
524 for node in mem::take(&mut gc.weaks) {
525 // SAFETY:
526 // The `Allocator` must always ensure its start node is a valid, non-null pointer that
527 // was allocated by `Box::from_raw(Box::new(..))`.
528 let _unmarked_node = unsafe { Box::from_raw(node.as_ptr()) };
529 }
530 }
531}
532
533/// Forcefully runs a garbage collection of all unaccessible nodes.
534pub fn force_collect() {
535 BOA_GC.with(|current| {
536 let mut gc = current.borrow_mut();
537
538 if gc.runtime.bytes_allocated > 0 {
539 Collector::collect(&mut gc);
540 }
541 });
542}
543
544#[cfg(test)]
545mod test;
546
547/// Returns `true` is any weak maps are currently allocated.
548#[cfg(test)]
549#[must_use]
550pub fn has_weak_maps() -> bool {
551 BOA_GC.with(|current| {
552 let gc = current.borrow();
553
554 !gc.weak_maps.is_empty()
555 })
556}