mica_language/
gc.rs

1//! Garbage collection.
2
3use std::alloc::{handle_alloc_error, Layout};
4use std::borrow::Borrow;
5use std::cell::Cell;
6use std::ops::Deref;
7use std::{fmt, mem, ptr};
8
9use crate::bytecode::DispatchTable;
10use crate::value::{RawValue, ValueKind};
11
12/// The strategy used for running the GC automatically.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum AutoStrategy {
15   /// Don't run the GC when `auto_collect` is called.
16   Disabled,
17   /// Always run the GC when `auto_collect` is called.
18   AlwaysRun,
19   /// Run the GC if its `allocated_bytes` exceeds `next_run`, and grow `next_run` to
20   /// `allocated_bytes * growth_factor / 256` after collection.
21   Ceiling {
22      next_run: usize,
23      growth_factor: usize,
24   },
25}
26
27impl AutoStrategy {
28   /// Returns whether the strategy's running condition is satisfied.
29   fn satisfied(&self, gc: &Memory) -> bool {
30      match self {
31         Self::Disabled => false,
32         Self::AlwaysRun => true,
33         Self::Ceiling { next_run, .. } => gc.allocated_bytes >= *next_run,
34      }
35   }
36
37   /// Returns an updated version of the strategy after a successful collection.
38   fn update(self, gc: &Memory) -> Self {
39      if let Self::Ceiling { growth_factor, .. } = self {
40         Self::Ceiling {
41            next_run: gc.allocated_bytes * growth_factor / 256,
42            growth_factor,
43         }
44      } else {
45         self
46      }
47   }
48}
49
50/// An allocator and garbage collector for memory.
51pub struct Memory {
52   /// Determines when the next GC cycle should run.
53   pub auto_strategy: AutoStrategy,
54   allocated_bytes: usize,
55
56   /// Things managed by the GC.
57   allocations: Vec<GcRaw<()>>,
58
59   /// The "gray stack". Without going too much into what colors mean in GCs, it's used as a way of
60   /// combatting stack overflows by doing actual work on the heap.
61   gray_stack: Vec<RawValue>,
62}
63
64impl Memory {
65   /// Creates a new GC.
66   pub fn new() -> Self {
67      Self {
68         auto_strategy: AutoStrategy::Ceiling {
69            next_run: 64 * 1024, // 64 KiB
70            growth_factor: 384,  // = 1.5 * 256
71         },
72         allocated_bytes: 0,
73
74         allocations: Vec::new(),
75
76         // NOTE: Preallocate some stack area here to cut down on GC times in typical programs that
77         // don't allocate deeply recursive objects with lots of fields.
78         // The value of 32 was picked as a sweet spot. Having less or more causes collection times
79         // to be slower for some reason.
80         gray_stack: Vec::with_capacity(32),
81      }
82   }
83
84   /// Returns the amount of bytes currently allocated by the GC.
85   pub fn allocated_bytes(&self) -> usize {
86      self.allocated_bytes
87   }
88
89   /// Marks and sweeps unused allocations.
90   ///
91   /// # Safety
92   /// All root pointers in values yielded by the iterator must be valid.
93   pub(crate) unsafe fn collect(&mut self, roots: impl Iterator<Item = RawValue>) {
94      unsafe fn mark_all_unreachable<T>(memories: impl Iterator<Item = GcRaw<T>>) {
95         for memory in memories {
96            let mem = memory.get_mem();
97            mem.reachable.set(false);
98         }
99      }
100
101      unsafe fn sweep_unreachable<T>(memories: &mut Vec<GcRaw<T>>, allocated_bytes: &mut usize) {
102         let mut i = 0;
103         while i < memories.len() {
104            let memory = memories[i];
105            let mem = memory.get_mem();
106            if !mem.reachable.get() {
107               let data_size = mem.data_size;
108               GcMem::release(memory);
109               *allocated_bytes -= data_size;
110               #[cfg(feature = "trace-gc")]
111               {
112                  println!(
113                     "gc | freed {} bytes ({:p}), now at {}",
114                     data_size, memory.0, *allocated_bytes
115                  );
116               }
117               memories.swap_remove(i);
118            } else {
119               i += 1;
120            }
121         }
122      }
123
124      // NOTE: Marking all objects as unreachable beforehand is *somehow* faster than doing it
125      // during the sweep phase. I believe it might have something to do with the objects being
126      // loaded into the CPU cache but I'm really not sure.
127      mark_all_unreachable(self.allocations.iter().copied());
128      for value in roots {
129         self.gray_stack.push(value);
130         self.mark_all_gray_reachable();
131      }
132      sweep_unreachable(&mut self.allocations, &mut self.allocated_bytes);
133   }
134
135   /// Recursively (as in, actually recursively) marks the dtable and its methods reachable.
136   unsafe fn mark_dtable_reachable_rec(&mut self, mem: GcRaw<DispatchTable>) {
137      if !mem.get_mem().reachable.get() {
138         mem.mark_reachable();
139         let dtable = mem.get();
140         if let Some(instance) = dtable.instance {
141            // NOTE: Recurring here is okay because we never have dtables that are more than two
142            // levels deep.
143            self.mark_dtable_reachable_rec(instance);
144         }
145         for method in dtable.methods() {
146            self.gray_stack.push(RawValue::from(method));
147            self.mark_all_gray_reachable();
148         }
149      }
150   }
151
152   /// Recursively marks all values on the gray stack reachable, beginning with the bottom-most
153   /// value.
154   unsafe fn mark_all_gray_reachable(&mut self) {
155      // NOTE: Unlike `mark_dtable_reachable_rec` this function does not actually recur.
156      // This is to prevent scripters from trivially causing a stack overflow.
157      while let Some(value) = self.gray_stack.pop() {
158         match value.kind() {
159            ValueKind::Nil | ValueKind::Boolean | ValueKind::Number => (),
160            ValueKind::String => {
161               let raw = value.get_raw_string_unchecked();
162               raw.mark_reachable();
163            }
164            ValueKind::Function => {
165               let raw = value.get_raw_function_unchecked();
166               if !raw.get_mem().reachable.get() {
167                  raw.mark_reachable();
168                  let closure = raw.get();
169                  for upvalue in &closure.captures {
170                     self.gray_stack.push(upvalue.get());
171                  }
172               }
173            }
174            ValueKind::List => {
175               let raw = value.get_raw_list_unchecked();
176               if !raw.get_mem().reachable.get() {
177                  raw.mark_reachable();
178                  let elements = raw.get().as_slice();
179                  for &element in elements {
180                     self.gray_stack.push(element);
181                  }
182               }
183            }
184            ValueKind::Dict => {
185               let raw = value.get_raw_dict_unchecked();
186               if !raw.get_mem().reachable.get() {
187                  raw.mark_reachable();
188                  for (key, value) in raw.get().iter() {
189                     self.gray_stack.push(key);
190                     self.gray_stack.push(value);
191                  }
192               }
193            }
194            ValueKind::Struct => {
195               let raw = value.get_raw_struct_unchecked();
196               if !raw.get_mem().reachable.get() {
197                  raw.mark_reachable();
198                  let struct_v = raw.get();
199                  let dtable = *raw.get().dtable.get();
200                  self.mark_dtable_reachable_rec(dtable);
201                  for field in struct_v.fields() {
202                     self.gray_stack.push(field);
203                  }
204               }
205            }
206            ValueKind::Trait => {
207               let raw = value.get_raw_trait_unchecked();
208               if !raw.get_mem().reachable.get() {
209                  raw.mark_reachable();
210                  self.mark_dtable_reachable_rec(raw.get().dtable);
211               }
212            }
213            // TODO: Allow user data to specify its own references.
214            ValueKind::UserData => {
215               let raw = value.get_raw_user_data_unchecked();
216               if !raw.get_mem().reachable.get() {
217                  raw.mark_reachable();
218               }
219               let dtable = raw.get().dtable_gcraw();
220               self.mark_dtable_reachable_rec(dtable);
221            }
222         }
223      }
224   }
225
226   /// Performs an _automatic_ collection.
227   ///
228   /// Automatic collections only trigger upon specific conditions, such as a specific amount of
229   /// generations passing.
230   pub(crate) unsafe fn auto_collect(&mut self, roots: impl Iterator<Item = RawValue>) {
231      #[cfg(feature = "trace-gc")]
232      {
233         println!(
234            "gc | auto_collect called with strategy {:?}",
235            self.auto_strategy
236         );
237      }
238      if self.auto_strategy.satisfied(self) {
239         #[cfg(feature = "trace-gc")]
240         {
241            println!("gc | strategy satisfied, collecting",);
242         }
243         self.collect(roots);
244         self.auto_strategy = self.auto_strategy.update(self);
245      }
246   }
247
248   /// Registers `mem` inside this GC.
249   fn register<T>(&mut self, mem: GcRaw<T>) {
250      self.allocations.push(mem.erase_type());
251      self.allocated_bytes += std::mem::size_of::<T>();
252      #[cfg(feature = "trace-gc")]
253      {
254         println!(
255            "gc | allocated {} bytes, now at {}",
256            std::mem::size_of::<T>(),
257            self.allocated_bytes
258         );
259      }
260   }
261
262   /// Allocates a new `GcRaw<T>` managed by this GC.
263   pub fn allocate<T>(&mut self, data: T) -> GcRaw<T> {
264      let gcmem = GcMem::allocate(data, drop_finalizer::<T>);
265      self.register(gcmem);
266      gcmem
267   }
268
269   /// If the provided `Gc<T>` isn't managed by a GC, makes it managed by this GC.
270   /// Otherwise does nothing.
271   pub fn manage<T>(&mut self, gc: &Gc<T>) -> GcRaw<T> {
272      unsafe {
273         let mem = gc.mem.get_mem();
274         if !mem.managed_by_gc.get() {
275            self.register(gc.mem);
276            mem.managed_by_gc.set(true);
277         }
278         gc.mem
279      }
280   }
281}
282
283impl Default for Memory {
284   fn default() -> Self {
285      Self::new()
286   }
287}
288
289impl Drop for Memory {
290   fn drop(&mut self) {
291      unsafe { self.collect(std::iter::empty()) }
292   }
293}
294
295/// An allocation with metadata.
296#[repr(C, align(8))]
297pub(crate) struct GcMem<T> {
298   /// Whether the memory is still reachable.
299   /// This is used by the mark phase to determine which memories should (not) be swept.
300   reachable: Cell<bool>,
301   /// Whether the memory is still being managed by the garbage collector.
302   managed_by_gc: Cell<bool>,
303   /// Foreign references to this memory.
304   rc: Cell<usize>,
305   /// The "finalizer", its task is to deinitialize the data stored in the `GcMem<T>`.
306   finalizer: unsafe fn(*mut u8),
307   /// The size of the allocated data.
308   data_size: usize,
309   /// The layout that was used for allocating this `GcMem<T>`; this is needed to deallocate
310   /// without triggering undefined behavior.
311   layout: Layout,
312   /// The data.
313   data: T,
314}
315
316impl<T> fmt::Debug for GcMem<T> {
317   fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
318      f.debug_struct("GcMem")
319         .field("reachable", &self.reachable)
320         .field("managed_by_gc", &self.managed_by_gc)
321         .field("rc", &self.rc)
322         .field("finalizer", &self.finalizer)
323         .finish_non_exhaustive()
324   }
325}
326
327impl<T> GcMem<T> {
328   /// Returns the allocation layout of a `GcMem<T>`.
329   fn layout() -> Layout {
330      Layout::new::<Self>()
331   }
332
333   /// Allocates a `GcMem<T>`.
334   fn allocate(data: T, finalizer: unsafe fn(*mut u8)) -> GcRaw<T> {
335      let layout = Self::layout();
336      let mem = Self {
337         // NOTE: `reachable` is initially set to `false` because reachability is only determined
338         // during the marking phase.
339         reachable: Cell::new(false),
340         managed_by_gc: Cell::new(true),
341         rc: Cell::new(0),
342         finalizer,
343         data_size: std::mem::size_of::<T>(),
344         layout,
345         data,
346      };
347      let allocation = unsafe { std::alloc::alloc(layout) } as *mut Self;
348      if allocation.is_null() {
349         handle_alloc_error(layout);
350      }
351      unsafe { ptr::write(allocation, mem) }
352      #[cfg(feature = "trace-gc")]
353      {
354         println!(
355            "gcmem | allocated {:p}, T: {}",
356            allocation,
357            std::any::type_name::<T>()
358         );
359      }
360      GcRaw(allocation as *const _)
361   }
362
363   /// Deallocates a `GcMem<T>`.
364   ///
365   /// # Safety
366   /// `mem` must be a pointer returned by [`allocate`][`Self::allocate`].
367   unsafe fn deallocate(mem: GcRaw<T>) {
368      let mem = mem.0 as *mut GcMem<T>;
369      #[cfg(feature = "trace-gc")]
370      {
371         println!("gcmem | deallocating {:p}", mem);
372      }
373      let layout;
374      {
375         let mem = &mut *mem;
376         (mem.finalizer)(&mut mem.data as *mut T as *mut u8);
377         layout = mem.layout;
378      }
379      // Ugh, that cast from *const to *mut hurts.
380      std::alloc::dealloc(mem as *mut u8, layout)
381   }
382
383   /// Deallocates the given memory or marks it as unmanaged if there are foreign references to it.
384   unsafe fn release(memory: GcRaw<T>) {
385      #[cfg(feature = "trace-gc")]
386      {
387         println!("gcmem | releasing {:p}", memory.0);
388      }
389      let mem = memory.get_mem();
390      if mem.rc.get() > 0 {
391         mem.managed_by_gc.set(false);
392      } else {
393         GcMem::deallocate(memory);
394      }
395   }
396}
397
398unsafe fn drop_finalizer<T>(x: *mut u8) {
399   #[cfg(feature = "trace-gc")]
400   {
401      println!("drop | T: {}", std::any::type_name::<T>());
402   }
403   let x = x as *mut T;
404   ptr::drop_in_place(x);
405}
406
407/// An unmanaged reference to GC memory.
408///
409/// Be careful around this, as values of this type must not outlive the [`Memory`] they were born
410/// in.
411#[derive(Debug)]
412#[repr(transparent)]
413pub struct GcRaw<T>(*const GcMem<T>);
414
415impl<T> GcRaw<T> {
416   /// Returns a reference to the data inside the `GcRaw<T>`.
417   ///
418   /// # Safety
419   /// The caller must ensure that the `GcRaw<T>` points to existing data.
420   pub unsafe fn get<'a>(&self) -> &'a T {
421      let mem = &*self.0;
422      &mem.data
423   }
424
425   pub(crate) fn from_raw(raw: *const GcMem<T>) -> Self {
426      Self(raw)
427   }
428
429   pub(crate) fn get_raw(&self) -> *const GcMem<T> {
430      self.0
431   }
432
433   /// Returns a reference to the `GcMem<T>` behind this `GcRaw<T>`.
434   ///
435   /// # Safety
436   /// The caller must ensure that the `GcRaw<T>` points to existing data.
437   unsafe fn get_mem(&self) -> &GcMem<T> {
438      &*self.0
439   }
440
441   /// Marks the reference as still reachable.
442   ///
443   /// # Safety
444   /// The caller must ensure that the `GcRaw<T>` points to existing data.
445   unsafe fn mark_reachable(&self) {
446      self.get_mem().reachable.set(true);
447   }
448
449   /// Returns a `GcRaw` with the type erased.
450   ///
451   /// This operation is safe because it cannot trigger undefined behavior.
452   /// Using the returned value though, can.
453   fn erase_type(self) -> GcRaw<()> {
454      unsafe { mem::transmute(self) }
455   }
456}
457
458impl<T> Clone for GcRaw<T> {
459   fn clone(&self) -> Self {
460      Self(self.0)
461   }
462}
463
464impl<T> Copy for GcRaw<T> {}
465
466impl<T> PartialEq for GcRaw<T> {
467   fn eq(&self, other: &Self) -> bool {
468      self.0 == other.0
469   }
470}
471
472impl<T> Eq for GcRaw<T> {}
473
474/// An automatically managed, safe reference to GC memory.
475pub struct Gc<T> {
476   mem: GcRaw<T>,
477}
478
479impl<T> Gc<T> {
480   /// Creates a new `Gc` that is not managed by a garbage collector.
481   pub fn new(data: T) -> Self {
482      let mem = GcMem::allocate(data, drop_finalizer::<T>);
483      unsafe {
484         let mem = mem.get_mem();
485         mem.managed_by_gc.set(false);
486         mem.rc.set(1);
487      }
488      Self { mem }
489   }
490
491   /// Constructs a `Gc` handle from a raw pointer to a `GcMem`.
492   ///
493   /// # Safety
494   /// Assumes the pointer passed points to valid memory.
495   pub unsafe fn from_raw(raw: GcRaw<T>) -> Self {
496      let mem = &*raw.0;
497      mem.rc.set(mem.rc.get() + 1);
498      Self { mem: raw }
499   }
500
501   /// Returns the raw reference to the GC'd memory without affecting the reference count.
502   ///
503   /// Note that this is an associated function, and must be called like `Gc::as_raw(&gc)`.
504   pub fn as_raw(gc: &Self) -> GcRaw<T> {
505      gc.mem
506   }
507}
508
509impl<T> AsRef<T> for Gc<T> {
510   fn as_ref(&self) -> &T {
511      unsafe { self.mem.get() }
512   }
513}
514
515impl<T> Borrow<T> for Gc<T> {
516   fn borrow(&self) -> &T {
517      unsafe { self.mem.get() }
518   }
519}
520
521impl<T> Deref for Gc<T> {
522   type Target = T;
523
524   fn deref(&self) -> &Self::Target {
525      unsafe { self.mem.get() }
526   }
527}
528
529impl<T> Clone for Gc<T> {
530   fn clone(&self) -> Gc<T> {
531      unsafe { Gc::from_raw(self.mem) }
532   }
533}
534
535impl<T> Drop for Gc<T> {
536   fn drop(&mut self) {
537      let mem = unsafe { &*self.mem.0 };
538      mem.rc.set(mem.rc.get() - 1);
539      if mem.rc.get() == 0 && !mem.managed_by_gc.get() {
540         unsafe { GcMem::deallocate(self.mem) }
541      }
542   }
543}
544
545impl<T> fmt::Debug for Gc<T>
546where
547   T: fmt::Debug,
548{
549   fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
550      unsafe { fmt::Debug::fmt(self.mem.get(), f) }
551   }
552}
553
554impl<T> fmt::Display for Gc<T>
555where
556   T: fmt::Display,
557{
558   fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
559      unsafe { fmt::Display::fmt(self.mem.get(), f) }
560   }
561}
562
563impl<T> PartialEq for Gc<T>
564where
565   T: PartialEq,
566{
567   fn eq(&self, other: &Self) -> bool {
568      unsafe { self.mem.get() == other.mem.get() }
569   }
570}
571
572impl<T> Eq for Gc<T> where T: Eq {}
573
574impl<T> PartialOrd for Gc<T>
575where
576   T: PartialOrd,
577{
578   fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
579      unsafe { self.mem.get().partial_cmp(other.mem.get()) }
580   }
581}
582
583impl<T> Ord for Gc<T>
584where
585   T: Ord,
586{
587   fn cmp(&self, other: &Self) -> std::cmp::Ordering {
588      unsafe { self.mem.get().cmp(other.mem.get()) }
589   }
590}
591
592impl<T> std::hash::Hash for Gc<T>
593where
594   T: std::hash::Hash,
595{
596   fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
597      unsafe { self.mem.get().hash(state) };
598   }
599}