Skip to main content

shape_vm/executor/
gc_integration.rs

1//! Garbage collection integration for the VM
2//!
3//! Without `gc` feature: no-ops (all values use Arc reference counting).
4//! With `gc` feature: real root scanning, safepoint polling, and GC triggering.
5//!
6//! ## Collection strategies (gc feature)
7//!
8//! The VM integrates three collection strategies from `shape-gc`:
9//!
10//! 1. **Generational (Young/Old)**: Uses `collect_young()` / `collect_old()` via
11//!    the adaptive scheduler to minimize pause times. Young-gen collections are
12//!    fast and frequent; old-gen collections happen predictively.
13//!
14//! 2. **Incremental marking**: When a marking cycle is active (`is_marking()`),
15//!    the dispatch loop calls `gc_incremental_mark_step()` every 1024 instructions
16//!    to make bounded progress on the gray worklist without stopping the world.
17//!
18//! 3. **Full STW (fallback)**: The original `collect()` path, used when the
19//!    scheduler recommends `CollectionType::Full` or when no scheduler is enabled.
20
21use crate::memory::{GCResult, GCStats, GarbageCollector};
22
23/// Garbage collection integration for VirtualMachine
24pub trait GCIntegration {
25    /// Maybe trigger garbage collection based on config
26    fn maybe_collect_garbage(&mut self);
27
28    /// Force garbage collection
29    fn force_gc(&mut self) -> GCResult;
30
31    /// Get GC statistics
32    fn gc_stats(&self) -> GCStats;
33
34    /// Get GC heap size
35    fn gc_heap_size(&self) -> usize;
36
37    /// Get GC object count
38    fn gc_object_count(&self) -> usize;
39
40    /// Access the garbage collector
41    fn gc(&self) -> &GarbageCollector;
42
43    /// Access the garbage collector mutably
44    fn gc_mut(&mut self) -> &mut GarbageCollector;
45}
46
47// --- Non-GC implementation (Arc refcounting) ---
48
49#[cfg(not(feature = "gc"))]
50impl GCIntegration for super::VirtualMachine {
51    fn maybe_collect_garbage(&mut self) {
52        // No-op: Arc reference counting handles memory
53    }
54
55    fn force_gc(&mut self) -> GCResult {
56        // No-op: return empty result
57        GCResult::new(0, 0, std::time::Duration::ZERO)
58    }
59
60    fn gc_stats(&self) -> GCStats {
61        self.gc.stats()
62    }
63
64    fn gc_heap_size(&self) -> usize {
65        self.gc.heap_size()
66    }
67
68    fn gc_object_count(&self) -> usize {
69        self.gc.object_count()
70    }
71
72    fn gc(&self) -> &GarbageCollector {
73        &self.gc
74    }
75
76    fn gc_mut(&mut self) -> &mut GarbageCollector {
77        &mut self.gc
78    }
79}
80
81// --- Real GC implementation ---
82
83#[cfg(feature = "gc")]
84impl GCIntegration for super::VirtualMachine {
85    fn maybe_collect_garbage(&mut self) {
86        self.maybe_collect_gc_adaptive();
87    }
88
89    fn force_gc(&mut self) -> GCResult {
90        let start = std::time::Instant::now();
91        let stats_before = self
92            .gc_heap
93            .as_ref()
94            .map(|h| h.stats().total_collected_bytes)
95            .unwrap_or(0);
96        self.run_gc_collection_full();
97        let stats_after = self
98            .gc_heap
99            .as_ref()
100            .map(|h| h.stats().total_collected_bytes)
101            .unwrap_or(0);
102        let result = GCResult::new(0, stats_after - stats_before, start.elapsed());
103        // Record stats in the GarbageCollector for reporting
104        self.gc.record_collection(&result);
105        result
106    }
107
108    fn gc_stats(&self) -> GCStats {
109        self.gc.stats()
110    }
111
112    fn gc_heap_size(&self) -> usize {
113        self.gc_heap.as_ref().map(|h| h.heap_size()).unwrap_or(0)
114    }
115
116    fn gc_object_count(&self) -> usize {
117        0 // Bump allocator doesn't track individual objects
118    }
119
120    fn gc(&self) -> &GarbageCollector {
121        &self.gc
122    }
123
124    fn gc_mut(&mut self) -> &mut GarbageCollector {
125        &mut self.gc
126    }
127}
128
129#[cfg(feature = "gc")]
130impl super::VirtualMachine {
131    // ── Adaptive collection dispatch ────────────────────────────────────
132
133    /// Use the adaptive scheduler to decide what kind of collection to run.
134    ///
135    /// Prefers generational collection (young/old) when possible, falling back
136    /// to full STW only when the scheduler recommends it or is not configured.
137    fn maybe_collect_gc_adaptive(&mut self) {
138        let Some(ref gc_heap) = self.gc_heap else {
139            return;
140        };
141
142        let collection_type = gc_heap.should_collect_adaptive();
143
144        match collection_type {
145            shape_gc::scheduler::CollectionType::None => {}
146            shape_gc::scheduler::CollectionType::Young => {
147                self.run_gc_collection_young();
148            }
149            shape_gc::scheduler::CollectionType::Old => {
150                self.run_gc_collection_old();
151            }
152            shape_gc::scheduler::CollectionType::Full => {
153                self.run_gc_collection_full();
154            }
155        }
156    }
157
158    // ── Root scanning ──────────────────────────────────────────────────
159
160    /// Collect all GC root pointers into a Vec.
161    ///
162    /// Root categories: stack, module bindings (globals), closure upvalues,
163    /// task scheduler callables/results, uncaught exception.
164    ///
165    /// This collects roots into a flat `Vec<*mut u8>` suitable for passing
166    /// to `collect_young()` / `collect_old()`. Fields are borrowed individually
167    /// to avoid a whole-`self` borrow conflict with `gc_heap`.
168    fn collect_gc_roots(&self) -> Vec<*mut u8> {
169        let mut roots = Vec::with_capacity(self.sp + self.module_bindings.len() + 32);
170
171        // 1. Stack slots [0..sp]
172        for i in 0..self.sp {
173            shape_gc::roots::trace_nanboxed_bits(self.stack[i].raw_bits(), &mut |ptr| {
174                roots.push(ptr);
175            });
176        }
177
178        // 2. Module bindings (global variables)
179        for binding in &self.module_bindings {
180            shape_gc::roots::trace_nanboxed_bits(binding.raw_bits(), &mut |ptr| {
181                roots.push(ptr);
182            });
183        }
184
185        // 3. Call stack — closure upvalues
186        for frame in &self.call_stack {
187            if let Some(ref upvalues) = frame.upvalues {
188                for upvalue in upvalues {
189                    let nb = upvalue.get();
190                    shape_gc::roots::trace_nanboxed_bits(nb.raw_bits(), &mut |ptr| {
191                        roots.push(ptr);
192                    });
193                }
194            }
195        }
196
197        // 4. Task scheduler — spawned callables and completed results
198        self.task_scheduler.scan_roots(&mut |ptr| {
199            roots.push(ptr);
200        });
201
202        // 5. Uncaught exception
203        if let Some(ref exc) = self.last_uncaught_exception {
204            shape_gc::roots::trace_nanboxed_bits(exc.raw_bits(), &mut |ptr| {
205                roots.push(ptr);
206            });
207        }
208
209        roots
210    }
211
212    // ── Young-generation collection ────────────────────────────────────
213
214    /// Run a young-generation collection, scanning VM roots.
215    ///
216    /// Collects only young-gen regions + dirty card references. Objects that
217    /// have survived enough young-gen cycles are promoted to old gen.
218    fn run_gc_collection_young(&mut self) {
219        let roots = self.collect_gc_roots();
220        let start = std::time::Instant::now();
221
222        let Some(ref mut gc_heap) = self.gc_heap else {
223            return;
224        };
225
226        let stats = gc_heap.collect_young(&roots);
227        let pause_us = start.elapsed().as_micros() as u64;
228
229        // Record in GarbageCollector stats
230        let result = GCResult::new(
231            stats.objects_collected as u64,
232            stats.bytes_collected as u64,
233            start.elapsed(),
234        );
235        self.gc.record_collection(&result);
236
237        // Record in VmMetrics if enabled
238        if let Some(ref mut metrics) = self.metrics {
239            metrics.record_gc_pause(crate::metrics::GcPauseEvent {
240                collection_type: 0, // Young
241                pause_us,
242                bytes_collected: stats.bytes_collected,
243                bytes_promoted: 0, // Promotion tracking is internal to GenerationalCollector
244                timestamp_us: metrics.elapsed_us(),
245            });
246        }
247    }
248
249    // ── Old-generation collection ──────────────────────────────────────
250
251    /// Run an old-generation collection, scanning VM roots.
252    ///
253    /// Full mark-sweep across both generations. Called less frequently than
254    /// young-gen collection, typically when the adaptive scheduler predicts
255    /// old gen is about to fill up.
256    fn run_gc_collection_old(&mut self) {
257        let roots = self.collect_gc_roots();
258        let start = std::time::Instant::now();
259
260        let Some(ref mut gc_heap) = self.gc_heap else {
261            return;
262        };
263
264        let stats = gc_heap.collect_old(&roots);
265        let pause_us = start.elapsed().as_micros() as u64;
266
267        // Record in GarbageCollector stats
268        let result = GCResult::new(
269            stats.objects_collected as u64,
270            stats.bytes_collected as u64,
271            start.elapsed(),
272        );
273        self.gc.record_collection(&result);
274
275        // Record in VmMetrics if enabled
276        if let Some(ref mut metrics) = self.metrics {
277            metrics.record_gc_pause(crate::metrics::GcPauseEvent {
278                collection_type: 1, // Old
279                pause_us,
280                bytes_collected: stats.bytes_collected,
281                bytes_promoted: 0,
282                timestamp_us: metrics.elapsed_us(),
283            });
284        }
285    }
286
287    // ── Full STW collection (fallback) ─────────────────────────────────
288
289    /// Run a full stop-the-world collection, scanning VM roots.
290    ///
291    /// Root categories: stack, module bindings (globals), closure upvalues,
292    /// task scheduler callables/results, uncaught exception.
293    ///
294    /// Fields are accessed individually to avoid a whole-`self` borrow conflict
295    /// with the mutable borrow of `gc_heap`.
296    fn run_gc_collection_full(&mut self) {
297        let start = std::time::Instant::now();
298
299        let Some(ref mut gc_heap) = self.gc_heap else {
300            return;
301        };
302
303        // Borrow each root source separately to satisfy the borrow checker.
304        // The STW `collect()` API takes a callback, so we cannot use
305        // `collect_gc_roots()` here (it would require borrowing `self`
306        // while `gc_heap` is already mutably borrowed).
307        let stack = &self.stack;
308        let sp = self.sp;
309        let module_bindings = &self.module_bindings;
310        let call_stack = &self.call_stack;
311        let task_scheduler = &self.task_scheduler;
312        let last_uncaught_exception = &self.last_uncaught_exception;
313
314        gc_heap.collect(&mut |visitor| {
315            // 1. Stack slots [0..sp]
316            for i in 0..sp {
317                shape_gc::roots::trace_nanboxed_bits(stack[i].raw_bits(), visitor);
318            }
319
320            // 2. Module bindings (global variables)
321            for binding in module_bindings {
322                shape_gc::roots::trace_nanboxed_bits(binding.raw_bits(), visitor);
323            }
324
325            // 3. Call stack — closure upvalues
326            for frame in call_stack {
327                if let Some(ref upvalues) = frame.upvalues {
328                    for upvalue in upvalues {
329                        let nb = upvalue.get();
330                        shape_gc::roots::trace_nanboxed_bits(nb.raw_bits(), visitor);
331                    }
332                }
333            }
334
335            // 4. Task scheduler — spawned callables and completed results
336            task_scheduler.scan_roots(visitor);
337
338            // 5. Uncaught exception
339            if let Some(exc) = last_uncaught_exception {
340                shape_gc::roots::trace_nanboxed_bits(exc.raw_bits(), visitor);
341            }
342        });
343
344        let pause_us = start.elapsed().as_micros() as u64;
345
346        // Record in VmMetrics if enabled
347        if let Some(ref mut metrics) = self.metrics {
348            metrics.record_gc_pause(crate::metrics::GcPauseEvent {
349                collection_type: 2, // Full
350                pause_us,
351                bytes_collected: 0, // STW collect() doesn't return per-call stats easily
352                bytes_promoted: 0,
353                timestamp_us: metrics.elapsed_us(),
354            });
355        }
356    }
357
358    // ── Incremental marking step ───────────────────────────────────────
359
360    /// Perform a bounded incremental marking step.
361    ///
362    /// Called from the dispatch loop every 1024 instructions when a marking
363    /// cycle is active (`gc_heap.is_marking()`). Processes up to `mark_budget`
364    /// gray objects from the worklist without stopping the world.
365    ///
366    /// When the incremental cycle completes (mark termination + sweep), the
367    /// stats are recorded and the byte counter is reset.
368    pub(crate) fn gc_incremental_mark_step(&mut self) {
369        // Budget: number of gray objects to process per dispatch-loop check.
370        // 64 is a good balance between latency (small pauses) and throughput
371        // (not spending too much time in GC overhead).
372        const MARK_BUDGET: usize = 64;
373
374        // Collect roots upfront. For incremental marking, roots are only
375        // needed on the first call (to start the cycle) — subsequent calls
376        // just process the gray worklist. However, `collect_incremental`
377        // handles this internally: it only scans roots when transitioning
378        // from Idle to Marking phase.
379        let roots = self.collect_gc_roots();
380        let start = std::time::Instant::now();
381
382        let Some(ref mut gc_heap) = self.gc_heap else {
383            return;
384        };
385
386        // Borrow the root vec so it lives long enough for the closure.
387        let roots_ref = &roots;
388
389        let result = gc_heap.collect_incremental(MARK_BUDGET, &mut |visitor| {
390            for &ptr in roots_ref.iter() {
391                visitor(ptr);
392            }
393        });
394
395        if let shape_gc::CollectResult::Complete(stats) = result {
396            let pause_us = start.elapsed().as_micros() as u64;
397
398            // Record in GarbageCollector stats
399            let gc_result = GCResult::new(
400                stats.objects_collected as u64,
401                stats.bytes_collected as u64,
402                start.elapsed(),
403            );
404            self.gc.record_collection(&gc_result);
405
406            // Record in VmMetrics if enabled
407            if let Some(ref mut metrics) = self.metrics {
408                metrics.record_gc_pause(crate::metrics::GcPauseEvent {
409                    collection_type: 3, // Incremental (completed cycle)
410                    pause_us,
411                    bytes_collected: stats.bytes_collected,
412                    bytes_promoted: 0,
413                    timestamp_us: metrics.elapsed_us(),
414                });
415            }
416        }
417    }
418
419    // ── Safepoint polling ──────────────────────────────────────────────
420
421    /// Poll the GC safepoint. Called at interrupt check points.
422    #[inline(always)]
423    pub(crate) fn gc_safepoint_poll(&self) {
424        if let Some(ref gc_heap) = self.gc_heap {
425            shape_gc::safepoint::safepoint_poll(gc_heap.safepoint());
426        }
427    }
428}