shape_vm/executor/gc_integration.rs
1//! Garbage collection integration for the VM
2//!
3//! Without `gc` feature: no-ops (all values use Arc reference counting).
4//! With `gc` feature: real root scanning, safepoint polling, and GC triggering.
5//!
6//! ## Collection strategies (gc feature)
7//!
8//! The VM integrates three collection strategies from `shape-gc`:
9//!
10//! 1. **Generational (Young/Old)**: Uses `collect_young()` / `collect_old()` via
11//! the adaptive scheduler to minimize pause times. Young-gen collections are
12//! fast and frequent; old-gen collections happen predictively.
13//!
14//! 2. **Incremental marking**: When a marking cycle is active (`is_marking()`),
15//! the dispatch loop calls `gc_incremental_mark_step()` every 1024 instructions
16//! to make bounded progress on the gray worklist without stopping the world.
17//!
18//! 3. **Full STW (fallback)**: The original `collect()` path, used when the
19//! scheduler recommends `CollectionType::Full` or when no scheduler is enabled.
20
21use crate::memory::{GCResult, GCStats, GarbageCollector};
22
23/// Garbage collection integration for VirtualMachine
24pub trait GCIntegration {
25 /// Maybe trigger garbage collection based on config
26 fn maybe_collect_garbage(&mut self);
27
28 /// Force garbage collection
29 fn force_gc(&mut self) -> GCResult;
30
31 /// Get GC statistics
32 fn gc_stats(&self) -> GCStats;
33
34 /// Get GC heap size
35 fn gc_heap_size(&self) -> usize;
36
37 /// Get GC object count
38 fn gc_object_count(&self) -> usize;
39
40 /// Access the garbage collector
41 fn gc(&self) -> &GarbageCollector;
42
43 /// Access the garbage collector mutably
44 fn gc_mut(&mut self) -> &mut GarbageCollector;
45}
46
47// --- Non-GC implementation (Arc refcounting) ---
48
49#[cfg(not(feature = "gc"))]
50impl GCIntegration for super::VirtualMachine {
51 fn maybe_collect_garbage(&mut self) {
52 // No-op: Arc reference counting handles memory
53 }
54
55 fn force_gc(&mut self) -> GCResult {
56 // No-op: return empty result
57 GCResult::new(0, 0, std::time::Duration::ZERO)
58 }
59
60 fn gc_stats(&self) -> GCStats {
61 self.gc.stats()
62 }
63
64 fn gc_heap_size(&self) -> usize {
65 self.gc.heap_size()
66 }
67
68 fn gc_object_count(&self) -> usize {
69 self.gc.object_count()
70 }
71
72 fn gc(&self) -> &GarbageCollector {
73 &self.gc
74 }
75
76 fn gc_mut(&mut self) -> &mut GarbageCollector {
77 &mut self.gc
78 }
79}
80
81// --- Real GC implementation ---
82
83#[cfg(feature = "gc")]
84impl GCIntegration for super::VirtualMachine {
85 fn maybe_collect_garbage(&mut self) {
86 self.maybe_collect_gc_adaptive();
87 }
88
89 fn force_gc(&mut self) -> GCResult {
90 let start = std::time::Instant::now();
91 let stats_before = self
92 .gc_heap
93 .as_ref()
94 .map(|h| h.stats().total_collected_bytes)
95 .unwrap_or(0);
96 self.run_gc_collection_full();
97 let stats_after = self
98 .gc_heap
99 .as_ref()
100 .map(|h| h.stats().total_collected_bytes)
101 .unwrap_or(0);
102 let result = GCResult::new(0, stats_after - stats_before, start.elapsed());
103 // Record stats in the GarbageCollector for reporting
104 self.gc.record_collection(&result);
105 result
106 }
107
108 fn gc_stats(&self) -> GCStats {
109 self.gc.stats()
110 }
111
112 fn gc_heap_size(&self) -> usize {
113 self.gc_heap.as_ref().map(|h| h.heap_size()).unwrap_or(0)
114 }
115
116 fn gc_object_count(&self) -> usize {
117 0 // Bump allocator doesn't track individual objects
118 }
119
120 fn gc(&self) -> &GarbageCollector {
121 &self.gc
122 }
123
124 fn gc_mut(&mut self) -> &mut GarbageCollector {
125 &mut self.gc
126 }
127}
128
129#[cfg(feature = "gc")]
130impl super::VirtualMachine {
131 // ── Adaptive collection dispatch ────────────────────────────────────
132
133 /// Use the adaptive scheduler to decide what kind of collection to run.
134 ///
135 /// Prefers generational collection (young/old) when possible, falling back
136 /// to full STW only when the scheduler recommends it or is not configured.
137 fn maybe_collect_gc_adaptive(&mut self) {
138 let Some(ref gc_heap) = self.gc_heap else {
139 return;
140 };
141
142 let collection_type = gc_heap.should_collect_adaptive();
143
144 match collection_type {
145 shape_gc::scheduler::CollectionType::None => {}
146 shape_gc::scheduler::CollectionType::Young => {
147 self.run_gc_collection_young();
148 }
149 shape_gc::scheduler::CollectionType::Old => {
150 self.run_gc_collection_old();
151 }
152 shape_gc::scheduler::CollectionType::Full => {
153 self.run_gc_collection_full();
154 }
155 }
156 }
157
158 // ── Root scanning ──────────────────────────────────────────────────
159
160 /// Collect all GC root pointers into a Vec.
161 ///
162 /// Root categories: stack, module bindings (globals), closure upvalues,
163 /// task scheduler callables/results, uncaught exception.
164 ///
165 /// This collects roots into a flat `Vec<*mut u8>` suitable for passing
166 /// to `collect_young()` / `collect_old()`. Fields are borrowed individually
167 /// to avoid a whole-`self` borrow conflict with `gc_heap`.
168 fn collect_gc_roots(&self) -> Vec<*mut u8> {
169 let mut roots = Vec::with_capacity(self.sp + self.module_bindings.len() + 32);
170
171 // 1. Stack slots [0..sp]
172 for i in 0..self.sp {
173 shape_gc::roots::trace_nanboxed_bits(self.stack[i].raw_bits(), &mut |ptr| {
174 roots.push(ptr);
175 });
176 }
177
178 // 2. Module bindings (global variables)
179 for binding in &self.module_bindings {
180 shape_gc::roots::trace_nanboxed_bits(binding.raw_bits(), &mut |ptr| {
181 roots.push(ptr);
182 });
183 }
184
185 // 3. Call stack — closure upvalues
186 for frame in &self.call_stack {
187 if let Some(ref upvalues) = frame.upvalues {
188 for upvalue in upvalues {
189 let nb = upvalue.get();
190 shape_gc::roots::trace_nanboxed_bits(nb.raw_bits(), &mut |ptr| {
191 roots.push(ptr);
192 });
193 }
194 }
195 }
196
197 // 4. Task scheduler — spawned callables and completed results
198 self.task_scheduler.scan_roots(&mut |ptr| {
199 roots.push(ptr);
200 });
201
202 // 5. Uncaught exception
203 if let Some(ref exc) = self.last_uncaught_exception {
204 shape_gc::roots::trace_nanboxed_bits(exc.raw_bits(), &mut |ptr| {
205 roots.push(ptr);
206 });
207 }
208
209 roots
210 }
211
212 // ── Young-generation collection ────────────────────────────────────
213
214 /// Run a young-generation collection, scanning VM roots.
215 ///
216 /// Collects only young-gen regions + dirty card references. Objects that
217 /// have survived enough young-gen cycles are promoted to old gen.
218 fn run_gc_collection_young(&mut self) {
219 let roots = self.collect_gc_roots();
220 let start = std::time::Instant::now();
221
222 let Some(ref mut gc_heap) = self.gc_heap else {
223 return;
224 };
225
226 let stats = gc_heap.collect_young(&roots);
227 let pause_us = start.elapsed().as_micros() as u64;
228
229 // Record in GarbageCollector stats
230 let result = GCResult::new(
231 stats.objects_collected as u64,
232 stats.bytes_collected as u64,
233 start.elapsed(),
234 );
235 self.gc.record_collection(&result);
236
237 // Record in VmMetrics if enabled
238 if let Some(ref mut metrics) = self.metrics {
239 metrics.record_gc_pause(crate::metrics::GcPauseEvent {
240 collection_type: 0, // Young
241 pause_us,
242 bytes_collected: stats.bytes_collected,
243 bytes_promoted: 0, // Promotion tracking is internal to GenerationalCollector
244 timestamp_us: metrics.elapsed_us(),
245 });
246 }
247 }
248
249 // ── Old-generation collection ──────────────────────────────────────
250
251 /// Run an old-generation collection, scanning VM roots.
252 ///
253 /// Full mark-sweep across both generations. Called less frequently than
254 /// young-gen collection, typically when the adaptive scheduler predicts
255 /// old gen is about to fill up.
256 fn run_gc_collection_old(&mut self) {
257 let roots = self.collect_gc_roots();
258 let start = std::time::Instant::now();
259
260 let Some(ref mut gc_heap) = self.gc_heap else {
261 return;
262 };
263
264 let stats = gc_heap.collect_old(&roots);
265 let pause_us = start.elapsed().as_micros() as u64;
266
267 // Record in GarbageCollector stats
268 let result = GCResult::new(
269 stats.objects_collected as u64,
270 stats.bytes_collected as u64,
271 start.elapsed(),
272 );
273 self.gc.record_collection(&result);
274
275 // Record in VmMetrics if enabled
276 if let Some(ref mut metrics) = self.metrics {
277 metrics.record_gc_pause(crate::metrics::GcPauseEvent {
278 collection_type: 1, // Old
279 pause_us,
280 bytes_collected: stats.bytes_collected,
281 bytes_promoted: 0,
282 timestamp_us: metrics.elapsed_us(),
283 });
284 }
285 }
286
287 // ── Full STW collection (fallback) ─────────────────────────────────
288
289 /// Run a full stop-the-world collection, scanning VM roots.
290 ///
291 /// Root categories: stack, module bindings (globals), closure upvalues,
292 /// task scheduler callables/results, uncaught exception.
293 ///
294 /// Fields are accessed individually to avoid a whole-`self` borrow conflict
295 /// with the mutable borrow of `gc_heap`.
296 fn run_gc_collection_full(&mut self) {
297 let start = std::time::Instant::now();
298
299 let Some(ref mut gc_heap) = self.gc_heap else {
300 return;
301 };
302
303 // Borrow each root source separately to satisfy the borrow checker.
304 // The STW `collect()` API takes a callback, so we cannot use
305 // `collect_gc_roots()` here (it would require borrowing `self`
306 // while `gc_heap` is already mutably borrowed).
307 let stack = &self.stack;
308 let sp = self.sp;
309 let module_bindings = &self.module_bindings;
310 let call_stack = &self.call_stack;
311 let task_scheduler = &self.task_scheduler;
312 let last_uncaught_exception = &self.last_uncaught_exception;
313
314 gc_heap.collect(&mut |visitor| {
315 // 1. Stack slots [0..sp]
316 for i in 0..sp {
317 shape_gc::roots::trace_nanboxed_bits(stack[i].raw_bits(), visitor);
318 }
319
320 // 2. Module bindings (global variables)
321 for binding in module_bindings {
322 shape_gc::roots::trace_nanboxed_bits(binding.raw_bits(), visitor);
323 }
324
325 // 3. Call stack — closure upvalues
326 for frame in call_stack {
327 if let Some(ref upvalues) = frame.upvalues {
328 for upvalue in upvalues {
329 let nb = upvalue.get();
330 shape_gc::roots::trace_nanboxed_bits(nb.raw_bits(), visitor);
331 }
332 }
333 }
334
335 // 4. Task scheduler — spawned callables and completed results
336 task_scheduler.scan_roots(visitor);
337
338 // 5. Uncaught exception
339 if let Some(exc) = last_uncaught_exception {
340 shape_gc::roots::trace_nanboxed_bits(exc.raw_bits(), visitor);
341 }
342 });
343
344 let pause_us = start.elapsed().as_micros() as u64;
345
346 // Record in VmMetrics if enabled
347 if let Some(ref mut metrics) = self.metrics {
348 metrics.record_gc_pause(crate::metrics::GcPauseEvent {
349 collection_type: 2, // Full
350 pause_us,
351 bytes_collected: 0, // STW collect() doesn't return per-call stats easily
352 bytes_promoted: 0,
353 timestamp_us: metrics.elapsed_us(),
354 });
355 }
356 }
357
358 // ── Incremental marking step ───────────────────────────────────────
359
360 /// Perform a bounded incremental marking step.
361 ///
362 /// Called from the dispatch loop every 1024 instructions when a marking
363 /// cycle is active (`gc_heap.is_marking()`). Processes up to `mark_budget`
364 /// gray objects from the worklist without stopping the world.
365 ///
366 /// When the incremental cycle completes (mark termination + sweep), the
367 /// stats are recorded and the byte counter is reset.
368 pub(crate) fn gc_incremental_mark_step(&mut self) {
369 // Budget: number of gray objects to process per dispatch-loop check.
370 // 64 is a good balance between latency (small pauses) and throughput
371 // (not spending too much time in GC overhead).
372 const MARK_BUDGET: usize = 64;
373
374 // Collect roots upfront. For incremental marking, roots are only
375 // needed on the first call (to start the cycle) — subsequent calls
376 // just process the gray worklist. However, `collect_incremental`
377 // handles this internally: it only scans roots when transitioning
378 // from Idle to Marking phase.
379 let roots = self.collect_gc_roots();
380 let start = std::time::Instant::now();
381
382 let Some(ref mut gc_heap) = self.gc_heap else {
383 return;
384 };
385
386 // Borrow the root vec so it lives long enough for the closure.
387 let roots_ref = &roots;
388
389 let result = gc_heap.collect_incremental(MARK_BUDGET, &mut |visitor| {
390 for &ptr in roots_ref.iter() {
391 visitor(ptr);
392 }
393 });
394
395 if let shape_gc::CollectResult::Complete(stats) = result {
396 let pause_us = start.elapsed().as_micros() as u64;
397
398 // Record in GarbageCollector stats
399 let gc_result = GCResult::new(
400 stats.objects_collected as u64,
401 stats.bytes_collected as u64,
402 start.elapsed(),
403 );
404 self.gc.record_collection(&gc_result);
405
406 // Record in VmMetrics if enabled
407 if let Some(ref mut metrics) = self.metrics {
408 metrics.record_gc_pause(crate::metrics::GcPauseEvent {
409 collection_type: 3, // Incremental (completed cycle)
410 pause_us,
411 bytes_collected: stats.bytes_collected,
412 bytes_promoted: 0,
413 timestamp_us: metrics.elapsed_us(),
414 });
415 }
416 }
417 }
418
419 // ── Safepoint polling ──────────────────────────────────────────────
420
421 /// Poll the GC safepoint. Called at interrupt check points.
422 #[inline(always)]
423 pub(crate) fn gc_safepoint_poll(&self) {
424 if let Some(ref gc_heap) = self.gc_heap {
425 shape_gc::safepoint::safepoint_poll(gc_heap.safepoint());
426 }
427 }
428}