Skip to main content

shape_gc/
scheduler.rs

1//! Adaptive GC scheduling — decides WHEN and WHAT to collect.
2//!
3//! The scheduler monitors allocation rates, generation utilization, and pause
4//! history to make informed collection decisions:
5//!
6//! - **Young GC**: Triggered when young-gen utilization exceeds a threshold.
7//! - **Old GC**: Triggered predictively when the old-gen fill rate suggests
8//!   it will run out of space before the next scheduled collection.
9//! - **Full GC**: Fallback when neither generational strategy applies.
10
11use std::time::{Duration, Instant};
12
13/// What kind of collection the scheduler recommends.
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum CollectionType {
16    /// No collection needed.
17    None,
18    /// Collect only the young generation.
19    Young,
20    /// Collect only the old generation (full mark-sweep of old regions).
21    Old,
22    /// Full collection (both generations).
23    Full,
24}
25
26/// Heap metrics snapshot for the scheduler to inspect.
27///
28/// Instead of taking a reference to GcHeap (which would create circular
29/// dependencies), the caller provides a snapshot of the relevant metrics.
30#[derive(Debug, Clone)]
31pub struct HeapMetrics {
32    /// Young generation utilization (0.0 to 1.0).
33    pub young_utilization: f64,
34    /// Free bytes in old generation.
35    pub old_free_bytes: usize,
36    /// Total bytes allocated since the last GC cycle.
37    pub bytes_since_gc: usize,
38    /// GC threshold (bytes). Used for fallback decisions.
39    pub gc_threshold: usize,
40    /// Average GC pause time in seconds (from GcHeapStats).
41    pub avg_gc_pause_secs: f64,
42}
43
44/// Adaptive GC scheduler.
45///
46/// Tracks allocation rates and collection history to decide when and what
47/// type of collection to perform. The scheduler is updated after each
48/// allocation (via `record_allocation`) and after each collection (via
49/// `record_young_gc` / `record_old_gc`).
50pub struct AdaptiveScheduler {
51    /// Timestamp of the last young GC.
52    last_young_gc: Instant,
53    /// Timestamp of the last old GC.
54    last_old_gc: Instant,
55    /// Bytes allocated since the last young GC.
56    young_alloc_bytes: usize,
57    /// Number of young GCs performed.
58    young_gc_count: u32,
59    /// Number of old GCs performed.
60    old_gc_count: u32,
61    /// Young-gen utilization threshold before triggering young GC (default 0.8).
62    young_utilization_threshold: f64,
63    /// Headroom factor for old-gen predictive collection (default 2.0).
64    /// If time_to_full < headroom_factor * avg_gc_pause, trigger old GC.
65    headroom_factor: f64,
66    /// Total allocation bytes tracked (for rate computation).
67    total_alloc_bytes: u64,
68    /// Rolling average of young GC pause durations.
69    young_pause_avg: Duration,
70    /// Rolling average of old GC pause durations.
71    old_pause_avg: Duration,
72}
73
74impl AdaptiveScheduler {
75    /// Create a new scheduler with default thresholds.
76    pub fn new() -> Self {
77        Self {
78            last_young_gc: Instant::now(),
79            last_old_gc: Instant::now(),
80            young_alloc_bytes: 0,
81            young_gc_count: 0,
82            old_gc_count: 0,
83            young_utilization_threshold: 0.8,
84            headroom_factor: 2.0,
85            total_alloc_bytes: 0,
86            young_pause_avg: Duration::ZERO,
87            old_pause_avg: Duration::ZERO,
88        }
89    }
90
91    /// Create a scheduler with custom thresholds.
92    pub fn with_config(young_utilization_threshold: f64, headroom_factor: f64) -> Self {
93        Self {
94            young_utilization_threshold,
95            headroom_factor,
96            ..Self::new()
97        }
98    }
99
100    /// Determine what collection should happen based on current heap metrics.
101    pub fn should_collect(&self, metrics: &HeapMetrics) -> CollectionType {
102        // Check 1: Young-gen utilization exceeds threshold
103        if metrics.young_utilization > self.young_utilization_threshold {
104            return CollectionType::Young;
105        }
106
107        // Check 2: Predict old-gen fill based on allocation rate.
108        // Only meaningful if we have a previous old GC timestamp and nonzero time.
109        let elapsed = self.last_old_gc.elapsed();
110        let elapsed_secs = elapsed.as_secs_f64();
111        if elapsed_secs > 0.0 && self.young_alloc_bytes > 0 {
112            let alloc_rate = self.young_alloc_bytes as f64 / elapsed_secs;
113            let old_free = metrics.old_free_bytes as f64;
114
115            // Avoid division by zero: if alloc_rate is zero, we never fill up
116            if alloc_rate > 0.0 {
117                let time_to_full = old_free / alloc_rate;
118
119                // Determine the reference pause time. Use actual average if available,
120                // otherwise fall back to the metrics-provided average.
121                let ref_pause = if metrics.avg_gc_pause_secs > 0.0 {
122                    metrics.avg_gc_pause_secs
123                } else {
124                    self.old_pause_avg.as_secs_f64()
125                };
126
127                // If the reference pause is zero (no collections yet), use a small
128                // sentinel value so we don't erroneously trigger.
129                let effective_pause = if ref_pause > 0.0 {
130                    ref_pause
131                } else {
132                    // No pause history — only trigger if truly about to overflow
133                    0.001 // 1ms sentinel
134                };
135
136                if time_to_full < self.headroom_factor * effective_pause {
137                    return CollectionType::Old;
138                }
139            }
140        }
141
142        // Check 3: Fallback byte-threshold trigger
143        if metrics.bytes_since_gc >= metrics.gc_threshold {
144            return CollectionType::Full;
145        }
146
147        CollectionType::None
148    }
149
150    // ── Recording events ────────────────────────────────────────────
151
152    /// Record that a young GC happened. Resets the per-young-cycle counters.
153    pub fn record_young_gc(&mut self, pause_duration: Duration) {
154        self.young_gc_count += 1;
155        self.young_alloc_bytes = 0;
156        self.last_young_gc = Instant::now();
157
158        // Exponential moving average for pause times (alpha = 0.3)
159        self.young_pause_avg = ema_duration(self.young_pause_avg, pause_duration, 0.3);
160    }
161
162    /// Record that an old GC happened.
163    pub fn record_old_gc(&mut self, pause_duration: Duration) {
164        self.old_gc_count += 1;
165        self.young_alloc_bytes = 0; // Reset since old GC is a superset
166        self.last_old_gc = Instant::now();
167
168        self.old_pause_avg = ema_duration(self.old_pause_avg, pause_duration, 0.3);
169    }
170
171    /// Record a full GC (resets both generation counters).
172    pub fn record_full_gc(&mut self, pause_duration: Duration) {
173        self.record_young_gc(pause_duration);
174        self.record_old_gc(pause_duration);
175    }
176
177    /// Record an allocation of `bytes` bytes.
178    pub fn record_allocation(&mut self, bytes: usize) {
179        self.young_alloc_bytes += bytes;
180        self.total_alloc_bytes += bytes as u64;
181    }
182
183    // ── Query accessors ─────────────────────────────────────────────
184
185    /// Number of young GCs recorded by this scheduler.
186    pub fn young_gc_count(&self) -> u32 {
187        self.young_gc_count
188    }
189
190    /// Number of old GCs recorded by this scheduler.
191    pub fn old_gc_count(&self) -> u32 {
192        self.old_gc_count
193    }
194
195    /// Total bytes allocated since last young GC.
196    pub fn young_alloc_bytes(&self) -> usize {
197        self.young_alloc_bytes
198    }
199
200    /// Total bytes allocated since scheduler creation.
201    pub fn total_alloc_bytes(&self) -> u64 {
202        self.total_alloc_bytes
203    }
204
205    /// Young-gen utilization threshold.
206    pub fn young_utilization_threshold(&self) -> f64 {
207        self.young_utilization_threshold
208    }
209
210    /// Headroom factor for old-gen prediction.
211    pub fn headroom_factor(&self) -> f64 {
212        self.headroom_factor
213    }
214
215    /// Average young GC pause duration (EMA).
216    pub fn young_pause_avg(&self) -> Duration {
217        self.young_pause_avg
218    }
219
220    /// Average old GC pause duration (EMA).
221    pub fn old_pause_avg(&self) -> Duration {
222        self.old_pause_avg
223    }
224
225    /// Current allocation rate (bytes/sec) since the last old GC.
226    /// Returns 0.0 if no time has elapsed.
227    pub fn allocation_rate(&self) -> f64 {
228        let elapsed = self.last_old_gc.elapsed().as_secs_f64();
229        if elapsed > 0.0 {
230            self.young_alloc_bytes as f64 / elapsed
231        } else {
232            0.0
233        }
234    }
235}
236
237impl Default for AdaptiveScheduler {
238    fn default() -> Self {
239        Self::new()
240    }
241}
242
243/// Compute an exponential moving average for `Duration` values.
244fn ema_duration(current: Duration, new_sample: Duration, alpha: f64) -> Duration {
245    if current == Duration::ZERO {
246        // First sample — use it directly
247        return new_sample;
248    }
249    let current_nanos = current.as_nanos() as f64;
250    let sample_nanos = new_sample.as_nanos() as f64;
251    let result_nanos = alpha * sample_nanos + (1.0 - alpha) * current_nanos;
252    Duration::from_nanos(result_nanos as u64)
253}
254
255#[cfg(test)]
256mod tests {
257    use super::*;
258    use std::time::Duration;
259
260    // ── Basic scheduler creation ────────────────────────────────────
261
262    #[test]
263    fn test_scheduler_defaults() {
264        let s = AdaptiveScheduler::new();
265        assert_eq!(s.young_gc_count(), 0);
266        assert_eq!(s.old_gc_count(), 0);
267        assert_eq!(s.young_alloc_bytes(), 0);
268        assert_eq!(s.young_utilization_threshold(), 0.8);
269        assert_eq!(s.headroom_factor(), 2.0);
270    }
271
272    #[test]
273    fn test_scheduler_custom_config() {
274        let s = AdaptiveScheduler::with_config(0.5, 3.0);
275        assert_eq!(s.young_utilization_threshold(), 0.5);
276        assert_eq!(s.headroom_factor(), 3.0);
277    }
278
279    // ── should_collect triggers ─────────────────────────────────────
280
281    #[test]
282    fn test_triggers_young_gc_on_utilization() {
283        let s = AdaptiveScheduler::with_config(0.8, 2.0);
284
285        let metrics = HeapMetrics {
286            young_utilization: 0.9, // Above 0.8 threshold
287            old_free_bytes: 10_000_000,
288            bytes_since_gc: 0,
289            gc_threshold: 4_000_000,
290            avg_gc_pause_secs: 0.001,
291        };
292
293        assert_eq!(s.should_collect(&metrics), CollectionType::Young);
294    }
295
296    #[test]
297    fn test_no_collection_below_thresholds() {
298        let s = AdaptiveScheduler::new();
299
300        let metrics = HeapMetrics {
301            young_utilization: 0.3,
302            old_free_bytes: 10_000_000,
303            bytes_since_gc: 1_000,
304            gc_threshold: 4_000_000,
305            avg_gc_pause_secs: 0.001,
306        };
307
308        assert_eq!(s.should_collect(&metrics), CollectionType::None);
309    }
310
311    #[test]
312    fn test_fallback_full_gc_on_byte_threshold() {
313        let s = AdaptiveScheduler::new();
314
315        let metrics = HeapMetrics {
316            young_utilization: 0.3,     // Below threshold
317            old_free_bytes: 10_000_000, // Plenty of space
318            bytes_since_gc: 5_000_000,  // Above gc_threshold
319            gc_threshold: 4_000_000,
320            avg_gc_pause_secs: 0.001,
321        };
322
323        assert_eq!(s.should_collect(&metrics), CollectionType::Full);
324    }
325
326    // ── Recording events ────────────────────────────────────────────
327
328    #[test]
329    fn test_record_allocation() {
330        let mut s = AdaptiveScheduler::new();
331        s.record_allocation(1000);
332        s.record_allocation(500);
333        assert_eq!(s.young_alloc_bytes(), 1500);
334        assert_eq!(s.total_alloc_bytes(), 1500);
335    }
336
337    #[test]
338    fn test_record_young_gc_resets_alloc_counter() {
339        let mut s = AdaptiveScheduler::new();
340        s.record_allocation(5000);
341        assert_eq!(s.young_alloc_bytes(), 5000);
342
343        s.record_young_gc(Duration::from_millis(1));
344        assert_eq!(s.young_alloc_bytes(), 0);
345        assert_eq!(s.young_gc_count(), 1);
346
347        // Total should still reflect the allocation
348        assert_eq!(s.total_alloc_bytes(), 5000);
349    }
350
351    #[test]
352    fn test_record_old_gc_resets_counter() {
353        let mut s = AdaptiveScheduler::new();
354        s.record_allocation(3000);
355
356        s.record_old_gc(Duration::from_millis(5));
357        assert_eq!(s.young_alloc_bytes(), 0);
358        assert_eq!(s.old_gc_count(), 1);
359    }
360
361    #[test]
362    fn test_record_full_gc() {
363        let mut s = AdaptiveScheduler::new();
364        s.record_allocation(7000);
365
366        s.record_full_gc(Duration::from_millis(10));
367        assert_eq!(s.young_gc_count(), 1);
368        assert_eq!(s.old_gc_count(), 1);
369        assert_eq!(s.young_alloc_bytes(), 0);
370    }
371
372    // ── Zero allocation rate handling ───────────────────────────────
373
374    #[test]
375    fn test_zero_allocation_rate_no_old_gc() {
376        // If no allocations have been made, old-gen prediction should not trigger
377        let s = AdaptiveScheduler::new();
378
379        let metrics = HeapMetrics {
380            young_utilization: 0.3,
381            old_free_bytes: 100, // Very low, but no allocation pressure
382            bytes_since_gc: 0,
383            gc_threshold: 4_000_000,
384            avg_gc_pause_secs: 0.001,
385        };
386
387        // Should be None because young_alloc_bytes is 0 (no rate)
388        assert_eq!(s.should_collect(&metrics), CollectionType::None);
389    }
390
391    #[test]
392    fn test_allocation_rate_computation() {
393        let mut s = AdaptiveScheduler::new();
394        // When no time has elapsed, rate is 0
395        assert_eq!(s.allocation_rate(), 0.0);
396
397        s.record_allocation(10000);
398        // Rate depends on elapsed time since last old GC (just created)
399        // It should be > 0 since we just allocated
400        let rate = s.allocation_rate();
401        // Just verify it's non-negative (timing-dependent)
402        assert!(rate >= 0.0);
403    }
404
405    // ── Pause time EMA ──────────────────────────────────────────────
406
407    #[test]
408    fn test_pause_avg_first_sample() {
409        let mut s = AdaptiveScheduler::new();
410        assert_eq!(s.young_pause_avg(), Duration::ZERO);
411
412        s.record_young_gc(Duration::from_millis(10));
413        // First sample should be used directly
414        assert_eq!(s.young_pause_avg(), Duration::from_millis(10));
415    }
416
417    #[test]
418    fn test_pause_avg_ema_converges() {
419        let mut s = AdaptiveScheduler::new();
420
421        // Record several pauses
422        for _ in 0..20 {
423            s.record_young_gc(Duration::from_millis(5));
424        }
425
426        // After many samples of 5ms, EMA should converge near 5ms
427        let avg_ms = s.young_pause_avg().as_millis();
428        assert!(
429            avg_ms >= 4 && avg_ms <= 6,
430            "Expected ~5ms, got {}ms",
431            avg_ms
432        );
433    }
434
435    // ── EMA helper ──────────────────────────────────────────────────
436
437    #[test]
438    fn test_ema_duration_first_sample() {
439        let result = ema_duration(Duration::ZERO, Duration::from_millis(100), 0.3);
440        assert_eq!(result, Duration::from_millis(100));
441    }
442
443    #[test]
444    fn test_ema_duration_blends() {
445        let current = Duration::from_millis(10);
446        let new_sample = Duration::from_millis(20);
447        let result = ema_duration(current, new_sample, 0.5);
448        // 0.5 * 20 + 0.5 * 10 = 15ms
449        assert_eq!(result.as_millis(), 15);
450    }
451
452    // ── Old-gen predictive trigger ──────────────────────────────────
453
454    #[test]
455    fn test_old_gen_predictive_trigger() {
456        let mut s = AdaptiveScheduler::with_config(0.8, 2.0);
457
458        // Simulate: allocate a lot very quickly
459        s.record_allocation(10_000_000);
460
461        // Wait a tiny bit so elapsed is nonzero (the Instant::now in constructor
462        // gives us a baseline — we rely on the time between constructor and
463        // should_collect call)
464        let metrics = HeapMetrics {
465            young_utilization: 0.3, // Below young threshold
466            old_free_bytes: 100,    // Almost no space left
467            bytes_since_gc: 1_000,  // Below gc_threshold
468            gc_threshold: 4_000_000,
469            avg_gc_pause_secs: 10.0, // Very long pause time → headroom large
470        };
471
472        // With tiny old_free_bytes and large allocation rate, time_to_full is tiny.
473        // headroom_factor * avg_gc_pause = 2.0 * 10.0 = 20 seconds
474        // time_to_full ≈ 100 / (10_000_000 / elapsed) which should be << 20s
475        let result = s.should_collect(&metrics);
476        assert_eq!(result, CollectionType::Old);
477    }
478}