Skip to main content

shape_vm/
metrics.rs

1//! Bounded observability infrastructure for the Shape VM.
2//!
3//! Provides [`VmMetrics`] — a lightweight, zero-overhead-when-disabled metrics
4//! collector with fixed-size ring buffers for tier/GC events and a log-linear
5//! histogram for GC pause latencies.
6
7use std::mem::MaybeUninit;
8
9// ---------------------------------------------------------------------------
10// RingBuffer<T, N>
11// ---------------------------------------------------------------------------
12
13/// Fixed-size circular buffer that overwrites the oldest element when full.
14///
15/// Backed by an inline `[MaybeUninit<T>; N]` array — no heap growth after
16/// construction.
17#[derive(Debug)]
18pub struct RingBuffer<T, const N: usize> {
19    buf: [MaybeUninit<T>; N],
20    /// Next write position.
21    head: usize,
22    /// Number of live elements (≤ N).
23    len: usize,
24}
25
26impl<T, const N: usize> RingBuffer<T, N> {
27    /// Create an empty ring buffer.
28    pub const fn new() -> Self {
29        Self {
30            // SAFETY: An array of MaybeUninit does not require initialization.
31            buf: unsafe { MaybeUninit::uninit().assume_init() },
32            head: 0,
33            len: 0,
34        }
35    }
36
37    /// Push a value, overwriting the oldest entry when the buffer is full.
38    pub fn push(&mut self, value: T) {
39        if self.len == N {
40            // Overwriting — drop the old value first.
41            // SAFETY: slot at `head` is initialised when len == N.
42            unsafe { self.buf[self.head].assume_init_drop() };
43        }
44        self.buf[self.head] = MaybeUninit::new(value);
45        self.head = (self.head + 1) % N;
46        if self.len < N {
47            self.len += 1;
48        }
49    }
50
51    /// Number of live elements.
52    #[inline]
53    pub fn len(&self) -> usize {
54        self.len
55    }
56
57    /// Returns `true` when the buffer contains no elements.
58    #[inline]
59    pub fn is_empty(&self) -> bool {
60        self.len == 0
61    }
62
63    /// Returns `true` when the buffer is at capacity.
64    #[inline]
65    pub fn is_full(&self) -> bool {
66        self.len == N
67    }
68
69    /// Drop all elements and reset to empty.
70    pub fn clear(&mut self) {
71        // Drop each live element.
72        for i in 0..self.len {
73            let idx = if self.len == N {
74                (self.head + i) % N
75            } else {
76                i
77            };
78            // SAFETY: indices 0..len are initialised.
79            unsafe { self.buf[idx].assume_init_drop() };
80        }
81        self.head = 0;
82        self.len = 0;
83    }
84
85    /// Iterate from oldest to newest.
86    pub fn iter(&self) -> RingBufferIter<'_, T, N> {
87        let start = if self.len == N {
88            self.head // oldest is at head when full
89        } else {
90            0
91        };
92        RingBufferIter {
93            ring: self,
94            pos: start,
95            remaining: self.len,
96        }
97    }
98
99    /// Reference to the most recently pushed element.
100    pub fn last(&self) -> Option<&T> {
101        if self.len == 0 {
102            return None;
103        }
104        let idx = if self.head == 0 { N - 1 } else { self.head - 1 };
105        // SAFETY: the slot behind head is initialised when len > 0.
106        Some(unsafe { self.buf[idx].assume_init_ref() })
107    }
108}
109
110impl<T, const N: usize> Drop for RingBuffer<T, N> {
111    fn drop(&mut self) {
112        self.clear();
113    }
114}
115
116/// Iterator over a [`RingBuffer`] from oldest to newest.
117pub struct RingBufferIter<'a, T, const N: usize> {
118    ring: &'a RingBuffer<T, N>,
119    pos: usize,
120    remaining: usize,
121}
122
123impl<'a, T, const N: usize> Iterator for RingBufferIter<'a, T, N> {
124    type Item = &'a T;
125
126    fn next(&mut self) -> Option<Self::Item> {
127        if self.remaining == 0 {
128            return None;
129        }
130        // SAFETY: pos indexes a live element.
131        let item = unsafe { self.ring.buf[self.pos].assume_init_ref() };
132        self.pos = (self.pos + 1) % N;
133        self.remaining -= 1;
134        Some(item)
135    }
136
137    fn size_hint(&self) -> (usize, Option<usize>) {
138        (self.remaining, Some(self.remaining))
139    }
140}
141
142impl<'a, T, const N: usize> ExactSizeIterator for RingBufferIter<'a, T, N> {}
143
144// ---------------------------------------------------------------------------
145// Histogram (log-linear buckets)
146// ---------------------------------------------------------------------------
147
148/// Log-linear histogram for recording latency values in microseconds.
149///
150/// Bucket boundaries (µs): 1, 2, 5, 10, 20, 50, 100, 200, 500, 1_000, 2_000,
151/// 5_000, 10_000, 20_000, 50_000, 100_000. Values above 100_000 µs land in the
152/// overflow bucket.
153#[derive(Debug)]
154pub struct Histogram {
155    /// Count per bucket (len = boundaries.len() + 1 for the overflow bucket).
156    buckets: Vec<u64>,
157    /// Upper-bound of each bucket in microseconds.
158    boundaries: Vec<u64>,
159    total_count: u64,
160    total_sum: u64,
161    min: u64,
162    max: u64,
163}
164
165impl Histogram {
166    /// Create a new histogram with default log-linear bucket boundaries.
167    pub fn new() -> Self {
168        let boundaries: Vec<u64> = vec![
169            1, 2, 5, 10, 20, 50, 100, 200, 500, 1_000, 2_000, 5_000, 10_000, 20_000, 50_000,
170            100_000,
171        ];
172        let bucket_count = boundaries.len() + 1; // +1 overflow
173        Self {
174            buckets: vec![0u64; bucket_count],
175            boundaries,
176            total_count: 0,
177            total_sum: 0,
178            min: u64::MAX,
179            max: 0,
180        }
181    }
182
183    /// Record a value in microseconds.
184    pub fn record(&mut self, value_us: u64) {
185        self.total_count += 1;
186        self.total_sum += value_us;
187        if value_us < self.min {
188            self.min = value_us;
189        }
190        if value_us > self.max {
191            self.max = value_us;
192        }
193
194        // Find the first boundary that is >= value_us.
195        let bucket_idx = match self.boundaries.binary_search(&value_us) {
196            Ok(i) => i,
197            Err(i) => i,
198        };
199        // binary_search returns len when value > all boundaries → overflow bucket
200        self.buckets[bucket_idx] += 1;
201    }
202
203    /// Approximate percentile (0.0–1.0). Returns 0 when the histogram is empty.
204    pub fn percentile(&self, p: f64) -> u64 {
205        if self.total_count == 0 {
206            return 0;
207        }
208        let threshold = (p * self.total_count as f64).ceil() as u64;
209        let mut cumulative: u64 = 0;
210        for (i, &count) in self.buckets.iter().enumerate() {
211            cumulative += count;
212            if cumulative >= threshold {
213                if i < self.boundaries.len() {
214                    return self.boundaries[i];
215                } else {
216                    // Overflow bucket — return max observed.
217                    return self.max;
218                }
219            }
220        }
221        self.max
222    }
223
224    /// Mean value in microseconds. Returns 0.0 for an empty histogram.
225    pub fn mean(&self) -> f64 {
226        if self.total_count == 0 {
227            return 0.0;
228        }
229        self.total_sum as f64 / self.total_count as f64
230    }
231
232    /// Total number of recorded values.
233    #[inline]
234    pub fn count(&self) -> u64 {
235        self.total_count
236    }
237
238    /// Reset all buckets and statistics.
239    pub fn reset(&mut self) {
240        for b in self.buckets.iter_mut() {
241            *b = 0;
242        }
243        self.total_count = 0;
244        self.total_sum = 0;
245        self.min = u64::MAX;
246        self.max = 0;
247    }
248}
249
250// ---------------------------------------------------------------------------
251// Event structs
252// ---------------------------------------------------------------------------
253
254/// A tier transition event for a single function.
255#[derive(Clone, Debug)]
256pub struct TierEvent {
257    /// Bytecode function id.
258    pub function_id: u16,
259    /// Source tier: 0 = Interpreted, 1 = BaselineJit, 2 = OptimizingJit.
260    pub from_tier: u8,
261    /// Target tier.
262    pub to_tier: u8,
263    /// Cumulative call count at transition time.
264    pub call_count: u32,
265    /// Microseconds since VM start.
266    pub timestamp_us: u64,
267}
268
269/// A garbage-collection pause event.
270#[derive(Clone, Debug)]
271pub struct GcPauseEvent {
272    /// Collection type: 0 = Young, 1 = Old, 2 = Full.
273    pub collection_type: u8,
274    /// Pause duration in microseconds.
275    pub pause_us: u64,
276    /// Bytes freed by this collection.
277    pub bytes_collected: usize,
278    /// Bytes promoted to an older generation.
279    pub bytes_promoted: usize,
280    /// Microseconds since VM start.
281    pub timestamp_us: u64,
282}
283
284// ---------------------------------------------------------------------------
285// VmMetrics
286// ---------------------------------------------------------------------------
287
288/// Aggregated VM metrics with bounded memory usage.
289///
290/// When enabled on a [`VirtualMachine`], counters are bumped inline and events
291/// are pushed into fixed-size ring buffers. The structure is `Option`-wrapped
292/// in the VM so disabled metrics have **zero** per-instruction overhead.
293#[derive(Debug)]
294pub struct VmMetrics {
295    /// Total bytecode instructions dispatched.
296    pub instructions_executed: u64,
297    /// Typed opcodes that ran without a guard check.
298    pub typed_trusted_ops: u64,
299    /// Typed opcodes that required a runtime type guard.
300    pub typed_guarded_ops: u64,
301    /// Calls dispatched through JIT-compiled code.
302    pub jit_dispatches: u64,
303    /// Calls dispatched through the interpreter.
304    pub interpreter_calls: u64,
305    /// Recent tier transition events (last 256).
306    pub tier_events: RingBuffer<TierEvent, 256>,
307    /// Recent GC pause events (last 256).
308    pub gc_pauses: RingBuffer<GcPauseEvent, 256>,
309    /// GC pause duration histogram.
310    pub gc_pause_histogram: Histogram,
311    /// Instant at which this metrics session started.
312    start_time: std::time::Instant,
313}
314
315impl VmMetrics {
316    /// Create a fresh metrics collector.
317    pub fn new() -> Self {
318        Self {
319            instructions_executed: 0,
320            typed_trusted_ops: 0,
321            typed_guarded_ops: 0,
322            jit_dispatches: 0,
323            interpreter_calls: 0,
324            tier_events: RingBuffer::new(),
325            gc_pauses: RingBuffer::new(),
326            gc_pause_histogram: Histogram::new(),
327            start_time: std::time::Instant::now(),
328        }
329    }
330
331    #[inline]
332    pub fn record_instruction(&mut self) {
333        self.instructions_executed += 1;
334    }
335
336    #[inline]
337    pub fn record_trusted_op(&mut self) {
338        self.typed_trusted_ops += 1;
339    }
340
341    #[inline]
342    pub fn record_guarded_op(&mut self) {
343        self.typed_guarded_ops += 1;
344    }
345
346    #[inline]
347    pub fn record_jit_dispatch(&mut self) {
348        self.jit_dispatches += 1;
349    }
350
351    #[inline]
352    pub fn record_interpreter_call(&mut self) {
353        self.interpreter_calls += 1;
354    }
355
356    /// Record a deopt fallback (re-exec-from-entry) — should be rare in production.
357    #[inline]
358    pub fn record_deopt_fallback(&mut self) {
359        // Counted under interpreter_calls since we re-enter the interpreter.
360        self.interpreter_calls += 1;
361    }
362
363    pub fn record_tier_event(&mut self, event: TierEvent) {
364        self.tier_events.push(event);
365    }
366
367    pub fn record_gc_pause(&mut self, event: GcPauseEvent) {
368        self.gc_pause_histogram.record(event.pause_us);
369        self.gc_pauses.push(event);
370    }
371
372    /// Microseconds elapsed since this metrics session started.
373    pub fn elapsed_us(&self) -> u64 {
374        self.start_time.elapsed().as_micros() as u64
375    }
376
377    /// Compute a summary snapshot for logging / display.
378    pub fn summary(&self) -> MetricsSummary {
379        let total_typed = self.typed_trusted_ops + self.typed_guarded_ops;
380        let total_dispatch = self.jit_dispatches + self.interpreter_calls;
381        MetricsSummary {
382            instructions_executed: self.instructions_executed,
383            trusted_ratio: if total_typed > 0 {
384                self.typed_trusted_ops as f64 / total_typed as f64
385            } else {
386                0.0
387            },
388            jit_ratio: if total_dispatch > 0 {
389                self.jit_dispatches as f64 / total_dispatch as f64
390            } else {
391                0.0
392            },
393            gc_pause_p50_us: self.gc_pause_histogram.percentile(0.50),
394            gc_pause_p99_us: self.gc_pause_histogram.percentile(0.99),
395            total_gc_pauses: self.gc_pause_histogram.count(),
396        }
397    }
398}
399
400/// A point-in-time summary of VM metrics.
401#[derive(Debug, Clone)]
402pub struct MetricsSummary {
403    pub instructions_executed: u64,
404    /// Fraction of typed ops that were trusted (0.0–1.0).
405    pub trusted_ratio: f64,
406    /// Fraction of dispatches that went through JIT (0.0–1.0).
407    pub jit_ratio: f64,
408    /// Median GC pause (µs).
409    pub gc_pause_p50_us: u64,
410    /// 99th-percentile GC pause (µs).
411    pub gc_pause_p99_us: u64,
412    /// Total number of GC pauses recorded.
413    pub total_gc_pauses: u64,
414}
415
416// ---------------------------------------------------------------------------
417// Tests
418// ---------------------------------------------------------------------------
419
420#[cfg(test)]
421mod tests {
422    use super::*;
423
424    // -- RingBuffer tests ---------------------------------------------------
425
426    #[test]
427    fn ring_buffer_empty() {
428        let rb: RingBuffer<u32, 4> = RingBuffer::new();
429        assert!(rb.is_empty());
430        assert!(!rb.is_full());
431        assert_eq!(rb.len(), 0);
432        assert!(rb.last().is_none());
433        assert_eq!(rb.iter().count(), 0);
434    }
435
436    #[test]
437    fn ring_buffer_push_within_capacity() {
438        let mut rb: RingBuffer<u32, 4> = RingBuffer::new();
439        rb.push(10);
440        rb.push(20);
441        rb.push(30);
442
443        assert_eq!(rb.len(), 3);
444        assert!(!rb.is_full());
445        assert_eq!(*rb.last().unwrap(), 30);
446
447        let items: Vec<&u32> = rb.iter().collect();
448        assert_eq!(items, vec![&10, &20, &30]);
449    }
450
451    #[test]
452    fn ring_buffer_push_exactly_full() {
453        let mut rb: RingBuffer<u32, 4> = RingBuffer::new();
454        for i in 0..4 {
455            rb.push(i);
456        }
457        assert!(rb.is_full());
458        assert_eq!(rb.len(), 4);
459        assert_eq!(*rb.last().unwrap(), 3);
460
461        let items: Vec<u32> = rb.iter().copied().collect();
462        assert_eq!(items, vec![0, 1, 2, 3]);
463    }
464
465    #[test]
466    fn ring_buffer_overflow_wraps() {
467        let mut rb: RingBuffer<u32, 4> = RingBuffer::new();
468        for i in 0..7 {
469            rb.push(i);
470        }
471        // Should contain [3, 4, 5, 6] — oldest three overwritten
472        assert!(rb.is_full());
473        assert_eq!(rb.len(), 4);
474        assert_eq!(*rb.last().unwrap(), 6);
475
476        let items: Vec<u32> = rb.iter().copied().collect();
477        assert_eq!(items, vec![3, 4, 5, 6]);
478    }
479
480    #[test]
481    fn ring_buffer_overflow_many_wraps() {
482        let mut rb: RingBuffer<u32, 3> = RingBuffer::new();
483        for i in 0..100 {
484            rb.push(i);
485        }
486        assert_eq!(rb.len(), 3);
487        let items: Vec<u32> = rb.iter().copied().collect();
488        assert_eq!(items, vec![97, 98, 99]);
489    }
490
491    #[test]
492    fn ring_buffer_clear() {
493        let mut rb: RingBuffer<u32, 4> = RingBuffer::new();
494        rb.push(1);
495        rb.push(2);
496        rb.push(3);
497        rb.clear();
498        assert!(rb.is_empty());
499        assert_eq!(rb.len(), 0);
500        assert!(rb.last().is_none());
501        assert_eq!(rb.iter().count(), 0);
502
503        // Can push after clear
504        rb.push(10);
505        assert_eq!(rb.len(), 1);
506        assert_eq!(*rb.last().unwrap(), 10);
507    }
508
509    #[test]
510    fn ring_buffer_clear_when_full() {
511        let mut rb: RingBuffer<u32, 3> = RingBuffer::new();
512        for i in 0..5 {
513            rb.push(i);
514        }
515        rb.clear();
516        assert!(rb.is_empty());
517        rb.push(100);
518        let items: Vec<u32> = rb.iter().copied().collect();
519        assert_eq!(items, vec![100]);
520    }
521
522    #[test]
523    fn ring_buffer_size_one() {
524        let mut rb: RingBuffer<u32, 1> = RingBuffer::new();
525        rb.push(42);
526        assert!(rb.is_full());
527        assert_eq!(*rb.last().unwrap(), 42);
528        assert_eq!(rb.iter().copied().collect::<Vec<_>>(), vec![42]);
529
530        rb.push(99);
531        assert_eq!(*rb.last().unwrap(), 99);
532        assert_eq!(rb.iter().copied().collect::<Vec<_>>(), vec![99]);
533    }
534
535    #[test]
536    fn ring_buffer_drop_non_copy_types() {
537        // Use String to verify Drop is called correctly (no double-free / leak).
538        let mut rb: RingBuffer<String, 3> = RingBuffer::new();
539        rb.push("hello".to_string());
540        rb.push("world".to_string());
541        rb.push("foo".to_string());
542        rb.push("bar".to_string()); // overwrites "hello"
543
544        let items: Vec<&str> = rb.iter().map(|s| s.as_str()).collect();
545        assert_eq!(items, vec!["world", "foo", "bar"]);
546    }
547
548    #[test]
549    fn ring_buffer_iter_exact_size() {
550        let mut rb: RingBuffer<u32, 4> = RingBuffer::new();
551        rb.push(1);
552        rb.push(2);
553        let iter = rb.iter();
554        assert_eq!(iter.len(), 2);
555    }
556
557    // -- Histogram tests ----------------------------------------------------
558
559    #[test]
560    fn histogram_empty() {
561        let h = Histogram::new();
562        assert_eq!(h.count(), 0);
563        assert_eq!(h.mean(), 0.0);
564        assert_eq!(h.percentile(0.5), 0);
565        assert_eq!(h.percentile(0.99), 0);
566    }
567
568    #[test]
569    fn histogram_single_value() {
570        let mut h = Histogram::new();
571        h.record(50); // bucket boundary = 50
572        assert_eq!(h.count(), 1);
573        assert_eq!(h.mean(), 50.0);
574        assert_eq!(h.percentile(0.5), 50);
575        assert_eq!(h.percentile(0.99), 50);
576    }
577
578    #[test]
579    fn histogram_min_max() {
580        let mut h = Histogram::new();
581        h.record(10);
582        h.record(500);
583        h.record(200);
584        assert_eq!(h.min, 10);
585        assert_eq!(h.max, 500);
586    }
587
588    #[test]
589    fn histogram_percentile_distribution() {
590        let mut h = Histogram::new();
591        // Record 100 values of 5µs and 100 values of 1000µs.
592        for _ in 0..100 {
593            h.record(5);
594        }
595        for _ in 0..100 {
596            h.record(1000);
597        }
598        assert_eq!(h.count(), 200);
599        // p50 should be in the low bucket (5µs), p75+ should be in the high bucket.
600        assert!(h.percentile(0.25) <= 5);
601        assert!(h.percentile(0.75) >= 1000);
602    }
603
604    #[test]
605    fn histogram_overflow_bucket() {
606        let mut h = Histogram::new();
607        h.record(500_000); // well above 100_000 boundary
608        assert_eq!(h.count(), 1);
609        // p50 should return the max (overflow bucket).
610        assert_eq!(h.percentile(0.5), 500_000);
611    }
612
613    #[test]
614    fn histogram_reset() {
615        let mut h = Histogram::new();
616        h.record(10);
617        h.record(20);
618        h.reset();
619        assert_eq!(h.count(), 0);
620        assert_eq!(h.mean(), 0.0);
621        assert_eq!(h.min, u64::MAX);
622        assert_eq!(h.max, 0);
623    }
624
625    #[test]
626    fn histogram_mean_accuracy() {
627        let mut h = Histogram::new();
628        h.record(100);
629        h.record(200);
630        h.record(300);
631        assert!((h.mean() - 200.0).abs() < 0.01);
632    }
633
634    // -- VmMetrics tests ----------------------------------------------------
635
636    #[test]
637    fn vm_metrics_counters() {
638        let mut m = VmMetrics::new();
639        m.record_instruction();
640        m.record_instruction();
641        m.record_trusted_op();
642        m.record_guarded_op();
643        m.record_guarded_op();
644        m.record_jit_dispatch();
645        m.record_interpreter_call();
646        m.record_interpreter_call();
647        m.record_interpreter_call();
648
649        assert_eq!(m.instructions_executed, 2);
650        assert_eq!(m.typed_trusted_ops, 1);
651        assert_eq!(m.typed_guarded_ops, 2);
652        assert_eq!(m.jit_dispatches, 1);
653        assert_eq!(m.interpreter_calls, 3);
654    }
655
656    #[test]
657    fn vm_metrics_tier_events() {
658        let mut m = VmMetrics::new();
659        m.record_tier_event(TierEvent {
660            function_id: 42,
661            from_tier: 0,
662            to_tier: 1,
663            call_count: 1000,
664            timestamp_us: 123456,
665        });
666        assert_eq!(m.tier_events.len(), 1);
667        let last = m.tier_events.last().unwrap();
668        assert_eq!(last.function_id, 42);
669        assert_eq!(last.from_tier, 0);
670        assert_eq!(last.to_tier, 1);
671    }
672
673    #[test]
674    fn vm_metrics_gc_pause_events() {
675        let mut m = VmMetrics::new();
676        m.record_gc_pause(GcPauseEvent {
677            collection_type: 0,
678            pause_us: 150,
679            bytes_collected: 4096,
680            bytes_promoted: 0,
681            timestamp_us: 100_000,
682        });
683        m.record_gc_pause(GcPauseEvent {
684            collection_type: 2,
685            pause_us: 5000,
686            bytes_collected: 1024 * 1024,
687            bytes_promoted: 512,
688            timestamp_us: 200_000,
689        });
690        assert_eq!(m.gc_pauses.len(), 2);
691        assert_eq!(m.gc_pause_histogram.count(), 2);
692    }
693
694    #[test]
695    fn vm_metrics_summary() {
696        let mut m = VmMetrics::new();
697        m.instructions_executed = 10_000;
698        m.typed_trusted_ops = 800;
699        m.typed_guarded_ops = 200;
700        m.jit_dispatches = 300;
701        m.interpreter_calls = 700;
702
703        // Record some GC pauses.
704        for _ in 0..10 {
705            m.record_gc_pause(GcPauseEvent {
706                collection_type: 0,
707                pause_us: 50,
708                bytes_collected: 1024,
709                bytes_promoted: 0,
710                timestamp_us: 0,
711            });
712        }
713
714        let s = m.summary();
715        assert_eq!(s.instructions_executed, 10_000);
716        assert!((s.trusted_ratio - 0.8).abs() < 0.01);
717        assert!((s.jit_ratio - 0.3).abs() < 0.01);
718        assert_eq!(s.total_gc_pauses, 10);
719        assert!(s.gc_pause_p50_us <= 50);
720    }
721
722    #[test]
723    fn vm_metrics_summary_zero_division() {
724        let m = VmMetrics::new();
725        let s = m.summary();
726        assert_eq!(s.trusted_ratio, 0.0);
727        assert_eq!(s.jit_ratio, 0.0);
728        assert_eq!(s.total_gc_pauses, 0);
729    }
730
731    #[test]
732    fn vm_metrics_elapsed() {
733        let m = VmMetrics::new();
734        // Elapsed should be non-negative and very small.
735        assert!(m.elapsed_us() < 1_000_000); // less than 1 second
736    }
737}