Skip to main content

piano_runtime/
collector.rs

1//! Thread-local timing collector with RAII guards.
2//!
3//! Each instrumented function calls `enter(name)` which pushes a `StackEntry`
4//! onto a thread-local call stack and returns an RAII `Guard`. When the guard
5//! drops (on any exit path), it pops the stack entry, computes elapsed time,
6//! propagates children time to the parent, and records a `RawRecord`.
7//!
8//! `collect()` aggregates raw records into per-function summaries sorted by
9//! self-time descending. `reset()` clears all state for the current thread.
10//!
11//! Flush strategy: `RECORDS` is wrapped in `AutoFlushRecords`, which implements
12//! `Drop`. When thread-local storage is destroyed (normal process exit, thread
13//! join), the `Drop` impl flushes records to disk. This avoids the classic
14//! atexit-vs-TLS ordering problem where atexit fires after TLS is destroyed.
15//!
16//! Thread-locality: all state (stack, records) is thread-local. Each thread
17//! produces an independent call tree. Spawned threads or async tasks that
18//! cross thread boundaries appear as separate, unrelated profiles. This is
19//! the correct model for CPU profiling — wall-clock time on one thread is
20//! independent of work on another.
21
22use std::cell::RefCell;
23use std::collections::HashMap;
24use std::io::Write;
25use std::path::PathBuf;
26use std::sync::LazyLock;
27use std::time::{Instant, SystemTime, UNIX_EPOCH};
28
29/// Process-wide run identifier.
30///
31/// All threads within a single process share this ID, so that JSON files
32/// written by different threads can be correlated during report consolidation.
33static RUN_ID: LazyLock<String> =
34    LazyLock::new(|| format!("{}_{}", std::process::id(), timestamp_ms()));
35
36/// Aggregated timing data for a single function.
37#[derive(Debug, Clone)]
38pub struct FunctionRecord {
39    pub name: String,
40    pub calls: u64,
41    pub total_ms: f64,
42    pub self_ms: f64,
43}
44
45/// Entry on the thread-local timing stack.
46struct StackEntry {
47    name: &'static str,
48    start: Instant,
49    children_ms: f64,
50}
51
52/// Raw measurement produced when a Guard drops.
53struct RawRecord {
54    name: &'static str,
55    elapsed_ms: f64,
56    children_ms: f64,
57}
58
59/// Wrapper around `Vec<RawRecord>` that auto-flushes to disk on Drop.
60///
61/// When thread-local storage is destroyed (process exit, thread join), Drop
62/// fires while the data is still alive — no TLS lookup needed.
63struct AutoFlushRecords {
64    records: Vec<RawRecord>,
65}
66
67impl AutoFlushRecords {
68    const fn new() -> Self {
69        Self {
70            records: Vec::new(),
71        }
72    }
73}
74
75impl Drop for AutoFlushRecords {
76    fn drop(&mut self) {
77        let registered: Vec<&str> = REGISTERED
78            .try_with(|reg| reg.borrow().clone())
79            .unwrap_or_default();
80        if self.records.is_empty() && registered.is_empty() {
81            return;
82        }
83        let aggregated = aggregate(&self.records, &registered);
84        if aggregated.is_empty() {
85            return;
86        }
87        let Some(dir) = runs_dir() else { return };
88        let path = dir.join(format!("{}.json", timestamp_ms()));
89        let _ = write_json(&aggregated, &path);
90    }
91}
92
93thread_local! {
94    static STACK: RefCell<Vec<StackEntry>> = const { RefCell::new(Vec::new()) };
95    static RECORDS: RefCell<AutoFlushRecords> = const { RefCell::new(AutoFlushRecords::new()) };
96    static REGISTERED: RefCell<Vec<&'static str>> = const { RefCell::new(Vec::new()) };
97}
98
99/// RAII timing guard. Records elapsed time on drop.
100#[must_use = "dropping the guard immediately records ~0ms; bind it with `let _guard = ...`"]
101pub struct Guard {
102    /// Prevents manual construction outside this module.
103    _private: (),
104}
105
106impl Drop for Guard {
107    fn drop(&mut self) {
108        STACK.with(|stack| {
109            let entry = stack.borrow_mut().pop();
110            let Some(entry) = entry else {
111                eprintln!("piano-runtime: guard dropped without matching stack entry (bug)");
112                return;
113            };
114            let elapsed_ms = entry.start.elapsed().as_secs_f64() * 1000.0;
115            let children_ms = entry.children_ms;
116
117            // Safe to re-borrow: the RefMut from pop() was dropped above.
118            if let Some(parent) = stack.borrow_mut().last_mut() {
119                parent.children_ms += elapsed_ms;
120            }
121
122            RECORDS.with(|records| {
123                records.borrow_mut().records.push(RawRecord {
124                    name: entry.name,
125                    elapsed_ms,
126                    children_ms,
127                });
128            });
129        });
130    }
131}
132
133/// Start timing a function. Returns a Guard that records the measurement on drop.
134pub fn enter(name: &'static str) -> Guard {
135    STACK.with(|stack| {
136        stack.borrow_mut().push(StackEntry {
137            name,
138            start: Instant::now(),
139            children_ms: 0.0,
140        });
141    });
142    Guard { _private: () }
143}
144
145/// Register a function name so it appears in output even if never called.
146pub fn register(name: &'static str) {
147    REGISTERED.with(|reg| {
148        let mut reg = reg.borrow_mut();
149        if !reg.contains(&name) {
150            reg.push(name);
151        }
152    });
153}
154
155/// Aggregate raw records into per-function summaries, sorted by self_ms descending.
156fn aggregate(raw: &[RawRecord], registered: &[&str]) -> Vec<FunctionRecord> {
157    let mut map: HashMap<&str, (u64, f64, f64)> = HashMap::new();
158
159    for name in registered {
160        map.entry(name).or_insert((0, 0.0, 0.0));
161    }
162
163    for rec in raw {
164        let entry = map.entry(rec.name).or_insert((0, 0.0, 0.0));
165        entry.0 += 1;
166        entry.1 += rec.elapsed_ms;
167        entry.2 += (rec.elapsed_ms - rec.children_ms).max(0.0);
168    }
169
170    let mut result: Vec<FunctionRecord> = map
171        .into_iter()
172        .map(|(name, (calls, total_ms, self_ms))| FunctionRecord {
173            name: name.to_owned(),
174            calls,
175            total_ms,
176            self_ms,
177        })
178        .collect();
179
180    result.sort_by(|a, b| {
181        b.self_ms
182            .partial_cmp(&a.self_ms)
183            .unwrap_or(std::cmp::Ordering::Equal)
184    });
185    result
186}
187
188/// Aggregate raw records into per-function summaries, sorted by self_ms descending.
189pub fn collect() -> Vec<FunctionRecord> {
190    RECORDS
191        .with(|records| REGISTERED.with(|reg| aggregate(&records.borrow().records, &reg.borrow())))
192}
193
194/// Clear all collected timing data.
195pub fn reset() {
196    STACK.with(|stack| stack.borrow_mut().clear());
197    RECORDS.with(|records| records.borrow_mut().records.clear());
198    REGISTERED.with(|reg| reg.borrow_mut().clear());
199}
200
201/// Return the current time as milliseconds since the Unix epoch.
202fn timestamp_ms() -> u128 {
203    SystemTime::now()
204        .duration_since(UNIX_EPOCH)
205        .unwrap_or_default()
206        .as_millis()
207}
208
209/// Return the directory where run files should be written.
210///
211/// Uses `PIANO_RUNS_DIR` env var if set, otherwise `~/.piano/runs/`.
212fn runs_dir() -> Option<PathBuf> {
213    if let Ok(dir) = std::env::var("PIANO_RUNS_DIR") {
214        return Some(PathBuf::from(dir));
215    }
216    dirs_fallback().map(|home| home.join(".piano").join("runs"))
217}
218
219/// Best-effort home directory detection (no deps).
220fn dirs_fallback() -> Option<PathBuf> {
221    std::env::var_os("HOME").map(PathBuf::from)
222}
223
224/// Write a JSON run file from the given function records.
225///
226/// Hand-written JSON via `write!()` — zero serde dependency.
227fn write_json(records: &[FunctionRecord], path: &std::path::Path) -> std::io::Result<()> {
228    if let Some(parent) = path.parent() {
229        std::fs::create_dir_all(parent)?;
230    }
231    let mut f = std::fs::File::create(path)?;
232    let ts = timestamp_ms();
233    let run_id = &*RUN_ID;
234    write!(
235        f,
236        "{{\"run_id\":\"{run_id}\",\"timestamp_ms\":{ts},\"functions\":["
237    )?;
238    for (i, rec) in records.iter().enumerate() {
239        if i > 0 {
240            write!(f, ",")?;
241        }
242        // Escape the function name (in practice only ASCII identifiers, but be safe).
243        let name = rec.name.replace('\\', "\\\\").replace('"', "\\\"");
244        write!(
245            f,
246            "{{\"name\":\"{name}\",\"calls\":{},\"total_ms\":{:.3},\"self_ms\":{:.3}}}",
247            rec.calls, rec.total_ms, rec.self_ms
248        )?;
249    }
250    writeln!(f, "]}}")?;
251    Ok(())
252}
253
254/// Flush collected timing data to a JSON file on disk.
255///
256/// Writes to `PIANO_RUNS_DIR/<timestamp>.json` or `~/.piano/runs/<timestamp>.json`.
257/// No-op if no records were collected or the output directory cannot be determined.
258///
259/// Normally you don't need to call this — `AutoFlushRecords::drop` flushes
260/// automatically when thread-local storage is destroyed (process exit). This
261/// function exists for explicit mid-program flushes.
262pub fn flush() {
263    let records = collect();
264    if records.is_empty() {
265        return;
266    }
267    let Some(dir) = runs_dir() else {
268        return;
269    };
270    let path = dir.join(format!("{}.json", timestamp_ms()));
271    let _ = write_json(&records, &path);
272    reset();
273}
274
275/// No-op retained for API compatibility.
276///
277/// Auto-flush now happens via `AutoFlushRecords::drop` when TLS is destroyed.
278/// Instrumented code may still call `init()` — it's harmless.
279pub fn init() {}
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284    use std::thread;
285    use std::time::Duration;
286
287    /// CPU-bound workload: hash a buffer `iterations` times.
288    /// Uses wrapping arithmetic to prevent optimization while staying deterministic.
289    fn burn_cpu(iterations: u64) {
290        let mut buf = [0x42u8; 4096];
291        for i in 0..iterations {
292            for b in &mut buf {
293                *b = b.wrapping_add(i as u8).wrapping_mul(31);
294            }
295        }
296        std::hint::black_box(&buf);
297    }
298
299    #[test]
300    fn burn_cpu_takes_measurable_time() {
301        let start = std::time::Instant::now();
302        burn_cpu(50_000);
303        let elapsed = start.elapsed();
304        assert!(
305            elapsed.as_millis() >= 1,
306            "burn_cpu(50_000) should take at least 1ms, took {:?}",
307            elapsed
308        );
309    }
310
311    #[test]
312    fn flush_writes_valid_json_to_env_dir() {
313        reset();
314        {
315            let _g = enter("flush_test");
316            thread::sleep(Duration::from_millis(5));
317        }
318
319        let tmp = std::env::temp_dir().join(format!("piano_test_{}", std::process::id()));
320        std::fs::create_dir_all(&tmp).unwrap();
321
322        // Point flush at our temp dir.
323        // SAFETY: Test runs serially (no concurrent env access).
324        unsafe { std::env::set_var("PIANO_RUNS_DIR", &tmp) };
325        flush();
326        unsafe { std::env::remove_var("PIANO_RUNS_DIR") };
327
328        // Find the written file.
329        let files: Vec<_> = std::fs::read_dir(&tmp)
330            .unwrap()
331            .filter_map(|e| e.ok())
332            .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
333            .collect();
334        assert!(!files.is_empty(), "expected at least one JSON file");
335
336        let content = std::fs::read_to_string(files[0].path()).unwrap();
337        assert!(
338            content.contains("\"flush_test\""),
339            "should contain function name"
340        );
341        assert!(
342            content.contains("\"timestamp_ms\""),
343            "should contain timestamp_ms"
344        );
345        assert!(content.contains("\"self_ms\""), "should contain self_ms");
346
347        // Cleanup.
348        let _ = std::fs::remove_dir_all(&tmp);
349    }
350
351    #[test]
352    fn write_json_produces_valid_format() {
353        let records = vec![
354            FunctionRecord {
355                name: "walk".into(),
356                calls: 3,
357                total_ms: 12.5,
358                self_ms: 8.3,
359            },
360            FunctionRecord {
361                name: "resolve".into(),
362                calls: 1,
363                total_ms: 4.2,
364                self_ms: 4.2,
365            },
366        ];
367        let tmp = std::env::temp_dir().join(format!("piano_json_{}.json", std::process::id()));
368        write_json(&records, &tmp).unwrap();
369
370        let content = std::fs::read_to_string(&tmp).unwrap();
371
372        // Verify structure.
373        assert!(
374            content.starts_with("{\"run_id\":\""),
375            "should start with run_id"
376        );
377        assert!(
378            content.contains("\"timestamp_ms\":"),
379            "should contain timestamp_ms"
380        );
381        assert!(
382            content.contains("\"functions\":["),
383            "should have functions array"
384        );
385        assert!(content.contains("\"walk\""), "should contain walk");
386        assert!(content.contains("\"resolve\""), "should contain resolve");
387        assert!(content.contains("\"calls\":3"), "should have calls count");
388
389        let _ = std::fs::remove_file(&tmp);
390    }
391
392    #[test]
393    fn init_can_be_called_multiple_times() {
394        // init() is a no-op retained for API compatibility.
395        init();
396        init();
397        init();
398    }
399
400    #[test]
401    fn single_function_timing() {
402        reset();
403        {
404            let _g = enter("work");
405            thread::sleep(Duration::from_millis(10));
406        }
407        let records = collect();
408        assert_eq!(records.len(), 1);
409        assert_eq!(records[0].name, "work");
410        assert_eq!(records[0].calls, 1);
411        assert!(
412            records[0].total_ms >= 5.0,
413            "total_ms={}",
414            records[0].total_ms
415        );
416        assert!(records[0].self_ms >= 5.0, "self_ms={}", records[0].self_ms);
417    }
418
419    #[test]
420    fn nested_function_self_time() {
421        reset();
422        {
423            let _outer = enter("outer");
424            thread::sleep(Duration::from_millis(10));
425            {
426                let _inner = enter("inner");
427                thread::sleep(Duration::from_millis(10));
428            }
429        }
430        let records = collect();
431        let outer = records
432            .iter()
433            .find(|r| r.name == "outer")
434            .expect("outer not found");
435        let inner = records
436            .iter()
437            .find(|r| r.name == "inner")
438            .expect("inner not found");
439
440        assert!(outer.total_ms >= 15.0, "outer.total_ms={}", outer.total_ms);
441        assert!(outer.self_ms >= 5.0, "outer.self_ms={}", outer.self_ms);
442        assert!(
443            outer.self_ms < outer.total_ms,
444            "self should be less than total"
445        );
446        // inner's self_ms should be approximately equal to its total_ms (no children)
447        let diff = (inner.self_ms - inner.total_ms).abs();
448        assert!(
449            diff < 2.0,
450            "inner self_ms={} total_ms={}",
451            inner.self_ms,
452            inner.total_ms
453        );
454    }
455
456    #[test]
457    fn call_count_tracking() {
458        reset();
459        for _ in 0..5 {
460            let _g = enter("repeated");
461        }
462        let records = collect();
463        assert_eq!(records.len(), 1);
464        assert_eq!(records[0].name, "repeated");
465        assert_eq!(records[0].calls, 5);
466    }
467
468    #[test]
469    fn reset_clears_state() {
470        reset();
471        {
472            let _g = enter("something");
473            thread::sleep(Duration::from_millis(1));
474        }
475        reset();
476        let records = collect();
477        assert!(
478            records.is_empty(),
479            "expected empty after reset, got {} records",
480            records.len()
481        );
482    }
483
484    #[test]
485    fn collect_sorts_by_self_time_descending() {
486        reset();
487        {
488            let _g = enter("fast");
489            thread::sleep(Duration::from_millis(1));
490        }
491        {
492            let _g = enter("slow");
493            thread::sleep(Duration::from_millis(15));
494        }
495        let records = collect();
496        assert_eq!(records.len(), 2);
497        assert_eq!(
498            records[0].name, "slow",
499            "expected slow first, got {:?}",
500            records[0].name
501        );
502        assert_eq!(
503            records[1].name, "fast",
504            "expected fast second, got {:?}",
505            records[1].name
506        );
507    }
508
509    #[test]
510    fn registered_but_uncalled_functions_appear_with_zero_calls() {
511        reset();
512        register("never_called");
513        {
514            let _g = enter("called_once");
515            thread::sleep(Duration::from_millis(1));
516        }
517        let records = collect();
518        assert_eq!(records.len(), 2, "should have both functions");
519        let never = records
520            .iter()
521            .find(|r| r.name == "never_called")
522            .expect("never_called");
523        assert_eq!(never.calls, 0);
524        assert!((never.total_ms).abs() < f64::EPSILON);
525        assert!((never.self_ms).abs() < f64::EPSILON);
526        let called = records
527            .iter()
528            .find(|r| r.name == "called_once")
529            .expect("called_once");
530        assert_eq!(called.calls, 1);
531    }
532
533    #[test]
534    fn json_output_contains_run_id() {
535        reset();
536        {
537            let _g = enter("rid_test");
538            thread::sleep(Duration::from_millis(1));
539        }
540        let tmp = std::env::temp_dir().join(format!("piano_rid_{}", std::process::id()));
541        std::fs::create_dir_all(&tmp).unwrap();
542        unsafe { std::env::set_var("PIANO_RUNS_DIR", &tmp) };
543        flush();
544        unsafe { std::env::remove_var("PIANO_RUNS_DIR") };
545        let files: Vec<_> = std::fs::read_dir(&tmp)
546            .unwrap()
547            .filter_map(|e| e.ok())
548            .filter(|e| e.path().extension().is_some_and(|ext| ext == "json"))
549            .collect();
550        assert!(!files.is_empty());
551        let content = std::fs::read_to_string(files[0].path()).unwrap();
552        assert!(
553            content.contains("\"run_id\":\""),
554            "should contain run_id field: {content}"
555        );
556        let _ = std::fs::remove_dir_all(&tmp);
557    }
558
559    #[test]
560    fn conservation_sequential_calls() {
561        reset();
562        {
563            let _main = enter("main_seq");
564            burn_cpu(10_000);
565            {
566                let _a = enter("a");
567                burn_cpu(30_000);
568            }
569            {
570                let _b = enter("b");
571                burn_cpu(20_000);
572            }
573        }
574        let records = collect();
575        let main_r = records.iter().find(|r| r.name == "main_seq").unwrap();
576        let a_r = records.iter().find(|r| r.name == "a").unwrap();
577        let b_r = records.iter().find(|r| r.name == "b").unwrap();
578
579        let sum_self = main_r.self_ms + a_r.self_ms + b_r.self_ms;
580        let root_total = main_r.total_ms;
581        let error_pct = ((sum_self - root_total) / root_total).abs() * 100.0;
582        assert!(
583            error_pct < 5.0,
584            "conservation violated: sum_self={sum_self:.3}ms, root_total={root_total:.3}ms, error={error_pct:.1}%"
585        );
586    }
587
588    #[test]
589    fn conservation_nested_calls() {
590        reset();
591        {
592            let _main = enter("main_nest");
593            burn_cpu(5_000);
594            {
595                let _a = enter("a_nest");
596                burn_cpu(5_000);
597                {
598                    let _b = enter("b_nest");
599                    burn_cpu(30_000);
600                }
601            }
602        }
603        let records = collect();
604        let main_r = records.iter().find(|r| r.name == "main_nest").unwrap();
605        let a_r = records.iter().find(|r| r.name == "a_nest").unwrap();
606        let b_r = records.iter().find(|r| r.name == "b_nest").unwrap();
607
608        let sum_self = main_r.self_ms + a_r.self_ms + b_r.self_ms;
609        let root_total = main_r.total_ms;
610        let error_pct = ((sum_self - root_total) / root_total).abs() * 100.0;
611        assert!(
612            error_pct < 5.0,
613            "conservation violated: sum_self={sum_self:.3}ms, root_total={root_total:.3}ms, error={error_pct:.1}%"
614        );
615
616        // b has no children, so self_ms should equal total_ms
617        let b_diff = (b_r.self_ms - b_r.total_ms).abs();
618        assert!(
619            b_diff < 0.1,
620            "leaf self_ms should equal total_ms: self={:.3}, total={:.3}",
621            b_r.self_ms,
622            b_r.total_ms
623        );
624    }
625
626    #[test]
627    fn conservation_mixed_topology() {
628        reset();
629        {
630            let _main = enter("main_mix");
631            burn_cpu(5_000);
632            {
633                let _a = enter("a_mix");
634                burn_cpu(10_000);
635                {
636                    let _b = enter("b_mix");
637                    burn_cpu(20_000);
638                }
639            }
640            {
641                let _c = enter("c_mix");
642                burn_cpu(15_000);
643            }
644        }
645        let records = collect();
646        let main_r = records.iter().find(|r| r.name == "main_mix").unwrap();
647
648        let sum_self: f64 = records.iter().map(|r| r.self_ms).sum();
649        let root_total = main_r.total_ms;
650        let error_pct = ((sum_self - root_total) / root_total).abs() * 100.0;
651        assert!(
652            error_pct < 5.0,
653            "conservation violated: sum_self={sum_self:.3}ms, root_total={root_total:.3}ms, error={error_pct:.1}%"
654        );
655    }
656
657    #[test]
658    fn conservation_repeated_calls() {
659        reset();
660        {
661            let _main = enter("main_rep");
662            burn_cpu(5_000);
663            for _ in 0..10 {
664                let _a = enter("a_rep");
665                burn_cpu(5_000);
666            }
667        }
668        let records = collect();
669        let main_r = records.iter().find(|r| r.name == "main_rep").unwrap();
670        let a_r = records.iter().find(|r| r.name == "a_rep").unwrap();
671
672        assert_eq!(a_r.calls, 10);
673
674        let sum_self = main_r.self_ms + a_r.self_ms;
675        let root_total = main_r.total_ms;
676        let error_pct = ((sum_self - root_total) / root_total).abs() * 100.0;
677        assert!(
678            error_pct < 5.0,
679            "conservation violated: sum_self={sum_self:.3}ms, root_total={root_total:.3}ms, error={error_pct:.1}%"
680        );
681    }
682
683    #[test]
684    fn negative_self_time_clamped_to_zero() {
685        // Regression test for the f64 drift clamp in aggregate().
686        // Construct a synthetic RawRecord where children_ms slightly exceeds elapsed_ms
687        // (simulating floating-point accumulation drift).
688        let raw = vec![RawRecord {
689            name: "drifted",
690            elapsed_ms: 10.0,
691            children_ms: 10.001,
692        }];
693        let result = aggregate(&raw, &[]);
694        assert_eq!(result.len(), 1);
695        assert_eq!(
696            result[0].self_ms, 0.0,
697            "negative self-time should be clamped to zero"
698        );
699    }
700
701    #[test]
702    fn guard_overhead_under_1us() {
703        reset();
704        let iterations = 1_000_000u64;
705        let start = std::time::Instant::now();
706        for _ in 0..iterations {
707            let _g = enter("overhead");
708        }
709        let elapsed = start.elapsed();
710        let per_call_ns = elapsed.as_nanos() as f64 / iterations as f64;
711        eprintln!("guard overhead: {per_call_ns:.1}ns per call ({iterations} iterations)");
712        assert!(
713            per_call_ns < 1000.0,
714            "per-call overhead {per_call_ns:.1}ns exceeds 1us limit"
715        );
716        reset();
717    }
718
719    #[test]
720    fn deep_nesting_100_levels() {
721        reset();
722
723        // Pre-generate static names for each level.
724        let names: Vec<&'static str> = (0..100)
725            .map(|i| -> &'static str { Box::leak(format!("level_{i}").into_boxed_str()) })
726            .collect();
727
728        // Build nested call tree iteratively using a vec of guards.
729        let mut guards = Vec::with_capacity(100);
730        for name in &names {
731            guards.push(enter(name));
732            burn_cpu(1_000);
733        }
734        // Drop guards in reverse order (innermost first).
735        while let Some(g) = guards.pop() {
736            drop(g);
737        }
738
739        let records = collect();
740        assert_eq!(records.len(), 100, "expected 100 functions");
741
742        let root = records.iter().find(|r| r.name == "level_0").unwrap();
743
744        // Conservation: sum of self-times should equal root total.
745        let sum_self: f64 = records.iter().map(|r| r.self_ms).sum();
746        let error_pct = ((sum_self - root.total_ms) / root.total_ms).abs() * 100.0;
747        assert!(
748            error_pct < 5.0,
749            "conservation violated at 100 levels: sum_self={sum_self:.3}ms, root_total={:.3}ms, error={error_pct:.1}%",
750            root.total_ms
751        );
752
753        // No negative self-times.
754        for rec in &records {
755            assert!(
756                rec.self_ms >= 0.0,
757                "{} has negative self_ms: {}",
758                rec.name,
759                rec.self_ms
760            );
761        }
762
763        // Each non-leaf should have self_ms < total_ms (except the innermost).
764        let innermost = records.iter().find(|r| r.name == "level_99").unwrap();
765        let diff = (innermost.self_ms - innermost.total_ms).abs();
766        assert!(
767            diff < 0.5,
768            "innermost level should have self ≈ total: self={:.3}, total={:.3}",
769            innermost.self_ms,
770            innermost.total_ms
771        );
772
773        reset();
774    }
775}