Skip to main content

stryke/
profiler.rs

1//! Wall-clock profiler for `stryke --profile`.
2//!
3//! **Tree-walker**: per-statement line times and [`Profiler::enter_sub`] / [`Profiler::exit_sub`]
4//! around subroutine bodies.
5//!
6//! **Bytecode VM**: per-opcode wall time is charged to that opcode's source line; `Call` / `Return`
7//! add inclusive subroutine samples (Cranelift JIT is disabled while profiling).
8
9use std::collections::HashMap;
10use std::io::Write;
11use std::time::Duration;
12
13/// Line- and sub-level timings (nanoseconds).
14pub struct Profiler {
15    file: String,
16    line_ns: HashMap<(String, usize), u64>,
17    sub_stack: Vec<String>,
18    /// Collapsed stacks `a;b;c` → total ns (flamegraph.pl folded input).
19    folded_ns: HashMap<String, u64>,
20    /// Per-subroutine name → inclusive time (ns).
21    sub_inclusive_ns: HashMap<String, u64>,
22}
23
24impl Profiler {
25    pub fn new(file: impl Into<String>) -> Self {
26        Self {
27            file: file.into(),
28            line_ns: HashMap::new(),
29            sub_stack: Vec::new(),
30            folded_ns: HashMap::new(),
31            sub_inclusive_ns: HashMap::new(),
32        }
33    }
34
35    pub fn on_line(&mut self, file: &str, line: usize, dt: Duration) {
36        let ns = dt.as_nanos() as u64;
37        *self.line_ns.entry((file.to_string(), line)).or_insert(0) += ns;
38    }
39
40    pub fn enter_sub(&mut self, name: &str) {
41        self.sub_stack.push(name.to_string());
42    }
43
44    pub fn exit_sub(&mut self, dt: Duration) {
45        let ns = dt.as_nanos() as u64;
46        let Some(name) = self.sub_stack.pop() else {
47            return;
48        };
49        *self.sub_inclusive_ns.entry(name.clone()).or_insert(0) += ns;
50        let prefix = self.sub_stack.join(";");
51        let full = if prefix.is_empty() {
52            name
53        } else {
54            format!("{};{}", prefix, name)
55        };
56        *self.folded_ns.entry(full).or_insert(0) += ns;
57    }
58
59    /// stderr: folded stacks (flamegraph.pl) + line totals + sub totals.
60    pub fn print_report(&mut self) {
61        // Incomplete enter/exit pairs (e.g. `die` before `return`) would confuse folded output.
62        self.sub_stack.clear();
63
64        eprintln!("# stryke --profile: collapsed stacks (name stack → ns); feed to flamegraph.pl");
65        let mut stacks: Vec<_> = self.folded_ns.iter().collect();
66        stacks.sort_by(|a, b| b.1.cmp(a.1));
67        for (k, ns) in stacks.iter() {
68            eprintln!("{} {}", k, ns);
69        }
70
71        eprintln!("# stryke --profile: lines (file:line → total ns)");
72        let mut lines: Vec<_> = self.line_ns.iter().collect();
73        lines.sort_by(|a, b| b.1.cmp(a.1));
74        for ((f, ln), ns) in lines.iter() {
75            eprintln!("{}:{} {}", f, ln, ns);
76        }
77
78        eprintln!("# stryke --profile: subs (name → inclusive ns)");
79        let mut subs: Vec<_> = self.sub_inclusive_ns.iter().collect();
80        subs.sort_by(|a, b| b.1.cmp(a.1));
81        for (name, ns) in subs {
82            eprintln!("{} {}", name, ns);
83        }
84        eprintln!("# profile script: {}", self.file);
85    }
86
87    /// Render an SVG flamegraph to `writer` using the collected folded stacks.
88    pub fn render_flame_svg<W: Write>(&mut self, writer: W) -> std::io::Result<()> {
89        self.sub_stack.clear();
90
91        let lines: Vec<String> = self
92            .folded_ns
93            .iter()
94            .map(|(stack, ns)| format!("{} {}", stack, ns))
95            .collect();
96        let line_refs: Vec<&str> = lines.iter().map(|s| s.as_str()).collect();
97
98        let mut opts = inferno::flamegraph::Options::default();
99        opts.title = format!("stryke --flame: {}", self.file);
100        opts.count_name = "ns".to_string();
101        opts.colors = inferno::flamegraph::color::Palette::Basic(
102            inferno::flamegraph::color::BasicPalette::Hot,
103        );
104        inferno::flamegraph::from_lines(&mut opts, line_refs, writer)
105    }
106
107    /// Render a colored terminal flamegraph to stderr.
108    ///
109    /// Shows: (1) per-sub inclusive bars sorted hottest-first,
110    /// (2) per-stack-frame bars with call depth indentation,
111    /// (3) hottest source lines.
112    pub fn render_flame_tty(&mut self) {
113        self.sub_stack.clear();
114        let total_ns = self.folded_ns.values().copied().max().unwrap_or(1);
115        let term_width = term_width();
116        // reserve columns: "100.0%  " (8) + name (dynamic) + " " + bar + " 999.9ms"
117        let time_suffix_len = 10;
118        let pct_prefix_len = 8;
119
120        // ── header ──────────────────────────────────────────────────
121        eprintln!("\x1b[1;97m── stryke --flame: {} ──\x1b[0m", self.file);
122        eprintln!();
123
124        // ── subroutine inclusive time (flat) ─────────────────────────
125        if !self.sub_inclusive_ns.is_empty() {
126            eprintln!("\x1b[1;97m  Subroutines (inclusive)\x1b[0m");
127            let mut subs: Vec<_> = self.sub_inclusive_ns.iter().collect();
128            subs.sort_by(|a, b| b.1.cmp(a.1));
129            let max_name = subs.iter().map(|(n, _)| n.len()).max().unwrap_or(4).min(40);
130            let bar_budget =
131                term_width.saturating_sub(pct_prefix_len + max_name + 2 + time_suffix_len);
132            for (name, &ns) in &subs {
133                let pct = ns as f64 / total_ns as f64 * 100.0;
134                let bar_len = (ns as f64 / total_ns as f64 * bar_budget as f64) as usize;
135                let color = heat_color(pct);
136                let display_name = if name.len() > 40 {
137                    format!("…{}", &name[name.len() - 39..])
138                } else {
139                    name.to_string()
140                };
141                eprintln!(
142                    "  {:>5.1}%  {:<width$} {}{}\x1b[0m {}",
143                    pct,
144                    display_name,
145                    color,
146                    "█".repeat(bar_len.max(1)),
147                    format_ns(ns),
148                    width = max_name,
149                );
150            }
151            eprintln!();
152        }
153
154        // ── call stacks (tree-style) ────────────────────────────────
155        if !self.folded_ns.is_empty() {
156            eprintln!("\x1b[1;97m  Call stacks\x1b[0m");
157            let mut stacks: Vec<_> = self.folded_ns.iter().collect();
158            stacks.sort_by(|a, b| b.1.cmp(a.1));
159            let max_show = 20;
160            for (stack, &ns) in stacks.iter().take(max_show) {
161                let pct = ns as f64 / total_ns as f64 * 100.0;
162                let depth = stack.matches(';').count();
163                let leaf = stack.rsplit(';').next().unwrap_or(stack);
164                let indent = "  ".repeat(depth);
165                let display = format!("{}{}", indent, leaf);
166                let name_width = display.len().min(50);
167                let bar_budget =
168                    term_width.saturating_sub(pct_prefix_len + name_width + 2 + time_suffix_len);
169                let bar_len = (ns as f64 / total_ns as f64 * bar_budget as f64) as usize;
170                let color = heat_color(pct);
171                eprintln!(
172                    "  {:>5.1}%  {:<width$} {}{}\x1b[0m {}",
173                    pct,
174                    display,
175                    color,
176                    "█".repeat(bar_len.max(1)),
177                    format_ns(ns),
178                    width = name_width,
179                );
180            }
181            if stacks.len() > max_show {
182                eprintln!("  … and {} more stacks", stacks.len() - max_show);
183            }
184            eprintln!();
185        }
186
187        // ── hottest source lines ────────────────────────────────────
188        if !self.line_ns.is_empty() {
189            eprintln!("\x1b[1;97m  Hot lines\x1b[0m");
190            let mut lines: Vec<_> = self.line_ns.iter().collect();
191            lines.sort_by(|a, b| b.1.cmp(a.1));
192            let max_show = 10;
193            let line_total: u64 = lines.iter().map(|(_, &ns)| ns).sum();
194            for ((f, ln), &ns) in lines.iter().take(max_show) {
195                let pct = ns as f64 / line_total as f64 * 100.0;
196                let color = heat_color(pct);
197                eprintln!(
198                    "  {:>5.1}%  {}{}:{}\x1b[0m  {}",
199                    pct,
200                    color,
201                    f,
202                    ln,
203                    format_ns(ns),
204                );
205            }
206        }
207        eprintln!();
208    }
209}
210
211fn term_width() -> usize {
212    #[cfg(unix)]
213    {
214        let mut ws = libc::winsize {
215            ws_row: 0,
216            ws_col: 0,
217            ws_xpixel: 0,
218            ws_ypixel: 0,
219        };
220        if unsafe { libc::ioctl(2, libc::TIOCGWINSZ, &mut ws) } == 0 && ws.ws_col > 0 {
221            return ws.ws_col as usize;
222        }
223    }
224    80
225}
226
227fn heat_color(pct: f64) -> &'static str {
228    if pct >= 60.0 {
229        "\x1b[1;91m" // bright red
230    } else if pct >= 30.0 {
231        "\x1b[1;93m" // bright yellow
232    } else if pct >= 10.0 {
233        "\x1b[33m" // yellow
234    } else {
235        "\x1b[32m" // green
236    }
237}
238
239fn format_ns(ns: u64) -> String {
240    if ns >= 1_000_000_000 {
241        format!("{:.1}s", ns as f64 / 1e9)
242    } else if ns >= 1_000_000 {
243        format!("{:.1}ms", ns as f64 / 1e6)
244    } else if ns >= 1_000 {
245        format!("{:.1}µs", ns as f64 / 1e3)
246    } else {
247        format!("{}ns", ns)
248    }
249}
250
251#[cfg(test)]
252impl Profiler {
253    fn line_total_ns(&self, file: &str, line: usize) -> u64 {
254        self.line_ns
255            .get(&(file.to_string(), line))
256            .copied()
257            .unwrap_or(0)
258    }
259
260    fn folded_total_ns(&self, key: &str) -> u64 {
261        self.folded_ns.get(key).copied().unwrap_or(0)
262    }
263
264    fn sub_inclusive_total_ns(&self, name: &str) -> u64 {
265        self.sub_inclusive_ns.get(name).copied().unwrap_or(0)
266    }
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272    use std::time::Duration;
273
274    #[test]
275    fn on_line_accumulates_per_file_line() {
276        let mut p = Profiler::new("a.pl");
277        p.on_line("a.pl", 2, Duration::from_nanos(100));
278        p.on_line("a.pl", 2, Duration::from_nanos(50));
279        assert_eq!(p.line_total_ns("a.pl", 2), 150);
280    }
281
282    #[test]
283    fn exit_sub_nested_stack_folded_keys() {
284        let mut p = Profiler::new("a.pl");
285        p.enter_sub("outer");
286        p.enter_sub("inner");
287        p.exit_sub(Duration::from_nanos(7));
288        assert_eq!(p.sub_inclusive_total_ns("inner"), 7);
289        assert_eq!(p.folded_total_ns("outer;inner"), 7);
290        p.exit_sub(Duration::from_nanos(11));
291        assert_eq!(p.sub_inclusive_total_ns("outer"), 11);
292        assert_eq!(p.folded_total_ns("outer"), 11);
293    }
294
295    #[test]
296    fn exit_sub_without_matching_enter_is_silent() {
297        let mut p = Profiler::new("a.pl");
298        p.exit_sub(Duration::from_nanos(1));
299        assert_eq!(p.sub_inclusive_total_ns("nope"), 0);
300    }
301}