iai_callgrind_runner/runner/callgrind/
mod.rs

1pub mod args;
2pub mod flamegraph;
3pub mod flamegraph_parser;
4pub mod hashmap_parser;
5pub mod model;
6pub mod parser;
7pub mod summary_parser;
8
9use std::convert::Into;
10use std::path::PathBuf;
11
12use colored::Colorize;
13use itertools::Itertools;
14use parser::{CallgrindProperties, ParserOutput};
15
16use self::model::Metrics;
17use super::summary::{
18    CallgrindRegression, MetricsSummary, ToolMetricSummary, ToolRun, ToolRunSegment,
19};
20use crate::api::{self, EventKind};
21use crate::util::{to_string_signed_short, EitherOrBoth};
22
23#[derive(Debug, Clone)]
24pub struct Summary {
25    pub details: EitherOrBoth<(PathBuf, CallgrindProperties)>,
26    pub metrics_summary: MetricsSummary,
27}
28
29#[derive(Debug, Clone)]
30pub struct Summaries {
31    pub summaries: Vec<Summary>,
32    pub total: MetricsSummary,
33}
34
35#[derive(Clone, Debug)]
36pub struct CacheSummary {
37    l1_hits: u64,
38    l3_hits: u64,
39    ram_hits: u64,
40    total_memory_rw: u64,
41    cycles: u64,
42}
43
44#[derive(Debug, Clone)]
45pub struct RegressionConfig {
46    pub limits: Vec<(EventKind, f64)>,
47    pub fail_fast: bool,
48}
49
50impl TryFrom<&Metrics> for CacheSummary {
51    type Error = anyhow::Error;
52
53    fn try_from(value: &Metrics) -> std::result::Result<Self, Self::Error> {
54        use EventKind::*;
55        //         0   1  2    3    4    5    6    7    8
56        // events: Ir Dr Dw I1mr D1mr D1mw ILmr DLmr DLmw
57        let instructions = value.try_metric_by_kind(&Ir)?;
58        let total_data_cache_reads = value.try_metric_by_kind(&Dr)?;
59        let total_data_cache_writes = value.try_metric_by_kind(&Dw)?;
60        let l1_instructions_cache_read_misses = value.try_metric_by_kind(&I1mr)?;
61        let l1_data_cache_read_misses = value.try_metric_by_kind(&D1mr)?;
62        let l1_data_cache_write_misses = value.try_metric_by_kind(&D1mw)?;
63        let l3_instructions_cache_read_misses = value.try_metric_by_kind(&ILmr)?;
64        let l3_data_cache_read_misses = value.try_metric_by_kind(&DLmr)?;
65        let l3_data_cache_write_misses = value.try_metric_by_kind(&DLmw)?;
66
67        let ram_hits = l3_instructions_cache_read_misses
68            + l3_data_cache_read_misses
69            + l3_data_cache_write_misses;
70        let l1_data_accesses = l1_data_cache_read_misses + l1_data_cache_write_misses;
71        let l1_miss = l1_instructions_cache_read_misses + l1_data_accesses;
72        let l3_accesses = l1_miss;
73        let l3_hits = l3_accesses - ram_hits;
74
75        let total_memory_rw = instructions + total_data_cache_reads + total_data_cache_writes;
76        let l1_hits = total_memory_rw - ram_hits - l3_hits;
77
78        // Uses Itamar Turner-Trauring's formula from https://pythonspeed.com/articles/consistent-benchmarking-in-ci/
79        let cycles = l1_hits + (5 * l3_hits) + (35 * ram_hits);
80
81        Ok(Self {
82            l1_hits,
83            l3_hits,
84            ram_hits,
85            total_memory_rw,
86            cycles,
87        })
88    }
89}
90
91impl RegressionConfig {
92    /// Check regression of the [`super::metrics::Metrics`] for the configured [`EventKind`]s and
93    /// print it
94    ///
95    /// If the old `Metrics` is None then no regression checks are performed and this method returns
96    /// [`Ok`].
97    ///
98    /// # Errors
99    ///
100    /// Returns an [`anyhow::Error`] with the only source [`crate::error::Error::RegressionError`]
101    /// if a regression error occurred
102    pub fn check_and_print(&self, metrics_summary: &MetricsSummary) -> Vec<CallgrindRegression> {
103        let regression = self.check(metrics_summary);
104
105        for CallgrindRegression {
106            event_kind,
107            new,
108            old,
109            diff_pct,
110            limit,
111        } in &regression
112        {
113            if limit.is_sign_positive() {
114                eprintln!(
115                    "Performance has {0}: {1} ({new} > {old}) regressed by {2:>+6} (>{3:>+6})",
116                    "regressed".bold().bright_red(),
117                    event_kind.to_string().bold(),
118                    format!("{}%", to_string_signed_short(*diff_pct))
119                        .bold()
120                        .bright_red(),
121                    to_string_signed_short(*limit).bright_black()
122                );
123            } else {
124                eprintln!(
125                    "Performance has {0}: {1} ({new} < {old}) regressed by {2:>+6} (<{3:>+6})",
126                    "regressed".bold().bright_red(),
127                    event_kind.to_string().bold(),
128                    format!("{}%", to_string_signed_short(*diff_pct))
129                        .bold()
130                        .bright_red(),
131                    to_string_signed_short(*limit).bright_black()
132                );
133            }
134        }
135
136        regression
137    }
138
139    // Check the `MetricsSummary` for regressions.
140    //
141    // The limits for event kinds which are not present in the `MetricsSummary` are ignored.
142    pub fn check(&self, metrics_summary: &MetricsSummary) -> Vec<CallgrindRegression> {
143        let mut regressions = vec![];
144        for (event_kind, new_cost, old_cost, pct, limit) in
145            self.limits.iter().filter_map(|(event_kind, limit)| {
146                metrics_summary.diff_by_kind(event_kind).and_then(|d| {
147                    if let EitherOrBoth::Both(new, old) = d.metrics {
148                        // This unwrap is safe since the diffs are calculated if both costs are
149                        // present
150                        Some((event_kind, new, old, d.diffs.unwrap().diff_pct, limit))
151                    } else {
152                        None
153                    }
154                })
155            })
156        {
157            if limit.is_sign_positive() {
158                if pct > *limit {
159                    let regression = CallgrindRegression {
160                        event_kind: *event_kind,
161                        new: new_cost,
162                        old: old_cost,
163                        diff_pct: pct,
164                        limit: *limit,
165                    };
166                    regressions.push(regression);
167                }
168            } else if pct < *limit {
169                let regression = CallgrindRegression {
170                    event_kind: *event_kind,
171                    new: new_cost,
172                    old: old_cost,
173                    diff_pct: pct,
174                    limit: *limit,
175                };
176                regressions.push(regression);
177            } else {
178                // no regression
179            }
180        }
181        regressions
182    }
183}
184
185impl From<api::RegressionConfig> for RegressionConfig {
186    fn from(value: api::RegressionConfig) -> Self {
187        let api::RegressionConfig { limits, fail_fast } = value;
188        RegressionConfig {
189            limits: if limits.is_empty() {
190                vec![(EventKind::Ir, 10f64)]
191            } else {
192                limits
193            },
194            fail_fast: fail_fast.unwrap_or(false),
195        }
196    }
197}
198
199impl Default for RegressionConfig {
200    fn default() -> Self {
201        Self {
202            limits: vec![(EventKind::Ir, 10f64)],
203            fail_fast: Default::default(),
204        }
205    }
206}
207
208impl Summaries {
209    /// Group the output by pid, then by parts and then by threads
210    ///
211    /// The grouping simplifies the zipping of the new and old parser output later.
212    ///
213    /// A simplified example. `(pid, part, thread)`
214    ///
215    /// ```rust,ignore
216    /// let parsed: Vec<(i32, u64, usize)> = [
217    ///     (10, 1, 1),
218    ///     (10, 1, 2),
219    ///     (20, 1, 1)
220    /// ];
221    ///
222    /// let grouped = group(parsed);
223    /// assert_eq!(grouped,
224    /// vec![
225    ///     vec![
226    ///         vec![
227    ///             (10, 1, 1),
228    ///             (10, 1, 2)
229    ///         ]
230    ///     ],
231    ///     vec![
232    ///         vec![
233    ///             (20, 1, 1)
234    ///         ]
235    ///     ]
236    /// ])
237    /// ```
238    fn group(
239        parsed: impl Iterator<Item = (PathBuf, CallgrindProperties, Metrics)>,
240    ) -> Vec<Vec<Vec<(PathBuf, CallgrindProperties, Metrics)>>> {
241        let mut grouped = vec![];
242        let mut cur_pid = 0_i32;
243        let mut cur_part = 0;
244
245        for element in parsed {
246            let pid = element.1.pid.unwrap_or(0_i32);
247            let part = element.1.part.unwrap_or(0);
248
249            if pid != cur_pid {
250                grouped.push(vec![vec![element]]);
251                cur_pid = pid;
252                cur_part = part;
253            } else if part != cur_part {
254                let parts = grouped.last_mut().unwrap();
255                parts.push(vec![element]);
256                cur_part = part;
257            } else {
258                let parts = grouped.last_mut().unwrap();
259                let threads = parts.last_mut().unwrap();
260                threads.push(element);
261            }
262        }
263        grouped
264    }
265
266    /// Create a new `Summaries` from the output(s) of the callgrind parser.
267    ///
268    /// The summaries created from the new parser outputs and the old parser outputs are grouped by
269    /// pid (subprocesses recorded with `--trace-children`), then by part (for example cause by a
270    /// `--dump-every-bb=xxx`) and then by thread (caused by `--separate-threads`). Since each of
271    /// these components can differ between the new and the old parser output, this complicates the
272    /// creation of each `Summary`. We can't just zip the new and old parser output directly to get
273    /// (as far as possible) correct comparisons between the new and old costs. To remedy the
274    /// possibly incorrect comparisons, there is always a total created.
275    ///
276    /// In a first step the parsed outputs are grouped in vectors by pid, then by parts and then by
277    /// threads. This solution is not very efficient but there are not too many parsed outputs to be
278    /// expected. 100 at most and maybe 2-10 on average, so the tradeoff between performance and
279    /// clearer structure of this method looks reasonable.
280    ///
281    /// Secondly and finally, the groups are processed and summarized in a total.
282    pub fn new(parsed_new: ParserOutput, parsed_old: Option<ParserOutput>) -> Self {
283        let grouped_new = Self::group(parsed_new.into_iter());
284        let grouped_old = Self::group(parsed_old.into_iter().flatten());
285
286        let mut total = MetricsSummary::default();
287        let mut summaries = vec![];
288
289        for e_pids in grouped_new.into_iter().zip_longest(grouped_old) {
290            match e_pids {
291                itertools::EitherOrBoth::Both(new_parts, old_parts) => {
292                    for e_parts in new_parts.into_iter().zip_longest(old_parts) {
293                        match e_parts {
294                            itertools::EitherOrBoth::Both(new_threads, old_threads) => {
295                                for e_threads in new_threads.into_iter().zip_longest(old_threads) {
296                                    let summary = match e_threads {
297                                        itertools::EitherOrBoth::Both(new, old) => {
298                                            Summary::from_new_and_old(new, old)
299                                        }
300                                        itertools::EitherOrBoth::Left(new) => {
301                                            Summary::from_new(new.0, new.1, new.2)
302                                        }
303                                        itertools::EitherOrBoth::Right(old) => {
304                                            Summary::from_old(old.0, old.1, old.2)
305                                        }
306                                    };
307                                    total.add(&summary.metrics_summary);
308                                    summaries.push(summary);
309                                }
310                            }
311                            itertools::EitherOrBoth::Left(left) => {
312                                for new in left {
313                                    let summary = Summary::from_new(new.0, new.1, new.2);
314                                    total.add(&summary.metrics_summary);
315                                    summaries.push(summary);
316                                }
317                            }
318                            itertools::EitherOrBoth::Right(right) => {
319                                for old in right {
320                                    let summary = Summary::from_old(old.0, old.1, old.2);
321                                    total.add(&summary.metrics_summary);
322                                    summaries.push(summary);
323                                }
324                            }
325                        }
326                    }
327                }
328                itertools::EitherOrBoth::Left(left) => {
329                    for new in left.into_iter().flatten() {
330                        let summary = Summary::from_new(new.0, new.1, new.2);
331                        total.add(&summary.metrics_summary);
332                        summaries.push(summary);
333                    }
334                }
335                itertools::EitherOrBoth::Right(right) => {
336                    for old in right.into_iter().flatten() {
337                        let summary = Summary::from_old(old.0, old.1, old.2);
338                        total.add(&summary.metrics_summary);
339                        summaries.push(summary);
340                    }
341                }
342            }
343        }
344
345        Self { summaries, total }
346    }
347
348    pub fn has_multiple(&self) -> bool {
349        self.summaries.len() > 1
350    }
351}
352
353impl From<Summaries> for ToolRun {
354    fn from(value: Summaries) -> Self {
355        let segments = value.summaries.into_iter().map(Into::into).collect();
356        Self {
357            total: ToolMetricSummary::CallgrindSummary(value.total),
358            segments,
359        }
360    }
361}
362
363impl From<&Summaries> for ToolRun {
364    fn from(value: &Summaries) -> Self {
365        value.clone().into()
366    }
367}
368
369impl Summary {
370    pub fn new(
371        details: EitherOrBoth<(PathBuf, CallgrindProperties)>,
372        metrics_summary: MetricsSummary,
373    ) -> Self {
374        Self {
375            details,
376            metrics_summary,
377        }
378    }
379
380    pub fn from_new(path: PathBuf, properties: CallgrindProperties, metrics: Metrics) -> Self {
381        Self {
382            details: EitherOrBoth::Left((path, properties)),
383            metrics_summary: MetricsSummary::new(EitherOrBoth::Left(metrics)),
384        }
385    }
386
387    pub fn from_old(path: PathBuf, properties: CallgrindProperties, metrics: Metrics) -> Self {
388        Self {
389            details: EitherOrBoth::Right((path, properties)),
390            metrics_summary: MetricsSummary::new(EitherOrBoth::Right(metrics)),
391        }
392    }
393
394    pub fn from_new_and_old(
395        new: (PathBuf, CallgrindProperties, Metrics),
396        old: (PathBuf, CallgrindProperties, Metrics),
397    ) -> Self {
398        Self {
399            details: EitherOrBoth::Both((new.0, new.1), (old.0, old.1)),
400            metrics_summary: MetricsSummary::new(EitherOrBoth::Both(new.2, old.2)),
401        }
402    }
403}
404
405impl From<Summary> for ToolRunSegment {
406    fn from(value: Summary) -> Self {
407        match value.details {
408            EitherOrBoth::Left((new_path, new_props)) => ToolRunSegment {
409                metrics_summary: ToolMetricSummary::CallgrindSummary(value.metrics_summary),
410                details: EitherOrBoth::Left(new_props.into_info(&new_path)),
411            },
412            EitherOrBoth::Right((old_path, old_props)) => ToolRunSegment {
413                metrics_summary: ToolMetricSummary::CallgrindSummary(value.metrics_summary),
414                details: EitherOrBoth::Right(old_props.into_info(&old_path)),
415            },
416            EitherOrBoth::Both((new_path, new_props), (old_path, old_props)) => ToolRunSegment {
417                metrics_summary: ToolMetricSummary::CallgrindSummary(value.metrics_summary),
418                details: EitherOrBoth::Both(
419                    new_props.into_info(&new_path),
420                    old_props.into_info(&old_path),
421                ),
422            },
423        }
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use rstest::rstest;
430    use EventKind::*;
431
432    use super::*;
433
434    fn cachesim_costs(costs: [u64; 9]) -> Metrics {
435        Metrics::with_metric_kinds([
436            (Ir, costs[0]),
437            (Dr, costs[1]),
438            (Dw, costs[2]),
439            (I1mr, costs[3]),
440            (D1mr, costs[4]),
441            (D1mw, costs[5]),
442            (ILmr, costs[6]),
443            (DLmr, costs[7]),
444            (DLmw, costs[8]),
445        ])
446    }
447
448    #[rstest]
449    fn test_regression_check_when_old_is_none() {
450        let regression = RegressionConfig::default();
451        let new = cachesim_costs([0, 0, 0, 0, 0, 0, 0, 0, 0]);
452        let summary = MetricsSummary::new(EitherOrBoth::Left(new));
453
454        assert!(regression.check(&summary).is_empty());
455    }
456
457    #[rstest]
458    #[case::ir_all_zero(
459        vec![(Ir, 0f64)],
460        [0, 0, 0, 0, 0, 0, 0, 0, 0],
461        [0, 0, 0, 0, 0, 0, 0, 0, 0],
462        vec![]
463    )]
464    #[case::ir_when_regression(
465        vec![(Ir, 0f64)],
466        [2, 0, 0, 0, 0, 0, 0, 0, 0],
467        [1, 0, 0, 0, 0, 0, 0, 0, 0],
468        vec![(Ir, 2, 1, 100f64, 0f64)]
469    )]
470    #[case::ir_when_improved(
471        vec![(Ir, 0f64)],
472        [1, 0, 0, 0, 0, 0, 0, 0, 0],
473        [2, 0, 0, 0, 0, 0, 0, 0, 0],
474        vec![]
475    )]
476    #[case::ir_when_negative_limit(
477        vec![(Ir, -49f64)],
478        [1, 0, 0, 0, 0, 0, 0, 0, 0],
479        [2, 0, 0, 0, 0, 0, 0, 0, 0],
480        vec![(Ir, 1, 2, -50f64, -49f64)]
481    )]
482    #[case::derived_all_zero(
483        vec![(EstimatedCycles, 0f64)],
484        [0, 0, 0, 0, 0, 0, 0, 0, 0],
485        [0, 0, 0, 0, 0, 0, 0, 0, 0],
486        vec![]
487    )]
488    #[case::derived_when_regression(
489        vec![(EstimatedCycles, 0f64)],
490        [2, 0, 0, 0, 0, 0, 0, 0, 0],
491        [1, 0, 0, 0, 0, 0, 0, 0, 0],
492        vec![(EstimatedCycles, 2, 1, 100f64, 0f64)]
493    )]
494    #[case::derived_when_regression_multiple(
495        vec![(EstimatedCycles, 5f64), (Ir, 10f64)],
496        [2, 0, 0, 0, 0, 0, 0, 0, 0],
497        [1, 0, 0, 0, 0, 0, 0, 0, 0],
498        vec![(EstimatedCycles, 2, 1, 100f64, 5f64), (Ir, 2, 1, 100f64, 10f64)]
499    )]
500    #[case::derived_when_improved(
501        vec![(EstimatedCycles, 0f64)],
502        [1, 0, 0, 0, 0, 0, 0, 0, 0],
503        [2, 0, 0, 0, 0, 0, 0, 0, 0],
504        vec![]
505    )]
506    #[case::derived_when_regression_mixed(
507        vec![(EstimatedCycles, 0f64)],
508        [96, 24, 18, 6, 0, 2, 6, 0, 2],
509        [48, 12, 9, 3, 0, 1, 3, 0, 1],
510        vec![(EstimatedCycles, 410, 205, 100f64, 0f64)]
511    )]
512    fn test_regression_check_when_old_is_some(
513        #[case] limits: Vec<(EventKind, f64)>,
514        #[case] new: [u64; 9],
515        #[case] old: [u64; 9],
516        #[case] expected: Vec<(EventKind, u64, u64, f64, f64)>,
517    ) {
518        let regression = RegressionConfig {
519            limits,
520            ..Default::default()
521        };
522
523        let new = cachesim_costs(new);
524        let old = cachesim_costs(old);
525        let summary = MetricsSummary::new(EitherOrBoth::Both(new, old));
526        let expected = expected
527            .iter()
528            .map(|(e, n, o, d, l)| CallgrindRegression {
529                event_kind: *e,
530                new: *n,
531                old: *o,
532                diff_pct: *d,
533                limit: *l,
534            })
535            .collect::<Vec<CallgrindRegression>>();
536
537        assert_eq!(regression.check(&summary), expected);
538    }
539}