criterion_table/
lib.rs

1#![warn(missing_docs)]
2
3//! Generate markdown comparison tables from
4//! [Cargo Criterion](https://github.com/bheisler/cargo-criterion) benchmark JSON
5//! output.
6//!
7//! Currently, the tool is limited to Github Flavored Markdown (GFM), but adding
8//! new output types is relatively simple.
9//!
10//! ## Generated Markdown Example
11//!
12//! [Benchmark Report](https://github.com/nu11ptr/criterion-table/blob/master/example/README.md)
13
14/// This module holds the various formatters that can be used to format the output
15pub mod formatter;
16
17use std::cmp::{max, Ordering};
18use std::collections::HashMap;
19use std::fs::File;
20use std::io::{BufReader, ErrorKind, Read};
21use std::ops::Div;
22use std::path::Path;
23
24use anyhow::anyhow;
25use flexstr::{flex_fmt, FlexStr, IntoFlex, ToCase, ToFlex, ToFlexStr};
26use indexmap::map::Entry;
27use indexmap::IndexMap;
28use serde::Deserialize;
29
30// Trick to test README samples (from: https://github.com/rust-lang/cargo/issues/383#issuecomment-720873790)
31#[cfg(doctest)]
32mod test_readme {
33    macro_rules! external_doc_test {
34        ($x:expr) => {
35            #[doc = $x]
36            extern "C" {}
37        };
38    }
39
40    external_doc_test!(include_str!("../../README.md"));
41}
42
43// Starting capacity for the String buffer used to build the page
44const BUFFER_CAPACITY: usize = 65535;
45
46// *** Raw JSON Data Structs ***
47
48// NOTE: These were shamelessly copied (with translation) from:
49// https://github.com/bheisler/cargo-criterion/blob/main/src/message_formats/json.rs
50
51#[allow(dead_code)]
52#[derive(Debug, Deserialize)]
53struct ConfidenceInterval {
54    estimate: f64,
55    lower_bound: f64,
56    upper_bound: f64,
57    unit: FlexStr,
58}
59
60#[allow(dead_code)]
61#[derive(Debug, Deserialize)]
62struct Throughput {
63    per_iteration: u64,
64    unit: FlexStr,
65}
66
67#[derive(Debug, Deserialize)]
68enum ChangeType {
69    NoChange,
70    Improved,
71    Regressed,
72}
73
74#[allow(dead_code)]
75#[derive(Debug, Deserialize)]
76struct ChangeDetails {
77    mean: ConfidenceInterval,
78    median: ConfidenceInterval,
79
80    change: ChangeType,
81}
82
83/// Raw deserialized JSON Criterion benchmark data
84#[allow(dead_code)]
85#[derive(Debug, Deserialize)]
86pub struct BenchmarkComplete {
87    id: FlexStr,
88    report_directory: FlexStr,
89    iteration_count: Vec<u64>,
90    measured_values: Vec<f64>,
91    unit: FlexStr,
92
93    throughput: Vec<Throughput>,
94
95    typical: ConfidenceInterval,
96    mean: ConfidenceInterval,
97    median: ConfidenceInterval,
98    median_abs_dev: ConfidenceInterval,
99    slope: Option<ConfidenceInterval>,
100
101    change: Option<ChangeDetails>,
102}
103
104/// Raw deserialized JSON Criterion benchmark group data
105#[allow(dead_code)]
106#[derive(Debug, Deserialize)]
107pub struct BenchmarkGroupComplete {
108    group_name: FlexStr,
109    benchmarks: Vec<FlexStr>,
110    report_directory: FlexStr,
111}
112
113/// Enum that can hold either Raw deserialized JSON benchmark or benchmark group data
114#[derive(Debug, Deserialize)]
115#[serde(untagged)]
116pub enum RawCriterionData {
117    /// Raw benchmark data
118    Benchmark(Box<BenchmarkComplete>),
119    /// Raw benchmark group data
120    BenchmarkGroup(Box<BenchmarkGroupComplete>),
121}
122
123impl RawCriterionData {
124    /// Load raw Criterion JSON data from the given reader. It returns a `Vec` of enum wrapped raw
125    /// benchmark or group data
126    pub fn from_reader(r: impl Read) -> serde_json::error::Result<Vec<Self>> {
127        let reader = BufReader::new(r);
128        let mut de = serde_json::Deserializer::from_reader(reader);
129        let mut data_vec = Vec::new();
130
131        loop {
132            match RawCriterionData::deserialize(&mut de) {
133                Ok(data) => data_vec.push(data),
134                Err(err) if err.is_eof() => break,
135                Err(err) => return Err(err),
136            }
137        }
138
139        Ok(data_vec)
140    }
141}
142
143// *** Tables Config ***
144
145#[derive(Default, Deserialize)]
146/// Configuration file format for adding comments to tables
147pub struct TablesConfig {
148    /// Top level comments
149    pub top_comments: IndexMap<FlexStr, FlexStr>,
150    /// Per table comments (table -> comment)
151    pub table_comments: HashMap<FlexStr, FlexStr>,
152}
153
154impl TablesConfig {
155    /// Try to load the config from the given reader
156    pub fn try_load_config(r: impl Read) -> anyhow::Result<Self> {
157        let mut reader = BufReader::new(r);
158        let mut buffer = String::with_capacity(16384);
159        reader.read_to_string(&mut buffer)?;
160
161        let config: TablesConfig = toml::from_str(&buffer)?;
162        Ok(config)
163    }
164}
165
166// *** Criterion Data Structures ***
167
168// ### Column Info ###
169
170/// Column maximum width data
171#[derive(Clone, Debug)]
172pub struct ColumnInfo {
173    /// The name of the column
174    pub name: FlexStr,
175    /// The maximum display width for this column
176    pub max_width: usize,
177}
178
179impl ColumnInfo {
180    #[inline]
181    /// Create a new `ColumnInfo` using an initial width
182    pub fn new(name: FlexStr, width: usize) -> Self {
183        Self {
184            name,
185            max_width: width,
186        }
187    }
188
189    #[inline]
190    fn update_info(&mut self, width: usize) {
191        self.max_width = max(self.max_width, width);
192    }
193}
194
195// ### Time Unit ###
196
197/// Time unit of a particular measurement
198#[derive(Clone, Copy, Debug)]
199pub enum TimeUnit {
200    /// Time is in seconds
201    Second(f64),
202    /// Time is in milliseconds
203    Millisecond(f64),
204    /// Time is in microseconds
205    Microsecond(f64),
206    /// Time is in nanoseconds
207    Nanosecond(f64),
208    /// Time is in picoseconds
209    Picosecond(f64),
210}
211
212impl TimeUnit {
213    /// Create a new `TimeUnit` taking the time and initial unit string as input
214    pub fn try_new(time: f64, unit: &str) -> anyhow::Result<Self> {
215        match unit {
216            "ms" if time > 1000.0 => Self::try_new(time / 1000.0, "s"),
217            "us" if time > 1000.0 => Self::try_new(time / 1000.0, "ms"),
218            "ns" if time > 1000.0 => Self::try_new(time / 1000.0, "us"),
219            "ps" if time > 1000.0 => Self::try_new(time / 1000.0, "ns"),
220            "s" => Ok(TimeUnit::Second(time)),
221            "ms" => Ok(TimeUnit::Millisecond(time)),
222            "us" => Ok(TimeUnit::Microsecond(time)),
223            "ns" => Ok(TimeUnit::Nanosecond(time)),
224            "ps" => Ok(TimeUnit::Picosecond(time)),
225            _ => Err(anyhow!("Unrecognized time unit: {unit}")),
226        }
227    }
228
229    /// Returns the display width in chars for this `TimeUnit`
230    #[inline]
231    pub fn width(&self) -> usize {
232        self.to_flex_str().chars().count()
233    }
234
235    fn as_picoseconds(&self) -> f64 {
236        match *self {
237            TimeUnit::Second(s) => s * 1_000_000_000_000.0,
238            TimeUnit::Millisecond(ms) => ms * 1_000_000_000.0,
239            TimeUnit::Microsecond(us) => us * 1_000_000.0,
240            TimeUnit::Nanosecond(ns) => ns * 1_000.0,
241            TimeUnit::Picosecond(ps) => ps,
242        }
243    }
244}
245
246impl Div for TimeUnit {
247    type Output = f64;
248
249    fn div(self, rhs: Self) -> Self::Output {
250        let unit1 = self.as_picoseconds();
251        let unit2 = rhs.as_picoseconds();
252        unit1 / unit2
253    }
254}
255
256impl ToFlexStr for TimeUnit {
257    fn to_flex_str(&self) -> FlexStr {
258        match self {
259            TimeUnit::Second(time) => flex_fmt!("{time:.2} s"),
260            TimeUnit::Millisecond(time) => flex_fmt!("{time:.2} ms"),
261            TimeUnit::Microsecond(time) => flex_fmt!("{time:.2} us"),
262            TimeUnit::Nanosecond(time) => flex_fmt!("{time:.2} ns"),
263            TimeUnit::Picosecond(time) => flex_fmt!("{time:.2} ps"),
264        }
265    }
266}
267
268// ### Comparison ###
269
270/// A comparison time of a benchmark to its baseline
271#[derive(Clone, Copy, Debug, Default, PartialEq, PartialOrd)]
272pub struct Comparison(f64);
273
274impl Comparison {
275    /// The display width in chars of this comparison data
276    #[inline]
277    pub fn width(self) -> usize {
278        self.to_flex_str().chars().count()
279    }
280}
281
282impl ToFlexStr for Comparison {
283    fn to_flex_str(&self) -> FlexStr {
284        if self.0 > 1.0 {
285            flex_fmt!("{:.2}x faster", self.0)
286        } else if self.0 < 1.0 {
287            flex_fmt!("{:.2}x slower", 1.0 / self.0)
288        } else {
289            flex_fmt!("{:.2}x", self.0)
290        }
291    }
292}
293
294impl PartialEq<f64> for Comparison {
295    #[inline]
296    fn eq(&self, other: &f64) -> bool {
297        f64::eq(&self.0, other)
298    }
299}
300
301impl PartialOrd<f64> for Comparison {
302    #[inline]
303    fn partial_cmp(&self, other: &f64) -> Option<Ordering> {
304        f64::partial_cmp(&self.0, other)
305    }
306}
307
308// #### Column ###
309
310#[derive(Clone, Debug)]
311struct Column {
312    #[allow(dead_code)]
313    name: FlexStr,
314    time_unit: TimeUnit,
315    pct: Comparison,
316}
317
318impl Column {
319    pub fn new(name: FlexStr, time_unit: TimeUnit, first_col_time: Option<TimeUnit>) -> Self {
320        let pct = match first_col_time {
321            Some(first_col_time) => Comparison(first_col_time / time_unit),
322            None => Comparison(1.0),
323        };
324
325        Self {
326            name,
327            time_unit,
328            pct,
329        }
330    }
331
332    // This returns the "width" of the resulting text in chars. Since we don't know how it will be
333    // formatted we return width of: TimeUnit + Percent. Any additional spaces or formatting chars
334    // are not considered and must be added by the formatter
335    #[inline]
336    pub fn width(&self) -> usize {
337        self.time_unit.width() + self.pct.width()
338    }
339}
340
341// ### Row ###
342
343#[derive(Clone, Debug)]
344struct Row {
345    name: FlexStr,
346    column_data: IndexMap<FlexStr, Column>,
347}
348
349impl Row {
350    #[inline]
351    pub fn new(name: FlexStr) -> Self {
352        Self {
353            name,
354            column_data: Default::default(),
355        }
356    }
357
358    // NOTE: The 'first' column here reflects the first column seen for THIS row NOT for the whole table
359    // This means our timings COULD be based off different columns in different rows
360    fn first_column_time(&self) -> Option<TimeUnit> {
361        self.column_data
362            .first()
363            .map(|(_, Column { time_unit, .. })| *time_unit)
364    }
365
366    fn add_column(&mut self, name: FlexStr, time_unit: TimeUnit) -> anyhow::Result<&Column> {
367        let first_time = self.first_column_time();
368
369        match self.column_data.entry(name.clone()) {
370            Entry::Occupied(_) => Err(anyhow!("Duplicate column: {name}")),
371            Entry::Vacant(entry) => {
372                let col = Column::new(name, time_unit, first_time);
373                Ok(entry.insert(col))
374            }
375        }
376    }
377}
378
379// ### Column Info Map ###
380
381#[derive(Clone, Debug, Default)]
382struct ColumnInfoVec(Vec<ColumnInfo>);
383
384impl ColumnInfoVec {
385    pub fn update_column_info(&mut self, idx: usize, name: FlexStr, width: usize) {
386        match self.0.iter_mut().find(|col| col.name == name) {
387            Some(col_info) => col_info.update_info(width),
388            None => self.0.insert(idx, ColumnInfo::new(name, width)),
389        }
390    }
391}
392
393// ### Table ###
394
395#[derive(Clone, Debug)]
396struct Table {
397    name: FlexStr,
398    columns: ColumnInfoVec,
399    rows: IndexMap<FlexStr, Row>,
400}
401
402impl Table {
403    #[inline]
404    pub fn new(name: FlexStr) -> Self {
405        Self {
406            name,
407            columns: Default::default(),
408            rows: Default::default(),
409        }
410    }
411
412    pub fn add_column_data(
413        &mut self,
414        idx: usize,
415        column_name: FlexStr,
416        row_name: FlexStr,
417        time: TimeUnit,
418    ) -> anyhow::Result<()> {
419        // Assume we have a blank named first column just for holding the row name
420        self.columns
421            .update_column_info(0, Default::default(), row_name.chars().count());
422
423        let row = self.get_row(row_name);
424        let col = row.add_column(column_name.clone(), time)?;
425
426        // Use either the width of the data or the name, whichever is larger
427        let width = max(col.width(), column_name.chars().count());
428        self.columns.update_column_info(idx, column_name, width);
429        Ok(())
430    }
431
432    fn get_row(&mut self, name: FlexStr) -> &mut Row {
433        match self.rows.entry(name.clone()) {
434            Entry::Occupied(entry) => entry.into_mut(),
435            Entry::Vacant(entry) => entry.insert(Row::new(name)),
436        }
437    }
438}
439
440// ### Column Position ###
441
442#[derive(Default, Debug)]
443struct ColumnPosition(IndexMap<(FlexStr, FlexStr), usize>);
444
445impl ColumnPosition {
446    pub fn next_idx(&mut self, table_name: FlexStr, row_name: FlexStr) -> usize {
447        match self.0.entry((table_name, row_name)) {
448            Entry::Occupied(mut entry) => {
449                *entry.get_mut() += 1;
450                *entry.get()
451            }
452            Entry::Vacant(entry) => *entry.insert(1),
453        }
454    }
455}
456
457// ### Criterion Table Data ###
458
459/// Fully processed Criterion benchmark data ready for formatting
460#[derive(Clone, Debug)]
461pub struct CriterionTableData {
462    tables: IndexMap<FlexStr, Table>,
463}
464
465impl CriterionTableData {
466    /// Build table data from the input raw Criterion data
467    pub fn from_raw(raw_data: &[RawCriterionData]) -> anyhow::Result<Self> {
468        let mut data = Self {
469            tables: Default::default(),
470        };
471
472        data.build_from_raw_data(raw_data)?;
473        Ok(data)
474    }
475
476    fn build_from_raw_data(&mut self, raw_data: &[RawCriterionData]) -> anyhow::Result<()> {
477        let mut col_pos = ColumnPosition::default();
478
479        for item in raw_data {
480            // We only process benchmark data - skip anything else
481            if let RawCriterionData::Benchmark(bm) = item {
482                // Break the id into table, column, and row respectively
483                let mut parts: Vec<FlexStr> = bm.id.split('/').map(|s| s.to_flex()).collect();
484                if parts.len() < 2 {
485                    return Err(anyhow::anyhow!("Malformed id: {}", &bm.id));
486                }
487
488                let (table_name, column_name) = (parts.remove(0), parts.remove(0));
489                // If we don't have a row name then we will work with a blank row name
490                let row_name = if !parts.is_empty() {
491                    parts.remove(0)
492                } else {
493                    "".into()
494                };
495
496                // Find our table, calculate our timing, and add data to our column
497                let table = self.get_table(table_name.clone());
498                let time_unit = TimeUnit::try_new(bm.typical.estimate, &bm.typical.unit)?;
499
500                let idx = col_pos.next_idx(table_name, row_name.clone());
501                table.add_column_data(idx, column_name, row_name, time_unit)?;
502            }
503        }
504
505        Ok(())
506    }
507
508    fn get_table(&mut self, name: FlexStr) -> &mut Table {
509        match self.tables.entry(name.clone()) {
510            Entry::Occupied(entry) => entry.into_mut(),
511            Entry::Vacant(entry) => entry.insert(Table::new(name)),
512        }
513    }
514
515    fn encode_key(s: &FlexStr) -> FlexStr {
516        s.replace(' ', "_").into_flex().to_lower()
517    }
518
519    /// Given a `Formatter` and `TablesConfig`, generate formatted tables as a `String`
520    pub fn make_tables(&self, mut f: impl Formatter, config: &TablesConfig) -> String {
521        let mut buffer = String::with_capacity(BUFFER_CAPACITY);
522
523        // Start of doc
524        let table_names: Vec<_> = self.tables.keys().collect();
525        f.start(&mut buffer, &config.top_comments, &table_names);
526
527        for table in self.tables.values() {
528            let col_info = &table.columns.0;
529
530            if let Some(first_col) = col_info.first() {
531                // Start of table
532                let comments = config.table_comments.get(&Self::encode_key(&table.name));
533                f.start_table(&mut buffer, &table.name, comments, col_info);
534
535                for row in table.rows.values() {
536                    // Start of row
537                    f.start_row(&mut buffer, &row.name, first_col.max_width);
538
539                    for col in &col_info[1..] {
540                        match row.column_data.get(&col.name) {
541                            // Used column
542                            Some(col_data) => f.used_column(
543                                &mut buffer,
544                                col_data.time_unit,
545                                col_data.pct,
546                                col.max_width,
547                            ),
548                            // Unused column
549                            None => f.unused_column(&mut buffer, col.max_width),
550                        }
551                    }
552
553                    // End of row
554                    f.end_row(&mut buffer);
555                }
556
557                // End of table
558                f.end_table(&mut buffer);
559            }
560        }
561
562        // End of doc
563        f.end(&mut buffer);
564
565        buffer
566    }
567}
568
569// *** Formatter ***
570
571/// Implement this "visitor" trait to create a `Formatter` for a new file type
572pub trait Formatter {
573    /// Called first at the start of output. Passed top level `top_comments` and a slice of table
574    /// names (typically used to build a table of contents)
575    fn start(
576        &mut self,
577        buffer: &mut String,
578        top_comments: &IndexMap<FlexStr, FlexStr>,
579        tables: &[&FlexStr],
580    );
581
582    /// Called last after all processing is done
583    fn end(&mut self, buffer: &mut String);
584
585    /// Called before each table is output with the `name` of the table, a table `comment`, if any,
586    /// and column maximum display width data
587    fn start_table(
588        &mut self,
589        buffer: &mut String,
590        name: &FlexStr,
591        comment: Option<&FlexStr>,
592        columns: &[ColumnInfo],
593    );
594
595    /// Called at the end of each table output
596    fn end_table(&mut self, buffer: &mut String);
597
598    /// Called at the start of each new row with the row `name` and the `max_width` of the row name
599    /// column
600    fn start_row(&mut self, buffer: &mut String, name: &FlexStr, max_width: usize);
601
602    /// Called at the end of each row
603    fn end_row(&mut self, buffer: &mut String);
604
605    /// Called for each column that is populated with the `time` measurement, a comparison to baseline,
606    /// and the maximum display width of the column
607    fn used_column(
608        &mut self,
609        buffer: &mut String,
610        time: TimeUnit,
611        compare: Comparison,
612        max_width: usize,
613    );
614
615    /// Called for each column that is blank with the maximum display width of the the column
616    fn unused_column(&mut self, buffer: &mut String, max_width: usize);
617}
618
619// *** Functions ***
620
621fn load_config(cfg_name: impl AsRef<Path>) -> anyhow::Result<TablesConfig> {
622    match File::open(cfg_name) {
623        // If the file exists, but it can't be deserialized then report that error
624        Ok(f) => Ok(TablesConfig::try_load_config(f)?),
625        // If file just isn't there then ignore and return a blank config
626        Err(err) if err.kind() == ErrorKind::NotFound => Ok(TablesConfig::default()),
627        // Report any other I/O errors
628        Err(err) => Err(err.into()),
629    }
630}
631
632/// Top level function that can be used to build table data. It takes a reader (raw `cargo-criterion`
633/// JSON data), a `Formatter` (only option is `GFMFormatter` as of this writing), and the name of
634/// a file in `TablesConfig` toml format (the file is optional, simply skipped if it can't be found)
635pub fn build_tables(
636    read: impl Read,
637    fmt: impl Formatter,
638    cfg_name: impl AsRef<Path>,
639) -> anyhow::Result<String> {
640    let raw_data = RawCriterionData::from_reader(read)?;
641    let data = CriterionTableData::from_raw(&raw_data)?;
642    let config = load_config(cfg_name)?;
643    Ok(data.make_tables(fmt, &config))
644}