iai_callgrind_runner/
util.rs

1//! This module provides common utility functions
2
3// spell-checker: ignore axxxxxbcd
4use std::ffi::OsStr;
5use std::io::{self, BufWriter, Write};
6use std::ops::Neg;
7use std::path::{Path, PathBuf};
8use std::process::Command;
9
10use anyhow::{anyhow, Result};
11use log::{debug, log_enabled, trace, Level};
12use regex::Regex;
13#[cfg(feature = "schema")]
14use schemars::JsonSchema;
15use serde::{Deserialize, Serialize};
16use which::which;
17
18use crate::error::Error;
19use crate::runner::metrics::Metric;
20
21// # Developer notes
22//
23// EitherOrBoth is not considered complete in terms of possible functionality. Simply extend and add
24// new methods by need.
25
26/// Either left or right or both can be present
27///
28/// Most of the time, this enum is used to store (new, old) output, metrics, etc. Per convention
29/// left is `new` and right is `old`.
30#[derive(Debug, PartialEq, Serialize, Deserialize, Clone, Eq)]
31#[cfg_attr(feature = "schema", derive(JsonSchema))]
32pub enum EitherOrBoth<T> {
33    /// Both values (`new` and `old`) are present
34    Both(T, T),
35    /// The left or `new` value
36    Left(T),
37    /// The right or `old` value
38    Right(T),
39}
40
41/// A simple glob pattern with allowed wildcard characters `*` and `?`
42///
43/// Match patterns as they are accepted by `valgrind` command line arguments such as
44/// `--toggle-collect` (<https://valgrind.org/docs/manual/cl-manual.html#cl-manual.options>)
45#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
46pub struct Glob(String);
47
48impl<T> EitherOrBoth<T> {
49    /// Try to return the left (`new`) value
50    pub fn left(&self) -> Option<&T> {
51        match self {
52            Self::Right(_) => None,
53            Self::Both(left, _) | Self::Left(left) => Some(left),
54        }
55    }
56
57    /// Try to return the right (`old`) value
58    pub fn right(&self) -> Option<&T> {
59        match self {
60            Self::Left(_) => None,
61            Self::Right(right) | Self::Both(_, right) => Some(right),
62        }
63    }
64
65    /// Apply the function `f` to the inner value of `EitherOrBoth` and return a new `EitherOrBoth`
66    pub fn map<F, N>(self, f: F) -> EitherOrBoth<N>
67    where
68        F: Fn(T) -> N,
69    {
70        match self {
71            Self::Left(left) => EitherOrBoth::Left(f(left)),
72            Self::Right(right) => EitherOrBoth::Right(f(right)),
73            Self::Both(l, r) => EitherOrBoth::Both(f(l), f(r)),
74        }
75    }
76
77    /// Converts from `&EitherOrBoth<T>` to `EitherOrBoth<&T>`
78    pub fn as_ref(&self) -> EitherOrBoth<&T> {
79        match self {
80            Self::Left(left) => EitherOrBoth::Left(left),
81            Self::Right(right) => EitherOrBoth::Right(right),
82            Self::Both(left, right) => EitherOrBoth::Both(left, right),
83        }
84    }
85}
86
87impl<T> TryFrom<(Option<T>, Option<T>)> for EitherOrBoth<T> {
88    type Error = String;
89
90    fn try_from(value: (Option<T>, Option<T>)) -> std::result::Result<Self, Self::Error> {
91        match value {
92            (None, None) => Err("Either the left, right or both values must be present".to_owned()),
93            (None, Some(right)) => Ok(Self::Right(right)),
94            (Some(left), None) => Ok(Self::Left(left)),
95            (Some(left), Some(right)) => Ok(Self::Both(left, right)),
96        }
97    }
98}
99
100impl Glob {
101    /// Create a new `Glob` pattern matcher
102    pub fn new<T>(pattern: T) -> Self
103    where
104        T: Into<String>,
105    {
106        Self(pattern.into())
107    }
108
109    /// Return true if the glob pattern matches the `haystack`
110    ///
111    /// Allowed wildcard characters are `*` to match any amount of characters and `?` to match
112    /// exactly one character.
113    ///
114    /// # Examples
115    ///
116    /// ```rust
117    /// use iai_callgrind_runner::util::Glob;
118    ///
119    /// let glob = Glob::new("a*bc?");
120    ///
121    /// assert!(glob.is_match("abcd"));
122    /// assert!(glob.is_match("axxxxxbcd"))
123    /// ```
124    ///
125    /// # Implementation Details
126    ///
127    /// This linear-time glob algorithm originates from the article
128    /// <https://research.swtch.com/glob> written by Russ Cox.
129    ///
130    /// We only need a reduced glob matching algorithm for patterns (only `*` and `?` wildcards)
131    /// accepted by valgrind in callgrind options like `--toggle-collect`, ... After having a quick
132    /// look at the algorithm in the `valgrind` repo, it felt too complex for this task, is
133    /// recursive instead of iterative and as far as I can tell, the computation time is slower
134    /// compared to the algorithm used here. Converting the glob patterns into regex would work, but
135    /// requires an extra step, is slower and the glob patterns would inherently allow regex which
136    /// is hard to explain. Repos like <https://crates.io/crates/glob-match> are great and their
137    /// algorithm is based on the same algorithm used here. However such crates allow more globs
138    /// than required.
139    #[allow(clippy::similar_names)]
140    pub fn is_match(&self, haystack: &str) -> bool {
141        let mut p_idx = 0;
142        let mut h_idx = 0;
143
144        let mut next_p_idx = 0;
145        let mut next_h_idx = 0;
146
147        let pattern = self.0.as_bytes();
148        let haystack = haystack.as_bytes();
149
150        while p_idx < pattern.len() || h_idx < haystack.len() {
151            if p_idx < pattern.len() {
152                match pattern[p_idx] {
153                    b'?' => {
154                        if h_idx < haystack.len() {
155                            p_idx += 1;
156                            h_idx += 1;
157                            continue;
158                        }
159                    }
160                    b'*' => {
161                        next_p_idx = p_idx;
162                        next_h_idx = h_idx + 1;
163                        p_idx += 1;
164                        continue;
165                    }
166                    c => {
167                        if h_idx < haystack.len() && haystack[h_idx] == c {
168                            p_idx += 1;
169                            h_idx += 1;
170                            continue;
171                        }
172                    }
173                }
174            }
175            if 0 < next_h_idx && next_h_idx <= haystack.len() {
176                p_idx = next_p_idx;
177                h_idx = next_h_idx;
178                continue;
179            }
180            return false;
181        }
182        true
183    }
184
185    /// Return the glob as string reference
186    pub fn as_str(&self) -> &str {
187        &self.0
188    }
189}
190
191impl<T> From<T> for Glob
192where
193    T: AsRef<str>,
194{
195    fn from(value: T) -> Self {
196        Self(value.as_ref().to_owned())
197    }
198}
199
200/// Convert a boolean value to a `yes` or `no` string
201pub fn bool_to_yesno(value: bool) -> String {
202    if value {
203        "yes".to_owned()
204    } else {
205        "no".to_owned()
206    }
207}
208
209/// Copy a directory recursively from `source` to `dest` preserving mode, ownership and timestamps
210///
211/// If `follow_symlinks` is true copy the symlinked file or directory instead of the symlink itself
212pub fn copy_directory(source: &Path, dest: &Path, follow_symlinks: bool) -> Result<()> {
213    let cp = resolve_binary_path("cp")?;
214    let mut command = Command::new(&cp);
215
216    // Using short options ensures compatibility with FreeBSD and Linux
217    if follow_symlinks {
218        // -H: Follow command-line symbolic links
219        // -L: always follow symbolic links in SOURCE
220        command.args(["-H", "-L"]);
221    }
222
223    // -v: Verbose
224    // -R: Recursive
225    // -p: preserve timestamps, file mode, ownership
226    command.args(["-v", "-R", "-p"]);
227    command.arg(source);
228    command.arg(dest);
229    let (stdout, stderr) = command
230        .output()
231        .map_err(|error| Error::LaunchError(cp.clone(), error.to_string()))
232        .and_then(|output| {
233            if output.status.success() {
234                Ok((output.stdout, output.stderr))
235            } else {
236                let status = output.status;
237                Err(Error::ProcessError(
238                    cp.to_string_lossy().to_string(),
239                    Some(output),
240                    status,
241                    None,
242                ))
243            }
244        })?;
245
246    if !stdout.is_empty() {
247        trace!("copy fixtures: stdout:");
248        if log_enabled!(Level::Trace) {
249            write_all_to_stderr(&stdout);
250        }
251    }
252    if !stderr.is_empty() {
253        trace!("copy fixtures: stderr:");
254        if log_enabled!(Level::Trace) {
255            write_all_to_stderr(&stderr);
256        }
257    }
258    Ok(())
259}
260
261/// Calculate the difference between `new` and `old` as factor
262pub fn factor_diff(new: Metric, old: Metric) -> f64 {
263    if new == old {
264        return 1f64;
265    }
266
267    let new_float: f64 = new.into();
268    let old_float: f64 = old.into();
269
270    if new > old {
271        if old == Metric::Int(0) {
272            f64::INFINITY
273        } else {
274            new_float / old_float
275        }
276    } else if new == Metric::Int(0) {
277        f64::NEG_INFINITY
278    } else {
279        (old_float / new_float).neg()
280    }
281}
282
283/// Convert a valgrind glob pattern into a [`Regex`]
284///
285/// A valgrind glob pattern is a simpler glob pattern usually used to match function calls for
286/// example in `--toggle-collect`, `--dump-before`, ... as described here
287/// <https://valgrind.org/docs/manual/cl-manual.html#cl-manual.options>
288///
289/// In short, there are `*` and `?` which are converted into `.*` and `.?` respectively.
290pub fn glob_to_regex(input: &str) -> Result<Regex> {
291    let pattern = input.chars().fold(String::new(), |mut acc, c| {
292        if c == '*' {
293            acc.push_str(".*");
294        } else if c == '?' {
295            acc.push_str(".?");
296        } else {
297            acc.push(c);
298        }
299
300        acc
301    });
302
303    Regex::new(&pattern).map_err(Into::into)
304}
305
306/// Make a `path` absolute with the `base_dir` as prefix
307pub fn make_absolute<B, T>(base_dir: B, path: T) -> PathBuf
308where
309    B: AsRef<Path>,
310    T: AsRef<Path>,
311{
312    let (base_dir, path) = (base_dir.as_ref(), path.as_ref());
313    if path.strip_prefix(base_dir).is_ok() {
314        path.to_owned()
315    } else {
316        base_dir.join(path)
317    }
318}
319
320/// Make a `path` relative to the `base_dir`
321pub fn make_relative<B, T>(base_dir: B, path: T) -> PathBuf
322where
323    B: AsRef<Path>,
324    T: AsRef<Path>,
325{
326    let (base_dir, path) = (base_dir.as_ref(), path.as_ref());
327    path.strip_prefix(base_dir).unwrap_or(path).to_owned()
328}
329
330/// Calculate the difference between `new` and `old` as percentage
331pub fn percentage_diff(new: Metric, old: Metric) -> f64 {
332    if new == old {
333        return 0f64;
334    }
335
336    let new: f64 = new.into();
337    let old: f64 = old.into();
338
339    let diff = (new - old) / old;
340    diff * 100.0f64
341}
342
343/// Try to resolve the absolute path of a binary from the `PATH` and relative paths
344///
345/// If the binary is a name without path separators the PATH is tried, otherwise if not absolute
346/// a relative path is tried. If the path is already absolute checks if it is executable.
347pub fn resolve_binary_path<T>(binary: T) -> Result<PathBuf>
348where
349    T: AsRef<OsStr>,
350{
351    let binary = binary.as_ref();
352    match which(binary) {
353        Ok(path) => {
354            debug!("Found '{}': '{}'", binary.to_string_lossy(), path.display());
355            Ok(path)
356        }
357        Err(error) => Err(
358            anyhow! {"{error}: '{0}' could not be found. Is '{0}' installed, executable and in the PATH?",
359                binary.to_string_lossy()
360            },
361        ),
362    }
363}
364
365/// Format a float as string depending on the number of digits of the integer-part
366///
367/// The higher the number of integer-part digits the lower the number of fractional-part digits.
368/// This procedure accounts for the fractional-part being less significant the higher the value of
369/// the floating point number gets.
370pub fn to_string_signed_short(n: f64) -> String {
371    let n_abs = n.abs();
372
373    if n_abs < 10.0f64 {
374        format!("{n:+.5}")
375    } else if n_abs < 100.0f64 {
376        format!("{n:+.4}")
377    } else if n_abs < 1000.0f64 {
378        format!("{n:+.3}")
379    } else if n_abs < 10000.0f64 {
380        format!("{n:+.2}")
381    } else if n_abs < 100_000.0_f64 {
382        format!("{n:+.1}")
383    } else {
384        format!("{n:+.0}")
385    }
386}
387
388/// Format a float as string depending on the number of digits of the integer-part without sign
389///
390/// Same as [`to_string_signed_short`] but without a sign.
391pub fn to_string_unsigned_short(n: f64) -> String {
392    to_string_signed_short(n)[1..].to_owned()
393}
394
395/// Trim a slice of `u8` from ascii whitespace
396pub fn trim(bytes: &[u8]) -> &[u8] {
397    let Some(from) = bytes.iter().position(|x| !x.is_ascii_whitespace()) else {
398        return &bytes[0..0];
399    };
400    let to = bytes
401        .iter()
402        .rposition(|x| !x.is_ascii_whitespace())
403        .unwrap();
404    &bytes[from..=to]
405}
406
407/// Truncate a utf-8 [`std::str`] to a given `len`
408pub fn truncate_str_utf8(string: &str, len: usize) -> &str {
409    if let Some((pos, c)) = string
410        .char_indices()
411        .take_while(|(i, c)| i + c.len_utf8() <= len)
412        .last()
413    {
414        &string[..pos + c.len_utf8()]
415    } else {
416        &string[..0]
417    }
418}
419
420/// Dump all data to `stderr`
421pub fn write_all_to_stderr(bytes: &[u8]) {
422    if !bytes.is_empty() {
423        let stderr = io::stderr();
424        let stderr = stderr.lock();
425        let mut writer = BufWriter::new(stderr);
426        writer
427            .write_all(bytes)
428            .and_then(|()| writer.flush())
429            .unwrap();
430        if !bytes.last().is_some_and(|l| *l == b'\n') {
431            eprintln!();
432        }
433    }
434}
435
436/// Dump all data to `stdout`
437pub fn write_all_to_stdout(bytes: &[u8]) {
438    if !bytes.is_empty() {
439        let stdout = io::stdout();
440        let stdout = stdout.lock();
441        let mut writer = BufWriter::new(stdout);
442        writer
443            .write_all(bytes)
444            .and_then(|()| writer.flush())
445            .unwrap();
446        if !bytes.last().is_some_and(|l| *l == b'\n') {
447            println!();
448        }
449    }
450}
451
452/// Convert a `yes` or `no` string to a boolean value
453///
454/// This method is the counterpart to [`bool_to_yesno`] and can fail if the string doesn't match
455/// exactly (case-sensitive).
456pub fn yesno_to_bool(value: &str) -> Option<bool> {
457    match value.trim() {
458        "yes" => Some(true),
459        "no" => Some(false),
460        _ => None,
461    }
462}
463
464#[cfg(test)]
465mod tests {
466    use rstest::rstest;
467
468    use super::*;
469
470    #[rstest]
471    #[case::empty_0("", 0, "")]
472    #[case::empty_1("", 1, "")]
473    #[case::single_0("a", 0, "")]
474    #[case::single_1("a", 1, "a")]
475    #[case::single_2("a", 2, "a")]
476    #[case::two_0("ab", 0, "")]
477    #[case::two_1("ab", 1, "a")]
478    #[case::two_2("ab", 2, "ab")]
479    #[case::two_3("ab", 3, "ab")]
480    #[case::two_usize_max("ab", usize::MAX, "ab")]
481    #[case::hundred_0(&"a".repeat(100), 0, "")]
482    #[case::hundred_99(&"ab".repeat(50), 99, &"ab".repeat(50)[..99])]
483    #[case::hundred_100(&"a".repeat(100), 100, &"a".repeat(100))]
484    #[case::hundred_255(&"a".repeat(100), 255, &"a".repeat(100))]
485    #[case::multi_byte_0("µ", 0, "")]
486    #[case::multi_byte_1("µ", 1, "")]
487    #[case::multi_byte_2("µ", 2, "µ")]
488    #[case::multi_byte_3("µ", 3, "µ")]
489    #[case::uni_then_multi_byte_0("aµ", 0, "")]
490    #[case::uni_then_multi_byte_1("aµ", 1, "a")]
491    #[case::uni_then_multi_byte_2("aµ", 2, "a")]
492    #[case::uni_then_multi_byte_3("aµ", 3, "aµ")]
493    #[case::uni_then_multi_byte_4("aµ", 4, "aµ")]
494    #[case::multi_byte_then_uni_0("µa", 0, "")]
495    #[case::multi_byte_then_uni_1("µa", 1, "")]
496    #[case::multi_byte_then_uni_2("µa", 2, "µ")]
497    #[case::multi_byte_then_uni_3("µa", 3, "µa")]
498    #[case::multi_byte_then_uni_4("µa", 4, "µa")]
499    fn test_truncate_str(#[case] input: &str, #[case] len: usize, #[case] expected: &str) {
500        assert_eq!(truncate_str_utf8(input, len), expected);
501    }
502
503    #[rstest]
504    #[case::zero(0, 0, 1f64)]
505    #[case::float_zero_int_zero(0, 0f64, 1f64)]
506    #[case::int_zero_float_zero(0f64, 0, 1f64)]
507    #[case::float_zero(0f64, 0f64, 1f64)]
508    #[case::infinity_int(1, 0, f64::INFINITY)]
509    #[case::infinity_div_int(1f64, 0, f64::INFINITY)]
510    #[case::infinity_float(1f64, 0f64, f64::INFINITY)]
511    #[case::infinity_float_mixed(1f64, 0, f64::INFINITY)]
512    #[case::infinity_div_float(1, 0f64, f64::INFINITY)]
513    #[case::negative_infinity(0, 1, f64::NEG_INFINITY)]
514    #[case::negative_infinity_float(0f64, 1, f64::NEG_INFINITY)]
515    #[case::factor_one(1, 1, 1f64)]
516    #[case::factor_minus_two(1, 2, -2f64)]
517    #[case::factor_two(2, 1, 2f64)]
518    fn test_factor_diff_eq<L, R>(#[case] a: L, #[case] b: R, #[case] expected: f64)
519    where
520        L: Into<Metric>,
521        R: Into<Metric>,
522    {
523        assert_eq!(factor_diff(a.into(), b.into()), expected);
524    }
525
526    // spell-checker: disable
527    #[rstest]
528    #[case::both_empty("", "", true)]
529    #[case::star_match_empty("*", "", true)]
530    #[case::empty_not_match_single("", "a", false)]
531    #[case::empty_not_match_star("", "*", false)]
532    #[case::star_match_star("*", "*", true)]
533    #[case::two_star_match_star("**", "*", true)]
534    #[case::mark_match_star("?", "*", true)]
535    #[case::mark_match_char("?", "b", true)]
536    #[case::star_match_two_chars("*", "ab", true)]
537    #[case::star_match_many("*", &"abc".repeat(30), true)]
538    #[case::star_a_match_a("*a", "a", true)]
539    #[case::a_star_match_a("a*", "a", true)]
540    #[case::two_star_a_match_a("**a", "a", true)]
541    #[case::star_match_no_char_middle("a*by", "aby", true)]
542    #[case::star_match_one_char_middle("a*by", "axby", true)]
543    #[case::star_match_two_char_middle("a*by", "axzby", true)]
544    #[case::star_match_same_middle("a*by", "abyby", true)]
545    #[case::multi_star_no_match("a*by*by", "aby", false)]
546    #[case::multi_star_match("a*by*by", "abyby", true)]
547    // spell-checker: enable
548    fn test_glob(#[case] pattern: String, #[case] haystack: &str, #[case] expected: bool) {
549        let actual = Glob(pattern).is_match(haystack);
550        assert_eq!(actual, expected);
551    }
552}