brace-expander 0.0.2

Brace expansion library
Documentation
//! [![Repository](https://img.shields.io/badge/github-brace--expander-/)](https://github.com/Property404/brace-expander)
//! [![crates.io](https://img.shields.io/crates/v/brace-expander.svg)](https://crates.io/crates/brace-expander)
//! [![Documentation](https://docs.rs/brace-expander/badge.svg)](https://docs.rs/brace-expander)
//!
//! Library to support bash-like brace expansions
//!
//! ## Examples
//!
//! ```rust
//! use brace_expander::BraceExpander;
//! let be = BraceExpander::default();
//!
//! // Basic cartesian product
//! assert_eq!(be.expand("{a,b}").unwrap().join(" "), "a b");
//! assert_eq!(be.expand("hello_{a,b}").unwrap().join(" "), "hello_a hello_b");
//! assert_eq!(be.expand("G{o,u}{b,g}").unwrap().join(" "), "Gob Gog Gub Gug");
//!
//! // Nested product
//! assert_eq!(be
//!     .expand("{{a,b}{c,{e,f}},g}")
//!     .unwrap()
//!     .join(" "), "ac ae af bc be bf g");
//!
//! // Numeric expansion
//! assert_eq!(be.expand("thing{1..3}").unwrap().join(" "), "thing1 thing2 thing3");
//!
//! // Or backwards
//! assert_eq!(be.expand("thing{3..1}").unwrap().join(" "), "thing3 thing2 thing1");
//!
//! // char expansion
//! assert_eq!(be.expand("{A..C}").unwrap().join(" "), "A B C");
//!
//! // Step by
//! assert_eq!(be.expand("{A..E..2}").unwrap().join(" "), "A C E");
//!
//! // Leading zeroes
//! assert_eq!(be.expand("Agent{006..008}").unwrap().join(" "), "Agent006 Agent007 Agent008");
//!
//! ```
//!
//! ## License
//!
//! MIT or Apache-2.0
//!
//! Pull requests encouraged
#![warn(missing_docs)]
#![forbid(unsafe_code)]
use either::Either;
mod error;
use error::Error;
mod tokenizer;
use tokenizer::{Token, TokenKind};
mod parser;
use parser::AstToken;

#[derive(Clone, Debug)]
struct Options {
    ignore_parse_failures: bool,
    interpret_backslashes: bool,
}

impl Options {
    const fn new() -> Self {
        Self {
            ignore_parse_failures: false,
            interpret_backslashes: false,
        }
    }
}

/// Brace expander
#[derive(Clone, Debug)]
pub struct BraceExpander {
    options: Options,
}

impl Default for BraceExpander {
    fn default() -> Self {
        Self::new()
    }
}

// Perf: cartesian product is probably the biggest bottleneck
fn cartesian_product<B>(list_a: Vec<String>, list_b: &[B]) -> Vec<String>
where
    B: AsRef<str>,
{
    list_a
        .into_iter()
        .flat_map(|s| std::iter::repeat_n(s, list_b.len()).zip(list_b.iter()))
        .map(|(mut a, b)| {
            a += b.as_ref();
            a
        })
        .collect()
}

pub(crate) fn expand_ast(input: &[AstToken]) -> Vec<String> {
    let mut segments = vec![String::new()];
    for token in input.iter() {
        match token {
            AstToken::Text(text) => {
                segments = cartesian_product(segments, &[text]);
            }
            AstToken::NumericExpansion {
                start,
                end,
                step,
                leading_zeros,
            } => {
                let leading_zeroes = "0".repeat(*leading_zeros);
                let range = if start <= end {
                    Either::Left(*start..=*end)
                } else {
                    Either::Right((*end..=*start).rev())
                }
                .step_by(usize::from(*step))
                .map(|int| format!("{leading_zeroes}{int}"))
                .collect::<Vec<_>>();
                segments = cartesian_product(segments, &range);
            }
            AstToken::CharExpansion { start, end, step } => {
                let range = if start <= end {
                    Either::Left(*start..=*end)
                } else {
                    Either::Right((*end..=*start).rev())
                }
                .step_by(usize::from(*step))
                .map(char::from)
                // Bash replaces backslash with space in char expansions
                .map(|c| if c == '\\' { ' ' } else { c })
                .map(|c| c.to_string())
                .collect::<Vec<_>>();
                segments = cartesian_product(segments, &range);
            }
            AstToken::CommaExpansion(asts) => {
                segments = cartesian_product(
                    segments,
                    &asts.iter().flat_map(|v| expand_ast(v)).collect::<Vec<_>>(),
                );
            }
        }
    }

    segments
}

impl BraceExpander {
    /// Create a new default [BraceExpander] with sensible options
    ///
    /// This is the same as calling [Default::default()] but it is a const method
    #[must_use]
    pub const fn new() -> Self {
        Self {
            options: Options::new(),
        }
    }

    /// Ignore parse failures instead of erroring out, making the parsing stage infallible. This is
    /// how Bash behaves.
    ///
    /// # Example
    /// ```
    /// # use brace_expander::BraceExpander;
    /// let be = BraceExpander::new().ignore_parse_failures(false);
    /// assert!(be.expand("{1...10}").is_err());
    /// let be = be.ignore_parse_failures(true);
    /// assert_eq!(be.expand("{1...10}").unwrap().join(" "), "{1...10}");
    /// ```
    ///
    /// Default: `false`
    #[must_use]
    pub const fn ignore_parse_failures(mut self, value: bool) -> Self {
        self.options.ignore_parse_failures = value;
        self
    }

    /// Interpret backslashes as a method of escaping a character as text. Enabling this makes the
    /// tokenization process fallible (fails on backslash with nothing following it).
    ///
    /// # Example
    /// ```
    /// # use brace_expander::BraceExpander;
    /// let be = BraceExpander::new().interpret_backslashes(false);
    /// assert_eq!(be.expand("\\{a,b}").unwrap().join(" "), "\\a \\b");
    /// let be = be.interpret_backslashes(true);
    /// assert_eq!(be.expand("\\{a,b}").unwrap().join(" "), "{a,b}");
    /// ```
    ///
    /// Default: `false`
    #[must_use]
    pub const fn interpret_backslashes(mut self, value: bool) -> Self {
        self.options.interpret_backslashes = value;
        self
    }

    /// Expand a string
    pub fn expand(&self, input: &str) -> Result<Vec<String>, Error> {
        let mut expansions = Vec::new();
        let tokens_barrel = tokenizer::tokenize(input, &self.options)?;
        let tokens_barrel = tokens_barrel.split(|token| token.kind == TokenKind::Whitespace);

        for tokens in tokens_barrel {
            if !tokens.is_empty() {
                let ast = parser::parse_section(tokens, &self.options)?;
                // Perf note: expansion takes MUCH longer than tokenization or parsing
                // Start here for perf improvements
                expansions.extend(expand_ast(&ast));
            }
        }

        Ok(expansions)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn test_tv(be: &BraceExpander, input: &str, expected: &[&'static str]) {
        let actual = be.expand(input).unwrap();
        let expected = expected
            .iter()
            .map(|s| String::from(*s))
            .collect::<Vec<String>>();
        if actual != expected {
            panic!(
                "{options:?}\nInput   : '{input}'\nExpected: {expected:?}\nActual  : {actual:?}",
                options = be.options
            );
        }
    }

    #[test]
    fn expand() {
        let be = BraceExpander::default().ignore_parse_failures(false);
        test_tv(&be, "a", &["a"]);
        test_tv(&be, "a,4", &["a,4"]);
        test_tv(&be, "a..4", &["a..4"]);
        test_tv(&be, "{1..4}", &["1", "2", "3", "4"]);
        test_tv(&be, "a{1..4}", &["a1", "a2", "a3", "a4"]);
        test_tv(&be, "a{1..4}b", &["a1b", "a2b", "a3b", "a4b"]);
        test_tv(&be, "{1..2}{1..2}", &["11", "12", "21", "22"]);
        test_tv(&be, "a{1..2}{1..2}b", &["a11b", "a12b", "a21b", "a22b"]);
        test_tv(&be, "{a,b}{c,d}", &["ac", "ad", "bc", "bd"]);
        test_tv(&be, "{_{a,b}_,c}", &["_a_", "_b_", "c"]);
        test_tv(&be, "{_{1..3}_,c}", &["_1_", "_2_", "_3_", "c"]);
        test_tv(&be, "{1..1}", &["1"]);
        test_tv(&be, "{3..1}", &["3", "2", "1"]);
        test_tv(&be, "{3..1}", &["3", "2", "1"]);
        test_tv(&be, "a{,,}", &["a", "a", "a"]);
        test_tv(&be, "{1..2}{,}", &["1", "1", "2", "2"]);
        test_tv(&be, "{,}{1..2}", &["1", "2", "1", "2"]);
        test_tv(&be, "{a,}{1..2}", &["a1", "a2", "1", "2"]);
        test_tv(&be, "a b", &["a", "b"]);
        test_tv(&be, "{1..2} {a,b}c", &["1", "2", "ac", "bc"]);
        test_tv(&be, "", &[]);
        test_tv(&be, "  ", &[]);
        test_tv(&be, "a  ", &["a"]);
        test_tv(&be, "} {1..2}", &["}", "1", "2"]);
        test_tv(&be, "{1..2}}", &["1}", "2}"]);
        test_tv(&be, "{1..2}..", &["1..", "2.."]);
        test_tv(&be, "{1..3..1}", &["1", "2", "3"]);
        test_tv(&be, "{1..3..0}", &["1", "2", "3"]);
        test_tv(&be, "{1..3..-1}", &["1", "2", "3"]);
        test_tv(&be, "{1..3..2}", &["1", "3"]);
        test_tv(&be, "{1..3..3}", &["1"]);
        test_tv(&be, "{3..1..3}", &["3"]);
        test_tv(&be, "{1..03..2}", &["01", "03"]);
        test_tv(&be, "{01..03..2}", &["01", "03"]);
        test_tv(&be, "{01..00003..2}", &["00001", "00003"]);
        test_tv(&be, "{1..3..002}", &["1", "3"]);
        test_tv(&be, "{a..c}", &["a", "b", "c"]);
        test_tv(&be, "{a..c..2}", &["a", "c"]);
        test_tv(&be, "{c..a..2}", &["c", "a"]);
        test_tv(&be, "{a..Z..2}", &["a", "_", "]", "["]);
        test_tv(&be, "{Z..a..2}", &["Z", " ", "^", "`"]);
        test_tv(&be, "{Z..a..2}", &["Z", " ", "^", "`"]);
        test_tv(&be, "{0..-2}", &["0", "-1", "-2"]);
        test_tv(&be, "{-1..-2}", &["-1", "-2"]);
        test_tv(&be, "{-1..1}", &["-1", "0", "1"]);
        test_tv(&be, "{1..-1}", &["1", "0", "-1"]);
        test_tv(&be, "{-1..+1}", &["-1", "0", "1"]);
        test_tv(
            &be,
            "{a,b}{c,d}{e,f}",
            &["ace", "acf", "ade", "adf", "bce", "bcf", "bde", "bdf"],
        );
    }

    #[test]
    fn backslash_behavior() {
        let tvs: &[(&str, &[&str], &[&str])] = &[
            ("{a,\\ }", &["{a,\\", "}"], &["a", " "]),
            ("\\{a,b}", &["\\a", "\\b"], &["{a,b}"]),
            ("_{a,\\ b}", &["_{a,\\", "b}"], &["_a", "_ b"]),
            ("\\\\", &["\\\\"], &["\\"]),
        ];
        for tv in tvs {
            test_tv(
                &BraceExpander::new()
                    .ignore_parse_failures(true)
                    .interpret_backslashes(false),
                tv.0,
                tv.1,
            );
            test_tv(
                &BraceExpander::new()
                    .ignore_parse_failures(true)
                    .interpret_backslashes(true),
                tv.0,
                tv.2,
            );
        }
    }

    #[test]
    fn parse_failures() {
        let tvs: &[(&str, &[&str])] = &[
            ("{a..", &["{a.."]),
            ("{", &["{"]),
            ("{a}", &["{a}"]),
            ("{a}{b,c}", &["{a}b", "{a}c"]),
            ("{1..2..z}", &["{1..2..z}"]),
            ("{1..z}", &["{1..z}"]),
            ("{1..z}{,}", &["{1..z}", "{1..z}"]),
            ("{1..}", &["{1..}"]),
            ("{..}", &["{..}"]),
            ("{{{", &["{{{"]),
            ("{,{,{", &["{,{,{"]),
        ];

        // Ignore parse failures like bash
        let be = BraceExpander::default().ignore_parse_failures(true);
        for tv in tvs {
            test_tv(&be, tv.0, tv.1);
        }

        // Error out on parse failure
        let be = BraceExpander::default().ignore_parse_failures(false);
        for tv in tvs {
            assert!(be.expand(tv.0).is_err());
        }
    }

    #[test]
    fn fuzz() {
        use rand::prelude::*;
        let mut rng = rand::rng();
        fn build_fuzzy_string(rng: &mut rand::rngs::ThreadRng) -> String {
            let length = rng.random_range(0..20);
            let mut string = String::new();
            for _ in 0..length {
                let chars = [
                    '\\',
                    '{',
                    '}',
                    '.',
                    ',',
                    '\'',
                    '"',
                    ' ',
                    '\t',
                    '\r',
                    '\n',
                    rng.random(),
                ];
                string.push(*chars.choose(rng).unwrap());
            }
            string
        }

        let strict_be = BraceExpander::default().ignore_parse_failures(false);
        let loose_be = BraceExpander::default().ignore_parse_failures(true);
        for _ in 0..1000 {
            let string = build_fuzzy_string(&mut rng);

            // BraceExpander in loose mood shouldn't error at all, even on garbage
            if let Err(err @ Error::ParserError { .. }) = loose_be.expand(&string) {
                panic!("Unexpected error on `{string}`: {err}");
            }

            // Just make sure we don't panic on strict
            let _ = strict_be.expand(&string);
        }
    }
}