Skip to main content

brace_expander/
lib.rs

1//! [![Repository](https://img.shields.io/badge/github-brace--expander-/)](https://github.com/Property404/brace-expander)
2//! [![crates.io](https://img.shields.io/crates/v/brace-expander.svg)](https://crates.io/crates/brace-expander)
3//! [![Documentation](https://docs.rs/brace-expander/badge.svg)](https://docs.rs/brace-expander)
4//!
5//! Library to support bash-like brace expansions
6//!
7//! ## Examples
8//!
9//! ```rust
10//! use brace_expander::BraceExpander;
11//! let be = BraceExpander::default();
12//!
13//! // Basic cartesian product
14//! assert_eq!(be.expand("{a,b}").unwrap().join(" "), "a b");
15//! assert_eq!(be.expand("hello_{a,b}").unwrap().join(" "), "hello_a hello_b");
16//! assert_eq!(be.expand("G{o,u}{b,g}").unwrap().join(" "), "Gob Gog Gub Gug");
17//!
18//! // Nested product
19//! assert_eq!(be
20//!     .expand("{{a,b}{c,{e,f}},g}")
21//!     .unwrap()
22//!     .join(" "), "ac ae af bc be bf g");
23//!
24//! // Numeric expansion
25//! assert_eq!(be.expand("thing{1..3}").unwrap().join(" "), "thing1 thing2 thing3");
26//!
27//! // Or backwards
28//! assert_eq!(be.expand("thing{3..1}").unwrap().join(" "), "thing3 thing2 thing1");
29//!
30//! // char expansion
31//! assert_eq!(be.expand("{A..C}").unwrap().join(" "), "A B C");
32//!
33//! // Step by
34//! assert_eq!(be.expand("{A..E..2}").unwrap().join(" "), "A C E");
35//!
36//! // Leading zeroes
37//! assert_eq!(be.expand("Agent{006..008}").unwrap().join(" "), "Agent006 Agent007 Agent008");
38//!
39//! ```
40//!
41//! ## License
42//!
43//! MIT or Apache-2.0
44//!
45//! Pull requests encouraged
46#![warn(missing_docs)]
47#![forbid(unsafe_code)]
48use either::Either;
49mod error;
50use error::Error;
51mod tokenizer;
52use tokenizer::{Token, TokenKind};
53mod parser;
54use parser::AstToken;
55
56#[derive(Clone, Debug)]
57struct Options {
58    ignore_parse_failures: bool,
59    interpret_backslashes: bool,
60}
61
62impl Options {
63    const fn new() -> Self {
64        Self {
65            ignore_parse_failures: false,
66            interpret_backslashes: false,
67        }
68    }
69}
70
71/// Brace expander
72#[derive(Clone, Debug)]
73pub struct BraceExpander {
74    options: Options,
75}
76
77impl Default for BraceExpander {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83// Perf: cartesian product is probably the biggest bottleneck
84fn cartesian_product<B>(list_a: Vec<String>, list_b: &[B]) -> Vec<String>
85where
86    B: AsRef<str>,
87{
88    list_a
89        .into_iter()
90        .flat_map(|s| std::iter::repeat_n(s, list_b.len()).zip(list_b.iter()))
91        .map(|(mut a, b)| {
92            a += b.as_ref();
93            a
94        })
95        .collect()
96}
97
98pub(crate) fn expand_ast(input: &[AstToken]) -> Vec<String> {
99    let mut segments = vec![String::new()];
100    for token in input.iter() {
101        match token {
102            AstToken::Text(text) => {
103                segments = cartesian_product(segments, &[text]);
104            }
105            AstToken::NumericExpansion {
106                start,
107                end,
108                step,
109                leading_zeros,
110            } => {
111                let leading_zeroes = "0".repeat(*leading_zeros);
112                let range = if start <= end {
113                    Either::Left(*start..=*end)
114                } else {
115                    Either::Right((*end..=*start).rev())
116                }
117                .step_by(usize::from(*step))
118                .map(|int| format!("{leading_zeroes}{int}"))
119                .collect::<Vec<_>>();
120                segments = cartesian_product(segments, &range);
121            }
122            AstToken::CharExpansion { start, end, step } => {
123                let range = if start <= end {
124                    Either::Left(*start..=*end)
125                } else {
126                    Either::Right((*end..=*start).rev())
127                }
128                .step_by(usize::from(*step))
129                .map(char::from)
130                // Bash replaces backslash with space in char expansions
131                .map(|c| if c == '\\' { ' ' } else { c })
132                .map(|c| c.to_string())
133                .collect::<Vec<_>>();
134                segments = cartesian_product(segments, &range);
135            }
136            AstToken::CommaExpansion(asts) => {
137                segments = cartesian_product(
138                    segments,
139                    &asts.iter().flat_map(|v| expand_ast(v)).collect::<Vec<_>>(),
140                );
141            }
142        }
143    }
144
145    segments
146}
147
148impl BraceExpander {
149    /// Create a new default [BraceExpander] with sensible options
150    ///
151    /// This is the same as calling [Default::default()] but it is a const method
152    #[must_use]
153    pub const fn new() -> Self {
154        Self {
155            options: Options::new(),
156        }
157    }
158
159    /// Ignore parse failures instead of erroring out, making the parsing stage infallible. This is
160    /// how Bash behaves.
161    ///
162    /// # Example
163    /// ```
164    /// # use brace_expander::BraceExpander;
165    /// let be = BraceExpander::new().ignore_parse_failures(false);
166    /// assert!(be.expand("{1...10}").is_err());
167    /// let be = be.ignore_parse_failures(true);
168    /// assert_eq!(be.expand("{1...10}").unwrap().join(" "), "{1...10}");
169    /// ```
170    ///
171    /// Default: `false`
172    #[must_use]
173    pub const fn ignore_parse_failures(mut self, value: bool) -> Self {
174        self.options.ignore_parse_failures = value;
175        self
176    }
177
178    /// Interpret backslashes as a method of escaping a character as text. Enabling this makes the
179    /// tokenization process fallible (fails on backslash with nothing following it).
180    ///
181    /// # Example
182    /// ```
183    /// # use brace_expander::BraceExpander;
184    /// let be = BraceExpander::new().interpret_backslashes(false);
185    /// assert_eq!(be.expand("\\{a,b}").unwrap().join(" "), "\\a \\b");
186    /// let be = be.interpret_backslashes(true);
187    /// assert_eq!(be.expand("\\{a,b}").unwrap().join(" "), "{a,b}");
188    /// ```
189    ///
190    /// Default: `false`
191    #[must_use]
192    pub const fn interpret_backslashes(mut self, value: bool) -> Self {
193        self.options.interpret_backslashes = value;
194        self
195    }
196
197    /// Expand a string
198    pub fn expand(&self, input: &str) -> Result<Vec<String>, Error> {
199        let mut expansions = Vec::new();
200        let tokens_barrel = tokenizer::tokenize(input, &self.options)?;
201        let tokens_barrel = tokens_barrel.split(|token| token.kind == TokenKind::Whitespace);
202
203        for tokens in tokens_barrel {
204            if !tokens.is_empty() {
205                let ast = parser::parse_section(tokens, &self.options)?;
206                // Perf note: expansion takes MUCH longer than tokenization or parsing
207                // Start here for perf improvements
208                expansions.extend(expand_ast(&ast));
209            }
210        }
211
212        Ok(expansions)
213    }
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    fn test_tv(be: &BraceExpander, input: &str, expected: &[&'static str]) {
221        let actual = be.expand(input).unwrap();
222        let expected = expected
223            .iter()
224            .map(|s| String::from(*s))
225            .collect::<Vec<String>>();
226        if actual != expected {
227            panic!(
228                "{options:?}\nInput   : '{input}'\nExpected: {expected:?}\nActual  : {actual:?}",
229                options = be.options
230            );
231        }
232    }
233
234    #[test]
235    fn expand() {
236        let be = BraceExpander::default().ignore_parse_failures(false);
237        test_tv(&be, "a", &["a"]);
238        test_tv(&be, "a,4", &["a,4"]);
239        test_tv(&be, "a..4", &["a..4"]);
240        test_tv(&be, "{1..4}", &["1", "2", "3", "4"]);
241        test_tv(&be, "a{1..4}", &["a1", "a2", "a3", "a4"]);
242        test_tv(&be, "a{1..4}b", &["a1b", "a2b", "a3b", "a4b"]);
243        test_tv(&be, "{1..2}{1..2}", &["11", "12", "21", "22"]);
244        test_tv(&be, "a{1..2}{1..2}b", &["a11b", "a12b", "a21b", "a22b"]);
245        test_tv(&be, "{a,b}{c,d}", &["ac", "ad", "bc", "bd"]);
246        test_tv(&be, "{_{a,b}_,c}", &["_a_", "_b_", "c"]);
247        test_tv(&be, "{_{1..3}_,c}", &["_1_", "_2_", "_3_", "c"]);
248        test_tv(&be, "{1..1}", &["1"]);
249        test_tv(&be, "{3..1}", &["3", "2", "1"]);
250        test_tv(&be, "{3..1}", &["3", "2", "1"]);
251        test_tv(&be, "a{,,}", &["a", "a", "a"]);
252        test_tv(&be, "{1..2}{,}", &["1", "1", "2", "2"]);
253        test_tv(&be, "{,}{1..2}", &["1", "2", "1", "2"]);
254        test_tv(&be, "{a,}{1..2}", &["a1", "a2", "1", "2"]);
255        test_tv(&be, "a b", &["a", "b"]);
256        test_tv(&be, "{1..2} {a,b}c", &["1", "2", "ac", "bc"]);
257        test_tv(&be, "", &[]);
258        test_tv(&be, "  ", &[]);
259        test_tv(&be, "a  ", &["a"]);
260        test_tv(&be, "} {1..2}", &["}", "1", "2"]);
261        test_tv(&be, "{1..2}}", &["1}", "2}"]);
262        test_tv(&be, "{1..2}..", &["1..", "2.."]);
263        test_tv(&be, "{1..3..1}", &["1", "2", "3"]);
264        test_tv(&be, "{1..3..0}", &["1", "2", "3"]);
265        test_tv(&be, "{1..3..-1}", &["1", "2", "3"]);
266        test_tv(&be, "{1..3..2}", &["1", "3"]);
267        test_tv(&be, "{1..3..3}", &["1"]);
268        test_tv(&be, "{3..1..3}", &["3"]);
269        test_tv(&be, "{1..03..2}", &["01", "03"]);
270        test_tv(&be, "{01..03..2}", &["01", "03"]);
271        test_tv(&be, "{01..00003..2}", &["00001", "00003"]);
272        test_tv(&be, "{1..3..002}", &["1", "3"]);
273        test_tv(&be, "{a..c}", &["a", "b", "c"]);
274        test_tv(&be, "{a..c..2}", &["a", "c"]);
275        test_tv(&be, "{c..a..2}", &["c", "a"]);
276        test_tv(&be, "{a..Z..2}", &["a", "_", "]", "["]);
277        test_tv(&be, "{Z..a..2}", &["Z", " ", "^", "`"]);
278        test_tv(&be, "{Z..a..2}", &["Z", " ", "^", "`"]);
279        test_tv(&be, "{0..-2}", &["0", "-1", "-2"]);
280        test_tv(&be, "{-1..-2}", &["-1", "-2"]);
281        test_tv(&be, "{-1..1}", &["-1", "0", "1"]);
282        test_tv(&be, "{1..-1}", &["1", "0", "-1"]);
283        test_tv(&be, "{-1..+1}", &["-1", "0", "1"]);
284        test_tv(
285            &be,
286            "{a,b}{c,d}{e,f}",
287            &["ace", "acf", "ade", "adf", "bce", "bcf", "bde", "bdf"],
288        );
289    }
290
291    #[test]
292    fn backslash_behavior() {
293        let tvs: &[(&str, &[&str], &[&str])] = &[
294            ("{a,\\ }", &["{a,\\", "}"], &["a", " "]),
295            ("\\{a,b}", &["\\a", "\\b"], &["{a,b}"]),
296            ("_{a,\\ b}", &["_{a,\\", "b}"], &["_a", "_ b"]),
297            ("\\\\", &["\\\\"], &["\\"]),
298        ];
299        for tv in tvs {
300            test_tv(
301                &BraceExpander::new()
302                    .ignore_parse_failures(true)
303                    .interpret_backslashes(false),
304                tv.0,
305                tv.1,
306            );
307            test_tv(
308                &BraceExpander::new()
309                    .ignore_parse_failures(true)
310                    .interpret_backslashes(true),
311                tv.0,
312                tv.2,
313            );
314        }
315    }
316
317    #[test]
318    fn parse_failures() {
319        let tvs: &[(&str, &[&str])] = &[
320            ("{a..", &["{a.."]),
321            ("{", &["{"]),
322            ("{a}", &["{a}"]),
323            ("{a}{b,c}", &["{a}b", "{a}c"]),
324            ("{1..2..z}", &["{1..2..z}"]),
325            ("{1..z}", &["{1..z}"]),
326            ("{1..z}{,}", &["{1..z}", "{1..z}"]),
327            ("{1..}", &["{1..}"]),
328            ("{..}", &["{..}"]),
329            ("{{{", &["{{{"]),
330            ("{,{,{", &["{,{,{"]),
331        ];
332
333        // Ignore parse failures like bash
334        let be = BraceExpander::default().ignore_parse_failures(true);
335        for tv in tvs {
336            test_tv(&be, tv.0, tv.1);
337        }
338
339        // Error out on parse failure
340        let be = BraceExpander::default().ignore_parse_failures(false);
341        for tv in tvs {
342            assert!(be.expand(tv.0).is_err());
343        }
344    }
345
346    #[test]
347    fn fuzz() {
348        use rand::prelude::*;
349        let mut rng = rand::rng();
350        fn build_fuzzy_string(rng: &mut rand::rngs::ThreadRng) -> String {
351            let length = rng.random_range(0..20);
352            let mut string = String::new();
353            for _ in 0..length {
354                let chars = [
355                    '\\',
356                    '{',
357                    '}',
358                    '.',
359                    ',',
360                    '\'',
361                    '"',
362                    ' ',
363                    '\t',
364                    '\r',
365                    '\n',
366                    rng.random(),
367                ];
368                string.push(*chars.choose(rng).unwrap());
369            }
370            string
371        }
372
373        let strict_be = BraceExpander::default().ignore_parse_failures(false);
374        let loose_be = BraceExpander::default().ignore_parse_failures(true);
375        for _ in 0..1000 {
376            let string = build_fuzzy_string(&mut rng);
377
378            // BraceExpander in loose mood shouldn't error at all, even on garbage
379            if let Err(err @ Error::ParserError { .. }) = loose_be.expand(&string) {
380                panic!("Unexpected error on `{string}`: {err}");
381            }
382
383            // Just make sure we don't panic on strict
384            let _ = strict_be.expand(&string);
385        }
386    }
387}