brush_parser/
pattern.rs

1//! Implements parsing for shell glob and extglob patterns.
2
3use crate::error;
4
5/// Represents the kind of an extended glob.
6pub enum ExtendedGlobKind {
7    /// The `+` extended glob; matches one or more occurrences of the inner pattern.
8    Plus,
9    /// The `@` extended glob; allows matching an alternation of inner patterns.
10    At,
11    /// The `!` extended glob; matches the negation of the inner pattern.
12    Exclamation,
13    /// The `?` extended glob; matches zero or one occurrence of the inner pattern.
14    Question,
15    /// The `*` extended glob; matches zero or more occurrences of the inner pattern.
16    Star,
17}
18
19/// Converts a shell pattern to a regular expression string.
20///
21/// # Arguments
22///
23/// * `pattern` - The shell pattern to convert.
24/// * `enable_extended_globbing` - Whether to enable extended globbing (extglob).
25pub fn pattern_to_regex_str(
26    pattern: &str,
27    enable_extended_globbing: bool,
28) -> Result<String, error::WordParseError> {
29    let regex_str = pattern_to_regex_translator::pattern(pattern, enable_extended_globbing)
30        .map_err(|e| error::WordParseError::Pattern(e.into()))?;
31    Ok(regex_str)
32}
33
34peg::parser! {
35    grammar pattern_to_regex_translator(enable_extended_globbing: bool) for str {
36        pub(crate) rule pattern() -> String =
37            pieces:(pattern_piece()*) {
38                pieces.join("")
39            }
40
41        rule pattern_piece() -> String =
42            escape_sequence() /
43            bracket_expression() /
44            extglob_enabled() s:extended_glob_pattern() { s } /
45            wildcard() /
46            [c if regex_char_needs_escaping(c)] {
47                let mut s = '\\'.to_string();
48                s.push(c);
49                s
50            } /
51            [c] { c.to_string() }
52
53        rule escape_sequence() -> String =
54            sequence:$(['\\'] [c if regex_char_needs_escaping(c)]) { sequence.to_owned() } /
55            ['\\'] [c] { c.to_string() }
56
57        rule bracket_expression() -> String =
58            "[" invert:(invert_char()?) members:bracket_member()+ "]" {
59                let mut members = members;
60                if invert.is_some() {
61                    members.insert(0, String::from("^"));
62                }
63
64                std::format!("[{}]", members.join(""))
65            }
66
67        rule invert_char() -> bool =
68            ['!' | '^'] { true }
69
70        rule bracket_member() -> String =
71            char_class_expression() /
72            char_range() /
73            char_list()
74
75        rule char_class_expression() -> String =
76            e:$("[:" char_class() ":]") { e.to_owned() }
77
78        rule char_class() =
79            "alnum" / "alpha" / "blank" / "cntrl" / "digit" / "graph" / "lower" / "print" / "punct" / "space" / "upper"/ "xdigit"
80
81        rule char_range() -> String =
82            range:$([_] "-" [c if c != ']']) { range.to_owned() }
83
84        rule char_list() -> String =
85            chars:$([c if c != ']']+) { escape_char_class_char_list(chars) }
86
87        rule wildcard() -> String =
88            "?" { String::from(".") } /
89            "*" { String::from(".*") }
90
91        rule extglob_enabled() -> () =
92            &[_] {? if enable_extended_globbing { Ok(()) } else { Err("extglob disabled") } }
93
94        pub(crate) rule extended_glob_pattern() -> String =
95            kind:extended_glob_prefix() "(" branches:extended_glob_body() ")" {
96                let mut s = String::new();
97
98                // fancy_regex uses ?! to indicate a negative lookahead.
99                if matches!(kind, ExtendedGlobKind::Exclamation) {
100                    if !branches.is_empty() {
101                        s.push_str("(?:(?!");
102                        s.push_str(&branches.join("|"));
103                        s.push_str(").*|(?>");
104                        s.push_str(&branches.join("|"));
105                        s.push_str(").+?|)");
106                    } else {
107                        s.push_str("(?:.+)");
108                    }
109                } else {
110                    s.push('(');
111                    s.push_str(&branches.join("|"));
112                    s.push(')');
113
114                    match kind {
115                        ExtendedGlobKind::Plus => s.push('+'),
116                        ExtendedGlobKind::Question => s.push('?'),
117                        ExtendedGlobKind::Star => s.push('*'),
118                        ExtendedGlobKind::At | ExtendedGlobKind::Exclamation => (),
119                    }
120                }
121
122                s
123            }
124
125        rule extended_glob_prefix() -> ExtendedGlobKind =
126            "+" { ExtendedGlobKind::Plus } /
127            "@" { ExtendedGlobKind::At } /
128            "!" { ExtendedGlobKind::Exclamation } /
129            "?" { ExtendedGlobKind::Question } /
130            "*" { ExtendedGlobKind::Star }
131
132        pub(crate) rule extended_glob_body() -> Vec<String> =
133            // Cover case with *no* branches.
134            &[')'] { vec![] } /
135            // Otherwise, look for branches separated by '|'.
136            extended_glob_branch() ** "|"
137
138        rule extended_glob_branch() -> String =
139            // Cover case of empty branch.
140            &['|' | ')'] { String::new() } /
141            pieces:(!['|' | ')'] piece:pattern_piece() { piece })+ {
142                pieces.join("")
143            }
144    }
145}
146
147/// Returns whether or not a given character needs to be escaped in a regular expression.
148///
149/// # Arguments
150///
151/// * `c` - The character to check.
152pub const fn regex_char_needs_escaping(c: char) -> bool {
153    matches!(
154        c,
155        '[' | ']' | '(' | ')' | '{' | '}' | '*' | '?' | '.' | '+' | '^' | '$' | '|' | '\\'
156    )
157}
158
159fn escape_char_class_char_list(s: &str) -> String {
160    s.replace('[', r"\[")
161}
162
163#[cfg(test)]
164#[expect(clippy::panic_in_result_fn)]
165mod tests {
166    use super::*;
167    use anyhow::Result;
168
169    #[test]
170    fn test_bracket_exprs() -> Result<()> {
171        assert_eq!(pattern_to_regex_str("[a-z]", true)?, "[a-z]");
172        assert_eq!(pattern_to_regex_str("[abc]", true)?, "[abc]");
173        assert_eq!(pattern_to_regex_str(r"[\(]", true)?, r"[\(]");
174        assert_eq!(pattern_to_regex_str(r"[(]", true)?, "[(]");
175        assert_eq!(pattern_to_regex_str("[[:digit:]]", true)?, "[[:digit:]]");
176        assert_eq!(pattern_to_regex_str(r"[-(),!]*", true)?, r"[-(),!].*");
177        assert_eq!(pattern_to_regex_str(r"[-\(\),\!]*", true)?, r"[-\(\),\!].*");
178        Ok(())
179    }
180
181    #[test]
182    fn test_extended_glob() -> Result<()> {
183        assert_eq!(
184            pattern_to_regex_translator::extended_glob_pattern("@(a|b)", true)?,
185            "(a|b)"
186        );
187
188        assert_eq!(
189            pattern_to_regex_translator::extended_glob_pattern("@(|a)", true)?,
190            "(|a)"
191        );
192
193        assert_eq!(
194            pattern_to_regex_translator::extended_glob_pattern("@(|)", true)?,
195            "(|)"
196        );
197
198        assert_eq!(
199            pattern_to_regex_translator::extended_glob_body("ab|ac", true)?,
200            vec!["ab", "ac"],
201        );
202
203        assert_eq!(
204            pattern_to_regex_translator::extended_glob_pattern("*(ab|ac)", true)?,
205            "(ab|ac)*"
206        );
207
208        assert_eq!(
209            pattern_to_regex_translator::extended_glob_body("", true)?,
210            Vec::<String>::new(),
211        );
212
213        Ok(())
214    }
215}