1use crate::error;
4
5pub enum ExtendedGlobKind {
7 Plus,
9 At,
11 Exclamation,
13 Question,
15 Star,
17}
18
19pub fn pattern_to_regex_str(
26 pattern: &str,
27 enable_extended_globbing: bool,
28) -> Result<String, error::WordParseError> {
29 let regex_str = pattern_to_regex_translator::pattern(pattern, enable_extended_globbing)
30 .map_err(|e| error::WordParseError::Pattern(e.into()))?;
31 Ok(regex_str)
32}
33
34peg::parser! {
35 grammar pattern_to_regex_translator(enable_extended_globbing: bool) for str {
36 pub(crate) rule pattern() -> String =
37 pieces:(pattern_piece()*) {
38 pieces.join("")
39 }
40
41 rule pattern_piece() -> String =
42 escape_sequence() /
43 bracket_expression() /
44 extglob_enabled() s:extended_glob_pattern() { s } /
45 wildcard() /
46 [c if regex_char_needs_escaping(c)] {
47 let mut s = '\\'.to_string();
48 s.push(c);
49 s
50 } /
51 [c] { c.to_string() }
52
53 rule escape_sequence() -> String =
54 sequence:$(['\\'] [c if regex_char_needs_escaping(c)]) { sequence.to_owned() } /
55 ['\\'] [c] { c.to_string() }
56
57 rule bracket_expression() -> String =
58 "[" invert:(invert_char()?) members:bracket_member()+ "]" {
59 let mut members = members.into_iter().flatten().collect::<Vec<_>>();
60
61 if members.is_empty() {
65 if invert.is_some() {
66 String::from(".")
67 } else {
68 String::from("(?!)")
69 }
70 } else {
71 if invert.is_some() {
72 members.insert(0, String::from("^"));
73 }
74
75 std::format!("[{}]", members.join(""))
76 }
77 }
78
79 rule invert_char() -> bool =
80 ['!' | '^'] { true }
81
82 rule bracket_member() -> Option<String> =
83 e:char_class_expression() { Some(e) } /
84 r:char_range() { r } /
85 m:single_char_bracket_member() {
86 let (char_str, _) = m;
87 Some(char_str)
88 }
89
90 rule char_class_expression() -> String =
91 e:$("[:" char_class() ":]") { e.to_owned() }
92
93 rule char_class() =
94 "alnum" / "alpha" / "blank" / "cntrl" / "digit" / "graph" / "lower" / "print" / "punct" / "space" / "upper"/ "xdigit"
95
96 rule char_range() -> Option<String> =
97 from:single_char_bracket_member() "-" to:single_char_bracket_member() {
98 let (from_str, from_c) = from;
99 let (to_str, to_c) = to;
100
101 if from_c <= to_c {
103 Some(std::format!("{from_str}-{to_str}"))
104 } else {
105 None
106 }
107 }
108
109 rule single_char_bracket_member() -> (String, char) =
110 ['\\'] [c] { (std::format!("\\{c}"), c) } /
112 ['['] { (String::from(r"\["), '[') } /
114 [c if c != ']'] { (c.to_string(), c) }
116
117 rule wildcard() -> String =
118 "?" { String::from(".") } /
119 "*" { String::from(".*") }
120
121 rule extglob_enabled() -> () =
122 &[_] {? if enable_extended_globbing { Ok(()) } else { Err("extglob disabled") } }
123
124 pub(crate) rule extended_glob_pattern() -> String =
125 kind:extended_glob_prefix() "(" branches:extended_glob_body() ")" {
126 let mut s = String::new();
127
128 if matches!(kind, ExtendedGlobKind::Exclamation) {
130 if !branches.is_empty() {
131 s.push_str("(?:(?!");
132 s.push_str(&branches.join("|"));
133 s.push_str(").*|(?>");
134 s.push_str(&branches.join("|"));
135 s.push_str(").+?|)");
136 } else {
137 s.push_str("(?:.+)");
138 }
139 } else {
140 s.push('(');
141 s.push_str(&branches.join("|"));
142 s.push(')');
143
144 match kind {
145 ExtendedGlobKind::Plus => s.push('+'),
146 ExtendedGlobKind::Question => s.push('?'),
147 ExtendedGlobKind::Star => s.push('*'),
148 ExtendedGlobKind::At | ExtendedGlobKind::Exclamation => (),
149 }
150 }
151
152 s
153 }
154
155 rule extended_glob_prefix() -> ExtendedGlobKind =
156 "+" { ExtendedGlobKind::Plus } /
157 "@" { ExtendedGlobKind::At } /
158 "!" { ExtendedGlobKind::Exclamation } /
159 "?" { ExtendedGlobKind::Question } /
160 "*" { ExtendedGlobKind::Star }
161
162 pub(crate) rule extended_glob_body() -> Vec<String> =
163 &[')'] { vec![] } /
165 extended_glob_branch() ** "|"
167
168 rule extended_glob_branch() -> String =
169 &['|' | ')'] { String::new() } /
171 pieces:(!['|' | ')'] piece:pattern_piece() { piece })+ {
172 pieces.join("")
173 }
174
175 rule glob_piece() =
177 bracket_expression() /
178 extglob_enabled() extended_glob_pattern() /
179 wildcard()
180
181 rule non_glob_piece() =
183 escape_sequence() /
184 !glob_piece() [_]
185
186 pub(crate) rule has_glob_metacharacters() -> bool =
191 non_glob_piece()* glob_piece() [_]* { true }
192 }
193}
194
195pub fn pattern_has_glob_metacharacters(pattern: &str, enable_extended_globbing: bool) -> bool {
205 pattern_to_regex_translator::has_glob_metacharacters(pattern, enable_extended_globbing)
206 .unwrap_or(false)
207}
208
209pub const fn regex_char_needs_escaping(c: char) -> bool {
215 matches!(
216 c,
217 '[' | ']' | '(' | ')' | '{' | '}' | '*' | '?' | '.' | '+' | '^' | '$' | '|' | '\\' | '-'
218 )
219}
220
221#[cfg(test)]
222#[expect(clippy::panic_in_result_fn)]
223mod tests {
224 use super::*;
225 use anyhow::Result;
226
227 #[test]
228 fn test_bracket_exprs() -> Result<()> {
229 assert_eq!(pattern_to_regex_str("[a-z]", true)?, "[a-z]");
230 assert_eq!(pattern_to_regex_str("[z-a]", true)?, "(?!)");
231 assert_eq!(pattern_to_regex_str("[+-/]", true)?, "[+-/]");
232 assert_eq!(pattern_to_regex_str(r"[\*-/]", true)?, r"[\*-/]");
233 assert_eq!(pattern_to_regex_str("[abc]", true)?, "[abc]");
234 assert_eq!(pattern_to_regex_str(r"[\(]", true)?, r"[\(]");
235 assert_eq!(pattern_to_regex_str(r"[(]", true)?, "[(]");
236 assert_eq!(pattern_to_regex_str("[[:digit:]]", true)?, "[[:digit:]]");
237 assert_eq!(pattern_to_regex_str(r"[-(),!]*", true)?, r"[-(),!].*");
238 assert_eq!(pattern_to_regex_str(r"[-\(\),\!]*", true)?, r"[-\(\),\!].*");
239 assert_eq!(pattern_to_regex_str(r"[a\-b]", true)?, r"[a\-b]");
240 assert_eq!(pattern_to_regex_str(r"[a\-\*]", true)?, r"[a\-\*]");
241 Ok(())
242 }
243
244 #[test]
245 fn test_extended_glob() -> Result<()> {
246 assert_eq!(
247 pattern_to_regex_translator::extended_glob_pattern("@(a|b)", true)?,
248 "(a|b)"
249 );
250
251 assert_eq!(
252 pattern_to_regex_translator::extended_glob_pattern("@(|a)", true)?,
253 "(|a)"
254 );
255
256 assert_eq!(
257 pattern_to_regex_translator::extended_glob_pattern("@(|)", true)?,
258 "(|)"
259 );
260
261 assert_eq!(
262 pattern_to_regex_translator::extended_glob_body("ab|ac", true)?,
263 vec!["ab", "ac"],
264 );
265
266 assert_eq!(
267 pattern_to_regex_translator::extended_glob_pattern("*(ab|ac)", true)?,
268 "(ab|ac)*"
269 );
270
271 assert_eq!(
272 pattern_to_regex_translator::extended_glob_body("", true)?,
273 Vec::<String>::new(),
274 );
275
276 Ok(())
277 }
278
279 #[test]
280 fn test_has_glob_metacharacters() {
281 assert!(pattern_has_glob_metacharacters("*", false));
283 assert!(pattern_has_glob_metacharacters("?", false));
284 assert!(pattern_has_glob_metacharacters("a*b", false));
285 assert!(pattern_has_glob_metacharacters("a?b", false));
286
287 assert!(pattern_has_glob_metacharacters("[abc]", false));
289 assert!(pattern_has_glob_metacharacters("[a-z]", false));
290 assert!(pattern_has_glob_metacharacters("[!a]", false));
291
292 assert!(!pattern_has_glob_metacharacters("]", false));
294 assert!(!pattern_has_glob_metacharacters("foo]", false));
295 assert!(!pattern_has_glob_metacharacters("a]b", false));
296
297 assert!(!pattern_has_glob_metacharacters("[", false));
299 assert!(!pattern_has_glob_metacharacters("[abc", false));
300 assert!(!pattern_has_glob_metacharacters("a[b", false));
301
302 assert!(!pattern_has_glob_metacharacters("hello", false));
304 assert!(!pattern_has_glob_metacharacters("", false));
305
306 assert!(!pattern_has_glob_metacharacters(r"\*", false));
308 assert!(!pattern_has_glob_metacharacters(r"\?", false));
309 assert!(!pattern_has_glob_metacharacters(r"\[abc]", false));
310
311 assert!(!pattern_has_glob_metacharacters("@(a)", false));
313 assert!(!pattern_has_glob_metacharacters("!(a)", false));
314 assert!(!pattern_has_glob_metacharacters("+(a)", false));
315
316 assert!(pattern_has_glob_metacharacters("@(a)", true));
318 assert!(pattern_has_glob_metacharacters("!(a)", true));
319 assert!(pattern_has_glob_metacharacters("+(a)", true));
320
321 assert!(pattern_has_glob_metacharacters("*(a)", false));
323 assert!(pattern_has_glob_metacharacters("?(a)", false));
324 }
325}