1use std::mem;
59use std::vec::IntoIter as VecIntoIter;
60
61use anyhow::anyhow;
62use itertools::{Either, Itertools as _};
63use regex::{Regex, RegexBuilder};
64
65use crate::error::Error as FError;
66
67#[derive(Debug)]
69enum ClassItem {
70 Char(char),
72 Range(char, char),
74}
75
76#[derive(Debug)]
78struct ClassAccumulator {
79 negated: bool,
81 items: Vec<ClassItem>,
83}
84
85#[derive(Debug)]
87enum State {
88 Start,
90 End,
92 Literal,
94 Escape,
96 ClassStart,
98 Class(ClassAccumulator),
100 ClassRange(ClassAccumulator, char),
103 ClassRangeDash(ClassAccumulator),
106 ClassEscape(ClassAccumulator),
108 Alternate(String, Vec<String>),
110 AlternateEscape(String, Vec<String>),
112}
113
114impl Default for State {
116 fn default() -> Self {
117 Self::Start
118 }
119}
120
121fn escape_in_class(chr: char) -> String {
124 if chr == ']' || chr == '\\' {
125 format!("\\{chr}")
126 } else {
127 chr.to_string()
128 }
129}
130
131fn escape(chr: char) -> String {
133 if "[{(|^$.*?+\\".contains(chr) {
134 format!("\\{chr}")
135 } else {
136 chr.to_string()
137 }
138}
139
140const fn map_letter_escape(chr: char) -> char {
142 match chr {
143 'a' => '\x07',
144 'b' => '\x08',
145 'e' => '\x1b',
146 'f' => '\x0c',
147 'n' => '\x0a',
148 'r' => '\x0d',
149 't' => '\x09',
150 'v' => '\x0b',
151 other => other,
152 }
153}
154
155fn escape_special(chr: char) -> String {
157 escape(map_letter_escape(chr))
158}
159
160struct ExcIter<I>
162where
163 I: Iterator<Item = ClassItem>,
164{
165 it: I,
167}
168
169impl<I> Iterator for ExcIter<I>
170where
171 I: Iterator<Item = ClassItem>,
172{
173 type Item = VecIntoIter<ClassItem>;
174
175 fn next(&mut self) -> Option<Self::Item> {
176 self.it.next().map(|cls| {
177 match cls {
178 ClassItem::Char('/') => vec![],
179 ClassItem::Char(_) => vec![cls],
180 ClassItem::Range('.', '/') => vec![ClassItem::Char('.')],
181 ClassItem::Range(start, '/') => vec![ClassItem::Range(start, '.')],
182 ClassItem::Range('/', '0') => vec![ClassItem::Char('0')],
183 ClassItem::Range('/', end) => vec![ClassItem::Range('0', end)],
184 ClassItem::Range(start, end) if start > '/' || end < '/' => vec![cls],
185 ClassItem::Range(start, end) => vec![
186 if start == '.' {
187 ClassItem::Char('.')
188 } else {
189 ClassItem::Range(start, '.')
190 },
191 if end == '0' {
192 ClassItem::Char('0')
193 } else {
194 ClassItem::Range('0', end)
195 },
196 ],
197 }
198 .into_iter()
199 })
200 }
201}
202
203fn handle_slash(mut acc: ClassAccumulator) -> ClassAccumulator {
207 if acc.negated {
208 let slash_found = acc.items.iter().any(|item| match *item {
209 ClassItem::Char('/') => true,
210 ClassItem::Char(_) => false,
211 ClassItem::Range(start, end) => start <= '/' && end >= '/',
212 });
213 if !slash_found {
214 acc.items.push(ClassItem::Char('/'));
215 }
216 acc
217 } else {
218 ClassAccumulator {
219 items: ExcIter {
220 it: acc.items.into_iter(),
221 }
222 .flatten()
223 .collect(),
224 ..acc
225 }
226 }
227}
228
229fn close_class(glob_acc: ClassAccumulator) -> String {
234 let acc = handle_slash(glob_acc);
235 let (chars_vec, classes_vec): (Vec<_>, Vec<_>) =
236 acc.items.into_iter().partition_map(|item| match item {
237 ClassItem::Char(chr) => Either::Left(chr),
238 ClassItem::Range(start, end) => Either::Right((start, end)),
239 });
240
241 let (chars, final_dash) = {
242 let mut has_dash = false;
243 let res = chars_vec
244 .into_iter()
245 .filter(|chr| {
246 if *chr == '-' {
247 has_dash = true;
248 false
249 } else {
250 true
251 }
252 })
253 .sorted_unstable()
254 .dedup()
255 .map(escape_in_class);
256 (res, if has_dash { "-" } else { "" })
257 };
258
259 let classes = classes_vec
260 .into_iter()
261 .sorted_unstable()
262 .dedup()
263 .map(|cls| {
264 format!(
265 "{first}-{last}",
266 first = escape_in_class(cls.0),
267 last = escape_in_class(cls.1)
268 )
269 });
270
271 format!(
272 "[{neg}{ranges}{final_dash}]",
273 neg = if acc.negated { "^" } else { "" },
274 ranges = chars.chain(classes).collect::<String>(),
275 )
276}
277
278fn close_alternate(gathered: Vec<String>) -> String {
280 let items = gathered
281 .into_iter()
282 .map(|item| item.chars().map(escape).collect::<String>())
283 .sorted_unstable()
284 .dedup()
285 .join("|");
286
287 format!("({items})")
288}
289
290struct GlobIterator<I: Iterator<Item = char>> {
292 pattern: I,
294 state: State,
296}
297
298type StringResult = Result<Option<String>, FError>;
300
301impl<I> GlobIterator<I>
302where
303 I: Iterator<Item = char>,
304{
305 fn handle_start(&mut self) -> String {
307 self.state = State::Literal;
308 "^".to_owned()
309 }
310
311 fn handle_literal(&mut self) -> Option<String> {
313 match self.pattern.next() {
314 None => {
315 self.state = State::End;
316 Some("$".to_owned())
317 }
318 Some(chr) => {
319 let (new_state, res) = match chr {
320 '\\' => (State::Escape, None),
321 '[' => (State::ClassStart, None),
322 '{' => (State::Alternate(String::new(), Vec::new()), None),
323 '?' => (State::Literal, Some("[^/]".to_owned())),
324 '*' => (State::Literal, Some(".*".to_owned())),
325 _ => (State::Literal, Some(regex::escape(&chr.to_string()))),
326 };
327 self.state = new_state;
328 res
329 }
330 }
331 }
332
333 fn handle_escape(&mut self) -> StringResult {
335 match self.pattern.next() {
336 Some(chr) => {
337 self.state = State::Literal;
338 Ok(Some(escape_special(chr)))
339 }
340 None => Err(FError::BareEscape),
341 }
342 }
343
344 fn handle_class_start(&mut self) -> StringResult {
346 match self.pattern.next() {
347 Some(chr) => {
348 self.state = match chr {
349 '!' => State::Class(ClassAccumulator {
350 negated: true,
351 items: Vec::new(),
352 }),
353 '-' => State::Class(ClassAccumulator {
354 negated: false,
355 items: vec![ClassItem::Char('-')],
356 }),
357 ']' => State::Class(ClassAccumulator {
358 negated: false,
359 items: vec![ClassItem::Char(']')],
360 }),
361 '\\' => State::ClassEscape(ClassAccumulator {
362 negated: false,
363 items: Vec::new(),
364 }),
365 other => State::Class(ClassAccumulator {
366 negated: false,
367 items: vec![ClassItem::Char(other)],
368 }),
369 };
370 Ok(None)
371 }
372 None => Err(FError::UnclosedClass),
373 }
374 }
375
376 fn handle_class(&mut self, mut acc: ClassAccumulator) -> StringResult {
378 match self.pattern.next() {
379 Some(chr) => Ok(match chr {
380 ']' => {
381 if acc.items.is_empty() {
382 acc.items.push(ClassItem::Char(']'));
383 self.state = State::Class(acc);
384 None
385 } else {
386 self.state = State::Literal;
387 Some(close_class(acc))
388 }
389 }
390 '-' => match acc.items.pop() {
391 None => {
392 acc.items.push(ClassItem::Char('-'));
393 self.state = State::Class(acc);
394 None
395 }
396 Some(ClassItem::Range(start, end)) => {
397 acc.items.push(ClassItem::Range(start, end));
398 self.state = State::ClassRangeDash(acc);
399 None
400 }
401 Some(ClassItem::Char(start)) => {
402 self.state = State::ClassRange(acc, start);
403 None
404 }
405 },
406 '\\' => {
407 self.state = State::ClassEscape(acc);
408 None
409 }
410 other => {
411 acc.items.push(ClassItem::Char(other));
412 self.state = State::Class(acc);
413 None
414 }
415 }),
416 None => Err(FError::UnclosedClass),
417 }
418 }
419
420 fn handle_class_escape(&mut self, mut acc: ClassAccumulator) -> StringResult {
422 match self.pattern.next() {
423 Some(chr) => {
424 acc.items.push(ClassItem::Char(map_letter_escape(chr)));
425 self.state = State::Class(acc);
426 Ok(None)
427 }
428 None => Err(FError::UnclosedClass),
429 }
430 }
431
432 fn handle_class_range(&mut self, mut acc: ClassAccumulator, start: char) -> StringResult {
434 match self.pattern.next() {
435 Some(chr) => match chr {
436 '\\' => Err(FError::NotImplemented(format!(
437 "FIXME: handle class range end escape with {acc:?} start {start:?}"
438 ))),
439 ']' => {
440 acc.items.push(ClassItem::Char(start));
441 acc.items.push(ClassItem::Char('-'));
442 self.state = State::Literal;
443 Ok(Some(close_class(acc)))
444 }
445 end if start > end => Err(FError::ReversedRange(start, end)),
446 end if start == end => {
447 acc.items.push(ClassItem::Char(start));
448 self.state = State::Class(acc);
449 Ok(None)
450 }
451 end => {
452 acc.items.push(ClassItem::Range(start, end));
453 self.state = State::Class(acc);
454 Ok(None)
455 }
456 },
457 None => Err(FError::UnclosedClass),
458 }
459 }
460
461 fn handle_class_range_dash(&mut self, mut acc: ClassAccumulator) -> StringResult {
463 match self.pattern.next() {
464 Some(chr) => {
465 if chr == ']' {
466 acc.items.push(ClassItem::Char('-'));
467 self.state = State::Literal;
468 Ok(Some(close_class(acc)))
469 } else if let Some(ClassItem::Range(start, end)) = acc.items.pop() {
470 Err(FError::RangeAfterRange(start, end))
471 } else {
472 Err(FError::InternalError(anyhow!(
473 "handle_class_range_dash: chr {chr:?} acc {acc:?}"
474 )))
475 }
476 }
477 None => Err(FError::UnclosedClass),
478 }
479 }
480
481 fn handle_alternate(&mut self, mut current: String, mut gathered: Vec<String>) -> StringResult {
483 match self.pattern.next() {
484 Some(chr) => match chr {
485 ',' => {
486 gathered.push(current);
487 self.state = State::Alternate(String::new(), gathered);
488 Ok(None)
489 }
490 '}' => {
491 self.state = State::Literal;
492 if current.is_empty() && gathered.is_empty() {
493 Ok(Some(r"\{\}".to_owned()))
494 } else {
495 gathered.push(current);
496 Ok(Some(close_alternate(gathered)))
497 }
498 }
499 '\\' => {
500 self.state = State::AlternateEscape(current, gathered);
501 Ok(None)
502 }
503 '[' => Err(FError::NotImplemented(
504 "FIXME: alternate character class".to_owned(),
505 )),
506 other => {
507 current.push(other);
508 self.state = State::Alternate(current, gathered);
509 Ok(None)
510 }
511 },
512 None => Err(FError::UnclosedAlternation),
513 }
514 }
515
516 fn handle_alternate_escape(
518 &mut self,
519 mut current: String,
520 gathered: Vec<String>,
521 ) -> StringResult {
522 match self.pattern.next() {
523 Some(chr) => {
524 current.push(map_letter_escape(chr));
525 self.state = State::Alternate(current, gathered);
526 Ok(None)
527 }
528 None => Err(FError::UnclosedAlternation),
529 }
530 }
531}
532
533impl<I> Iterator for GlobIterator<I>
534where
535 I: Iterator<Item = char>,
536{
537 type Item = StringResult;
538
539 fn next(&mut self) -> Option<Self::Item> {
540 match mem::take(&mut self.state) {
541 State::Start => Some(Ok(Some(self.handle_start()))),
542 State::End => None,
543 State::Literal => Some(Ok(self.handle_literal())),
544 State::Escape => Some(self.handle_escape()),
545 State::ClassStart => Some(self.handle_class_start()),
546 State::Class(acc) => Some(self.handle_class(acc)),
547 State::ClassEscape(acc) => Some(self.handle_class_escape(acc)),
548 State::ClassRange(acc, start) => Some(self.handle_class_range(acc, start)),
549 State::ClassRangeDash(acc) => Some(self.handle_class_range_dash(acc)),
550 State::Alternate(current, gathered) => Some(self.handle_alternate(current, gathered)),
551 State::AlternateEscape(current, gathered) => {
552 Some(self.handle_alternate_escape(current, gathered))
553 }
554 }
555 }
556}
557
558#[allow(clippy::missing_inline_in_public_items)]
567pub fn glob_to_regex(pattern: &str) -> Result<Regex, FError> {
568 let parser = GlobIterator {
569 pattern: pattern.chars(),
570 state: State::Start,
571 };
572 let re_pattern = parser.flatten_ok().collect::<Result<Vec<_>, _>>()?.join("");
573 Regex::new(&re_pattern).map_err(|err| FError::InvalidRegex(re_pattern, err))
574}
575
576#[allow(clippy::missing_inline_in_public_items)]
585pub fn glob_to_regex_ext(pattern: &str, ignore_case: bool) -> Result<Regex, FError> {
586 let parser = GlobIterator {
587 pattern: pattern.chars(),
588 state: State::Start,
589 };
590 let re_pattern = parser.flatten_ok().collect::<Result<Vec<_>, _>>()?.join("");
591 RegexBuilder::new(&re_pattern)
592 .case_insensitive(ignore_case)
593 .build()
594 .map_err(|err| FError::InvalidRegex(re_pattern, err))
595}