goglob_common/
lib.rs

1//! # Do not use this library directly!
2//!
3//! See the `goglob` crate instead.
4
5pub mod error;
6pub use crate::error::Result;
7
8pub mod charcls;
9pub mod literal;
10
11#[cfg(feature = "serde")]
12mod serde;
13
14use crate::{
15    charcls::{CharClass as GlobTokenCharClass, CharClassType},
16    error::{Error, ErrorType},
17    literal::Literal as GlobTokenLiteral,
18};
19use std::{borrow::Cow, result::Result as StdResult};
20
21/// Shell pattern matching similar to golang's `path.Match`.
22///
23/// # Further reading
24///
25/// See the `goglob` crate's documentation for the appropriate syntax.
26#[derive(Debug, Clone, Eq, PartialEq)]
27#[repr(transparent)]
28pub struct GlobPattern {
29    tokens: Cow<'static, [GlobToken]>,
30}
31impl GlobPattern {
32    /// Compile the given `pattern` into tokens at runtime, returning a [`GlobPattern`][Self]
33    /// on success or an [error][crate::error::Error] if `pattern` is syntactically invalid.
34    ///
35    /// # Further reading
36    ///
37    /// See the `goglob` crate's documentation for the appropriate syntax, as well as
38    /// [goglob::error::Error][crate:error:Error] for possible syntax errors.
39    #[inline]
40    pub fn new<S: AsRef<str>>(pattern: S) -> Result<Self> {
41        Self::_new(pattern.as_ref())
42    }
43    fn _new(pattern: &str) -> Result<Self> {
44        let mut tokens = Vec::new();
45        crate::scan_patterns(pattern, &mut tokens)?;
46
47        tokens.shrink_to_fit();
48        Ok(Self {
49            tokens: Cow::Owned(tokens),
50        })
51    }
52
53    /// Report whether the `name` matches the compiled shell pattern.
54    ///
55    /// # Further reading
56    ///
57    /// See the `goglob` crate's documentation for the appropriate syntax.
58    #[inline]
59    pub fn matches<S: AsRef<str>>(&self, name: S) -> bool {
60        self._matches(name.as_ref())
61    }
62    fn _matches(&self, name: &str) -> bool {
63        let mut next = name;
64        let mut tokens = self.tokens.iter().peekable();
65        'outer: while let Some(token) = tokens.next() {
66            next = match token.try_matches_next(next) {
67                Ok(Some(next)) => next,
68                Ok(None) => return false,
69                Err(()) => {
70                    // SeqWildcard doesn't implement matches_next. However, it
71                    // can match any number of non-'/' characters (even zero),
72                    // so we must see what matches the remaining tokens up until
73                    // the next SeqWildcard (or the end if no further SeqWildcards
74                    // remain)
75
76                    // If there are no more tokens left, make sure there is no '/'
77                    // in the rest of the string
78                    if tokens.peek().is_none() {
79                        return !next.contains('/');
80                    };
81
82                    // For every remaining position in next until '/', check if
83                    // the remaining tokens until SeqWildcard match.
84                    'star: for (i, c) in next.char_indices() {
85                        let mut tokens_peek = tokens.clone();
86                        let mut next_peek = &next[i..];
87                        let mut fail = false;
88                        let mut finished = true;
89                        'inner: while let Some(token_peek) = tokens_peek.peek() {
90                            next_peek = match token_peek.try_matches_next(next_peek) {
91                                Ok(Some(next_peek)) => next_peek,
92                                Ok(None) => {
93                                    fail = true;
94                                    break 'inner;
95                                }
96                                Err(_) => {
97                                    finished = false;
98                                    break 'inner;
99                                }
100                            };
101                            tokens_peek.next();
102                        }
103
104                        if !fail && (!finished || next_peek.is_empty()) {
105                            // Either we correctly matched until the next SeqWildcard,
106                            // or there are no tokens left and the entirety of the
107                            // string is matched. In either case we continue
108                            tokens = tokens_peek;
109                            next = next_peek;
110                            continue 'outer;
111                        }
112
113                        // Match failed, try from next position.
114
115                        if c == '/' {
116                            // Found '/', abort
117                            break 'star;
118                        }
119                    }
120
121                    // Exhausted available positions without finding a match.
122                    return false;
123                }
124            }
125        }
126        next.is_empty()
127    }
128}
129
130#[derive(Debug, Clone, Eq, PartialEq)]
131pub enum GlobToken {
132    Literal(GlobTokenLiteral),
133    CharClass(GlobTokenCharClass),
134    SeqWildcard,
135    SingleWildcard,
136}
137impl GlobToken {
138    fn try_matches_next<'a>(&self, name: &'a str) -> StdResult<Option<&'a str>, ()> {
139        match self {
140            GlobToken::Literal(l) => Ok(l.matches_next(name)),
141            GlobToken::CharClass(cc) => Ok(cc.matches_next(name)),
142            GlobToken::SingleWildcard => Ok(name.strip_prefix(|c| c != '/')),
143            GlobToken::SeqWildcard => Err(()),
144        }
145    }
146}
147
148/// Internal workspace-only function employed by `goglob-proc-macro`.
149///
150/// The procedural macro will insert calls to this function in the end-user's project,
151/// so it must be declared public.
152pub const fn glob_from_tokens(tokens: &'static [GlobToken]) -> GlobPattern {
153    GlobPattern {
154        tokens: Cow::Borrowed(tokens),
155    }
156}
157
158/// Internal workspace-only function used locally and in `goglob-proc-macro`.
159pub fn scan_patterns(pattern: &str, tokens: &mut Vec<GlobToken>) -> Result<()> {
160    if pattern.is_empty() {
161        return Err(Error::empty_pattern());
162    }
163
164    let mut pattern_iter = pattern.char_indices().peekable();
165    while pattern_iter.peek().is_some() {
166        let mut stars = false;
167
168        // Match star wildcards (e.g. '*ab?cd[e-z]*')
169        //                             ^          ^
170        while let Some((_, '*')) = pattern_iter.peek() {
171            stars = true;
172            pattern_iter.next();
173        }
174        if stars {
175            tokens.push(GlobToken::SeqWildcard)
176        }
177
178        // Match literals (e.g. '*ab?cd[e-z]*')
179        //                        ^^ ^^
180        let mut literal_string = String::new();
181        'literal: while let Some((i, c)) = pattern_iter.peek() {
182            let (i, c) = (*i, *c);
183            let c = match c {
184                ']' =>
185                // we are not in a character class (i.e. '[' was never passed)
186                // therefore ']' is illegal and should be explicitly escaped
187                {
188                    return Err(Error::new(ErrorType::UnescapedChar(']'), i))
189                }
190                '[' | '?' | '*' =>
191                // '[' opens a character class
192                // '?' is a single-character wildcard
193                // '*' is a multi-character wildcard
194                // any of these signal an end to the current literal
195                {
196                    break 'literal
197                }
198                '\\' => {
199                    pattern_iter.next();
200
201                    // '\' escapes the next character, whichever it may be.
202                    // If there is no "next character", then it's considered
203                    // an illegal escape
204                    let (_, escaped_char) = pattern_iter
205                        .next()
206                        .ok_or_else(|| Error::new(ErrorType::IllegalEscape, i))?;
207                    escaped_char
208                }
209                c => {
210                    pattern_iter.next();
211                    c
212                }
213            };
214
215            literal_string.push(c);
216        }
217        if !literal_string.is_empty() {
218            tokens.push(GlobToken::Literal(GlobTokenLiteral::new(literal_string)))
219        }
220
221        // Match question-mark wildcards (e.g. '*ab?cd[e-z]*')
222        //                                         ^
223        while let Some((_, '?')) = pattern_iter.peek() {
224            tokens.push(GlobToken::SingleWildcard);
225            pattern_iter.next();
226        }
227
228        // Match character class (e.g. '*ab?cd[e-z]*')
229        //                                    ^^^^^
230        if let Some((i, '[')) = pattern_iter.peek() {
231            let mut types: Vec<CharClassType> = Vec::new();
232            let mut negated = false;
233            let mut closed = false;
234            let i = *i;
235            let start_i = i;
236            let mut closed_i = usize::MAX;
237
238            pattern_iter.next();
239
240            // Match negation in character class (e.g. '[^A-F]')
241            //                                           ^
242            if let Some((_, '^')) = pattern_iter.peek() {
243                pattern_iter.next();
244                negated = true;
245            }
246
247            let mut start_range = false;
248            let mut in_range: Option<char> = None;
249            'char_cls: while let Some((i, c)) = pattern_iter.next() {
250                debug_assert!(!start_range || c == '-');
251                let c = match c {
252                    ']' => {
253                        // Close the character range
254                        closed = true;
255                        closed_i = i;
256                        break 'char_cls;
257                    }
258                    '^' =>
259                    // The character class was already started or negated.
260                    // Either way, a '^' here must be interpreted as a
261                    // character as-is (at least according to go's impl).
262                    {
263                        '^'
264                    }
265                    '-' if !start_range =>
266                    // Illegal uses of '-':
267                    //
268                    // * As the first character in the class (e.g. [-a][^-z])
269                    //                                              ^    ^
270                    // * After another '-' (e.g. [a--f])
271                    //                              ^
272                    // * Immediately after a character range (e.g. [a-f-z])
273                    //                                                 ^
274                    // If a literal '-' is desired, escape it with a '\' beforehand
275                    // (e.g. [a-f\-z][\-a][^\-z])
276                    //           ^^   ^^    ^^
277                    {
278                        return Err(Error::new(ErrorType::UnescapedChar('-'), i))
279                    }
280                    '-' => {
281                        // Character range (e.g. [0-9abcdefA-F]
282                        //                        ^^^      ^^^
283                        start_range = false;
284                        continue 'char_cls;
285                    }
286                    '\\' => {
287                        // '\' escapes the next character, whichever it may be.
288                        // If there is no "next character", then it's considered
289                        // an illegal escape
290                        let (_, escaped_char) = pattern_iter
291                            .next()
292                            .ok_or_else(|| Error::new(ErrorType::IllegalEscape, i))?;
293                        escaped_char
294                    }
295                    c => c,
296                };
297                if let Some(start) = in_range {
298                    let end = c;
299                    let range = (start..=end)
300                        .try_into()
301                        .map_err(|_| Error::new(ErrorType::InvalidRangeValues(start, end), i))?;
302                    types.push(range);
303                    in_range = None
304                } else if let Some((_, '-')) = pattern_iter.peek() {
305                    in_range = Some(c);
306                    start_range = true
307                } else {
308                    types.push(c.into())
309                }
310            }
311
312            // A character class must be closed with a corresponding ']'.
313            if !closed {
314                return Err(Error::new(ErrorType::UnclosedCharClass, start_i));
315            }
316
317            // A character class must not be empty (e.g. []abc] or [^]abc])
318            //                                            ^          ^
319            // For the character class to include a ']' char it must be
320            // explicitly escaped (e.g. [\]abc] or [^\]abc].
321            //                           ^^          ^^
322            if types.is_empty() {
323                return Err(Error::new(ErrorType::UnescapedChar(']'), closed_i));
324            }
325
326            tokens.push(GlobToken::CharClass(GlobTokenCharClass::new(
327                negated, types,
328            )));
329        }
330    }
331
332    Ok(())
333}
334
335#[cfg(test)]
336//noinspection DuplicatedCode
337mod tests {
338    mod aux {
339        use crate::{GlobPattern, Result as GlobPatternResult};
340        use std::fmt::{Display, Formatter};
341
342        #[derive(Clone)]
343        pub struct MatchTest {
344            pattern: String,
345            name: String,
346            expect_match: Result<bool, ()>,
347        }
348        impl MatchTest {
349            const fn _new(pattern: String, name: String, expect_match: Result<bool, ()>) -> Self {
350                Self {
351                    pattern,
352                    name,
353                    expect_match,
354                }
355            }
356
357            pub fn display(&self) -> TestDisplay {
358                let clone = self.clone();
359                TestDisplay { test: clone }
360            }
361
362            pub fn test(&self) -> GlobPatternResult<bool> {
363                GlobPattern::new(self.pattern.clone()).map(|p| p.matches(self.name.clone()))
364            }
365
366            pub fn succeed(self, result: GlobPatternResult<bool>) -> bool {
367                result.map_err(|_| ()) == self.expect_match
368            }
369        }
370        #[inline]
371        pub fn make_test<S1: Into<String>, S2: Into<String>>(
372            pattern: S1,
373            name: S2,
374            expect_match: Result<bool, ()>,
375        ) -> MatchTest {
376            MatchTest::_new(pattern.into(), name.into(), expect_match)
377        }
378
379        pub struct TestDisplay {
380            test: MatchTest,
381        }
382        impl Display for TestDisplay {
383            fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
384                write!(
385                    f,
386                    "({}, {}) expected {:?}",
387                    self.test.pattern, self.test.name, self.test.expect_match,
388                )
389            }
390        }
391    }
392
393    use aux::*;
394
395    #[test]
396    fn glob_pattern_go_match_test() {
397        let tests = [
398            make_test("abc", "abc", Ok(true)),
399            make_test("*", "abc", Ok(true)),
400            make_test("*c", "abc", Ok(true)),
401            make_test("a*", "a", Ok(true)),
402            make_test("a*", "abc", Ok(true)),
403            make_test("a*", "ab/c", Ok(false)),
404            make_test("a*/b", "abc/b", Ok(true)),
405            make_test("a*/b", "a/c/b", Ok(false)),
406            make_test("a*b*c*d*e*/f", "axbxcxdxe/f", Ok(true)),
407            make_test("a*b*c*d*e*/f", "axbxcxdxexxx/f", Ok(true)),
408            make_test("a*b*c*d*e*/f", "axbxcxdxe/xxx/f", Ok(false)),
409            make_test("a*b*c*d*e*/f", "axbxcxdxexxx/fff", Ok(false)),
410            make_test("a*b?c*x", "abxbbxdbxebxczzx", Ok(true)),
411            make_test("a*b?c*x", "abxbbxdbxebxczzy", Ok(false)),
412            make_test("ab[c]", "abc", Ok(true)),
413            make_test("ab[b-d]", "abc", Ok(true)),
414            make_test("ab[e-g]", "abc", Ok(false)),
415            make_test("ab[^c]", "abc", Ok(false)),
416            make_test("ab[^b-d]", "abc", Ok(false)),
417            make_test("ab[^e-g]", "abc", Ok(true)),
418            make_test("a\\*b", "a*b", Ok(true)),
419            make_test("a\\*b", "ab", Ok(false)),
420            make_test("a?b", "a☺b", Ok(true)),
421            make_test("a[^a]b", "a☺b", Ok(true)),
422            make_test("a???b", "a☺b", Ok(false)),
423            make_test("a[^a][^a][^a]b", "a☺b", Ok(false)),
424            make_test("[a-ζ]*", "α", Ok(true)),
425            make_test("*[a-ζ]", "A", Ok(false)),
426            make_test("a?b", "a/b", Ok(false)),
427            make_test("a*b", "a/b", Ok(false)),
428            make_test("[\\]a]", "]", Ok(true)),
429            make_test("[\\-]", "-", Ok(true)),
430            make_test("[x\\-]", "x", Ok(true)),
431            make_test("[x\\-]", "-", Ok(true)),
432            make_test("[x\\-]", "z", Ok(false)),
433            make_test("[\\-x]", "x", Ok(true)),
434            make_test("[\\-x]", "-", Ok(true)),
435            make_test("[\\-x]", "a", Ok(false)),
436            make_test("[]a]", "]", Err(())),
437            make_test("[-]", "-", Err(())),
438            make_test("[x-]", "x", Err(())),
439            make_test("[x-]", "-", Err(())),
440            make_test("[x-]", "z", Err(())),
441            make_test("[-x]", "x", Err(())),
442            make_test("[-x]", "-", Err(())),
443            make_test("[-x]", "a", Err(())),
444            make_test("\\", "a", Err(())),
445            make_test("[a-b-c]", "a", Err(())),
446            make_test("[", "a", Err(())),
447            make_test("[^", "a", Err(())),
448            make_test("[^bc", "a", Err(())),
449            make_test("a[", "a", Err(())),
450            make_test("a[", "ab", Err(())),
451            make_test("a[", "x", Err(())),
452            make_test("a/b[", "x", Err(())),
453            make_test("*x", "xxx", Ok(true)),
454        ];
455
456        for (i, test) in tests.into_iter().enumerate() {
457            let display = test.display();
458            let result = test.test();
459            let result_display = format!("{:?}", result);
460            assert!(
461                test.succeed(result),
462                "[Test {i}]: {display}, got {result_display}"
463            )
464        }
465    }
466}