simple_path_match/
lib.rs

1#![warn(clippy::pedantic)]
2#![allow(clippy::uninlined_format_args, clippy::missing_errors_doc)]
3#![cfg_attr(not(test), no_std)]
4
5extern crate alloc;
6
7use alloc::collections::{BTreeMap, VecDeque};
8use alloc::string::{String, ToString as _};
9use alloc::vec::Vec;
10use beef::Cow;
11use snafu::Snafu;
12
13const PATH_CURRENT: &str = ".";
14const PATH_PARENT: &str = "..";
15const UNIX_SEP: &str = "/";
16const WILDCARD_ANY: &str = "*";
17
18#[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
19enum PathComponent<'a> {
20    Current,
21    DirectoryMarker,
22    Name(Cow<'a, str>),
23    Parent,
24    RootName(Cow<'a, str>),
25}
26
27impl alloc::fmt::Display for PathComponent<'_> {
28    fn fmt(&self, formatter: &mut alloc::fmt::Formatter<'_>) -> Result<(), alloc::fmt::Error> {
29        match self {
30            PathComponent::Current => formatter.write_str(PATH_CURRENT),
31            PathComponent::DirectoryMarker => Ok(()),
32            PathComponent::Name(s) | PathComponent::RootName(s) => formatter.write_str(s),
33            PathComponent::Parent => formatter.write_str(PATH_PARENT),
34        }
35    }
36}
37
38impl PathComponent<'_> {
39    fn traversal_depth(&self) -> usize {
40        match self {
41            PathComponent::Current | PathComponent::DirectoryMarker => 0,
42            PathComponent::Name(_) | PathComponent::RootName(_) | PathComponent::Parent => 1,
43        }
44    }
45
46    fn into_owned(self) -> PathComponent<'static> {
47        match self {
48            PathComponent::Current => PathComponent::Current,
49            PathComponent::Parent => PathComponent::Parent,
50            PathComponent::DirectoryMarker => PathComponent::DirectoryMarker,
51            PathComponent::Name(n) => PathComponent::Name(n.into_owned().into()),
52            PathComponent::RootName(n) => PathComponent::RootName(n.into_owned().into()),
53        }
54    }
55}
56
57#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
58struct StartsEndsWith(String, String);
59
60impl alloc::fmt::Display for StartsEndsWith {
61    fn fmt(&self, formatter: &mut alloc::fmt::Formatter<'_>) -> Result<(), alloc::fmt::Error> {
62        formatter.write_str(&self.0)?;
63        formatter.write_str(WILDCARD_ANY)?;
64        formatter.write_str(&self.1)
65    }
66}
67
68impl StartsEndsWith {
69    pub fn matches(&self, name: &str) -> bool {
70        name.starts_with(&self.0) && name.ends_with(&self.1)
71    }
72}
73
74#[derive(Clone, Debug, PartialEq, Eq)]
75enum PatternComponent {
76    Literal(PathComponent<'static>),
77    StartsEndsWith(StartsEndsWith),
78}
79
80impl alloc::fmt::Display for PatternComponent {
81    fn fmt(&self, formatter: &mut alloc::fmt::Formatter<'_>) -> Result<(), alloc::fmt::Error> {
82        match self {
83            PatternComponent::Literal(c) => c.fmt(formatter),
84            PatternComponent::StartsEndsWith(m) => m.fmt(formatter),
85        }
86    }
87}
88
89/// Errors that can occur during pattern compilation
90#[derive(Debug, Snafu)]
91pub enum Error {
92    /// The supplied pattern contained a parent traversal (`..`)
93    #[snafu(display("Pattern must not contain parent traversals"))]
94    NoParents,
95
96    /// A wilcard was used in a component in an invalid way
97    #[snafu(display("Only one wilcard allowed in component: `{}`", component))]
98    WildcardPosition { component: String },
99}
100
101struct StringComponentIter<'a> {
102    path_string: core::iter::Enumerate<core::str::Split<'a, &'a str>>,
103    is_dir: bool,
104}
105
106impl<'a> StringComponentIter<'a> {
107    pub fn new(path: &'a str, separator: &'a str) -> StringComponentIter<'a> {
108        StringComponentIter {
109            path_string: path.split(separator).enumerate(),
110            is_dir: false,
111        }
112    }
113}
114
115impl<'a> Iterator for StringComponentIter<'a> {
116    type Item = PathComponent<'a>;
117
118    fn next(&mut self) -> Option<PathComponent<'a>> {
119        for (idx, component) in self.path_string.by_ref() {
120            self.is_dir = false;
121            match component {
122                "" => {
123                    if idx == 0 {
124                        return Some(PathComponent::RootName(component.into()));
125                    }
126                    self.is_dir = true;
127                }
128                PATH_CURRENT => return Some(PathComponent::Current),
129                PATH_PARENT => return Some(PathComponent::Parent),
130                _ => return Some(PathComponent::Name(component.into())),
131            }
132        }
133        if self.is_dir {
134            self.is_dir = false;
135            Some(PathComponent::DirectoryMarker)
136        } else {
137            None
138        }
139    }
140}
141
142fn normalized<'a, I: IntoIterator<Item = PathComponent<'a>>>(components: I) -> Vec<PathComponent<'a>> {
143    let components = components.into_iter();
144    let mut result = Vec::with_capacity(components.size_hint().0);
145    for component in components {
146        match component {
147            PathComponent::Name(_) | PathComponent::RootName(_) => result.push(component),
148            PathComponent::DirectoryMarker => {
149                if result.is_empty() {
150                    result.push(PathComponent::Current);
151                }
152                result.push(PathComponent::DirectoryMarker);
153            }
154            PathComponent::Parent => match result.last() {
155                None | Some(PathComponent::Parent) => result.push(PathComponent::Parent),
156                Some(PathComponent::Name(_)) => drop(result.pop()),
157                Some(PathComponent::RootName(_)) => {}
158                Some(c) => panic!("Component found in unexpected place during normalization: {:?}", c),
159            },
160            PathComponent::Current => {}
161        }
162    }
163    if result.is_empty() {
164        result.push(PathComponent::Current);
165    }
166    result
167}
168
169fn path_to_pattern<'a, I: IntoIterator<Item = PathComponent<'a>>>(
170    components: I,
171) -> Result<Vec<PatternComponent>, Error> {
172    let components = components.into_iter();
173    let mut result = Vec::with_capacity(components.size_hint().0);
174    for component in components {
175        match component {
176            PathComponent::Name(ref name) => {
177                let matcher = if let Some(idx) = name.find(WILDCARD_ANY) {
178                    let (start, end) = name.split_at(idx);
179                    let (_, end) = end.split_at(WILDCARD_ANY.len());
180                    if start.contains(WILDCARD_ANY) || end.contains(WILDCARD_ANY) {
181                        return Err(Error::WildcardPosition {
182                            component: name.to_string(),
183                        });
184                    }
185                    PatternComponent::StartsEndsWith(StartsEndsWith(start.to_string(), end.to_string()))
186                } else {
187                    PatternComponent::Literal(component.into_owned())
188                };
189                result.push(matcher);
190            }
191            PathComponent::Parent => return Err(Error::NoParents),
192            PathComponent::Current => {}
193            PathComponent::DirectoryMarker => {
194                if result.is_empty() {
195                    result.push(PatternComponent::Literal(PathComponent::Current));
196                }
197                result.push(PatternComponent::Literal(component.into_owned()));
198            }
199            PathComponent::RootName(_) => {
200                result.push(PatternComponent::Literal(component.into_owned()));
201            }
202        }
203    }
204    if result.is_empty() {
205        result.push(PatternComponent::Literal(PathComponent::Current));
206    }
207    Ok(result)
208}
209
210#[derive(Clone, Debug)]
211struct PathMatchNode {
212    can_end: bool,
213    literals: BTreeMap<PathComponent<'static>, PathMatchNode>,
214    starts_ends_with: BTreeMap<StartsEndsWith, PathMatchNode>,
215    min_traversals: usize,
216    max_traversals: usize,
217}
218
219impl Default for PathMatchNode {
220    fn default() -> PathMatchNode {
221        PathMatchNode {
222            can_end: false,
223            literals: BTreeMap::new(),
224            starts_ends_with: BTreeMap::new(),
225            min_traversals: 0,
226            max_traversals: usize::MAX,
227        }
228    }
229}
230
231impl alloc::fmt::Display for PathMatchNode {
232    fn fmt(&self, formatter: &mut alloc::fmt::Formatter<'_>) -> Result<(), alloc::fmt::Error> {
233        use alloc::fmt::Write as _;
234
235        let literals_iter = self.literals.iter().map(|(k, v)| (k.to_string(), v));
236        let matchers_iter = self.starts_ends_with.iter().map(|(k, v)| (k.to_string(), v));
237        let subnodes_iter = literals_iter.chain(matchers_iter);
238        let mut output = String::new();
239        let mut has_multiple_options = false;
240        for (idx, (k, v)) in subnodes_iter.enumerate() {
241            if idx > 0 {
242                output += "|";
243                has_multiple_options = true;
244            }
245            output += &k;
246            if v.can_end {
247                output += "$";
248            }
249            if !v.is_empty() {
250                output += UNIX_SEP;
251                write!(&mut output, "{}", v)?;
252            }
253        }
254        if has_multiple_options {
255            formatter.write_str("(")?;
256        }
257        formatter.write_str(&output)?;
258        if has_multiple_options {
259            formatter.write_str(")")?;
260        }
261        Ok(())
262    }
263}
264
265impl PathMatchNode {
266    fn insert_component(&mut self, component: PatternComponent) -> &mut PathMatchNode {
267        self.min_traversals = 0;
268        self.max_traversals = usize::MAX;
269        match component {
270            PatternComponent::Literal(literal) => self.literals.entry(literal).or_default(),
271            PatternComponent::StartsEndsWith(pattern) => self.starts_ends_with.entry(pattern).or_default(),
272        }
273    }
274
275    pub fn is_empty(&self) -> bool {
276        self.starts_ends_with.is_empty() && self.literals.is_empty()
277    }
278
279    fn recompute_depth_bounds(&mut self) -> (usize, usize) {
280        let min = &mut self.min_traversals;
281        let max = &mut self.max_traversals;
282        *min = if self.can_end { 0 } else { usize::MAX };
283        *max = 0;
284        let node_iter = self
285            .literals
286            .iter_mut()
287            .map(|(k, v)| (k.traversal_depth(), v))
288            .chain(self.starts_ends_with.values_mut().map(|v| (1, v)));
289        for (component_depth, node) in node_iter {
290            let (node_min, node_max) = node.recompute_depth_bounds();
291            *min = core::cmp::min(*min, node_min + component_depth);
292            *max = core::cmp::max(*max, node_max + component_depth);
293        }
294        (*min, *max)
295    }
296
297    pub fn insert(&mut self, mut pattern: Vec<PatternComponent>) {
298        let mut node = self;
299        for head in pattern.drain(..) {
300            node = node.insert_component(head);
301        }
302        node.can_end = true;
303    }
304
305    pub fn matches(node: &PathMatchNode, path: &[PathComponent], match_prefix: bool) -> bool {
306        let mut candidates = VecDeque::new();
307        candidates.push_front((node, path));
308        while let Some((node, path)) = candidates.pop_back() {
309            let path = if match_prefix && path.first() == Some(&PathComponent::Current) {
310                // It is invalid to do this in the non-prefix case, since we might need
311                // to match ".". We need to do this for the prefix case since "." is a prefix
312                // of any relative path, but won't match other paths.
313                &path[1..]
314            } else {
315                path
316            };
317            let can_match = node.can_end || match_prefix;
318            let path_is_dir_marker = path.len() == 1 && path.last() == Some(&PathComponent::DirectoryMarker);
319            if path_is_dir_marker && can_match {
320                return true;
321            }
322            if let Some(component) = path.first() {
323                if let Some(matching_node) = node.literals.get(component) {
324                    candidates.push_front((matching_node, &path[1..]));
325                }
326                for (name_matcher, matching_node) in &node.starts_ends_with {
327                    if let PathComponent::Name(name) = component {
328                        if name_matcher.matches(name) {
329                            candidates.push_front((matching_node, &path[1..]));
330                        }
331                    }
332                }
333            } else if can_match {
334                return true;
335            }
336        }
337        false
338    }
339}
340
341/// Matches against a path
342#[derive(Clone, Debug)]
343pub struct PathMatch {
344    separator: String,
345    match_tree: PathMatchNode,
346}
347
348impl alloc::fmt::Display for PathMatch {
349    fn fmt(&self, formatter: &mut alloc::fmt::Formatter<'_>) -> Result<(), alloc::fmt::Error> {
350        self.match_tree.fmt(formatter)
351    }
352}
353
354impl PathMatch {
355    /// Constructs a `PathMatch` for a single pattern.
356    ///
357    /// The pattern must use the forward slash as a separator. The following
358    /// restrictions apply:
359    /// * Each component must either be a literal name or can contain a single
360    ///   asterisk (representing a wildcard) with an optional literal prefix and
361    ///   suffix.
362    /// * `?` is not supported.
363    /// * The pattern must not contain parent traversals (`..`) but `.` is
364    ///   supported.
365    /// * No escaping of special characters is supported.
366    ///
367    /// Construction will return an error if parent traverals are present or
368    /// a component contains multiple wildcard characters.
369    ///
370    /// The supplied separator is used when parsing the supplied paths. The idea
371    /// is that the patterns you use are specified in an OS-independent
372    /// manner so they can be compile-time constant, but the separator is
373    /// supplied at run-time to allow adaptation to OS.
374    pub fn from_pattern(pattern: &str, separator: &str) -> Result<PathMatch, Error> {
375        let components = StringComponentIter::new(pattern, UNIX_SEP);
376        let pattern = path_to_pattern(components)?;
377        let mut match_tree = PathMatchNode::default();
378        match_tree.insert(pattern);
379        match_tree.recompute_depth_bounds();
380        let result = PathMatch {
381            separator: separator.to_string(),
382            match_tree,
383        };
384        Ok(result)
385    }
386
387    /// Returns `true` if the specified string matches the pattern, `false`
388    /// otherwise. Unlike patterns, paths may contain `..`, but if the parent
389    /// traversal cannot be normalized out, no matches can occur.
390    pub fn matches<P: AsRef<str>>(&self, path: P) -> bool {
391        let path = path.as_ref();
392        self.matches_common(path, false)
393    }
394
395    /// Returns `true` if the specified string forms a prefix path of one of the
396    /// patterns matches.
397    ///
398    /// The prefix must consist of full components. e.g. `first/second` is a
399    /// prefix of `first/second/third`, but `first/sec` is not.
400    pub fn matches_prefix<P: AsRef<str>>(&self, path: P) -> bool {
401        let path = path.as_ref();
402        self.matches_common(path, true)
403    }
404
405    fn matches_common(&self, path: &str, match_prefix: bool) -> bool {
406        let components = normalized(StringComponentIter::new(path, &self.separator));
407        PathMatchNode::matches(&self.match_tree, &components, match_prefix)
408    }
409
410    /// Returns the maximum number of components a matching path could have.
411    /// This assumes a normalized path - a matching path could always have
412    /// an arbitrary number of `.` components.
413    #[must_use]
414    pub fn max_depth(&self) -> usize {
415        self.match_tree.max_traversals
416    }
417}
418
419/// Builds a `PathMatch` which can match against multiple expressions.
420pub struct PathMatchBuilder {
421    processed: Vec<Vec<PatternComponent>>,
422    separator: String,
423}
424
425impl PathMatchBuilder {
426    /// Constructs a `PathMatchBuilder` where paths to be matched will use the
427    /// supplied separator.
428    #[must_use]
429    pub fn new(separator: &str) -> PathMatchBuilder {
430        PathMatchBuilder {
431            processed: Vec::new(),
432            separator: separator.into(),
433        }
434    }
435
436    /// Adds the specified pattern to the matcher.
437    ///
438    /// This will return an error if the pattern contains parent traversals or a
439    /// component containing multiple wildcards. See also
440    /// `PathMatch::from_pattern`.
441    pub fn add_pattern(&mut self, pattern: &str) -> Result<(), Error> {
442        let components = StringComponentIter::new(pattern, UNIX_SEP);
443        let processed = path_to_pattern(components)?;
444        self.processed.push(processed);
445        Ok(())
446    }
447
448    /// Constructs the `PathMatch` which can be used to match against paths.
449    pub fn build(self) -> Result<PathMatch, Error> {
450        let mut match_tree = PathMatchNode::default();
451        for pattern in self.processed {
452            match_tree.insert(pattern);
453        }
454        match_tree.recompute_depth_bounds();
455        let result = PathMatch {
456            separator: self.separator,
457            match_tree,
458        };
459        Ok(result)
460    }
461}
462
463#[cfg(test)]
464mod test {
465    use super::*;
466
467    #[test]
468    fn basic_syntax() -> Result<(), Error> {
469        let path = r"foo|bar|hmm|hello|";
470        for separator in ["/", "\\"] {
471            let path = path.replace("|", separator);
472            let pattern = PathMatch::from_pattern(".////foo/*/*/hel*o/", separator)?;
473            assert!(pattern.matches(path));
474        }
475        Ok(())
476    }
477
478    #[test]
479    fn star() -> Result<(), Error> {
480        let path = r"foo|bar|hmm|hello|";
481        for separator in ["/", "\\"] {
482            let path = &path.replace("|", separator);
483
484            let pattern = PathMatch::from_pattern("./*", separator)?;
485            assert!(!pattern.matches(path));
486
487            let pattern = PathMatch::from_pattern("./*/*", separator)?;
488            assert!(!pattern.matches(path));
489
490            let pattern = PathMatch::from_pattern("./*/*/*", separator)?;
491            assert!(!pattern.matches(path));
492
493            let pattern = PathMatch::from_pattern("./*/*/*/*", separator)?;
494            assert!(pattern.matches(path));
495        }
496
497        Ok(())
498    }
499
500    #[test]
501    fn root() -> Result<(), Error> {
502        for pattern in ["/", "/./", "/.", "/////", "///./."] {
503            let pattern = PathMatch::from_pattern(pattern, "/")?;
504            assert!(pattern.matches("/"));
505            assert!(!pattern.matches("./"));
506        }
507        Ok(())
508    }
509
510    #[test]
511    fn cwd() -> Result<(), Error> {
512        // Dot is not necessarily a folder
513        for pattern in [".", "././././.", ".////."] {
514            let pattern = PathMatch::from_pattern(pattern, "/")?;
515            assert!(pattern.matches("."));
516            assert!(pattern.matches("./"));
517            assert!(!pattern.matches("/"));
518            assert!(!pattern.matches("/."));
519            assert!(!pattern.matches("/./"));
520        }
521
522        // Dot is a folder
523        for pattern in ["./", "./././"] {
524            let pattern = PathMatch::from_pattern(pattern, "/")?;
525            assert!(pattern.matches("./"));
526            assert!(!pattern.matches("."));
527            assert!(!pattern.matches("/"));
528            assert!(!pattern.matches("/."));
529            assert!(!pattern.matches("/./"));
530        }
531        Ok(())
532    }
533
534    #[test]
535    fn file_path() -> Result<(), Error> {
536        for pattern in ["hello", "./hello", "././hello"] {
537            let pattern = PathMatch::from_pattern(pattern, "/")?;
538            assert!(pattern.matches("hello"));
539        }
540        Ok(())
541    }
542
543    #[test]
544    fn prefix_matching() -> Result<(), Error> {
545        let pattern = "hello/there/friend";
546        for separator in ["/", "\\"] {
547            let pattern = PathMatch::from_pattern(pattern, separator)?;
548            for path in [
549                ".",
550                ".|",
551                "hello",
552                "hello|",
553                "hello|there",
554                "hello|there|",
555                "hello|there|friend",
556                "hello|there|friend|",
557            ] {
558                let path = path.replace("|", separator);
559                assert!(pattern.matches_prefix(path));
560            }
561        }
562        Ok(())
563    }
564
565    #[test]
566    fn max_depth() -> Result<(), Error> {
567        for (pattern, depth) in [
568            (".", 0),
569            ("./", 0),
570            ("./*", 1),
571            ("./*/", 1),
572            ("./././", 0),
573            ("*/*/*/", 3),
574            ("./hello/", 1),
575            ("./*/*", 2),
576        ] {
577            let pattern_1 = PathMatch::from_pattern(pattern, "/")?;
578            let pattern_2 = {
579                let mut builder = PathMatchBuilder::new("/");
580                builder.add_pattern(pattern)?;
581                builder.build()?
582            };
583            assert_eq!(pattern_1.max_depth(), depth);
584            assert_eq!(pattern_2.max_depth(), depth);
585        }
586        Ok(())
587    }
588
589    #[test]
590    fn multiple_builder_patterns() -> Result<(), Error> {
591        let mut builder = PathMatchBuilder::new("/");
592        for pattern in [
593            "./a",
594            "./b/",
595            "a/b/c/d/e",
596            "./b/foo*",
597            "./b/bar",
598            "./b/test*pattern",
599            "./b/test*pattern/final",
600            "./c",
601            "./c/",
602        ] {
603            builder.add_pattern(pattern)?;
604        }
605        let pattern = builder.build()?;
606
607        // These should match
608        for path in [
609            "a",
610            "a/",
611            "b/",
612            "a/b/c/d/e",
613            "b/foobar",
614            "b/foocar",
615            "b/bar",
616            "b/test_wildcard_pattern",
617            "b/test_wildcard_pattern/final",
618            "c",
619            "c/",
620        ] {
621            assert!(pattern.matches(path));
622        }
623
624        // These should not
625        for path in ["b", "a/b/c/d", "b/folbar", "b/barfoo", "b/tes_attern"] {
626            assert!(!pattern.matches(path));
627        }
628
629        // These should prefix-match
630        for path in [
631            "b",
632            "a/b/c",
633            "a/b/c/",
634            "b/test_wildcard_pattern",
635            "b/test_wildcard_pattern/",
636        ] {
637            assert!(pattern.matches_prefix(path));
638        }
639
640        Ok(())
641    }
642
643    #[test]
644    fn no_patterns_match_nothing() -> Result<(), Error> {
645        let builder = PathMatchBuilder::new("/");
646        let pattern = builder.build()?;
647        assert!(!pattern.matches("non_empty"));
648        assert!(!pattern.matches(""));
649        assert!(!pattern.matches("/"));
650        Ok(())
651    }
652
653    #[test]
654    fn multiple_wildcard() -> Result<(), Error> {
655        let pattern = PathMatch::from_pattern("*/*", r"\")?;
656        assert!(!pattern.matches(r"."));
657        assert!(!pattern.matches(r"hello"));
658        assert!(pattern.matches(r"hello\there"));
659        assert!(!pattern.matches(r"hello\there\friend"));
660        Ok(())
661    }
662
663    #[test]
664    fn single_wildcard() -> Result<(), Error> {
665        let pattern = PathMatch::from_pattern("*", r"\")?;
666        assert!(!pattern.matches(r"."));
667        assert!(pattern.matches(r".hello"));
668        assert!(pattern.matches(r"hello"));
669        Ok(())
670    }
671
672    #[test]
673    fn wildcard_matches_dot_in_middle() -> Result<(), Error> {
674        let pattern = PathMatch::from_pattern("hello*there", r"\")?;
675        assert!(pattern.matches(r"hello.there"));
676        Ok(())
677    }
678}