simple_path_match/
lib.rs

1#![warn(clippy::pedantic)]
2#![allow(clippy::uninlined_format_args, clippy::missing_errors_doc)]
3#![cfg_attr(not(test), no_std)]
4#![forbid(unsafe_code)]
5
6extern crate alloc;
7
8use alloc::collections::{BTreeMap, VecDeque};
9use alloc::string::{String, ToString as _};
10use alloc::vec::Vec;
11use beef::Cow;
12use snafu::Snafu;
13
14const PATH_CURRENT: &str = ".";
15const PATH_PARENT: &str = "..";
16const UNIX_SEP: &str = "/";
17const WILDCARD_ANY: &str = "*";
18
19#[derive(Clone, Debug, PartialEq, Eq, Ord, PartialOrd)]
20enum PathComponent<'a> {
21    Current,
22    DirectoryMarker,
23    Name(Cow<'a, str>),
24    Parent,
25    RootName(Cow<'a, str>),
26}
27
28impl alloc::fmt::Display for PathComponent<'_> {
29    fn fmt(&self, formatter: &mut alloc::fmt::Formatter<'_>) -> Result<(), alloc::fmt::Error> {
30        match self {
31            PathComponent::Current => formatter.write_str(PATH_CURRENT),
32            PathComponent::DirectoryMarker => Ok(()),
33            PathComponent::Name(s) | PathComponent::RootName(s) => formatter.write_str(s),
34            PathComponent::Parent => formatter.write_str(PATH_PARENT),
35        }
36    }
37}
38
39impl PathComponent<'_> {
40    fn traversal_depth(&self) -> usize {
41        match self {
42            PathComponent::Current | PathComponent::DirectoryMarker => 0,
43            PathComponent::Name(_) | PathComponent::RootName(_) | PathComponent::Parent => 1,
44        }
45    }
46
47    fn into_owned(self) -> PathComponent<'static> {
48        match self {
49            PathComponent::Current => PathComponent::Current,
50            PathComponent::Parent => PathComponent::Parent,
51            PathComponent::DirectoryMarker => PathComponent::DirectoryMarker,
52            PathComponent::Name(n) => PathComponent::Name(n.into_owned().into()),
53            PathComponent::RootName(n) => PathComponent::RootName(n.into_owned().into()),
54        }
55    }
56}
57
58#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
59struct StartsEndsWith(String, String);
60
61impl alloc::fmt::Display for StartsEndsWith {
62    fn fmt(&self, formatter: &mut alloc::fmt::Formatter<'_>) -> Result<(), alloc::fmt::Error> {
63        formatter.write_str(&self.0)?;
64        formatter.write_str(WILDCARD_ANY)?;
65        formatter.write_str(&self.1)
66    }
67}
68
69impl StartsEndsWith {
70    pub fn matches(&self, name: &str) -> bool {
71        name.starts_with(&self.0) && name.ends_with(&self.1)
72    }
73}
74
75#[derive(Clone, Debug, PartialEq, Eq)]
76enum PatternComponent {
77    Literal(PathComponent<'static>),
78    StartsEndsWith(StartsEndsWith),
79}
80
81impl alloc::fmt::Display for PatternComponent {
82    fn fmt(&self, formatter: &mut alloc::fmt::Formatter<'_>) -> Result<(), alloc::fmt::Error> {
83        match self {
84            PatternComponent::Literal(c) => c.fmt(formatter),
85            PatternComponent::StartsEndsWith(m) => m.fmt(formatter),
86        }
87    }
88}
89
90/// Errors that can occur during pattern compilation
91#[derive(Debug, Snafu)]
92pub enum Error {
93    /// The supplied pattern contained a parent traversal (`..`)
94    #[snafu(display("Pattern must not contain parent traversals"))]
95    NoParents,
96
97    /// A wilcard was used in a component in an invalid way
98    #[snafu(display("Only one wilcard allowed in component: `{}`", component))]
99    WildcardPosition { component: String },
100}
101
102struct StringComponentIter<'a> {
103    path_string: core::iter::Enumerate<core::str::Split<'a, &'a str>>,
104    is_dir: bool,
105}
106
107impl<'a> StringComponentIter<'a> {
108    pub fn new(path: &'a str, separator: &'a str) -> StringComponentIter<'a> {
109        StringComponentIter {
110            path_string: path.split(separator).enumerate(),
111            is_dir: false,
112        }
113    }
114}
115
116impl<'a> Iterator for StringComponentIter<'a> {
117    type Item = PathComponent<'a>;
118
119    fn next(&mut self) -> Option<PathComponent<'a>> {
120        for (idx, component) in self.path_string.by_ref() {
121            self.is_dir = false;
122            match component {
123                "" => {
124                    if idx == 0 {
125                        return Some(PathComponent::RootName(component.into()));
126                    }
127                    self.is_dir = true;
128                }
129                PATH_CURRENT => return Some(PathComponent::Current),
130                PATH_PARENT => return Some(PathComponent::Parent),
131                _ => return Some(PathComponent::Name(component.into())),
132            }
133        }
134        if self.is_dir {
135            self.is_dir = false;
136            Some(PathComponent::DirectoryMarker)
137        } else {
138            None
139        }
140    }
141}
142
143fn normalized<'a, I: IntoIterator<Item = PathComponent<'a>>>(components: I) -> Vec<PathComponent<'a>> {
144    let components = components.into_iter();
145    let mut result = Vec::with_capacity(components.size_hint().0);
146    for component in components {
147        match component {
148            PathComponent::Name(_) | PathComponent::RootName(_) => result.push(component),
149            PathComponent::DirectoryMarker => {
150                if result.is_empty() {
151                    result.push(PathComponent::Current);
152                }
153                result.push(PathComponent::DirectoryMarker);
154            }
155            PathComponent::Parent => match result.last() {
156                None | Some(PathComponent::Parent) => result.push(PathComponent::Parent),
157                Some(PathComponent::Name(_)) => drop(result.pop()),
158                Some(PathComponent::RootName(_)) => {}
159                Some(c) => panic!("Component found in unexpected place during normalization: {:?}", c),
160            },
161            PathComponent::Current => {}
162        }
163    }
164    if result.is_empty() {
165        result.push(PathComponent::Current);
166    }
167    result
168}
169
170fn path_to_pattern<'a, I: IntoIterator<Item = PathComponent<'a>>>(
171    components: I,
172) -> Result<Vec<PatternComponent>, Error> {
173    let components = components.into_iter();
174    let mut result = Vec::with_capacity(components.size_hint().0);
175    for component in components {
176        match component {
177            PathComponent::Name(ref name) => {
178                let matcher = if let Some(idx) = name.find(WILDCARD_ANY) {
179                    let (start, end) = name.split_at(idx);
180                    let (_, end) = end.split_at(WILDCARD_ANY.len());
181                    if start.contains(WILDCARD_ANY) || end.contains(WILDCARD_ANY) {
182                        return Err(Error::WildcardPosition {
183                            component: name.to_string(),
184                        });
185                    }
186                    PatternComponent::StartsEndsWith(StartsEndsWith(start.to_string(), end.to_string()))
187                } else {
188                    PatternComponent::Literal(component.into_owned())
189                };
190                result.push(matcher);
191            }
192            PathComponent::Parent => return Err(Error::NoParents),
193            PathComponent::Current => {}
194            PathComponent::DirectoryMarker => {
195                if result.is_empty() {
196                    result.push(PatternComponent::Literal(PathComponent::Current));
197                }
198                result.push(PatternComponent::Literal(component.into_owned()));
199            }
200            PathComponent::RootName(_) => {
201                result.push(PatternComponent::Literal(component.into_owned()));
202            }
203        }
204    }
205    if result.is_empty() {
206        result.push(PatternComponent::Literal(PathComponent::Current));
207    }
208    Ok(result)
209}
210
211#[derive(Clone, Debug)]
212struct PathMatchNode {
213    can_end: bool,
214    literals: BTreeMap<PathComponent<'static>, PathMatchNode>,
215    starts_ends_with: BTreeMap<StartsEndsWith, PathMatchNode>,
216    min_traversals: usize,
217    max_traversals: usize,
218}
219
220impl Default for PathMatchNode {
221    fn default() -> PathMatchNode {
222        PathMatchNode {
223            can_end: false,
224            literals: BTreeMap::new(),
225            starts_ends_with: BTreeMap::new(),
226            min_traversals: 0,
227            max_traversals: usize::MAX,
228        }
229    }
230}
231
232impl alloc::fmt::Display for PathMatchNode {
233    fn fmt(&self, formatter: &mut alloc::fmt::Formatter<'_>) -> Result<(), alloc::fmt::Error> {
234        use alloc::fmt::Write as _;
235
236        let literals_iter = self.literals.iter().map(|(k, v)| (k.to_string(), v));
237        let matchers_iter = self.starts_ends_with.iter().map(|(k, v)| (k.to_string(), v));
238        let subnodes_iter = literals_iter.chain(matchers_iter);
239        let mut output = String::new();
240        let mut has_multiple_options = false;
241        for (idx, (k, v)) in subnodes_iter.enumerate() {
242            if idx > 0 {
243                output += "|";
244                has_multiple_options = true;
245            }
246            output += &k;
247            if v.can_end {
248                output += "$";
249            }
250            if !v.is_empty() {
251                output += UNIX_SEP;
252                write!(&mut output, "{}", v)?;
253            }
254        }
255        if has_multiple_options {
256            formatter.write_str("(")?;
257        }
258        formatter.write_str(&output)?;
259        if has_multiple_options {
260            formatter.write_str(")")?;
261        }
262        Ok(())
263    }
264}
265
266impl PathMatchNode {
267    fn insert_component(&mut self, component: PatternComponent) -> &mut PathMatchNode {
268        self.min_traversals = 0;
269        self.max_traversals = usize::MAX;
270        match component {
271            PatternComponent::Literal(literal) => self.literals.entry(literal).or_default(),
272            PatternComponent::StartsEndsWith(pattern) => self.starts_ends_with.entry(pattern).or_default(),
273        }
274    }
275
276    pub fn is_empty(&self) -> bool {
277        self.starts_ends_with.is_empty() && self.literals.is_empty()
278    }
279
280    fn recompute_depth_bounds(&mut self) -> (usize, usize) {
281        let min = &mut self.min_traversals;
282        let max = &mut self.max_traversals;
283        *min = if self.can_end { 0 } else { usize::MAX };
284        *max = 0;
285        let node_iter = self
286            .literals
287            .iter_mut()
288            .map(|(k, v)| (k.traversal_depth(), v))
289            .chain(self.starts_ends_with.values_mut().map(|v| (1, v)));
290        for (component_depth, node) in node_iter {
291            let (node_min, node_max) = node.recompute_depth_bounds();
292            *min = core::cmp::min(*min, node_min + component_depth);
293            *max = core::cmp::max(*max, node_max + component_depth);
294        }
295        (*min, *max)
296    }
297
298    pub fn insert(&mut self, mut pattern: Vec<PatternComponent>) {
299        let mut node = self;
300        for head in pattern.drain(..) {
301            node = node.insert_component(head);
302        }
303        node.can_end = true;
304    }
305
306    pub fn matches(node: &PathMatchNode, path: &[PathComponent], match_prefix: bool) -> bool {
307        let mut candidates = VecDeque::new();
308        candidates.push_front((node, path));
309        while let Some((node, path)) = candidates.pop_back() {
310            let path = if match_prefix && path.first() == Some(&PathComponent::Current) {
311                // It is invalid to do this in the non-prefix case, since we might need
312                // to match ".". We need to do this for the prefix case since "." is a prefix
313                // of any relative path, but won't match other paths.
314                &path[1..]
315            } else {
316                path
317            };
318            let can_match = node.can_end || match_prefix;
319            let path_is_dir_marker = path.len() == 1 && path.last() == Some(&PathComponent::DirectoryMarker);
320            if path_is_dir_marker && can_match {
321                return true;
322            }
323            if let Some(component) = path.first() {
324                if let Some(matching_node) = node.literals.get(component) {
325                    candidates.push_front((matching_node, &path[1..]));
326                }
327                for (name_matcher, matching_node) in &node.starts_ends_with {
328                    if let PathComponent::Name(name) = component {
329                        if name_matcher.matches(name) {
330                            candidates.push_front((matching_node, &path[1..]));
331                        }
332                    }
333                }
334            } else if can_match {
335                return true;
336            }
337        }
338        false
339    }
340}
341
342/// Matches against a path
343#[derive(Clone, Debug)]
344pub struct PathMatch {
345    separator: String,
346    match_tree: PathMatchNode,
347}
348
349impl alloc::fmt::Display for PathMatch {
350    fn fmt(&self, formatter: &mut alloc::fmt::Formatter<'_>) -> Result<(), alloc::fmt::Error> {
351        self.match_tree.fmt(formatter)
352    }
353}
354
355impl PathMatch {
356    /// Constructs a `PathMatch` for a single pattern.
357    ///
358    /// The pattern must use the forward slash as a separator. The following
359    /// restrictions apply:
360    /// * Each component must either be a literal name or can contain a single
361    ///   asterisk (representing a wildcard) with an optional literal prefix and
362    ///   suffix.
363    /// * `?` is not supported.
364    /// * The pattern must not contain parent traversals (`..`) but `.` is
365    ///   supported.
366    /// * No escaping of special characters is supported.
367    ///
368    /// Construction will return an error if parent traverals are present or
369    /// a component contains multiple wildcard characters.
370    ///
371    /// The supplied separator is used when parsing the supplied paths. The idea
372    /// is that the patterns you use are specified in an OS-independent
373    /// manner so they can be compile-time constant, but the separator is
374    /// supplied at run-time to allow adaptation to OS.
375    pub fn from_pattern(pattern: &str, separator: &str) -> Result<PathMatch, Error> {
376        let components = StringComponentIter::new(pattern, UNIX_SEP);
377        let pattern = path_to_pattern(components)?;
378        let mut match_tree = PathMatchNode::default();
379        match_tree.insert(pattern);
380        match_tree.recompute_depth_bounds();
381        let result = PathMatch {
382            separator: separator.to_string(),
383            match_tree,
384        };
385        Ok(result)
386    }
387
388    /// Returns `true` if the specified string matches the pattern, `false`
389    /// otherwise. Unlike patterns, paths may contain `..`, but if the parent
390    /// traversal cannot be normalized out, no matches can occur.
391    pub fn matches<P: AsRef<str>>(&self, path: P) -> bool {
392        let path = path.as_ref();
393        self.matches_common(path, false)
394    }
395
396    /// Returns `true` if the specified string forms a prefix path of one of the
397    /// patterns matches.
398    ///
399    /// The prefix must consist of full components. e.g. `first/second` is a
400    /// prefix of `first/second/third`, but `first/sec` is not.
401    pub fn matches_prefix<P: AsRef<str>>(&self, path: P) -> bool {
402        let path = path.as_ref();
403        self.matches_common(path, true)
404    }
405
406    fn matches_common(&self, path: &str, match_prefix: bool) -> bool {
407        let components = normalized(StringComponentIter::new(path, &self.separator));
408        PathMatchNode::matches(&self.match_tree, &components, match_prefix)
409    }
410
411    /// Returns the maximum number of components a matching path could have.
412    /// This assumes a normalized path - a matching path could always have
413    /// an arbitrary number of `.` components.
414    #[must_use]
415    pub fn max_depth(&self) -> usize {
416        self.match_tree.max_traversals
417    }
418}
419
420/// Builds a `PathMatch` which can match against multiple expressions.
421pub struct PathMatchBuilder {
422    processed: Vec<Vec<PatternComponent>>,
423    separator: String,
424}
425
426impl PathMatchBuilder {
427    /// Constructs a `PathMatchBuilder` where paths to be matched will use the
428    /// supplied separator.
429    #[must_use]
430    pub fn new(separator: &str) -> PathMatchBuilder {
431        PathMatchBuilder {
432            processed: Vec::new(),
433            separator: separator.into(),
434        }
435    }
436
437    /// Adds the specified pattern to the matcher.
438    ///
439    /// This will return an error if the pattern contains parent traversals or a
440    /// component containing multiple wildcards. See also
441    /// `PathMatch::from_pattern`.
442    pub fn add_pattern(&mut self, pattern: &str) -> Result<(), Error> {
443        let components = StringComponentIter::new(pattern, UNIX_SEP);
444        let processed = path_to_pattern(components)?;
445        self.processed.push(processed);
446        Ok(())
447    }
448
449    /// Constructs the `PathMatch` which can be used to match against paths.
450    pub fn build(self) -> Result<PathMatch, Error> {
451        let mut match_tree = PathMatchNode::default();
452        for pattern in self.processed {
453            match_tree.insert(pattern);
454        }
455        match_tree.recompute_depth_bounds();
456        let result = PathMatch {
457            separator: self.separator,
458            match_tree,
459        };
460        Ok(result)
461    }
462}
463
464#[cfg(test)]
465mod test {
466    use super::*;
467
468    #[test]
469    fn basic_syntax() -> Result<(), Error> {
470        let path = r"foo|bar|hmm|hello|";
471        for separator in ["/", "\\"] {
472            let path = path.replace("|", separator);
473            let pattern = PathMatch::from_pattern(".////foo/*/*/hel*o/", separator)?;
474            assert!(pattern.matches(path));
475        }
476        Ok(())
477    }
478
479    #[test]
480    fn star() -> Result<(), Error> {
481        let path = r"foo|bar|hmm|hello|";
482        for separator in ["/", "\\"] {
483            let path = &path.replace("|", separator);
484
485            let pattern = PathMatch::from_pattern("./*", separator)?;
486            assert!(!pattern.matches(path));
487
488            let pattern = PathMatch::from_pattern("./*/*", separator)?;
489            assert!(!pattern.matches(path));
490
491            let pattern = PathMatch::from_pattern("./*/*/*", separator)?;
492            assert!(!pattern.matches(path));
493
494            let pattern = PathMatch::from_pattern("./*/*/*/*", separator)?;
495            assert!(pattern.matches(path));
496        }
497
498        Ok(())
499    }
500
501    #[test]
502    fn root() -> Result<(), Error> {
503        for pattern in ["/", "/./", "/.", "/////", "///./."] {
504            let pattern = PathMatch::from_pattern(pattern, "/")?;
505            assert!(pattern.matches("/"));
506            assert!(!pattern.matches("./"));
507        }
508        Ok(())
509    }
510
511    #[test]
512    fn cwd() -> Result<(), Error> {
513        // Dot is not necessarily a folder
514        for pattern in [".", "././././.", ".////."] {
515            let pattern = PathMatch::from_pattern(pattern, "/")?;
516            assert!(pattern.matches("."));
517            assert!(pattern.matches("./"));
518            assert!(!pattern.matches("/"));
519            assert!(!pattern.matches("/."));
520            assert!(!pattern.matches("/./"));
521        }
522
523        // Dot is a folder
524        for pattern in ["./", "./././"] {
525            let pattern = PathMatch::from_pattern(pattern, "/")?;
526            assert!(pattern.matches("./"));
527            assert!(!pattern.matches("."));
528            assert!(!pattern.matches("/"));
529            assert!(!pattern.matches("/."));
530            assert!(!pattern.matches("/./"));
531        }
532        Ok(())
533    }
534
535    #[test]
536    fn file_path() -> Result<(), Error> {
537        for pattern in ["hello", "./hello", "././hello"] {
538            let pattern = PathMatch::from_pattern(pattern, "/")?;
539            assert!(pattern.matches("hello"));
540        }
541        Ok(())
542    }
543
544    #[test]
545    fn prefix_matching() -> Result<(), Error> {
546        let pattern = "hello/there/friend";
547        for separator in ["/", "\\"] {
548            let pattern = PathMatch::from_pattern(pattern, separator)?;
549            for path in [
550                ".",
551                ".|",
552                "hello",
553                "hello|",
554                "hello|there",
555                "hello|there|",
556                "hello|there|friend",
557                "hello|there|friend|",
558            ] {
559                let path = path.replace("|", separator);
560                assert!(pattern.matches_prefix(path));
561            }
562        }
563        Ok(())
564    }
565
566    #[test]
567    fn max_depth() -> Result<(), Error> {
568        for (pattern, depth) in [
569            (".", 0),
570            ("./", 0),
571            ("./*", 1),
572            ("./*/", 1),
573            ("./././", 0),
574            ("*/*/*/", 3),
575            ("./hello/", 1),
576            ("./*/*", 2),
577        ] {
578            let pattern_1 = PathMatch::from_pattern(pattern, "/")?;
579            let pattern_2 = {
580                let mut builder = PathMatchBuilder::new("/");
581                builder.add_pattern(pattern)?;
582                builder.build()?
583            };
584            assert_eq!(pattern_1.max_depth(), depth);
585            assert_eq!(pattern_2.max_depth(), depth);
586        }
587        Ok(())
588    }
589
590    #[test]
591    fn multiple_builder_patterns() -> Result<(), Error> {
592        let mut builder = PathMatchBuilder::new("/");
593        for pattern in [
594            "./a",
595            "./b/",
596            "a/b/c/d/e",
597            "./b/foo*",
598            "./b/bar",
599            "./b/test*pattern",
600            "./b/test*pattern/final",
601            "./c",
602            "./c/",
603        ] {
604            builder.add_pattern(pattern)?;
605        }
606        let pattern = builder.build()?;
607
608        // These should match
609        for path in [
610            "a",
611            "a/",
612            "b/",
613            "a/b/c/d/e",
614            "b/foobar",
615            "b/foocar",
616            "b/bar",
617            "b/test_wildcard_pattern",
618            "b/test_wildcard_pattern/final",
619            "c",
620            "c/",
621        ] {
622            assert!(pattern.matches(path));
623        }
624
625        // These should not
626        for path in ["b", "a/b/c/d", "b/folbar", "b/barfoo", "b/tes_attern"] {
627            assert!(!pattern.matches(path));
628        }
629
630        // These should prefix-match
631        for path in [
632            "b",
633            "a/b/c",
634            "a/b/c/",
635            "b/test_wildcard_pattern",
636            "b/test_wildcard_pattern/",
637        ] {
638            assert!(pattern.matches_prefix(path));
639        }
640
641        Ok(())
642    }
643
644    #[test]
645    fn no_patterns_match_nothing() -> Result<(), Error> {
646        let builder = PathMatchBuilder::new("/");
647        let pattern = builder.build()?;
648        assert!(!pattern.matches("non_empty"));
649        assert!(!pattern.matches(""));
650        assert!(!pattern.matches("/"));
651        Ok(())
652    }
653
654    #[test]
655    fn multiple_wildcard() -> Result<(), Error> {
656        let pattern = PathMatch::from_pattern("*/*", r"\")?;
657        assert!(!pattern.matches(r"."));
658        assert!(!pattern.matches(r"hello"));
659        assert!(pattern.matches(r"hello\there"));
660        assert!(!pattern.matches(r"hello\there\friend"));
661        Ok(())
662    }
663
664    #[test]
665    fn single_wildcard() -> Result<(), Error> {
666        let pattern = PathMatch::from_pattern("*", r"\")?;
667        assert!(!pattern.matches(r"."));
668        assert!(pattern.matches(r".hello"));
669        assert!(pattern.matches(r"hello"));
670        Ok(())
671    }
672
673    #[test]
674    fn wildcard_matches_dot_in_middle() -> Result<(), Error> {
675        let pattern = PathMatch::from_pattern("hello*there", r"\")?;
676        assert!(pattern.matches(r"hello.there"));
677        Ok(())
678    }
679}