patmatch/
lib.rs

1//! A configurable pattern matching (also known as globbing) library.
2//!
3//! The general use of this library is to compile `Pattern`s, then use them. For example:
4//!
5//! ```rust
6//! use patmatch::{Pattern, MatchOptions};
7//! let pat = Pattern::compile("*.png", MatchOptions::ALL);
8//! assert!(pat.matches("file.png"));
9//! assert!(!pat.matches("file.jpeg"));
10//! ```
11
12mod compiled;
13pub mod options;
14pub use options::*;
15
16use dyn_clone::DynClone;
17use std::{
18    fmt,
19    iter::FromIterator,
20    iter::{FusedIterator, Peekable},
21};
22
23/// A pattern to match strings against.
24#[derive(Debug, Clone)]
25pub struct Pattern {
26    pat: Box<dyn CompiledPat>,
27}
28
29/// A trait for compiled patterns.
30trait CompiledPat: fmt::Debug + DynClone {
31    /// See [`Pattern::matches`], which calls this.
32    fn matches(&self, string: &str) -> bool;
33}
34
35dyn_clone::clone_trait_object!(CompiledPat);
36
37/// A part of a [`Pattern`].
38/// Used to configure a compiled Pattern.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub enum Chunk {
41    /// A literal string. This matches the string exactly.
42    Str(String),
43    /// A wildcard (usually represented by an asterisk).
44    /// Matches any amount of characters, including none.
45    Wildcard,
46    /// An unknown character (usually represented by a question mark).
47    UnknownChar,
48}
49
50impl Chunk {
51    /// Turns `chr` into any of the non-[`Chunk::Str`] variants (e.g. [`Chunk::Wildcard`], etc.)
52    /// Only really used for ease of implementation of [`Pattern::compile`].
53    fn from_special_char(chr: char) -> Option<Chunk> {
54        use Chunk::*;
55        match chr {
56            '*' => Some(Wildcard),
57            '?' => Some(UnknownChar),
58            _ => None,
59        }
60    }
61    pub fn str(s: &str) -> Chunk {
62        Chunk::Str(s.to_owned())
63    }
64    fn take_str(self) -> Option<String> {
65        match self {
66            Chunk::Str(s) => Some(s),
67            _ => None,
68        }
69    }
70}
71
72impl From<String> for Chunk {
73    fn from(s: String) -> Self {
74        Chunk::Str(s)
75    }
76}
77
78impl FromIterator<Chunk> for Pattern {
79    /// Used to compile a `Pattern` from an iterator of [`Chunk`]s.
80    /// Note that this allocates memory to store *all* the `Chunk`s.
81    ///
82    /// Example usage:
83    /// ```rust
84    /// use patmatch::{Pattern, Chunk};
85    /// let chunk_vec = vec![Chunk::str("IMG_"), Chunk::Wildcard, Chunk::str(".png")];
86    /// let pat: Pattern = chunk_vec.into_iter().collect();
87    /// assert!(pat.matches("IMG_20170301.png"));
88    /// assert!(!pat.matches("stuff.png")); assert!(!pat.matches("IMG_20170302.jpeg"));
89    /// ```
90    fn from_iter<T: IntoIterator<Item = Chunk>>(iter: T) -> Self {
91        use compiled::*;
92        let mut chunks = Vec::new();
93        for chunk in iter {
94            if !(chunk == Chunk::Wildcard && chunks.ends_with(&[Chunk::Wildcard])) {
95                chunks.push(chunk);
96            }
97        }
98        let chunks = chunks;
99        if chunks.iter().all(|chunk| chunk == &Chunk::UnknownChar) {
100            // (Also handles the empty string.)
101            Pattern::from_compiled(OptionalCharLen { len: chunks.len() })
102        } else if chunks.iter().all(|chunk| chunk == &Chunk::Wildcard) {
103            Pattern::from_compiled(MatchAny {})
104        } else if chunks.iter().all(|chunk| matches!(chunk, Chunk::Str(_))) {
105            Pattern::from_compiled(LiteralStr(
106                chunks.into_iter().map(|x| x.take_str().unwrap()).collect(),
107            ))
108        } else {
109            let mut states = Vec::new();
110            for chunk in chunks {
111                match chunk {
112                    Chunk::Wildcard => states.push(State::Wildcard),
113                    Chunk::UnknownChar => states.push(State::UnknownChar),
114                    Chunk::Str(string) => {
115                        for chr in string.chars() {
116                            states.push(State::Char(chr));
117                        }
118                    }
119                }
120            }
121            Pattern::from_compiled(General { states })
122        }
123    }
124}
125
126/// An iterator for yielding `Chunk`s from an iterator.
127struct CompileIter<T: Iterator<Item = char>> {
128    iter: Peekable<T>,
129    opts: MatchOptions,
130}
131impl<T: Iterator<Item = char>> Iterator for CompileIter<T> {
132    type Item = Chunk;
133    fn next(&mut self) -> Option<Self::Item> {
134        use Chunk::*;
135        match self.iter.next() {
136            None => None,
137            Some(mut chr) => Some(match chr {
138                c if self.opts.contains(c.into()) => Chunk::from_special_char(c).unwrap(),
139                _ => {
140                    let mut string = String::new();
141                    if chr == '\\' {
142                        chr = self.iter.next().unwrap_or('\\');
143                    }
144                    string.push(chr);
145                    loop {
146                        match self.iter.peek() {
147                            Some('\\') => {
148                                self.iter.next();
149                                string.push(self.iter.next().unwrap_or('\\'));
150                            }
151                            Some(peeked) if !self.opts.contains(MatchOptions::from(*peeked)) => {
152                                string.push(*peeked);
153                                self.iter.next();
154                            }
155                            _ => break,
156                        }
157                    }
158                    Str(string)
159                }
160            }),
161        }
162    }
163}
164
165impl<T: Iterator<Item = char>> FusedIterator for CompileIter<T> {}
166
167impl Pattern {
168    /// Compiles a pattern from a string, using shell-like syntax.
169    /// If you want to compile your own custom string format, see [`Pattern::from_iter`].
170    ///
171    /// Each of these can be toggled using [`MatchOptions`].
172    /// * All characters prefixed with a backslash (`\`) are interpreted literally.
173    /// * ([`MatchOptions::WILDCARDS`]) Asterisks (`*`s) are interpreted as wildcards: e.g. `a*b` is interpreted as `a`, a
174    /// wildcard then `b`.
175    /// * ([`MatchOptions::UNKNOWN_CHARS`]) Question marks (`?`s) are interpreted as optional characters.
176    pub fn compile(pat: &str, opts: MatchOptions) -> Pattern {
177        // Yield all chunks and collect them into a `Pattern`.
178        CompileIter {
179            iter: pat.chars().peekable(),
180            opts,
181        }
182        .collect()
183    }
184
185    /// The same as [`Pattern::compile`], but with an iterator instead of a `&str`.
186    pub fn compile_iter<T: IntoIterator<Item = char>>(pat: T, opts: MatchOptions) -> Pattern {
187        CompileIter {
188            iter: pat.into_iter().peekable(),
189            opts,
190        }
191        .collect()
192    }
193
194    /// Checks if `string` matches the pattern.
195    /// The pattern is checked for a match "perfectly",
196    /// i.e. if it is possible to match by choosing all of the matches optimally,
197    /// it will do so.
198    /// This optimizes matching checks if not all features are used.
199    pub fn matches(&self, string: &str) -> bool {
200        self.pat.matches(string)
201    }
202
203    fn from_compiled<T: CompiledPat + 'static>(pat: T) -> Pattern {
204        Pattern { pat: Box::new(pat) }
205    }
206}
207
208#[cfg(test)]
209mod tests {
210    use super::{Chunk, MatchOptions, Pattern};
211    use Chunk::*;
212
213    /// Checks the match status of all patterns in `patterns` against all strings in `strings`.
214    fn check_match(patterns: Vec<Pattern>, strings: Vec<&str>, expected: bool) {
215        for pat in patterns {
216            for string in strings.iter() {
217                assert_eq!(
218                    pat.matches(string),
219                    expected,
220                    "Pattern {:?} failed to match against {}",
221                    pat,
222                    string
223                );
224            }
225        }
226    }
227
228    fn matches(patterns: Vec<Pattern>, strings: Vec<&str>) {
229        check_match(patterns, strings, true)
230    }
231
232    fn matches_v(patterns: Vec<Vec<Chunk>>, strings: Vec<&str>) {
233        matches(
234            patterns
235                .into_iter()
236                .map(|v| v.into_iter().collect())
237                .collect(),
238            strings,
239        )
240    }
241
242    fn strings_to_pats(patterns: Vec<&str>) -> Vec<Pattern> {
243        patterns
244            .into_iter()
245            .map(|pat| Pattern::compile(pat, MatchOptions::ALL))
246            .collect()
247    }
248
249    fn matches_s(patterns: Vec<&str>, strings: Vec<&str>) {
250        matches(strings_to_pats(patterns), strings)
251    }
252
253    fn no_match_s(patterns: Vec<&str>, strings: Vec<&str>) {
254        check_match(strings_to_pats(patterns), strings, false)
255    }
256
257    macro_rules! chunk {
258        ($i:ident) => {
259            $i
260        };
261        ($s:tt) => {
262            Str($s.to_string())
263        };
264    }
265    macro_rules! chunks {
266        ($($t:tt),*) => {
267            vec![ $(chunk!($t)),* ]
268        }
269    }
270
271    #[test]
272    fn cat() {
273        matches_v(vec![chunks!["c", UnknownChar, "t"]], vec!["cat"]);
274        matches_s(
275            vec![
276                "*", "c*", "*t", "*a*", "c*t", "ca*", "*at", "???", "?a?", "c??", "??t", "ca?",
277                "?at", "c?t", "cat", "ca*t", "c*at", "*cat", "cat*", "ca****t",
278            ],
279            vec!["cat"],
280        );
281        no_match_s(
282            vec!["cat?", "?cat", "c?at", "ca?t", "?", "??", "????", "", " *"],
283            vec!["cat"],
284        );
285    }
286
287    #[test]
288    fn empty() {
289        matches_s(vec![""], vec![""]);
290        no_match_s(vec![""], vec!["a", "b", "c", " ", "\t", "\n"]);
291    }
292
293    #[test]
294    fn patmatch() {
295        matches_s(
296            vec![
297                "pat?atch",
298                "patm?tch",
299                "p??????h",
300                "p???a??h",
301                "p???*??h",
302                "p???*???h",
303                "p*******************************************atmatch",
304                "\\p\\a\\t\\m\\a\\t\\c\\h",
305                "\\pat\\match",
306                "*",
307                "**",
308                "***",
309                "****",
310                "*?*?*?*?*?*",
311                "???**?*?*??",
312                "???**a*??*?",
313                "*p*",
314                "*pa*",
315                "*ma*",
316                "*tm*",
317                "**tm**",
318                "*tch*",
319                "*t*a*",
320                "*m*t*",
321                "*m*c*",
322                "p\\at*",
323                "*atmatc*",
324            ],
325            vec!["patmatch"],
326        );
327        no_match_s(
328            vec![
329                "pat\\*match",
330                "patmatch?",
331                "p\\?tmatch",
332                "pat\\\\match",
333                "p\\y",
334                "y",
335                "xsdaf",
336                "*x",
337                "*x*",
338                "*p*k",
339                "*c*m*",
340                "*c*m*a*",
341                "*c*a*m*",
342                "p\\a*\\*t",
343                "p??t",
344                "patmat",
345                "patmatc",
346                "?atmatc",
347                "*atmat",
348            ],
349            vec!["patmatch"],
350        );
351    }
352}