moenster/
lib.rs

1//! # mønster (n) - pattern.
2//!
3//! Simple glob-style pattern matching for strings.
4//! Always matches the whole string from beginning to end.
5//!
6//! | Wildcard | Description | Note |
7//! | -------- | ----------- | ---- |
8//! | *        | matches any number of any characters including none | |
9//! | ?        | matches any single character | does not handle multi-byte UTF-8 codepoints |
10//! | \[abc]   | matches one character given in the bracket | taken as byte values |
11//! | \[a-z]   | matches one character from the range given in the bracket | range taken from their byte values |
12//! | \[^abc]  | matches one character that is not given in the bracket | taken as byte values |
13//! | \[^a-z]  | matches one character that is not from the range given in the bracket | range taken from their byte values |
14//!
15//! _Note: An empty bracket can never match anything._
16//!
17//! # Example
18//!
19//! ```
20//! # use moenster::stringmatch;
21//! assert!(stringmatch("m*nster", "mønster"));
22//! ```
23
24#![forbid(unsafe_code)]
25#![deny(missing_debug_implementations, nonstandard_style)]
26#![warn(missing_docs, future_incompatible, unreachable_pub, rust_2018_idioms)]
27#![allow(clippy::collapsible_if)]
28
29/// Match a string against the specified pattern.
30///
31/// Returns true if the string matches against the pattern from start to finish.
32/// See the top-level documentation for allowed wildcards.
33pub fn stringmatch(pattern: &str, string: &str) -> bool {
34    stringmatch_bytes(pattern.as_bytes(), string.as_bytes(), Case::Sensitive)
35}
36
37// FIXME: Remove dead_code allowance.
38#[allow(dead_code)]
39#[derive(Copy, Clone)]
40enum Case {
41    Sensitive,
42    Insensitive,
43}
44
45fn stringmatch_bytes(mut pattern: &[u8], mut string: &[u8], case: Case) -> bool {
46    while !pattern.is_empty() && !string.is_empty() {
47        match pattern[0] {
48            // any number of any characters
49            b'*' => {
50                while pattern.len() > 2 && pattern[1] == b'*' {
51                    pattern = &pattern[1..];
52                }
53                if pattern.len() == 1 {
54                    return true;
55                }
56
57                while !string.is_empty() {
58                    if stringmatch_bytes(&pattern[1..], string, case) {
59                        return true;
60                    }
61                    string = &string[1..];
62                }
63
64                return false;
65            }
66            // any single character
67            b'?' => {
68                string = &string[1..];
69            }
70            // bracketed patterns such as `[abc]` or `[a-z]`
71            b'[' => {
72                pattern = &pattern[1..];
73                let not = pattern[0] == b'^';
74                if not {
75                    pattern = &pattern[1..];
76                }
77                let mut matched = false;
78                loop {
79                    if pattern.is_empty() {
80                        break;
81                    } else if pattern[0] == b'\\' && pattern.len() >= 2 {
82                        pattern = &pattern[1..];
83
84                        if pattern[0] == string[0] {
85                            matched = true;
86                        }
87                    } else if pattern[0] == b']' {
88                        break;
89                    } else if pattern.len() >= 3 && pattern[1] == b'-' {
90                        let mut start = pattern[0];
91                        let mut end = pattern[2];
92                        let mut c = string[0];
93                        if start > end {
94                            std::mem::swap(&mut start, &mut end);
95                        }
96
97                        if matches!(case, Case::Insensitive) {
98                            start = start.to_ascii_lowercase();
99                            end = end.to_ascii_lowercase();
100                            c = c.to_ascii_lowercase();
101                        }
102
103                        pattern = &pattern[2..];
104                        if c >= start && c <= end {
105                            matched = true;
106                        }
107                    } else {
108                        if matches!(case, Case::Sensitive) {
109                            if pattern[0] == string[0] {
110                                matched = true;
111                            }
112                        } else {
113                            if pattern[0].to_ascii_lowercase() != string[0].to_ascii_lowercase() {
114                                matched = true;
115                            }
116                        }
117                    }
118                    pattern = &pattern[1..];
119                }
120
121                if not {
122                    matched = !matched;
123                }
124
125                if !matched {
126                    return false;
127                }
128
129                string = &string[1..];
130            }
131            // everything else
132            _ => {
133                // Ignore escaped characters
134                if pattern[0] == b'\\' && pattern.len() >= 2 {
135                    pattern = &pattern[1..];
136                }
137
138                let p = pattern[0];
139                if matches!(case, Case::Sensitive) {
140                    if p != string[0] {
141                        return false;
142                    }
143                    string = &string[1..];
144                } else {
145                    if p.to_ascii_lowercase() != string[0].to_ascii_lowercase() {
146                        return false;
147                    }
148                    string = &string[1..];
149                }
150            }
151        }
152
153        // Need to handle the case that a bracketed pattern wasn't properly closed and we ran out
154        // of patterns to match.
155        if !pattern.is_empty() {
156            pattern = &pattern[1..];
157        }
158        if string.is_empty() {
159            while !pattern.is_empty() && pattern[0] == b'*' {
160                pattern = &pattern[1..];
161            }
162            break;
163        }
164    }
165
166    pattern.is_empty() && string.is_empty()
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn plain_string() {
175        assert!(stringmatch("moenster", "moenster"));
176    }
177
178    #[test]
179    fn escaped() {
180        assert!(stringmatch("moenste\\r", "moenster"));
181    }
182
183    #[test]
184    fn questionmark() {
185        assert!(stringmatch("mo?nster", "moenster"));
186        assert!(stringmatch("m??nster", "moenster"));
187        assert!(stringmatch("mo?nst?r", "moenster"));
188        assert!(!stringmatch("moenster?", "moenster"));
189    }
190
191    #[test]
192    fn wildcard() {
193        assert!(stringmatch("*", "moenster"));
194        assert!(stringmatch("*****", "moenster"));
195    }
196
197    #[test]
198    fn wildcard_and_more() {
199        assert!(stringmatch("m*oenster", "moenster"));
200        assert!(stringmatch("m*", "moenster"));
201        assert!(stringmatch("*r", "moenster"));
202    }
203
204    #[test]
205    fn bracketed_chars() {
206        assert!(stringmatch("m[oei]enster", "moenster"));
207        assert!(!stringmatch("m[bcd]enster", "moenster"));
208    }
209
210    #[test]
211    fn not_bracketed_chars() {
212        assert!(stringmatch("m[^bcd]enster", "moenster"));
213        assert!(!stringmatch("m[^oei]enster", "moenster"));
214    }
215
216    #[test]
217    fn bracketed_range() {
218        assert!(stringmatch("m[n-p]enster", "moenster"));
219        assert!(!stringmatch("m[a-c]enster", "moenster"));
220    }
221
222    #[test]
223    fn not_bracketed_range() {
224        assert!(stringmatch("m[^a-c]enster", "moenster"));
225        assert!(!stringmatch("m[^n-p]enster", "moenster"));
226    }
227
228    #[test]
229    fn wrong_bracket() {
230        assert!(stringmatch("m[n-p", "mo"));
231        assert!(!stringmatch("m[n-pt", "mot"));
232    }
233
234    #[test]
235    fn escaped_in_bracket() {
236        assert!(stringmatch("m[\\].;]o", "m]o"));
237        assert!(stringmatch("m[\\].;]o", "m;o"));
238        assert!(stringmatch("m[\\].;]o", "m.o"));
239    }
240
241    #[test]
242    fn empty_bracket() {
243        assert!(!stringmatch("m[]", "m"));
244    }
245}