Skip to main content

sieve/runtime/tests/
glob.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs Ltd <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-SEL
5 */
6
7use std::char::REPLACEMENT_CHARACTER;
8
9use crate::MAX_MATCH_VARIABLES;
10
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct GlobPattern {
13    pattern: Vec<PatternChar>,
14    to_lower: bool,
15}
16
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum PatternChar {
19    WildcardMany { num: usize, match_pos: usize },
20    WildcardSingle { match_pos: usize },
21    Char { char: char, match_pos: usize },
22}
23
24impl GlobPattern {
25    pub fn compile(pattern: &str, to_lower: bool) -> Self {
26        let mut chars = Vec::new();
27        let mut is_escaped = false;
28        let mut str = pattern.chars().peekable();
29
30        while let Some(char) = str.next() {
31            match char {
32                '*' if !is_escaped => {
33                    let mut num = 1;
34                    while let Some('*') = str.peek() {
35                        num += 1;
36                        str.next();
37                    }
38                    chars.push(PatternChar::WildcardMany { num, match_pos: 0 });
39                }
40                '?' if !is_escaped => {
41                    chars.push(PatternChar::WildcardSingle { match_pos: 0 });
42                }
43                '\\' if !is_escaped => {
44                    is_escaped = true;
45                    continue;
46                }
47                _ => {
48                    if is_escaped {
49                        is_escaped = false;
50                    }
51                    if to_lower && char.is_uppercase() {
52                        for char in char.to_lowercase() {
53                            chars.push(PatternChar::Char { char, match_pos: 0 });
54                        }
55                    } else {
56                        chars.push(PatternChar::Char { char, match_pos: 0 });
57                    }
58                }
59            }
60        }
61
62        GlobPattern {
63            pattern: chars,
64            to_lower,
65        }
66    }
67
68    // Credits: Algorithm ported from https://research.swtch.com/glob
69    pub fn matches(&self, value: &str) -> bool {
70        let value = if self.to_lower {
71            value.to_lowercase().chars().collect::<Vec<_>>()
72        } else {
73            value.chars().collect::<Vec<_>>()
74        };
75
76        let mut px = 0;
77        let mut nx = 0;
78        let mut next_px = 0;
79        let mut next_nx = 0;
80
81        while px < self.pattern.len() || nx < value.len() {
82            match self.pattern.get(px) {
83                Some(PatternChar::Char { char, .. }) => {
84                    if matches!(value.get(nx), Some(nc) if nc == char ) {
85                        px += 1;
86                        nx += 1;
87                        continue;
88                    }
89                }
90                Some(PatternChar::WildcardSingle { .. }) if nx < value.len() => {
91                    px += 1;
92                    nx += 1;
93                    continue;
94                }
95                Some(PatternChar::WildcardMany { .. }) => {
96                    next_px = px;
97                    next_nx = nx + 1;
98                    px += 1;
99                    continue;
100                }
101                _ => (),
102            }
103            if 0 < next_nx && next_nx <= value.len() {
104                px = next_px;
105                nx = next_nx;
106                continue;
107            }
108            return false;
109        }
110        true
111    }
112
113    pub fn capture(
114        mut self,
115        value_: &str,
116        capture_positions: u64,
117        captured_values: &mut Vec<(usize, String)>,
118    ) -> bool {
119        let value = if self.to_lower {
120            let mut value = Vec::with_capacity(value_.len());
121            for char in value_.chars() {
122                if char.is_uppercase() {
123                    for (pos, lowerchar) in char.to_lowercase().enumerate() {
124                        value.push((
125                            lowerchar,
126                            if pos == 0 {
127                                char
128                            } else {
129                                REPLACEMENT_CHARACTER
130                            },
131                        ));
132                    }
133                } else {
134                    value.push((char, char));
135                }
136            }
137            value
138        } else {
139            value_.chars().map(|char| (char, char)).collect::<Vec<_>>()
140        };
141
142        let mut px = 0;
143        let mut nx = 0;
144        let mut next_px = 0;
145        let mut next_nx = 0;
146
147        while px < self.pattern.len() || nx < value.len() {
148            match self.pattern.get_mut(px) {
149                Some(PatternChar::Char { char, match_pos }) => {
150                    if matches!(value.get(nx), Some(nc) if &nc.0 == char ) {
151                        *match_pos = nx;
152                        px += 1;
153                        nx += 1;
154                        continue;
155                    }
156                }
157                Some(PatternChar::WildcardSingle { match_pos }) if nx < value.len() => {
158                    *match_pos = nx;
159                    px += 1;
160                    nx += 1;
161                    continue;
162                }
163                Some(PatternChar::WildcardMany { match_pos, .. }) => {
164                    *match_pos = nx;
165                    next_px = px;
166                    next_nx = nx + 1;
167                    px += 1;
168                    continue;
169                }
170                _ => (),
171            }
172            if 0 < next_nx && next_nx <= value.len() {
173                px = next_px;
174                nx = next_nx;
175                continue;
176            }
177            return false;
178        }
179
180        let mut last_pos = 0;
181
182        captured_values.clear();
183        if capture_positions & 1 != 0 {
184            captured_values.push((0usize, value_.to_string()));
185        }
186
187        let mut wildcard_pos: usize = 1;
188        for item in &mut self.pattern {
189            if wildcard_pos <= MAX_MATCH_VARIABLES as usize {
190                last_pos = match item {
191                    PatternChar::WildcardMany { num, match_pos } => {
192                        while *num > 1 {
193                            if capture_positions & (1 << wildcard_pos) != 0 {
194                                captured_values.push((wildcard_pos, String::with_capacity(0)));
195                            }
196                            wildcard_pos += 1;
197                            *num -= 1;
198                        }
199
200                        if capture_positions & (1 << wildcard_pos) != 0 {
201                            if let Some(range) = value.get(last_pos..*match_pos) {
202                                captured_values.push((
203                                    wildcard_pos,
204                                    range
205                                        .iter()
206                                        .filter_map(|(_, char)| {
207                                            if char != &REPLACEMENT_CHARACTER {
208                                                Some(char)
209                                            } else {
210                                                None
211                                            }
212                                        })
213                                        .collect::<String>(),
214                                ));
215                            } else {
216                                debug_assert!(false, "Glob pattern failure.");
217                                return false;
218                            }
219                        }
220                        wildcard_pos += 1;
221                        *match_pos
222                    }
223                    PatternChar::WildcardSingle { match_pos } => {
224                        if capture_positions & (1 << wildcard_pos) != 0 {
225                            if let Some((char, orig_char)) = value.get(*match_pos) {
226                                captured_values.push((
227                                    wildcard_pos,
228                                    (if orig_char != &REPLACEMENT_CHARACTER {
229                                        orig_char
230                                    } else {
231                                        char
232                                    })
233                                    .to_string(),
234                                ));
235                            } else {
236                                debug_assert!(false, "Glob pattern failure.");
237                                return false;
238                            }
239                        }
240                        wildcard_pos += 1;
241                        *match_pos
242                    }
243                    PatternChar::Char { match_pos, .. } => *match_pos,
244                } + 1;
245            } else {
246                break;
247            }
248        }
249        true
250    }
251}
252
253#[cfg(test)]
254mod tests {
255    use crate::runtime::tests::glob::GlobPattern;
256
257    #[test]
258    fn glob_match() {
259        for (value, pattern, expected_result) in [
260            (
261                "frop.......frop.........frop....",
262                "?*frop*",
263                vec!["f", "rop.......", ".........frop...."],
264            ),
265            ("frop:frup:frop", "*:*:*", vec!["frop", "frup", "frop"]),
266            (
267                "a b c d e f g",
268                "? ? ? ? ? ? ?",
269                vec!["a", "b", "c", "d", "e", "f", "g"],
270            ),
271            ("puk pok puk pok", "pu*ok", vec!["k pok puk p"]),
272            ("snot kip snot", "snot*snot", vec![" kip "]),
273            (
274                "klopfropstroptop",
275                "*fr??*top",
276                vec!["klop", "o", "p", "strop"],
277            ),
278            ("toptoptop", "*top", vec!["toptop"]),
279            (
280                "Fehlende Straße zur Karte hinzufügen",
281                "FEHLENDE * ZUR Karte HINZUFÜGEN",
282                vec!["Straße"],
283            ),
284        ] {
285            let p = GlobPattern::compile(pattern, true);
286            let mut match_values = Vec::new();
287            assert!(
288                p.clone().capture(value, u64::MAX ^ 1, &mut match_values),
289                "{value:?} {pattern:?}",
290            );
291
292            assert_eq!(
293                match_values.into_iter().map(|(_, v)| v).collect::<Vec<_>>(),
294                expected_result,
295                "{value:?} {pattern:?}",
296            );
297            assert!(p.matches(value), "{value:?} {pattern:?}",);
298        }
299    }
300}