sieve/runtime/tests/
glob.rs

1/*
2 * Copyright (c) 2020-2023, Stalwart Labs Ltd.
3 *
4 * This file is part of the Stalwart Sieve Interpreter.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU Affero General Public License as
8 * published by the Free Software Foundation, either version 3 of
9 * the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Affero General Public License for more details.
15 * in the LICENSE file at the top-level directory of this distribution.
16 * You should have received a copy of the GNU Affero General Public License
17 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18 *
19 * You can be released from the requirements of the AGPLv3 license by
20 * purchasing a commercial license. Please contact licensing@stalw.art
21 * for more details.
22*/
23
24use std::char::REPLACEMENT_CHARACTER;
25
26use crate::MAX_MATCH_VARIABLES;
27
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct GlobPattern {
30    pattern: Vec<PatternChar>,
31    to_lower: bool,
32}
33
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub enum PatternChar {
36    WildcardMany { num: usize, match_pos: usize },
37    WildcardSingle { match_pos: usize },
38    Char { char: char, match_pos: usize },
39}
40
41impl GlobPattern {
42    pub fn compile(pattern: &str, to_lower: bool) -> Self {
43        let mut chars = Vec::new();
44        let mut is_escaped = false;
45        let mut str = pattern.chars().peekable();
46
47        while let Some(char) = str.next() {
48            match char {
49                '*' if !is_escaped => {
50                    let mut num = 1;
51                    while let Some('*') = str.peek() {
52                        num += 1;
53                        str.next();
54                    }
55                    chars.push(PatternChar::WildcardMany { num, match_pos: 0 });
56                }
57                '?' if !is_escaped => {
58                    chars.push(PatternChar::WildcardSingle { match_pos: 0 });
59                }
60                '\\' if !is_escaped => {
61                    is_escaped = true;
62                    continue;
63                }
64                _ => {
65                    if is_escaped {
66                        is_escaped = false;
67                    }
68                    if to_lower && char.is_uppercase() {
69                        for char in char.to_lowercase() {
70                            chars.push(PatternChar::Char { char, match_pos: 0 });
71                        }
72                    } else {
73                        chars.push(PatternChar::Char { char, match_pos: 0 });
74                    }
75                }
76            }
77        }
78
79        GlobPattern {
80            pattern: chars,
81            to_lower,
82        }
83    }
84
85    // Credits: Algorithm ported from https://research.swtch.com/glob
86    pub fn matches(&self, value: &str) -> bool {
87        let value = if self.to_lower {
88            value.to_lowercase().chars().collect::<Vec<_>>()
89        } else {
90            value.chars().collect::<Vec<_>>()
91        };
92
93        let mut px = 0;
94        let mut nx = 0;
95        let mut next_px = 0;
96        let mut next_nx = 0;
97
98        while px < self.pattern.len() || nx < value.len() {
99            match self.pattern.get(px) {
100                Some(PatternChar::Char { char, .. }) => {
101                    if matches!(value.get(nx), Some(nc) if nc == char ) {
102                        px += 1;
103                        nx += 1;
104                        continue;
105                    }
106                }
107                Some(PatternChar::WildcardSingle { .. }) => {
108                    if nx < value.len() {
109                        px += 1;
110                        nx += 1;
111                        continue;
112                    }
113                }
114                Some(PatternChar::WildcardMany { .. }) => {
115                    next_px = px;
116                    next_nx = nx + 1;
117                    px += 1;
118                    continue;
119                }
120                _ => (),
121            }
122            if 0 < next_nx && next_nx <= value.len() {
123                px = next_px;
124                nx = next_nx;
125                continue;
126            }
127            return false;
128        }
129        true
130    }
131
132    pub fn capture(
133        mut self,
134        value_: &str,
135        capture_positions: u64,
136        captured_values: &mut Vec<(usize, String)>,
137    ) -> bool {
138        let value = if self.to_lower {
139            let mut value = Vec::with_capacity(value_.len());
140            for char in value_.chars() {
141                if char.is_uppercase() {
142                    for (pos, lowerchar) in char.to_lowercase().enumerate() {
143                        value.push((
144                            lowerchar,
145                            if pos == 0 {
146                                char
147                            } else {
148                                REPLACEMENT_CHARACTER
149                            },
150                        ));
151                    }
152                } else {
153                    value.push((char, char));
154                }
155            }
156            value
157        } else {
158            value_.chars().map(|char| (char, char)).collect::<Vec<_>>()
159        };
160
161        let mut px = 0;
162        let mut nx = 0;
163        let mut next_px = 0;
164        let mut next_nx = 0;
165
166        while px < self.pattern.len() || nx < value.len() {
167            match self.pattern.get_mut(px) {
168                Some(PatternChar::Char { char, match_pos }) => {
169                    if matches!(value.get(nx), Some(nc) if &nc.0 == char ) {
170                        *match_pos = nx;
171                        px += 1;
172                        nx += 1;
173                        continue;
174                    }
175                }
176                Some(PatternChar::WildcardSingle { match_pos }) => {
177                    if nx < value.len() {
178                        *match_pos = nx;
179                        px += 1;
180                        nx += 1;
181                        continue;
182                    }
183                }
184                Some(PatternChar::WildcardMany { match_pos, .. }) => {
185                    *match_pos = nx;
186                    next_px = px;
187                    next_nx = nx + 1;
188                    px += 1;
189                    continue;
190                }
191                _ => (),
192            }
193            if 0 < next_nx && next_nx <= value.len() {
194                px = next_px;
195                nx = next_nx;
196                continue;
197            }
198            return false;
199        }
200
201        let mut last_pos = 0;
202
203        captured_values.clear();
204        if capture_positions & 1 != 0 {
205            captured_values.push((0usize, value_.to_string()));
206        }
207
208        let mut wildcard_pos = 1;
209        for item in &self.pattern {
210            if wildcard_pos <= MAX_MATCH_VARIABLES {
211                last_pos = match item {
212                    PatternChar::WildcardMany { mut num, match_pos } => {
213                        while num > 1 {
214                            if capture_positions & (1 << wildcard_pos) != 0 {
215                                captured_values.push((wildcard_pos, String::with_capacity(0)));
216                            }
217                            wildcard_pos += 1;
218                            num -= 1;
219                        }
220
221                        if capture_positions & (1 << wildcard_pos) != 0 {
222                            if let Some(range) = value.get(last_pos..*match_pos) {
223                                captured_values.push((
224                                    wildcard_pos,
225                                    range
226                                        .iter()
227                                        .filter_map(|(_, char)| {
228                                            if char != &REPLACEMENT_CHARACTER {
229                                                Some(char)
230                                            } else {
231                                                None
232                                            }
233                                        })
234                                        .collect::<String>(),
235                                ));
236                            } else {
237                                debug_assert!(false, "Glob pattern failure.");
238                                return false;
239                            }
240                        }
241                        wildcard_pos += 1;
242                        match_pos
243                    }
244                    PatternChar::WildcardSingle { match_pos } => {
245                        if capture_positions & (1 << wildcard_pos) != 0 {
246                            if let Some((char, orig_char)) = value.get(*match_pos) {
247                                captured_values.push((
248                                    wildcard_pos,
249                                    (if orig_char != &REPLACEMENT_CHARACTER {
250                                        orig_char
251                                    } else {
252                                        char
253                                    })
254                                    .to_string(),
255                                ));
256                            } else {
257                                debug_assert!(false, "Glob pattern failure.");
258                                return false;
259                            }
260                        }
261                        wildcard_pos += 1;
262                        match_pos
263                    }
264                    PatternChar::Char { match_pos, .. } => match_pos,
265                } + 1;
266            } else {
267                break;
268            }
269        }
270        true
271    }
272}
273
274#[cfg(test)]
275mod tests {
276    use crate::runtime::tests::glob::GlobPattern;
277
278    #[test]
279    fn glob_match() {
280        for (value, pattern, expected_result) in [
281            (
282                "frop.......frop.........frop....",
283                "?*frop*",
284                vec!["f", "rop.......", ".........frop...."],
285            ),
286            ("frop:frup:frop", "*:*:*", vec!["frop", "frup", "frop"]),
287            (
288                "a b c d e f g",
289                "? ? ? ? ? ? ?",
290                vec!["a", "b", "c", "d", "e", "f", "g"],
291            ),
292            ("puk pok puk pok", "pu*ok", vec!["k pok puk p"]),
293            ("snot kip snot", "snot*snot", vec![" kip "]),
294            (
295                "klopfropstroptop",
296                "*fr??*top",
297                vec!["klop", "o", "p", "strop"],
298            ),
299            ("toptoptop", "*top", vec!["toptop"]),
300            (
301                "Fehlende Straße zur Karte hinzufügen",
302                "FEHLENDE * ZUR Karte HINZUFÜGEN",
303                vec!["Straße"],
304            ),
305        ] {
306            let p = GlobPattern::compile(pattern, true);
307            let mut match_values = Vec::new();
308            assert!(
309                p.clone().capture(value, u64::MAX ^ 1, &mut match_values),
310                "{value:?} {pattern:?}",
311            );
312
313            assert_eq!(
314                match_values.into_iter().map(|(_, v)| v).collect::<Vec<_>>(),
315                expected_result,
316                "{value:?} {pattern:?}",
317            );
318            assert!(p.matches(value), "{value:?} {pattern:?}",);
319        }
320    }
321}