Skip to main content

soar_dl/
filter.rs

1use fast_glob::glob_match;
2use regex::Regex;
3
4#[derive(Debug, Clone, Default)]
5pub struct Filter {
6    pub regexes: Vec<Regex>,
7    pub globs: Vec<String>,
8    pub include: Vec<String>,
9    pub exclude: Vec<String>,
10    pub case_sensitive: bool,
11}
12
13impl Filter {
14    /// Determines whether a name satisfies this filter's combined criteria.
15    ///
16    /// The name must match every regex in `self.regexes`, match at least one glob in
17    /// `self.globs`, satisfy all include keyword groups in `self.include`, and must
18    /// not match any exclude keyword groups in `self.exclude`.
19    ///
20    /// # Returns
21    ///
22    /// `true` if the name matches all regexes, at least one glob, all include groups,
23    /// and no exclude groups; `false` otherwise.
24    ///
25    /// # Examples
26    ///
27    /// ```
28    /// use soar_dl::filter::Filter;
29    ///
30    /// let f = Filter {
31    ///     regexes: Vec::new(),
32    ///     globs: vec!["*".into()],
33    ///     include: Vec::new(),
34    ///     exclude: Vec::new(),
35    ///     case_sensitive: true,
36    /// };
37    /// assert!(f.matches("anything"));
38    /// ```
39    pub fn matches(&self, name: &str) -> bool {
40        let matches_regex =
41            self.regexes.is_empty() || self.regexes.iter().all(|r| r.is_match(name));
42        let matches_glob = self.globs.is_empty()
43            || if self.case_sensitive {
44                self.globs.iter().any(|g| glob_match(g, name))
45            } else {
46                self.globs
47                    .iter()
48                    .any(|g| glob_match(g.to_lowercase(), name.to_lowercase()))
49            };
50        let matches_include = self.matches_keywords(name, &self.include, true);
51        let matches_exclude = self.matches_keywords(name, &self.exclude, false);
52
53        matches_regex && matches_glob && matches_include && matches_exclude
54    }
55
56    /// Determines whether every keyword group in `keywords` satisfies the required presence or absence
57    /// against `name` according to `must_match`.
58    ///
59    /// - If `keywords` is empty, returns `true`.
60    /// - Splits each keyword string on commas, trims parts, and ignores empty parts.
61    /// - Respects `case_sensitive`: comparisons use the original case when `true`, otherwise both
62    ///   haystack and needles are lowercased.
63    /// - For each keyword (a group of comma-separated alternatives), any one alternative matching
64    ///   `name` counts as a match for that keyword.
65    /// - If `must_match` is `true`, each keyword group must have at least one matching alternative.
66    ///   If `must_match` is `false`, each keyword group must have no matching alternatives.
67    ///
68    /// # Examples
69    ///
70    /// ```
71    /// use regex::Regex;
72    /// use soar_dl::filter::Filter;
73    ///
74    /// let filter = Filter {
75    ///     regexes: vec![],
76    ///     globs: vec![],
77    ///     include: vec!["foo,bar".to_string()],
78    ///     exclude: vec![],
79    ///     case_sensitive: false,
80    /// };
81    ///
82    /// // "barbaz" contains "bar", one of the alternatives in the include group.
83    /// assert!(filter.matches("barbaz"));
84    /// ```
85    fn matches_keywords(&self, name: &str, keywords: &[String], must_match: bool) -> bool {
86        if keywords.is_empty() {
87            return true;
88        }
89
90        let haystack = if self.case_sensitive {
91            name.to_string()
92        } else {
93            name.to_lowercase()
94        };
95
96        keywords.iter().all(|kw| {
97            let parts: Vec<_> = kw
98                .split(',')
99                .map(str::trim)
100                .filter(|s| !s.is_empty())
101                .collect();
102
103            let any_match = parts.iter().any(|&part| {
104                let needle = if self.case_sensitive {
105                    part.to_string()
106                } else {
107                    part.to_lowercase()
108                };
109                haystack.contains(&needle)
110            });
111
112            if must_match {
113                any_match
114            } else {
115                !any_match
116            }
117        })
118    }
119}
120
121#[cfg(test)]
122mod tests {
123    use regex::Regex;
124
125    use super::*;
126
127    #[test]
128    fn test_filter_default() {
129        let filter = Filter::default();
130        assert!(filter.regexes.is_empty());
131        assert!(filter.globs.is_empty());
132        assert!(filter.include.is_empty());
133        assert!(filter.exclude.is_empty());
134        assert!(!filter.case_sensitive);
135    }
136
137    #[test]
138    fn test_matches_empty_filter() {
139        let filter = Filter::default();
140        // Empty filter should match everything
141        assert!(filter.matches("anything"));
142        assert!(filter.matches(""));
143        assert!(filter.matches("test.tar.gz"));
144    }
145
146    #[test]
147    fn test_matches_regex() {
148        let filter = Filter {
149            regexes: vec![Regex::new(r"\.tar\.gz$").unwrap()],
150            globs: vec![],
151            include: vec![],
152            exclude: vec![],
153            case_sensitive: true,
154        };
155
156        assert!(filter.matches("archive.tar.gz"));
157        assert!(filter.matches("file-v1.0.tar.gz"));
158        assert!(!filter.matches("archive.zip"));
159        assert!(!filter.matches("file.tar"));
160    }
161
162    #[test]
163    fn test_matches_multiple_regexes() {
164        let filter = Filter {
165            regexes: vec![Regex::new(r"^file").unwrap(), Regex::new(r"linux").unwrap()],
166            globs: vec![],
167            include: vec![],
168            exclude: vec![],
169            case_sensitive: true,
170        };
171
172        assert!(filter.matches("file-linux-x86_64"));
173        assert!(!filter.matches("archive-linux-x86_64")); // doesn't start with "file"
174        assert!(!filter.matches("file-windows-x86_64")); // doesn't contain "linux"
175    }
176
177    #[test]
178    fn test_matches_glob_case_sensitive() {
179        let filter = Filter {
180            regexes: vec![],
181            globs: vec!["*.tar.gz".to_string()],
182            include: vec![],
183            exclude: vec![],
184            case_sensitive: true,
185        };
186
187        assert!(filter.matches("archive.tar.gz"));
188        assert!(filter.matches("file.tar.gz"));
189        assert!(!filter.matches("archive.TAR.GZ"));
190        assert!(!filter.matches("archive.zip"));
191    }
192
193    #[test]
194    fn test_matches_glob_case_insensitive() {
195        let filter = Filter {
196            regexes: vec![],
197            globs: vec!["*.tar.gz".to_string()],
198            include: vec![],
199            exclude: vec![],
200            case_sensitive: false,
201        };
202
203        assert!(filter.matches("archive.tar.gz"));
204        assert!(filter.matches("archive.TAR.GZ"));
205        assert!(filter.matches("file.Tar.Gz"));
206        assert!(!filter.matches("archive.zip"));
207    }
208
209    #[test]
210    fn test_matches_multiple_globs() {
211        let filter = Filter {
212            regexes: vec![],
213            globs: vec!["*.tar.gz".to_string(), "*.zip".to_string()],
214            include: vec![],
215            exclude: vec![],
216            case_sensitive: true,
217        };
218
219        assert!(filter.matches("archive.tar.gz"));
220        assert!(filter.matches("file.zip"));
221        assert!(!filter.matches("file.tar"));
222        assert!(!filter.matches("file.7z"));
223    }
224
225    #[test]
226    fn test_matches_include_single_keyword() {
227        let filter = Filter {
228            regexes: vec![],
229            globs: vec![],
230            include: vec!["linux".to_string()],
231            exclude: vec![],
232            case_sensitive: true,
233        };
234
235        assert!(filter.matches("file-linux-x86_64"));
236        assert!(filter.matches("linux-binary"));
237        assert!(!filter.matches("file-windows-x86_64"));
238        assert!(!filter.matches("darwin-binary"));
239    }
240
241    #[test]
242    fn test_matches_include_multiple_keywords() {
243        let filter = Filter {
244            regexes: vec![],
245            globs: vec![],
246            include: vec!["linux".to_string(), "x86_64".to_string()],
247            exclude: vec![],
248            case_sensitive: true,
249        };
250
251        assert!(filter.matches("file-linux-x86_64"));
252        assert!(!filter.matches("file-linux-arm64")); // missing x86_64
253        assert!(!filter.matches("file-darwin-x86_64")); // missing linux
254    }
255
256    #[test]
257    fn test_matches_include_alternatives() {
258        let filter = Filter {
259            regexes: vec![],
260            globs: vec![],
261            include: vec!["linux,darwin".to_string()],
262            exclude: vec![],
263            case_sensitive: true,
264        };
265
266        assert!(filter.matches("file-linux-x86_64"));
267        assert!(filter.matches("file-darwin-x86_64"));
268        assert!(!filter.matches("file-windows-x86_64"));
269    }
270
271    #[test]
272    fn test_matches_include_case_insensitive() {
273        let filter = Filter {
274            regexes: vec![],
275            globs: vec![],
276            include: vec!["Linux".to_string()],
277            exclude: vec![],
278            case_sensitive: false,
279        };
280
281        assert!(filter.matches("file-linux-x86_64"));
282        assert!(filter.matches("file-LINUX-x86_64"));
283        assert!(filter.matches("file-Linux-x86_64"));
284    }
285
286    #[test]
287    fn test_matches_exclude_single_keyword() {
288        let filter = Filter {
289            regexes: vec![],
290            globs: vec![],
291            include: vec![],
292            exclude: vec!["debug".to_string()],
293            case_sensitive: true,
294        };
295
296        assert!(filter.matches("file-release"));
297        assert!(!filter.matches("file-debug"));
298        assert!(!filter.matches("debug-symbols"));
299    }
300
301    #[test]
302    fn test_matches_exclude_multiple_keywords() {
303        let filter = Filter {
304            regexes: vec![],
305            globs: vec![],
306            include: vec![],
307            exclude: vec!["debug".to_string(), "test".to_string()],
308            case_sensitive: true,
309        };
310
311        assert!(filter.matches("file-release"));
312        assert!(!filter.matches("file-debug"));
313        assert!(!filter.matches("test-binary"));
314        assert!(!filter.matches("debug-test-binary"));
315    }
316
317    #[test]
318    fn test_matches_exclude_alternatives() {
319        let filter = Filter {
320            regexes: vec![],
321            globs: vec![],
322            include: vec![],
323            exclude: vec!["debug,test".to_string()],
324            case_sensitive: true,
325        };
326
327        assert!(filter.matches("file-release"));
328        assert!(!filter.matches("file-debug"));
329        assert!(!filter.matches("file-test"));
330    }
331
332    #[test]
333    fn test_matches_combined_filters() {
334        let filter = Filter {
335            regexes: vec![Regex::new(r"^file").unwrap()],
336            globs: vec!["*.tar.gz".to_string()],
337            include: vec!["linux".to_string(), "x86_64".to_string()],
338            exclude: vec!["debug".to_string()],
339            case_sensitive: true,
340        };
341
342        assert!(filter.matches("file-linux-x86_64-v1.0.tar.gz"));
343        assert!(!filter.matches("archive-linux-x86_64-v1.0.tar.gz")); // doesn't start with "file"
344        assert!(!filter.matches("file-linux-x86_64-v1.0.zip")); // wrong extension
345        assert!(!filter.matches("file-darwin-x86_64-v1.0.tar.gz")); // not linux
346        assert!(!filter.matches("file-linux-arm64-v1.0.tar.gz")); // not x86_64
347        assert!(!filter.matches("file-linux-x86_64-debug.tar.gz")); // contains "debug"
348    }
349
350    #[test]
351    fn test_matches_keywords_empty() {
352        let filter = Filter::default();
353        assert!(filter.matches_keywords("anything", &[], true));
354        assert!(filter.matches_keywords("anything", &[], false));
355    }
356
357    #[test]
358    fn test_matches_keywords_whitespace_handling() {
359        let filter = Filter {
360            regexes: vec![],
361            globs: vec![],
362            include: vec!["  linux  ,  darwin  ".to_string()],
363            exclude: vec![],
364            case_sensitive: true,
365        };
366
367        assert!(filter.matches("file-linux-x86_64"));
368        assert!(filter.matches("file-darwin-x86_64"));
369    }
370
371    #[test]
372    fn test_matches_keywords_empty_alternatives() {
373        let filter = Filter {
374            regexes: vec![],
375            globs: vec![],
376            include: vec!["linux,,darwin".to_string()],
377            exclude: vec![],
378            case_sensitive: true,
379        };
380
381        // Empty alternatives should be filtered out
382        assert!(filter.matches("file-linux-x86_64"));
383        assert!(filter.matches("file-darwin-x86_64"));
384    }
385
386    #[test]
387    fn test_glob_wildcard_patterns() {
388        let filter = Filter {
389            regexes: vec![],
390            globs: vec!["file-*-x86_64".to_string()],
391            include: vec![],
392            exclude: vec![],
393            case_sensitive: true,
394        };
395
396        assert!(filter.matches("file-linux-x86_64"));
397        assert!(filter.matches("file-darwin-x86_64"));
398        assert!(filter.matches("file-windows-x86_64"));
399        assert!(!filter.matches("file-linux-arm64"));
400    }
401
402    #[test]
403    fn test_glob_question_mark() {
404        let filter = Filter {
405            regexes: vec![],
406            globs: vec!["file-?.tar.gz".to_string()],
407            include: vec![],
408            exclude: vec![],
409            case_sensitive: true,
410        };
411
412        assert!(filter.matches("file-1.tar.gz"));
413        assert!(filter.matches("file-a.tar.gz"));
414        assert!(!filter.matches("file-10.tar.gz"));
415        assert!(!filter.matches("file-.tar.gz"));
416    }
417}