simple_string_patterns/
simple_match.rs

1use crate::{enums::StringBounds, utils::{pairs_to_string_bounds, strs_to_string_bounds}, BoundsBuilder, BoundsPosition, CaseMatchMode, CharType, StripCharacters};
2
3/// Regex-free matcher methods for common use cases
4/// There are no plain and _cs-suffixed variants because the standard
5/// starts_with(pat: &str), contains(pat: &str) and ends_with(pat: &str) methods meet those needs
6pub trait SimpleMatch {
7
8  /// Matches the whole string in case-insensitive mode
9  fn equals_ci(&self, pattern: &str) -> bool;
10
11  /// Matches the the plain Latin letters [a-z] and numerals [0=9] in the string in case-insensitive mode
12  fn equals_ci_alphanum(&self, pattern: &str) -> bool;
13
14  /// Starts with a case-insensitive alphanumeric sequence
15  fn starts_with_ci(&self, pattern: &str) -> bool;
16  
17  /// Starts with a case-insensitive alphanumeric sequence
18  fn starts_with_ci_alphanum(&self, pattern: &str) -> bool;
19  
20  /// Ends with a case-insensitive alphanumeric sequence
21  fn ends_with_ci(&self, pattern: &str) -> bool;
22  
23  /// Ends with a case-insensitive alphanumeric sequence
24  fn ends_with_ci_alphanum(&self, pattern: &str) -> bool;
25
26  /// Contains a case-insensitive alphanumeric sequence
27  fn contains_ci(&self, pattern: &str) -> bool;
28  
29  /// Contains a case-insensitive alphanumeric sequence
30  fn contains_ci_alphanum(&self, pattern: &str) -> bool;
31}
32
33/// Implementation for &str/String 
34impl SimpleMatch for str {
35
36   /// Starts with a case-insensitive sequence
37  fn equals_ci(&self, pattern: &str) -> bool {
38    self.to_lowercase() == pattern.to_lowercase()
39  }
40  
41  /// Starts with a case-insensitive alphanumeric sequence
42  fn equals_ci_alphanum(&self, pattern: &str) -> bool {
43    self.to_lowercase().strip_non_alphanum() ==  pattern.to_lowercase().strip_non_alphanum()
44  }
45
46  /// Starts with a case-insensitive sequence
47  fn starts_with_ci(&self, pattern: &str) -> bool {
48    self.to_lowercase().starts_with(&pattern.to_lowercase())
49  }
50  
51  /// Starts with a case-insensitive alphanumeric sequence
52  fn starts_with_ci_alphanum(&self, pattern: &str) -> bool {
53    self.to_lowercase().strip_non_alphanum().starts_with(&pattern.to_lowercase())
54  }
55  
56  /// Ends with a case-insensitive sequence
57  fn ends_with_ci(&self, pattern: &str) -> bool {
58    self.to_lowercase().ends_with(&pattern.to_lowercase())
59  }
60  
61  /// Ends with a case-insensitive alphanumeric sequence
62  fn ends_with_ci_alphanum(&self, pattern: &str) -> bool {
63    self.to_lowercase().strip_non_alphanum().ends_with(&pattern.to_lowercase())
64  }
65
66  /// Contains a case-insensitive sequence
67  fn contains_ci(&self, pattern: &str) -> bool {
68    self.to_lowercase().contains(&pattern.to_lowercase())
69  }
70  
71  /// Contains a case-insensitive alphanumeric sequence
72  fn contains_ci_alphanum(&self, pattern: &str) -> bool {
73    self.to_lowercase().strip_non_alphanum().contains(&pattern.to_lowercase())
74  }
75}
76
77/// Return the indices of all ocurrences of a string
78pub trait MatchOccurrences {
79  /// Return the indices only of all matches of a given string pattern (not a regular expression)
80  /// Builds on match_indices in the Rust standard library
81  fn find_matched_indices(&self, pat: &str) -> Vec<usize>;
82
83  /// Match occurrences of a single character
84  fn find_char_indices(&self, pat: char) -> Vec<usize>;
85}
86
87
88impl MatchOccurrences for str {
89    /// Return the indices only of all matches of a given regular expression
90  fn find_matched_indices(&self, pat: &str) -> Vec<usize> {
91    self.match_indices(pat).into_iter().map(|pair| pair.0).collect::<Vec<usize>>()
92  }
93
94  /// As above, but with a character to avoid coercion
95  fn find_char_indices(&self, pat: char) -> Vec<usize> {
96    self.match_indices(pat).into_iter().map(|pair| pair.0).collect::<Vec<usize>>()
97  }
98}
99
100
101/// Test multiple patterns and return vector of booleans with the results for each item
102pub trait SimpleMatchesMany where Self:SimpleMatch {
103
104  /// test for multiple conditions. All other trait methods are derived from this
105  fn matched_conditional(&self, pattern_sets: &[StringBounds]) -> Vec<bool>;
106
107  /// test for multiple conditions with simple tuple pairs of pattern + case-insenitive flag
108  fn contains_conditional(&self, pattern_sets: &[(&str, bool)]) -> Vec<bool> {
109    let pattern_sets: Vec<StringBounds> = pairs_to_string_bounds(pattern_sets, BoundsPosition::Contains);
110    self.matched_conditional(&pattern_sets)
111   }
112
113  /// Test for presecnce of simple patterns in case-insensitive mode
114  fn contains_conditional_ci(&self, patterns: &[&str]) -> Vec<bool> {
115    let pattern_sets: Vec<StringBounds> = strs_to_string_bounds(patterns, CaseMatchMode::Insensitive, BoundsPosition::Contains);
116    self.matched_conditional(&pattern_sets)
117  }
118
119  /// Test for presecnce of simple patterns in case-sensitive mode
120  fn contains_conditional_cs(&self, patterns: &[&str]) -> Vec<bool> {
121    let pattern_sets: Vec<StringBounds> = strs_to_string_bounds(patterns, CaseMatchMode::Sensitive, BoundsPosition::Contains);
122    self.matched_conditional(&pattern_sets)
123  }
124  
125}
126
127/*
128* Common function to match scalar StringBounds rules
129*/
130pub(crate) fn match_bounds_rule(txt: &str, item: &StringBounds) -> bool {
131  let cm = item.case_mode();
132  let ci = item.case_insensitive();
133  // cast the sample string to lowercase for case-insenitive matches
134  let base = if ci {
135    match cm {
136      CaseMatchMode::AlphanumInsensitive => txt.to_lowercase().strip_non_alphanum(),
137      _ => txt.to_lowercase()
138    }
139  } else {
140    txt.to_owned()
141  };
142  // cast the simple pattern to lowercase for case-insenitive matches
143  let pattern = if ci {
144    item.pattern().to_lowercase()
145  } else {
146    item.pattern().to_owned()
147  };
148  // check if outcome of starts_with, ends_with or contains test matches the positivity value
149  let is_matched = if item.starts_with() {
150    base.starts_with(&pattern)
151  } else if item.ends_with() {
152    base.ends_with(&pattern)
153  } else if item.matches_whole() {
154    base == pattern
155  } else {
156    base.contains(&pattern)
157  } == item.is_positive();
158  is_matched
159}
160
161/*
162* Common function to match StringBounds rule sets handling  both and/or sub  rules and scalar rules
163*/
164pub(crate) fn match_bounds_rule_set(txt: &str, item: &StringBounds) -> bool {
165  match item {
166    StringBounds::And(inner_rules) => txt.matched_conditional(&inner_rules).into_iter().all(|result| result),
167    StringBounds::Or(inner_rules) => txt.matched_conditional(&inner_rules).into_iter().any(|result| result),
168    _ => match_bounds_rule(txt, item)
169  }
170}
171
172impl SimpleMatchesMany for str {
173
174  // test for multiple conditions. All other trait methods are derived from this
175  fn matched_conditional(&self, pattern_sets: &[StringBounds]) -> Vec<bool> {
176    let mut matched_items: Vec<bool> = Vec::with_capacity(pattern_sets.len());
177    for item in pattern_sets {
178       matched_items.push(match_bounds_rule_set(self, item));
179     }
180     matched_items
181   }
182}
183
184/// Test multiple patterns and return boolean
185pub trait SimpleMatchAll where Self:SimpleMatchesMany {
186
187  /// test for multiple conditions. All other trait methods are derived from this
188  fn match_all_conditional(&self, pattern_sets: &[StringBounds]) -> bool;
189
190  /// test for multiple conditions with simple tuple pairs of pattern + case-insenitive flag
191  fn contains_all_conditional(&self, pattern_sets: &[(&str, bool)]) -> bool {
192    let pattern_sets: Vec<StringBounds> = pairs_to_string_bounds(pattern_sets, BoundsPosition::Contains);
193    self.match_all_conditional(&pattern_sets)
194  }
195
196  /// Test for presecnce of simple patterns in case-insensitive mode
197  fn contains_all_conditional_ci(&self, patterns: &[&str]) -> bool {
198    let pattern_sets: Vec<StringBounds> = strs_to_string_bounds(patterns, CaseMatchMode::Insensitive, BoundsPosition::Contains);
199    self.match_all_conditional(&pattern_sets)
200  }
201
202  /// Test for presecnce of simple patterns in case-sensitive mode
203  fn contains_all_conditional_cs(&self, patterns: &[&str]) -> bool {
204    let pattern_sets: Vec<StringBounds> = strs_to_string_bounds(patterns, CaseMatchMode::Sensitive, BoundsPosition::Contains);
205    self.match_all_conditional(&pattern_sets)
206  }
207  
208}
209
210impl SimpleMatchAll for str {
211
212  // test for multiple conditions. All other 'many' trait methods are derived from this
213  fn match_all_conditional(&self, pattern_sets: &[StringBounds]) -> bool {
214    // self.matched_conditional(pattern_sets).into_iter().all(|matched| matched)
215    if pattern_sets.len() > 0 {
216      for item in pattern_sets {
217        // do not evaluate more rules one is not matched
218        if !match_bounds_rule_set(self, item) {
219          return false;
220        }
221      }
222      // return true if one or rules are matched
223      true
224    } else {
225      // return false if no rules are provided
226      false
227    }
228  }
229
230}
231
232/// Test for any of multiple pattern rules and return boolean
233pub trait SimpleMatchAny where Self:SimpleMatchesMany {
234
235  /// test for multiple conditions. All other trait methods are derived from this
236  fn match_any_conditional(&self, pattern_sets: &[StringBounds]) -> bool;
237
238  /// test for multiple conditions with simple tuple pairs of pattern + case-insenitive flag
239  fn contains_any_conditional(&self, pattern_sets: &[(&str, bool)]) -> bool {
240    let pattern_sets: Vec<StringBounds> = pairs_to_string_bounds(pattern_sets, BoundsPosition::Contains);
241    self.match_any_conditional(&pattern_sets)
242  }
243
244  /// Test for presecnce of simple patterns in case-insensitive mode
245  fn contains_any_conditional_ci(&self, patterns: &[&str]) -> bool {
246    let pattern_sets: Vec<StringBounds> = strs_to_string_bounds(patterns, CaseMatchMode::Insensitive, BoundsPosition::Contains);
247    self.match_any_conditional(&pattern_sets)
248  }
249
250  /// Test for presecnce of simple patterns in case-sensitive mode
251  fn contains_any_conditional_cs(&self, patterns: &[&str]) -> bool {
252    let pattern_sets: Vec<StringBounds> = strs_to_string_bounds(patterns, CaseMatchMode::Sensitive, BoundsPosition::Contains);
253    self.match_any_conditional(&pattern_sets)
254  }
255  
256}
257
258impl SimpleMatchAny for str {
259
260  // Test for multiple conditions. All other 'any' trait methods are derived from this
261  fn match_any_conditional(&self, pattern_sets: &[StringBounds]) -> bool {
262    for item in pattern_sets {
263      // if one rule is matched, return true as other rules need not be evaluated
264      if match_bounds_rule_set(self, item) {
265        return true;
266      }
267    }
268    // return false if no rules are matched or provided
269    false
270  }
271
272}
273
274/// Test if character set (CharType) is in the string
275pub trait SimplContainsType where Self:SimpleMatch {
276
277  /// contains characters in the specified set
278  fn contains_type(&self, char_type: CharType) -> bool;
279
280  /// contains characters in the specified sets
281  fn contains_types(&self, char_types: &[CharType]) -> bool;
282
283  /// starts with one or more characters in the specified set
284  fn starts_with_type(&self, char_type: CharType) -> bool;
285
286  /// starts with one or more characters in the specified set
287  fn starts_with_types(&self, char_types: &[CharType]) -> bool;
288
289  /// ends with one or more characters in the specified sets
290  fn ends_with_type(&self, char_type: CharType) -> bool;
291
292  /// ends with one or more characters in the specified sets
293  fn ends_with_types(&self, char_types: &[CharType]) -> bool;
294  
295}
296
297/// Implement character-set matching on &str/String
298impl SimplContainsType for str {
299
300  // test for multiple conditions. All other 'many' trait methods are derived from this
301  fn contains_type(&self, char_type: CharType) -> bool {
302    self.chars().any(|ch| char_type.is_in_range(&ch))
303  }
304
305  fn contains_types(&self, char_types: &[CharType]) -> bool {
306    self.chars().any(|ch| char_types.into_iter().any(|ct| ct.is_in_range(&ch)))
307  }
308
309   /// starts with one or more characters in the specified set
310   fn starts_with_type(&self, char_type: CharType) -> bool {
311    if let Some(first) = self.chars().nth(0) {
312      char_type.is_in_range(&first)
313    } else {
314      false
315    }
316   }
317
318   /// starts with one or more characters in the specified set
319   fn starts_with_types(&self, char_types: &[CharType]) -> bool {
320    if let Some(first) = self.chars().nth(0) {
321      char_types.into_iter().any(|ct| ct.is_in_range(&first))
322    } else {
323      false
324    }
325   }
326 
327   /// ends with one or more characters in the specified sets
328   fn ends_with_type(&self, char_type: CharType) -> bool {
329    if let Some(first) = self.chars().last() {
330      char_type.is_in_range(&first)
331    } else {
332      false
333    }
334   }
335 
336   /// ends with one or more characters in the specified sets
337   fn ends_with_types(&self, char_types: &[CharType]) -> bool {
338    if let Some(first) = self.chars().last() {
339      char_types.into_iter().any(|ct| ct.is_in_range(&first))
340    } else {
341      false
342    }
343   }
344   
345
346}
347
348
349/// Test multiple patterns and return a filtered vector of string slices by all pattern rules
350pub trait SimpleFilterAll<'a, T> {
351
352  /// test for multiple conditions. All other trait methods are derived from this
353  fn filter_all_conditional(&'a self, pattern_sets: &[StringBounds]) -> Vec<T>;
354
355  fn filter_all_rules(&'a self, rules: &BoundsBuilder) -> Vec<T> {
356    self.filter_all_conditional(&rules.as_vec())
357  }
358  
359}
360
361/// Filter strings by one or more StringBounds rules
362impl<'a> SimpleFilterAll<'a, &'a str> for [&str] {
363
364  // filter string slices by multiple conditions
365  fn filter_all_conditional(&'a self, pattern_sets: &[StringBounds]) -> Vec<&'a str> {
366    self.into_iter().map(|s| s.to_owned()).filter(|s| s.match_all_conditional(pattern_sets)).collect::<Vec<&'a str>>()
367  }
368
369}
370
371/// Variant implementation for owned strings
372impl<'a> SimpleFilterAll<'a, String> for [String] {
373  // filter strings by multiple conditions
374  fn filter_all_conditional(&'a self, pattern_sets: &[StringBounds]) -> Vec<String> {
375    self.into_iter().filter(|s| s.match_all_conditional(pattern_sets)).map(|s| s.to_owned()).collect::<Vec<String>>()
376  }
377
378}
379
380/// Test multiple patterns and return a filtered vector of string slices by any of the pattern rules
381pub trait SimpleFilterAny<'a, T> {
382
383  /// test for multiple conditions. All other trait methods are derived from this
384  fn filter_any_conditional(&'a self, pattern_sets: &[StringBounds]) -> Vec<T>;
385
386  fn filter_any_rules(&'a self, rules: &BoundsBuilder) -> Vec<T> {
387    self.filter_any_conditional(&rules.as_vec())
388  }
389  
390}
391
392/// Filter strings by one or more StringBounds rules
393impl<'a> SimpleFilterAny<'a, &'a str> for [&str] {
394
395  // filter string slices by multiple conditions
396  fn filter_any_conditional(&'a self, pattern_sets: &[StringBounds]) -> Vec<&'a str> {
397    self.into_iter().map(|s| s.to_owned()).filter(|s| s.match_any_conditional(pattern_sets)).collect::<Vec<&'a str>>()
398  }
399
400}
401
402/// Variant implementation for owned strings
403impl<'a> SimpleFilterAny<'a, String> for [String] {
404  // filter strings by multiple conditions
405  fn filter_any_conditional(&'a self, pattern_sets: &[StringBounds]) -> Vec<String> {
406    self.into_iter().filter(|s| s.match_any_conditional(pattern_sets)).map(|s| s.to_owned()).collect::<Vec<String>>()
407  }
408
409}