string_patterns/
pattern_capture.rs

1use regex::{Captures, Match, Regex};
2
3use crate::utils::{build_regex, build_whole_word_pattern};
4
5/// Set of methods to capture groups or match objects derived from Regex::captures.
6pub trait PatternCapture<'a> {
7
8  /// Yields an option with Regex::Captures as returned from re.captures, Accepts a boolean case_insensitive flag
9  fn pattern_captures(&self, pattern: &str, case_insensitive: bool) -> Option<Captures>;
10
11  /// Yields a vector of Match objects with two modes, outer will whole groups only, otherwise uniqe matched groups and subgroups
12  /// Use either pattern_matches_vec or pattern_matches_outer
13  fn pattern_matches_as_vec(&'a self, pattern: &str, case_insensitive: bool, outer: bool) -> Vec<Match>;
14
15  /// Yields a vector of Match objects with start and end index + the captured string. Accepts a boolean case_insensitive flag
16  /// Unlike pattern_captures, this method will only return unique matches including subgroups
17  fn pattern_matches_vec(&'a self, pattern: &str, case_insensitive: bool) -> Vec<Match<'a>> {
18    self.pattern_matches_as_vec(pattern, case_insensitive, false)
19  }
20
21   /// Yields a vector of Match objects with start and end index + the captured string. Accepts a boolean case_insensitive flag
22  /// Unlike pattern_captures, this method will only outer matches for whole pattern
23  fn pattern_matches_outer(&'a self, pattern: &str, case_insensitive: bool) -> Vec<Match<'a>> {
24    self.pattern_matches_as_vec(pattern, case_insensitive, true)
25  }
26
27  /// Yields an option with first match object if available with a boolean case_insensitive flag
28  /// As this uses re.find it will be fast than the matching last_match method
29  fn pattern_first_match(&'a self, pattern: &str, case_insensitive: bool) -> Option<Match<'a>>;
30
31 /// Yields an option with last match object if available with a boolean case_insensitive flag
32 fn pattern_last_match(&'a self, pattern: &str, case_insensitive: bool) -> Option<Match> {
33   let matched_segments = self.pattern_matches_vec(pattern, case_insensitive);
34   matched_segments.last().map(|m| *m)
35 }
36
37 /// returns an option with a pair of match objects
38 /// If there is only one match the match objects will have the same indices
39 fn pattern_first_last_matches(&'a self, pattern: &str, case_insensitive: bool) -> Option<(Match, Match)> {
40   let matched_segments = self.pattern_matches_vec(pattern, case_insensitive);
41   if let Some(first) = matched_segments.get(0) {
42     if let Some(last) = matched_segments.last() {
43       return Some((*first, *last));
44     }
45   }
46   None
47 }
48
49 /// Yields an option with an unsigned integer for the index of the start of the last match
50 /// with a boolean case_insensitive flag
51 fn pattern_first_index(&'a self, pattern: &str, case_insensitive: bool) -> Option<usize> {
52   if let Some(first) = self.pattern_first_match(pattern, case_insensitive) {
53     Some(first.start())
54   } else {
55     None
56   }
57 }
58
59 /// Yields an option with an unsigned integer for the index of the end of the first match
60 /// with a boolean case_insensitive flag
61 fn pattern_first_end_index(&'a self, pattern: &str, case_insensitive: bool) -> Option<usize> {
62   if let Some(first) = self.pattern_first_match(pattern, case_insensitive) {
63     Some(first.end())
64   } else {
65     None
66   }
67 }
68
69 /// Yields an option with an unsigned integer for the index of the start of the last match
70 /// with a boolean case_insensitive flag
71 fn pattern_last_start_index(&'a self, pattern: &str, case_insensitive: bool) -> Option<usize> {
72   if let Some(first) = self.pattern_first_match(pattern, case_insensitive) {
73     Some(first.start())
74   } else {
75     None
76   }
77 }
78
79 // Yields an option with an unsigned integer for the index of the end of the last match
80 /// with a boolean case_insensitive flag
81 fn pattern_last_index(&'a self, pattern: &str, case_insensitive: bool) -> Option<usize> {
82   if let Some(first) = self.pattern_first_match(pattern, case_insensitive) {
83     Some(first.end())
84   } else {
85     None
86   }
87 }
88
89 // Counts the number of matches with a boolean case_insensitive flag
90 fn count_pattern(&'a self, pattern: &'a str, case_insensitive: bool) -> usize {
91   self.pattern_matches_vec(pattern, case_insensitive).len()
92 }
93
94 // Counts the number of matches with a boolean case_insensitive flag
95 fn count_word(&'a self, word: &'a str, case_insensitive: bool) -> usize {
96   let pattern = build_whole_word_pattern(word);
97   self.pattern_matches_vec(&pattern, case_insensitive).len()
98 }
99}
100
101
102/// This function is the basis for both pattern_matches_vec() and pattern_matches_outer()
103/// and will be used with string-patterns-extras to replicate look-ahead and look-behind behaviour
104/// It returns a flattened vector of Match objects
105/// The outer options limits the matches to the whole matched sequence and excludes inner groups
106pub fn find_matches_within_haystack<'a>(haystack: &'a str, pattern: &str, case_insensitive: bool, outer: bool) -> (Vec<Match<'a>>, Option<Regex>) {
107  let mut matched_items: Vec<Match<'a>> = Vec::new();
108  if let Ok(re) = build_regex(pattern, case_insensitive) {
109    let mut item_keys: Vec<(&str, usize, usize)> = Vec::new();
110    for inner_captures in re.captures_iter(haystack) {
111      for capture_opt in inner_captures.iter() {
112        if let Some(matched_item) = capture_opt {
113          let item_str = matched_item.as_str();
114          
115          let item_key = (item_str, matched_item.start(), matched_item.end());
116          let is_matched = if outer { 
117            true
118          } else {
119            item_keys.contains(&item_key) == false
120          };
121          if is_matched {
122            matched_items.push(matched_item.to_owned());
123            if !outer {
124              item_keys.push(item_key);
125            }
126          }
127          // if only capturing the first group of outer matches, break the inner loop here and move onto the next outer group
128          if outer {
129            break;
130          }
131        }
132      }
133    }
134    (matched_items, Some(re))
135  } else {
136    (matched_items, None)
137  }
138}
139
140/// Implementation for &str/String
141impl<'a> PatternCapture<'a> for str {
142
143  /// Yields an option with Regex::Captures as returned from re.captures, Accepts a boolean case_insensitive flag
144  fn pattern_captures(&self, pattern: &str, case_insensitive: bool) -> Option<Captures> {
145    if let Ok(re) = build_regex(pattern, case_insensitive) {
146      re.captures(self)
147    } else {
148      None
149    }
150  }
151
152  /// Returns vector of match objects. The outer options excludes inner match groups.
153  fn pattern_matches_as_vec(&'a self, pattern: &str, case_insensitive: bool, outer: bool) -> Vec<Match<'a>> {
154    let (matched_items, _rgx) = find_matches_within_haystack(self, pattern, case_insensitive, outer);
155    matched_items
156  }
157
158  /// Yields an option with first match object if available with a boolean case_insensitive flag
159  /// As this uses re.find it will be fast than the matching last_match method
160  /// Implemented here to shortcut the larger find_matches_within_haystack function
161  fn pattern_first_match(&'a self, pattern: &str, case_insensitive: bool) -> Option<Match<'a>> {
162    if let Ok(re) = build_regex(pattern, case_insensitive) {
163      re.find(self)
164    } else {
165      None
166    }
167  }
168
169}
170
171