1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
use regex::{Captures, Match, Regex};

use crate::utils::{build_regex, build_whole_word_pattern};

/// Set of methods to capture groups or match objects derived from Regex::captures.
pub trait PatternCapture<'a> {

  /// Yields an option with Regex::Captures as returned from re.captures, Accepts a boolean case_insensitive flag
  fn pattern_captures(&self, pattern: &str, case_insensitive: bool) -> Option<Captures>;

  /// Yields a vector of Match objects with two modes, outer will whole groups only, otherwise uniqe matched groups and subgroups
  /// Use either pattern_matches_vec or pattern_matches_outer
  fn pattern_matches_as_vec(&'a self, pattern: &str, case_insensitive: bool, outer: bool) -> Vec<Match>;

  /// Yields a vector of Match objects with start and end index + the captured string. Accepts a boolean case_insensitive flag
  /// Unlike pattern_captures, this method will only return unique matches including subgroups
  fn pattern_matches_vec(&'a self, pattern: &str, case_insensitive: bool) -> Vec<Match<'a>> {
    self.pattern_matches_as_vec(pattern, case_insensitive, false)
  }

   /// Yields a vector of Match objects with start and end index + the captured string. Accepts a boolean case_insensitive flag
  /// Unlike pattern_captures, this method will only outer matches for whole pattern
  fn pattern_matches_outer(&'a self, pattern: &str, case_insensitive: bool) -> Vec<Match<'a>> {
    self.pattern_matches_as_vec(pattern, case_insensitive, true)
  }

  /// Yields an option with first match object if available with a boolean case_insensitive flag
  /// As this uses re.find it will be fast than the matching last_match method
  fn pattern_first_match(&'a self, pattern: &str, case_insensitive: bool) -> Option<Match<'a>>;

 /// Yields an option with last match object if available with a boolean case_insensitive flag
 fn pattern_last_match(&'a self, pattern: &str, case_insensitive: bool) -> Option<Match> {
   let matched_segments = self.pattern_matches_vec(pattern, case_insensitive);
   if let Some(last) = matched_segments.last() {
     Some(*last)
   } else {
     None
   }
 }

 /// returns an option with a pair of match objects
 /// If there is only one match the match objects will have the same indices
 fn pattern_first_last_matches(&'a self, pattern: &str, case_insensitive: bool) -> Option<(Match, Match)> {
   let matched_segments = self.pattern_matches_vec(pattern, case_insensitive);
   if let Some(first) = matched_segments.get(0) {
     if let Some(last) = matched_segments.last() {
       return Some((*first, *last));
     }
   }
   None
 }

 /// Yields an option with an unsigned integer for the index of the start of the last match
 /// with a boolean case_insensitive flag
 fn pattern_first_index(&'a self, pattern: &str, case_insensitive: bool) -> Option<usize> {
   if let Some(first) = self.pattern_first_match(pattern, case_insensitive) {
     Some(first.start())
   } else {
     None
   }
 }

 /// Yields an option with an unsigned integer for the index of the end of the first match
 /// with a boolean case_insensitive flag
 fn pattern_first_end_index(&'a self, pattern: &str, case_insensitive: bool) -> Option<usize> {
   if let Some(first) = self.pattern_first_match(pattern, case_insensitive) {
     Some(first.end())
   } else {
     None
   }
 }

 /// Yields an option with an unsigned integer for the index of the start of the last match
 /// with a boolean case_insensitive flag
 fn pattern_last_start_index(&'a self, pattern: &str, case_insensitive: bool) -> Option<usize> {
   if let Some(first) = self.pattern_first_match(pattern, case_insensitive) {
     Some(first.start())
   } else {
     None
   }
 }

 // Yields an option with an unsigned integer for the index of the end of the last match
 /// with a boolean case_insensitive flag
 fn pattern_last_index(&'a self, pattern: &str, case_insensitive: bool) -> Option<usize> {
   if let Some(first) = self.pattern_first_match(pattern, case_insensitive) {
     Some(first.end())
   } else {
     None
   }
 }

 // Counts the number of matches with a boolean case_insensitive flag
 fn count_pattern(&'a self, pattern: &'a str, case_insensitive: bool) -> usize {
   self.pattern_matches_vec(pattern, case_insensitive).len()
 }

 // Counts the number of matches with a boolean case_insensitive flag
 fn count_word(&'a self, word: &'a str, case_insensitive: bool) -> usize {
   let pattern = build_whole_word_pattern(word);
   self.pattern_matches_vec(&pattern, case_insensitive).len()
 }
}


/// This function is basis for both pattern_matches_vec() and pattern_matches_outer()
/// and will be used with string-patterns-extras for replicate look-ahead and look-behind behaviour
/// It returns a flattened vector of Match objects
pub fn find_matches_within_haystack<'a>(haystack: &'a str, pattern: &str, case_insensitive: bool, outer: bool) -> (Vec<Match<'a>>, Option<Regex>) {
  let mut matched_items: Vec<Match<'a>> = Vec::new();
  if let Ok(re) = build_regex(pattern, case_insensitive) {
    let mut item_keys: Vec<(&str, usize, usize)> = Vec::new();
    for inner_captures in re.captures_iter(haystack) {
      for capture_opt in inner_captures.iter() {
        if let Some(matched_item) = capture_opt {
          let item_str = matched_item.as_str();
          
          let item_key = (item_str, matched_item.start(), matched_item.end());
          let is_matched = if outer { 
            true
          } else {
            item_keys.contains(&item_key) == false
          };
          if is_matched {
            matched_items.push(matched_item.to_owned());
            if !outer {
              item_keys.push(item_key);
            }
          }
          // if only capturing the first group of outer matches, break the inner loop here and move onto the next outer group
          if outer {
            break;
          }
        }
      }
    }
    (matched_items, Some(re))
  } else {
    (matched_items, None)
  }
}

impl<'a> PatternCapture<'a> for str {

  // Yields an option with Regex::Captures as returned from re.captures, Accepts a boolean case_insensitive flag
  fn pattern_captures(&self, pattern: &str, case_insensitive: bool) -> Option<Captures> {
    if let Ok(re) = build_regex(pattern, case_insensitive) {
      re.captures(self)
    } else {
      None
    }
  }

  fn pattern_matches_as_vec(&'a self, pattern: &str, case_insensitive: bool, outer: bool) -> Vec<Match<'a>> {
    let (matched_items, _rgx) = find_matches_within_haystack(self, pattern, case_insensitive, outer);
    matched_items
  }

  /// Yields an option with first match object if available with a boolean case_insensitive flag
  /// As this uses re.find it will be fast than the matching last_match method
  /// Implemented here to shortcut the larger find_matches_within_haystack function
  fn pattern_first_match(&'a self, pattern: &str, case_insensitive: bool) -> Option<Match<'a>> {
    if let Ok(re) = build_regex(pattern, case_insensitive) {
      re.find(self)
    } else {
      None
    }
  }

}