simple_string_patterns/
segments.rs

1use crate::{simple_match::*, utils::extract_string_element_by_index};
2
3/// Methods to split a longer strong on a separator and return a vector of strings,
4/// a tuple of two strings or single optional string segment
5/// Note some methods may return empty segments in the case of leading, trailing or repeated separators
6/// See notes below
7pub trait ToSegments {
8
9  /// Extract a vector of non-empty strings from a string-like object with a given separator
10  /// excluding leading, trailing or double separators
11  fn to_segments(&self, separator: &str) -> Vec<String>;
12
13  /// Extract a vector of strings from a string-like object with a given separator
14  fn to_parts(&self, separator: &str) -> Vec<String>;
15
16  /// Extract only the head before the first occurrence of a separator
17  fn to_head(&self, separator: &str) -> String;
18
19  /// Extract only the first segment before the first occurrence of a non-initial separator
20  fn to_first(&self, separator: &str) -> String;
21
22  /// Extract only the remainder after the first occurrence of a non-initial separator
23  fn to_remainder_end(&self, separator: &str) -> String;
24
25  /// Extract only the last segment after the last occurrence of a non-final separator
26  fn to_last(&self, separator: &str) -> String;
27
28  /// Extract only the beginning before the last segment following the last occurrence of a non-final separator
29  fn to_remainder_start(&self, separator: &str) -> String;
30
31  /// Extract only the last segment
32  fn to_end(&self, separator: &str) -> String;
33
34  /// Extract the start before the last occurrence of the separator
35  /// or the whole string if the separator is absent
36  fn to_start(&self, separator: &str) -> String;
37
38  /// Extract a non-empty segment identified by its index from the components of a string with a given separator
39  /// e.g. String::from("/User/maria/Documents") .to_segment(1) yields "maria"
40  /// with the leading slash separator ignored
41  /// A negative index parameter will start from the end ignoring trailing separators
42  fn to_segment(&self, separator: &str, index: i32) -> Option<String> {
43    let parts = self.to_segments(separator);
44    extract_string_element_by_index(parts, index)
45  }
46
47  /// Extract a part identified by its index from the components of a string with a given separator
48  /// e.g. String::from("10/11/2024") .to_parts(1) yields "11"
49  /// A negative index parameter will start from the end 
50  fn to_part(&self, separator: &str, index: i32) -> Option<String> {
51    let parts = self.to_parts(separator);
52    extract_string_element_by_index(parts, index)
53  }
54
55  /// Extract an inner segment via a set of separator + index tuples
56  fn to_inner_segment(&self, groups: &[(&str, i32)]) -> Option<String>;
57
58  /// extract the remainder after the head
59  fn to_tail(&self, separator: &str) -> String;
60
61  /// extract the first and last parts after the first occurrence of the separator
62  fn to_head_tail(&self, separator: &str) -> (String, String);
63
64  /// extract the first and last parts after the last occurrence of the separator
65  fn to_start_end(&self, separator: &str) -> (String, String);
66
67}
68
69/// Implement string segment split and capture method for String
70impl ToSegments for str {
71
72  /// Splits a string on the exact separator, whether initial, final or repeated.
73  /// May yield empty segments
74  fn to_parts(&self, separator: &str) -> Vec<String> {
75    let splitter = self.split(separator);
76    splitter.into_iter().map(|s| s.to_string()).collect::<Vec<String>>()
77  }
78
79  /// Splits a string on a separator, but only returns an array of non-empty strings
80  /// skipping leading, trailing or repeated separators that may otherwise yield empty strings
81  fn to_segments(&self, separator: &str) -> Vec<String> {
82    let splitter = self.split(separator);
83    splitter.into_iter().map(|s| s.to_string()).filter(|s| s.len() > 0).collect::<Vec<String>>()
84  }
85
86  /// Extract only the head as a string. If the separator is absent return the whole string
87  fn to_head(&self, separator: &str) -> String {
88    if let Some((head, _tail)) = self.split_once(separator) {
89      head.to_string()
90    } else {
91      self.to_owned()
92    }
93  }
94
95  /// Extract only the last segment after the last occurrence of a non-final separator
96  fn to_last(&self, separator: &str) -> String {
97    let separator_len = separator.len();
98    if self.ends_with(separator) && self.len() > separator_len {
99      let end_index = self.len() - separator_len;
100      self[0..end_index].to_string().to_end(separator)
101    } else {
102      self.to_end(separator)
103    }
104  }
105
106  /// extract the last segment whether empty or not
107  fn to_end(&self, separator: &str) -> String {
108    let (_start, end) = self.to_start_end(separator);
109    end
110  }
111
112  /// extract the start before last occurrence of the separator
113  /// or, if absent, return the whole string
114  fn to_start(&self, separator: &str) -> String {
115    let (start, _end) = self.to_start_end(separator);
116    start
117  }
118
119  /// extract the remainder after the first split 
120  /// or the whole string if the separator is absent
121  fn to_tail(&self, separator: &str) -> String {
122    let parts = self.to_parts(separator);
123    let num_parts = parts.len();
124    if num_parts > 0 {
125      parts[1..num_parts].join(separator)
126    } else {
127      self.to_owned()
128    }
129  }
130
131  /// Extract only the first segment before the first occurrence of a non-initial separator
132  fn to_first(&self, separator: &str) -> String {
133    let separator_len = separator.len();
134    if self.starts_with(separator) && self.len() > separator_len {
135      self[separator_len..self.len()].to_string().to_head(separator)
136    } else {
137      self.to_head(separator)
138    }
139  }
140
141  /// Extract only the remainder after the first occurrence of a non-initial separator
142  fn to_remainder_end(&self, separator: &str) -> String {
143    let separator_len = separator.len();
144    if self.starts_with(separator) && self.len() > separator_len {
145      self[separator_len..].to_string().to_tail(separator)
146    } else {
147      self.to_tail(separator)
148    }
149  }
150  
151  /// Extract only the beginning before the last segment following the last occurrence of a non-final separator
152  fn to_remainder_start(&self, separator: &str) -> String {
153    let separator_len = separator.len();
154    if self.ends_with(separator) && self.len() > separator_len {
155      let end_index = self.len() - separator_len;
156      self[0..end_index].to_string().to_start(separator)
157    } else {
158      self.to_start(separator)
159    }
160  }
161
162  /// extract an inner segment via a set of tuples with separators and indices.
163  /// e.g. [("/", 1), ("-", 2)] applied to "pictures/holiday-france-1983/originals" 
164  /// would match "1983" as an optional string
165  fn to_inner_segment(&self, groups: &[(&str, i32)]) -> Option<String> {
166    if groups.len() > 0 {
167      let mut matched: Option<String> = None;
168      let mut current_string = self.to_string();
169      for group in groups {
170        if current_string.len() > 0 {
171          let (separator, index) = group;
172          matched = current_string.to_segment(*separator, *index);
173          current_string = matched.clone().unwrap_or("".to_string());
174        }
175      }
176      matched
177    } else {
178      None
179    }
180  }
181
182  /// Extract a tuple of the head and remainder
183  /// like split_once but returninga tuple of strings
184  /// If the separator is absent or at the start, the first part will be empty
185  fn to_head_tail(&self, separator: &str) -> (String, String) {
186    if let Some((head, tail)) = self.split_once(separator) {
187      (head.to_string(), tail.to_string())
188    } else {
189      ("".to_owned(), self.to_owned())
190    }
191  }
192
193  /// Extract a tuple of the start and the last part
194  /// like split_once in reverse and returning a tuple of strings
195  /// If the separator is absent or at the end, the second part will be empty
196  fn to_start_end(&self, separator: &str) -> (String, String) {
197    if let Some((start, end)) = self.rsplit_once(separator) {
198      (start.to_string(), end.to_string())
199    } else {
200      (self.to_owned(), "".to_string())
201    }
202  }
203
204}
205
206
207/// Methods to split a &str/String on the first matched separator character
208pub trait ToSegmentsFromChars {
209  
210  /// Split a string into parts separated by any of the referenced split characters
211  fn split_on_any_char(&self, separators: &[char]) -> Vec<String>;
212
213  /// Split a string into a head and tail separated by the first instance of the first matching separator
214  /// If none of the separators are matched, the first element is
215  ///  an empty string and the second the whole string
216  fn to_head_tail_on_any_char(&self, separators: &[char]) -> (String, String);
217
218  /// Split a string into s start and tail separated by the last instance of the first matching separator
219  /// If none of the separators are matched, the first element is
220  ///  an empty string and the second the whole string
221  fn to_start_end_on_any_char(&self, separators: &[char]) -> (String, String);
222}
223
224impl ToSegmentsFromChars for str {
225
226  /// Split a string on any of the referenced characters
227  fn split_on_any_char(&self, separators: &[char]) -> Vec<String> {
228    let mut parts: Vec<String> = Vec::new();
229    let mut has_match = false;
230    let mut indices: Vec<usize> = Vec::new();
231    for separator in separators {
232      for matched_index in self.find_char_indices(*separator) {
233        indices.push(matched_index);
234      }
235    }
236    indices.sort_by(|a, b| a.cmp(b));
237    let mut prev_start = 0;
238    for index in indices {
239      let segment = self[prev_start..index].to_string();
240      parts.push(segment);
241      has_match = true;
242      prev_start = index + 1;
243    }
244    if has_match {
245      parts.push(self[prev_start..].to_string());
246      parts
247    } else {
248      vec![self.to_owned()]
249    }
250  }
251
252  /// Split into head and tail components on the first occurrence of any of the referenced characters
253  fn to_head_tail_on_any_char(&self, separators: &[char]) -> (String, String) {
254    for ch in separators {
255      if self.contains(*ch) {
256        if let Some ((first, second)) = self.split_once(*ch) {
257          return (first.to_string(), second.to_string());
258        }
259      }
260    }
261    ("".to_owned(), self.to_string())
262  }
263
264  /// Split into start and end components on the last occurrence of any of the referenced characters
265  fn to_start_end_on_any_char(&self, separators: &[char]) -> (String, String) {
266    for ch in separators {
267      if self.contains(*ch) {
268        if let Some ((first, second)) = self.rsplit_once(*ch) {
269          return (first.to_string(), second.to_string());
270        }
271      }
272    }
273    (self.to_string(), "".to_owned())
274  }
275
276}