simple_string_patterns/
segments.rs

1use crate::{simple_match::*, utils::extract_string_element_by_index};
2
3/// Methods to split a longer strong on a separator and return a vector of strings,
4/// a tuple of two strings or single optional string segment
5/// Note some methods may return empty segments in the case of leading, trailing or repeated separators
6/// See notes below
7pub trait ToSegments {
8
9  /// Extract a vector of non-empty strings from a string-like object with a given separator
10  /// excluding leading, trailing or double separators
11  fn to_segments(&self, separator: &str) -> Vec<String>;
12
13  /// Extract a vector of strings from a string-like object with a given separator
14  fn to_parts(&self, separator: &str) -> Vec<String>;
15
16  /// Extract only the head before the first occurrence of a separator
17  fn to_head(&self, separator: &str) -> String;
18
19  /// Extract only the first segment before the first occurrence of a non-initial separator
20  fn to_first(&self, separator: &str) -> String;
21
22  /// Extract only the remainder after the first occurrence of a non-initial separator
23  fn to_remainder_end(&self, separator: &str) -> String;
24
25  /// Extract only the last segment after the last occurrence of a non-final separator
26  fn to_last(&self, separator: &str) -> String;
27
28  /// Extract only the beginning before the last segment following the last occurrence of a non-final separator
29  fn to_remainder_start(&self, separator: &str) -> String;
30
31  /// Extract only the last segment
32  fn to_end(&self, separator: &str) -> String;
33
34  /// Extract a non-empty segment identified by its index from the components of a string with a given separator
35  /// e.g. String::from("/User/maria/Documents") .to_segment(1) yields "maria"
36  /// with the leading slash separator ignored
37  /// A negative index parameter will start from the end ignoring trailing separators
38  fn to_segment(&self, separator: &str, index: i32) -> Option<String> {
39    let parts = self.to_segments(separator);
40    extract_string_element_by_index(parts, index)
41  }
42
43  /// Extract a part identified by its index from the components of a string with a given separator
44  /// e.g. String::from("10/11/2024") .to_parts(1) yields "11"
45  /// A negative index parameter will start from the end 
46  fn to_part(&self, separator: &str, index: i32) -> Option<String> {
47    let parts = self.to_parts(separator);
48    extract_string_element_by_index(parts, index)
49  }
50
51  /// Extract an inner segment via a set of separator + index tuples
52  fn to_inner_segment(&self, groups: &[(&str, i32)]) -> Option<String>;
53
54  /// extract the remainder after the head
55  fn to_tail(&self, separator: &str) -> String;
56
57  /// extract the first and last parts after the first occurrence of the separator
58  fn to_head_tail(&self, separator: &str) -> (String, String);
59
60  /// extract the first and last parts after the last occurrence of the separator
61  fn to_start_end(&self, separator: &str) -> (String, String);
62
63}
64
65/// Implement string segment split and capture method for String
66impl ToSegments for str {
67
68  /// Splits a string on the exact separator, whether initial, final or repeated.
69  /// May yield empty segments
70  fn to_parts(&self, separator: &str) -> Vec<String> {
71    let splitter = self.split(separator);
72    splitter.into_iter().map(|s| s.to_string()).collect::<Vec<String>>()
73  }
74
75  /// Splits a string on a separator, but only returns an array of non-empty strings
76  /// skipping leading, trailing or repeated separators that may otherwise yield empty strings
77  fn to_segments(&self, separator: &str) -> Vec<String> {
78    let splitter = self.split(separator);
79    splitter.into_iter().map(|s| s.to_string()).filter(|s| s.len() > 0).collect::<Vec<String>>()
80  }
81
82  /// Extract only the head as a string. If the separator is absent return the whole string
83  fn to_head(&self, separator: &str) -> String {
84    if let Some((head, _tail)) = self.split_once(separator) {
85      head.to_string()
86    } else {
87      self.to_owned()
88    }
89  }
90
91  /// Extract only the last segment after the last occurrence of a non-final separator
92  fn to_last(&self, separator: &str) -> String {
93    let separator_len = separator.len();
94    if self.ends_with(separator) && self.len() > separator_len {
95      let end_index = self.len() - separator_len;
96      self[0..end_index].to_string().to_end(separator)
97    } else {
98      self.to_end(separator)
99    }
100  }
101
102  /// extract the last segment whether empty or not
103  fn to_end(&self, separator: &str) -> String {
104    let parts = self.to_parts(separator);
105    if let Some(end) = parts.last() {
106      end.to_owned()
107    } else {
108      self.to_owned()
109    }
110  }
111
112  /// extract the remainder after the first split 
113  /// or the whole string if the separator is absent
114  fn to_tail(&self, separator: &str) -> String {
115    let parts = self.to_parts(separator);
116    let num_parts = parts.len();
117    if num_parts > 0 {
118      parts[1..num_parts].join(separator)
119    } else {
120      self.to_owned()
121    }
122  }
123
124  /// Extract only the first segment before the first occurrence of a non-initial separator
125  fn to_first(&self, separator: &str) -> String {
126    let separator_len = separator.len();
127    if self.starts_with(separator) && self.len() > separator_len {
128      self[separator_len..self.len()].to_string().to_head(separator)
129    } else {
130      self.to_head(separator)
131    }
132  }
133
134  /// Extract only the remainder after the first occurrence of a non-initial separator
135  fn to_remainder_end(&self, separator: &str) -> String {
136    let separator_len = separator.len();
137    if self.starts_with(separator) && self.len() > separator_len {
138      self[separator_len..].to_string().to_tail(separator)
139    } else {
140      self.to_tail(separator)
141    }
142  }
143  
144  /// Extract only the beginning before the last segment following the last occurrence of a non-final separator
145  fn to_remainder_start(&self, separator: &str) -> String {
146    let separator_len = separator.len();
147    if self.ends_with(separator) && self.len() > separator_len {
148      let end_index = self.len() - separator_len;
149      self[0..end_index].to_string().to_tail(separator)
150    } else {
151      self.to_tail(separator)
152    }
153  }
154
155  /// extract an inner segment via a set of tuples with separators and indices.
156  /// e.g. [("/", 1), ("-", 2)] applied to "pictures/holiday-france-1983/originals" 
157  /// would match "1983" as an optional string
158  fn to_inner_segment(&self, groups: &[(&str, i32)]) -> Option<String> {
159    if groups.len() > 0 {
160      let mut matched: Option<String> = None;
161      let mut current_string = self.to_string();
162      for group in groups {
163        if current_string.len() > 0 {
164          let (separator, index) = group;
165          matched = current_string.to_segment(*separator, *index);
166          current_string = matched.clone().unwrap_or("".to_string());
167        }
168      }
169      matched
170    } else {
171      None
172    }
173  }
174
175  /// Extract a tuple of the head and remainder
176  /// like split_once but returninga tuple of strings
177  /// If the separator is absent or at the start, the first part will be empty
178  fn to_head_tail(&self, separator: &str) -> (String, String) {
179    if let Some((head, tail)) = self.split_once(separator) {
180      (head.to_string(), tail.to_string())
181    } else {
182      ("".to_owned(), self.to_owned())
183    }
184  }
185
186  /// Extract a tuple of the start and the last part
187  /// like split_once in reverse and returning a tuple of strings
188  /// If the separator is absent or at the end, the second part will be empty
189  fn to_start_end(&self, separator: &str) -> (String, String) {
190    if let Some((start, end)) = self.rsplit_once(separator) {
191      (start.to_string(), end.to_string())
192    } else {
193      (self.to_owned(), "".to_string())
194    }
195  }
196
197}
198
199
200/// Methods to split a &str/String on the first matched separator character
201pub trait ToSegmentsFromChars {
202  
203  /// Split a string into parts separated by any of the referenced split characters
204  fn split_on_any_char(&self, separators: &[char]) -> Vec<String>;
205
206  /// Split a string into a head and tail separated by the first instance of the first matching separator
207  /// If none of the separators are matched, the first element is
208  ///  an empty string and the second the whole string
209  fn to_head_tail_on_any_char(&self, separators: &[char]) -> (String, String);
210
211  /// Split a string into s start and tail separated by the last instance of the first matching separator
212  /// If none of the separators are matched, the first element is
213  ///  an empty string and the second the whole string
214  fn to_start_end_on_any_char(&self, separators: &[char]) -> (String, String);
215}
216
217impl ToSegmentsFromChars for str {
218
219  /// Split a string on any of the referenced characters
220  fn split_on_any_char(&self, separators: &[char]) -> Vec<String> {
221    let mut parts: Vec<String> = Vec::new();
222    let mut has_match = false;
223    let mut indices: Vec<usize> = Vec::new();
224    for separator in separators {
225      for matched_index in self.find_char_indices(*separator) {
226        indices.push(matched_index);
227      }
228    }
229    indices.sort_by(|a, b| a.cmp(b));
230    let mut prev_start = 0;
231    for index in indices {
232      let segment = self[prev_start..index].to_string();
233      parts.push(segment);
234      has_match = true;
235      prev_start = index + 1;
236    }
237    if has_match {
238      parts.push(self[prev_start..].to_string());
239      parts
240    } else {
241      vec![self.to_owned()]
242    }
243  }
244
245  /// Split into head and tail components on the first occurrence of any of the referenced characters
246  fn to_head_tail_on_any_char(&self, separators: &[char]) -> (String, String) {
247    for ch in separators {
248      if self.contains(*ch) {
249        if let Some ((first, second)) = self.split_once(*ch) {
250          return (first.to_string(), second.to_string());
251        }
252      }
253    }
254    ("".to_owned(), self.to_string())
255  }
256
257  /// Split into start and end components on the last occurrence of any of the referenced characters
258  fn to_start_end_on_any_char(&self, separators: &[char]) -> (String, String) {
259    for ch in separators {
260      if self.contains(*ch) {
261        if let Some ((first, second)) = self.rsplit_once(*ch) {
262          return (first.to_string(), second.to_string());
263        }
264      }
265    }
266    (self.to_string(), "".to_owned())
267  }
268
269}