substring_replace/
lib.rs

1pub use to_offset::*;
2
3/// Trait with extension methods to manipulate substrings by character indices
4/// behaving like similar methods in other languages
5pub trait SubstringReplace where Self:ToString {
6
7    /// Return a substring by start and end character index
8    /// With multibyte characters this will not be the same as the byte indices
9    /// used by str slices
10    fn substring<T: ToOffset>(&self, start: usize, end: T) -> &str;
11
12    /// Return a substring from the start and to a specified end character index
13    fn substring_start(&self, end: i64) -> &str {
14        let end_index = if end < 0 { self.char_len().checked_sub(end.abs() as usize).unwrap_or(0) } else { end as usize };
15        self.substring(0, end_index)
16    }
17
18    /// Return a substring from a specified start character index to a specified end
19    /// A negative offset represents character index from the end, e.g. if character length is 15, -5 translates to 10
20    /// If start index is greater than the max character index, the function will yield an empty string
21    fn substring_end(&self, start: i64) -> &str {
22        let max_index = self.char_len();
23        let start_index = if start < 0 { max_index.checked_sub(start.abs() as usize).unwrap_or(0) } else { start as usize };
24        self.substring(start_index, max_index)
25    }
26
27    /// Return a substring by start and end character index
28    /// Unlike the default substring() method, the end index may be negative,
29    /// in which case it counts backwards from the end, e.g. if character length is 15, -5 translates to 10
30    #[deprecated(since = "0.2.1", note = "Use `substring` instead")]
31    fn substring_range(&self, start: usize, end: i64) -> &str {
32        self.substring(start, end)
33    }
34
35
36    // Replace substring delimited by start and end character index
37    // with any string (&str)
38    // To inject a string use substring_insert()
39    fn substring_replace<T: ToOffset>(&self, replacement: &str, start: usize, end: T) -> String;
40
41    /// Replace substring delimited by start and end character index
42    /// Unlike the default substring_replace() method, the end index may be negative,
43    /// in which case it counts backwards from the end, e.g. if character length is 15, -5 translates to 10
44    #[deprecated(since = "0.2.1", note = "Use `substring` instead")]
45    fn substring_replace_range(&self, replacement: &str, start: usize, end: i64) -> String {
46        self.substring_replace(replacement, start, end)
47    }
48
49
50    /// Replace the start of a string to specified end character index
51    /// e.g. "brown".substring_replace_start("d", 2);
52    /// will replace the first two characters with "d", yield "down"
53    /// A negative offset represents character index from the end, e.g. if character length is 15, -5 translates to 10
54    fn substring_replace_start(&self, replacement: &str, end: i64) -> String {
55        let end_index = if end < 0 { self.char_len().saturating_sub(end.abs() as usize) } else { end as usize };
56        self.substring_replace(replacement, 0, end_index)
57    }
58
59    /// Replace the remainder of string from a specified start character index
60    /// e.g. "blue".substring_replace_last("ack", 2);
61    /// will replace the last 2 characters with "ack", yielding "black"
62    /// A negative offset represents character index from the end, e.g. if character length is 15, -5 translates to 10
63    fn substring_replace_end(&self, replacement: &str, start: i64) -> String {
64        let end = self.char_len();
65        let start_index = if start < 0 { end.saturating_sub(start.abs() as usize) } else { start as usize };
66        self.substring_replace(replacement, start_index, end)
67    }
68
69    /// Remove a string delimited by a start and end character index
70    /// e.g. "abcde".substring_remove(2, 4);
71    /// will remove characters with indices of 2 and 3 (3rd and 4th or c and d)
72    /// resulting in "abe", i.e. the opposite behaviour to substring()
73    fn substring_remove(&self, start: usize, end: usize) -> String {
74        self.substring_replace("", start, end)
75    }
76
77    /// Extract a substring from a start index for n characters to the right
78    /// A negative length in the second parameter will start at the start index
79    fn substring_offset(&self, position: usize, length: i32) -> &str {
80        let (start, end) = position_and_offset_to_start_end(position, length);
81        self.substring(start, end)
82    }
83
84    /// Remove a string from a start position to given length
85    /// negative lengths will remove characters to the left
86    /// e.g. "abcde".substring_remove(3, -3);
87    /// will remove characters with indices of 1 and 2 (2nd and 3rd or b and c)
88    /// resulting in "ade", i.e. the opposite behaviour to substring_offset()
89    fn substring_pull(&self, position: usize, length: i32) -> String {
90        let (start, end) = position_and_offset_to_start_end(position, length);
91        self.substring_replace("", start, end)
92    }
93
94    /// Insert a string at a given character index
95    /// This differs from String::insert by using character rather than byte indices
96    /// to work better with multibyte characters
97    /// It's also implemented for &str, while returning a new owned string
98    fn substring_insert(&self, replacement: &str, start: usize) -> String {
99        self.substring_replace(replacement, start, start)
100    }
101
102    /// Convert character index to start byte index
103    fn to_start_byte_index(&self, start: usize) -> usize;
104
105    /// Convert character index to end byte index
106    fn to_end_byte_index(&self, start: usize) -> usize;
107
108    /// Return the character length rather than the byte length
109    fn char_len(&self) -> usize;
110
111    /// Return the character index rather than the byte index of the first match of a pattern
112    fn char_find(&self, pat: &str) -> Option<usize>;
113
114    /// Return the character index rather than the byte index of the last match of a pattern
115    /// this will be first index of the match
116    fn char_rfind(&self, pat: &str) -> Option<usize>;
117
118    /// Insert before or after the first or last occurrence
119    fn insert_adjacent(&self, insert: &str, pat: &str, before: bool, first: bool) -> String;
120
121    /// Insert before the first occurrence of a string
122    fn insert_before_first(&self, insert: &str, pat: &str) -> String {
123        self.insert_adjacent(insert, pat, true, true)
124    }
125
126    /// Insert before the first occurrence of a string
127    fn insert_before_last(&self, insert: &str, pat: &str) -> String {
128        self.insert_adjacent(insert, pat, true, false)
129    }
130
131    /// Insert after the last occurrence of a string
132    fn insert_after_first(&self, insert: &str, pat: &str) -> String {
133        self.insert_adjacent(insert, pat, false, true)
134    }
135
136    /// Insert after the last occurrence of a string
137    fn insert_after_last(&self, insert: &str, pat: &str) -> String {
138        self.insert_adjacent(insert, pat, false, false)
139    }
140
141    /// Insert between the first occurrence of a one string and the last occurrence of another
142    fn insert_between(&self, insert: &str, start_pat: &str, end_pat: &str) -> String {
143        if let Some(start_index) = self.char_find(start_pat) {
144            if let Some(end_index) = self.char_rfind(end_pat) {
145                return self.substring_replace(insert, start_index + 1, end_index);
146            }
147        }
148        self.to_string()
149    }
150
151    /// Insert between the first occurrence of a one string and the last occurrence of another
152    fn prepend(&self, insert: &str) -> String {
153        [insert.to_string(), self.to_string()].concat()
154    }
155
156    fn append(&self, insert: &str) -> String {
157        [self.to_string(), insert.to_string()].concat()
158    }
159
160}
161
162impl SubstringReplace for str {
163
164    /// Extract substring by character indices and hand overflow gracefully
165    /// if the end index is equal or greater than start index, the function will yield an empty string 
166    fn substring<T: ToOffset>(&self, start: usize, end: T) -> &str {
167        let end_index = end.to_offset(self.char_len());
168        if end_index > start {
169            &self[self.to_start_byte_index(start)..self.to_end_byte_index(end_index)]
170        } else {
171            ""
172        }
173    }
174
175    /// Replace a segment delimited by start and end characters indices with a string pattern (&str)
176    fn substring_replace<T: ToOffset>(&self, replacement: &str, start: usize, end: T) -> String {
177        let end_index = end.to_offset(self.char_len());
178        [&self[0..self.to_start_byte_index(start)], replacement, &self[self.to_end_byte_index(end_index)..]].concat()
179    }
180
181    /// Translate the character start index to the start byte index
182    /// to avoid boundary collisions with multibyte characters
183    fn to_start_byte_index(&self, start: usize) -> usize {
184        char_index_to_byte_index(self, start, false)
185    }
186
187    /// Translate the character end index to the end byte index
188    /// to avoid boundary collisions with multibyte characters
189    fn to_end_byte_index(&self, end: usize) -> usize {
190        char_index_to_byte_index(self, end, true)
191    }
192
193    /// Return the character length as opposed to the byte length
194    /// This will differ from len() only multibyte characters
195    fn char_len(&self) -> usize {
196        self.char_indices().count()
197    }
198
199    /// Return the character index of the first match of a given pattern
200    fn char_find(&self, pat: &str) -> Option<usize>{
201        extract_char_index(self, pat, false)
202    }
203
204    /// Return the character index rather than the byte index of the last match of a pattern
205    /// this will be first index of the match
206    fn char_rfind(&self, pat: &str) -> Option<usize>{
207        extract_char_index(self, pat, true)
208    }
209
210    /// Insert before or after the first or last occurrence
211    fn insert_adjacent(&self, insert: &str, pat: &str, before: bool, first: bool) -> String {
212        if let Some(index) = extract_char_index(self, pat, !first) {
213            let rel_index = if before {
214                index
215            } else {
216                index + 1
217            };
218            self.substring_insert(insert, rel_index)
219        } else {
220            self.to_string()
221        }
222    }
223}
224
225/*
226* private function to convert a character index to byte index requied by &str slices
227*/
228fn char_index_to_byte_index(text: &str, char_index: usize, to_end: bool) -> usize {
229    let default_index = if to_end { text.len() } else { 0 };
230    text.char_indices().nth(char_index).map(|(i, _)| i).unwrap_or(default_index)
231}
232
233/*
234* private function to convert an index position and i32 position or negative offset length
235* to valid start and end indices
236* where the start must be positive and the end may not be before the start 
237*/
238fn position_and_offset_to_start_end(position: usize, length: i32) -> (usize, usize) {
239    let reverse = length < 0; 
240    let start = if reverse {
241        position.checked_sub(length.abs() as usize).unwrap_or(0)
242    } else {
243        position
244    };
245    let start_i32 =  if start > i32::MAX as usize { i32::MAX } else { start as i32 };
246    let end_i32 = start_i32 + length.abs();
247    let end = if end_i32 < 0 {
248        0
249    } else {
250        end_i32 as usize
251    };
252    (start, end)
253}
254
255/// private function to extract the character index of pattenr (char sequence)
256fn extract_char_index(text: &str, pat: &str, reverse: bool) -> Option<usize> {
257    let mut start_index: Option<usize> = None;
258    let pat_chars = pat.chars().collect::<Vec<_>>();
259    let pat_len = pat.char_len();
260    let text_chars = text.chars().collect::<Vec<_>>();
261    let num_text_chars = text_chars.len();
262    let range = 0..num_text_chars;
263    let mut next_pat_char_index = if reverse { pat_len - 1 } else { 0 };
264    let mut temp_pat_len = 0;
265    for tc_index in range {
266        let rel_index = if reverse { num_text_chars - 1 - tc_index } else { tc_index };
267        let tc = text_chars[rel_index];
268        if tc == pat_chars[next_pat_char_index] {
269            if !reverse && next_pat_char_index == 0 {
270                start_index = Some(rel_index);
271            }
272            if pat_len > 1 {
273                if reverse {
274                    if next_pat_char_index > 0 {
275                        next_pat_char_index -= 1;
276                    }
277                } else {
278                    next_pat_char_index += 1;
279                }
280            }
281            temp_pat_len += 1;
282        } else {
283            next_pat_char_index = if reverse { pat_len - 1 } else { 0 };
284            temp_pat_len = 0;
285        }
286        if temp_pat_len == pat_len {
287            if reverse {
288                start_index = Some(rel_index);
289            }
290            break;
291        }
292    }
293    start_index
294}