indexable_str/
lib.rs

1#![crate_name = "indexable_str"]
2
3use std::{
4    fmt::Display,
5    ops::{Index, Range, RangeFrom, RangeTo},
6};
7
8#[derive(Copy, Clone)]
9struct CharOffset {
10    chr: char,
11    offset: usize,
12}
13
14/// `IndexableStr` is a `struct` for creating immutable string objects that make text parsing with Rust a bit more elegant.
15/// 
16/// # Examples
17/// ```
18/// // Gets a char from a specified index.
19/// use indexable_str::IndexableStr;
20/// 
21/// let s = IndexableStr::new("0😀2345678😀");
22/// 
23/// assert_eq!(s[1], '😀');
24/// ```
25/// 
26///  ```
27/// // Gets a string slice from a specified range.
28/// use indexable_str::IndexableStr;
29/// 
30/// let s = IndexableStr::new("0😀2345678😀");
31/// 
32/// assert_eq!(&s[1..9], "😀2345678")
33/// ```
34/// 
35/// ```
36/// // Parses a string of signed integers, which are separated by whitespace
37/// use regex::Regex;
38/// use indexable_str::IndexableStr;
39///   
40/// let text = IndexableStr::new("0 1 2\n  -11  -12  -13\n");
41/// let signed_integer_pattern: Regex = Regex::new(r#"\b(0)|(-?[1-9]\d*)\b"#).unwrap();
42/// let mut signed_integer_vec: Vec<i64> = Vec::new();
43/// let mut cursor: usize = 0;
44///  
45/// while cursor < text.len() {
46///    let c = text[cursor];
47/// 
48///     match c {
49///         ' ' | '\t' | '\r' | '\n' => {
50///             cursor += 1;
51///             continue;
52///         },
53///         _=> (), 
54///     }
55/// 
56///     if let Some(captures) = signed_integer_pattern.captures(&text[cursor..]) {
57///         let num_string = captures[0].to_string();
58///         let num = num_string.parse::<i64>();
59///         signed_integer_vec.push(num.unwrap());
60/// 
61///         cursor += num_string.len();
62///
63///         continue;
64///     }
65/// 
66///     panic!("Unexpected character '{}' at position ({})!", c, cursor);
67/// }
68///  
69/// assert_eq!(signed_integer_vec.len(), 6);
70/// assert_eq!(signed_integer_vec[0], 0);
71/// assert_eq!(signed_integer_vec[1], 1);
72/// assert_eq!(signed_integer_vec[2], 2);
73/// assert_eq!(signed_integer_vec[3], -11);
74/// assert_eq!(signed_integer_vec[4], -12);
75/// assert_eq!(signed_integer_vec[5], -13);
76/// ```
77/// 
78/// `IndexableStr` is designed to work well with all valid UTF-8 characters. 
79/// 
80/// You should note that `IndexableStr` creates a vector of objects that holds a `char` and the starting byte offset of the `char`'s position in the underlying string as a `usize`. This requires additional memory resources. However, the convenience of `IndexableStr` should outweigh the additional memory requirements for most applications.
81pub struct IndexableStr<'a> {
82    str: &'a str,
83    str_length: usize,
84    chars_vec: Vec<CharOffset>,
85    chars_length: usize,
86}
87
88impl<'a> IndexableStr<'a> {
89    /// Returns an indexable string.
90    /// # Arguments
91    /// 
92    /// * `str` - A string slice to be indexed.
93    /// # Examples
94    /// ```
95    /// use indexable_str::IndexableStr;
96    /// 
97    /// let s = IndexableStr::new("0😀2345678😀");
98    /// ```
99    pub fn new(str: &'a str) -> IndexableStr {
100        let mut current_offset: usize = 0;
101
102        let chars_vec: Vec<CharOffset> = str.chars().map(|c| {
103            let char_offset = CharOffset {
104                chr: c,
105                offset: current_offset,
106            };
107
108            let code_point: u32 = c as u32;
109
110            current_offset += (|| {
111                if code_point <= 0x7F {
112                    return 1;
113                }
114
115                if code_point <= 0x7FF {
116                    return 2;
117                }
118
119                if code_point <= 0xFFFF {
120                    return 3;
121                }
122
123                if code_point <= 0x10FFFF {
124                    return 4;
125                }
126
127                0
128            })();
129
130            char_offset
131        }).collect();
132
133        let chars_length: usize = chars_vec.len();
134
135        IndexableStr {
136            str,
137            str_length: str.len(),
138            chars_vec,
139            chars_length,
140        }
141    }
142
143    /// Returns the original string slice.
144    /// 
145    /// # Examples
146    /// ```
147    /// use indexable_str::IndexableStr;
148    /// 
149    /// let s = IndexableStr::new("0😀2345678😀");
150    /// 
151    /// assert_eq!(s.as_str(), "0😀2345678😀");
152    /// ```
153    pub fn as_str(&self) -> &'a str {
154        self.str
155    }
156
157    /// Returns a `usize` for the number of `char`s in the string.
158    /// 
159    /// # Examples
160    /// ```
161    /// use indexable_str::IndexableStr;
162    /// 
163    /// let s = IndexableStr::new("0😀2345678😀");
164    /// 
165    /// assert_eq!(s.len(), 10);
166    /// ```
167    pub fn len(&self) -> usize {
168        self.chars_length
169    }
170
171    fn create_str_from_range(&self, start_index: usize, end_index: usize) -> &str {
172        if end_index > self.chars_length {
173            panic!("Range end: ({end_index}) must be less than or equal to the number of UTF-8 characters in the string ({})!", self.chars_length);
174        }
175
176        if end_index < start_index {
177            panic!("Range end: ({end_index} must be greater than or equal to Range start: ({start_index})!")
178        }
179
180        let bytes_start: usize = self.chars_vec[start_index].offset;
181        let bytes_end: usize = match end_index {
182            _val if self.chars_length == end_index => self.str_length,
183            _ => self.chars_vec[end_index].offset,
184        };
185
186        &self.str[bytes_start..bytes_end]  
187    }
188}
189
190impl<'a> Display for IndexableStr<'a> {
191    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
192        write!(f, "{}", self.str)
193    }
194}
195
196impl<'a> Index<usize> for IndexableStr<'a> {
197    type Output = char;
198
199    fn index(&self, index: usize) -> &char {
200        &self.chars_vec[index].chr
201    }
202}
203
204/// # Panics
205/// * If the range end is greater than the number of characters in the string.
206/// * If the range end is less than the range start.
207impl<'a> Index<Range<usize>> for IndexableStr<'a> {
208    type Output = str;
209
210    fn index(&self, range: Range<usize>) -> &Self::Output {
211        self.create_str_from_range(range.start, range.end)
212    }
213}
214
215
216
217/// # Panics
218/// * If the range end is less than the range start.
219impl<'a> Index<RangeFrom<usize>> for IndexableStr<'a> {
220    type Output = str;
221
222    fn index(&self, index: RangeFrom<usize>) -> &Self::Output {
223        self.create_str_from_range(index.start, self.chars_length)
224    }
225}
226
227
228
229/// # Panics
230/// * If the range end is greater than the number of characters in the string.
231impl<'a> Index<RangeTo<usize>> for IndexableStr<'a> {
232    type Output = str;
233
234    fn index(&self, index: RangeTo<usize>) -> &Self::Output {
235        self.create_str_from_range(0, index.end)
236    }
237}
238
239#[cfg(test)]
240mod tests {
241    use super::*;
242
243    #[test]
244    fn test_as_str_works() {
245        let s = IndexableStr::new("0😀23456789");
246
247        assert_eq!(s.as_str(), "0😀23456789");
248    }
249
250    #[test]
251    fn test_len_works() {
252        let s = IndexableStr::new("0😀23456789");
253
254        assert_eq!(s.len(), 10);
255    }
256
257    #[test]
258    fn test_to_string_works() {
259        let s = IndexableStr::new("0😀23456789");
260
261        assert_eq!(s.to_string(), "0😀23456789");
262    }
263    #[test]
264    fn test_index_works() {
265        let s = IndexableStr::new("0😀23456789");
266
267        assert_eq!(s[1], '😀');
268    }
269    #[test]
270    fn test_range_works() {
271        let s = IndexableStr::new("0😀23456789");
272
273        assert_eq!(&s[1..9], "😀2345678");
274    }
275    #[test]
276    fn test_range_from_works() {
277        let s = IndexableStr::new("0😀23456789");
278
279        assert_eq!(&s[1..], "😀23456789");
280    }
281
282    #[test]
283    fn test_range_to_works() {
284        let s = IndexableStr::new("0😀23456789");
285        println!("length: {}", s.as_str().len());
286
287        assert_eq!(&s[..9], "0😀2345678");
288    }
289
290    #[test]
291    fn test_range_when_last_character_is_multi_byte() {
292        let s = IndexableStr::new("0😀2345678😀");
293        println!("length: {}", s.as_str().len());
294
295        assert_eq!(&s[..10], "0😀2345678😀");
296    }
297
298    #[test]
299    fn test_range_with_ending_index_too_large() {
300        let s = IndexableStr::new("0😀2345678😀");
301
302        let result = std::panic::catch_unwind(|| s.create_str_from_range(0, 11));
303        assert!(result.is_err());
304    }
305
306    #[test]
307    fn test_range_with_ending_index_is_less_than_the_starting_index() {
308        let s = IndexableStr::new("0😀2345678😀");
309
310        let result = std::panic::catch_unwind(|| s.create_str_from_range(20, 10));
311        assert!(result.is_err());
312    }
313}