stringslice/
lib.rs

1//! A collection of methods to slice strings based on character indices rather than bytes.
2//!
3//! This crate implements the [`StringSlice`] trait for [`&str`],
4//! containing the [`slice`], [`try_slice`], [`substring`], and [`try_substring`] methods.
5//!
6//! # Examples
7//!
8//! The [`slice`] method can be used to slice a [`&str`].
9//!
10//! ```
11//! use stringslice::StringSlice;
12//!
13//! assert_eq!("Ùníc😎de".slice(4..5), "😎");
14//! assert_eq!("世界こんにちは".slice(2..), "こんにちは");
15//! ```
16//!
17//! The [`substring`] method is provided for convenience and accepts
18//! separate parameters for the start and end of the slice.
19//!
20//! ```
21//! use stringslice::StringSlice;
22//!
23//! assert_eq!("Γεια σου κόσμε".substring(9, 14), "κόσμε");
24//! ```
25//!
26//! There are also equivalent [`try_slice`] and [`try_substring`] methods
27//! which return [`None`] for invalid input.
28//!
29//! ```
30//! use stringslice::StringSlice;
31//!
32//! assert_eq!("string".try_slice(4..2), None);
33//! ```
34//!
35//! [`StringSlice`]: trait.StringSlice.html
36//! [`&str`]: https://doc.rust-lang.org/std/primitive.str.html
37//! [`slice`]: trait.StringSlice.html#method.slice
38//! [`substring`]: trait.StringSlice.html#method.substring
39//! [`try_slice`]: trait.StringSlice.html#method.try_slice
40//! [`try_substring`]: trait.StringSlice.html#method.try_substring
41//! [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
42//!
43
44#![no_std]
45
46use core::ops::{Bound, RangeBounds};
47
48#[inline]
49fn range_to_begin_end(range: impl RangeBounds<usize>) -> (usize, usize) {
50    let begin = match range.start_bound() {
51        Bound::Included(&b) => b,
52        Bound::Excluded(&b) => b + 1,
53        Bound::Unbounded => 0,
54    };
55
56    let end = match range.end_bound() {
57        Bound::Included(&b) => b + 1,
58        Bound::Excluded(&b) => b,
59        // Note: using core::usize::MAX rather than usize::MAX for compatibility with Rust < 1.43
60        Bound::Unbounded => core::usize::MAX,
61    };
62
63    (begin, end)
64}
65
66/// Provides the [`slice`], [`try_slice`], [`substring`], and [`try_substring`] methods.
67///
68/// [`slice`]: trait.StringSlice.html#method.slice
69/// [`substring`]: trait.StringSlice.html#method.substring
70/// [`try_slice`]: trait.StringSlice.html#method.try_slice
71/// [`try_substring`]: trait.StringSlice.html#method.try_substring
72pub trait StringSlice {
73    /// Returns a string slice for the given range of characters
74    ///
75    /// This method will panic if the range is invalid,
76    /// for example if the beginning is greater than the end.
77    ///
78    /// # Examples
79    /// ```
80    /// use stringslice::StringSlice;
81    ///
82    /// assert_eq!("Ùníc😎de".slice(4..5), "😎");
83    /// ```
84    fn slice(&self, range: impl RangeBounds<usize>) -> &str;
85
86    /// Returns an [`Option`] containing string slice for the given range of characters
87    ///
88    /// This method will return [`None`] if the range is invalid,
89    /// for example if the beginning is greater than the end.
90    ///
91    /// # Examples
92    /// ```
93    /// use stringslice::StringSlice;
94    ///
95    /// assert_eq!("Ùníc😎de".try_slice(4..5), Some("😎"));
96    /// ```
97    /// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html
98    /// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
99    fn try_slice(&self, range: impl RangeBounds<usize>) -> Option<&str>;
100
101    /// Returns a string slice between the given beginning and end characters
102    ///
103    /// This method will panic if the parameters are invalid,
104    /// for example if the beginning is greater than the end.
105    ///
106    /// # Examples
107    /// ```
108    /// use stringslice::StringSlice;
109    ///
110    /// assert_eq!("Ùníc😎de".substring(4, 5), "😎");
111    /// ```
112    fn substring(&self, begin: usize, end: usize) -> &str;
113
114    /// Returns an [`Option`] containing string slice between the given beginning and end characters
115    ///
116    /// This method will return [`None`] if the parameters are invalid,
117    /// for example if the beginning is greater than the end.
118    ///
119    /// # Examples
120    /// ```
121    /// use stringslice::StringSlice;
122    ///
123    /// assert_eq!("Ùníc😎de".try_substring(4, 5), Some("😎"));
124    /// ```
125    /// [`Option`]: https://doc.rust-lang.org/std/option/enum.Option.html
126    /// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
127    fn try_substring(&self, begin: usize, end: usize) -> Option<&str>;
128}
129
130impl StringSlice for str {
131    #[inline]
132    fn slice(&self, range: impl RangeBounds<usize>) -> &str {
133        let (begin, end) = range_to_begin_end(range);
134        self.substring(begin, end)
135    }
136
137    #[inline]
138    fn try_slice(&self, range: impl RangeBounds<usize>) -> Option<&str> {
139        let (begin, end) = range_to_begin_end(range);
140        self.try_substring(begin, end)
141    }
142
143    #[inline]
144    fn substring(&self, begin: usize, end: usize) -> &str {
145        self.try_substring(begin, end)
146            .expect("begin < end when slicing string")
147    }
148
149    fn try_substring(&self, begin: usize, end: usize) -> Option<&str> {
150        if begin > end {
151            None
152        } else {
153            let mut ch_idx = self.char_indices().map(|(i, _c)| i);
154
155            let len = self.len();
156            let begin_ch = ch_idx.nth(begin).unwrap_or(len);
157            let end_ch = if end > begin {
158                ch_idx.nth(end - begin - 1).unwrap_or(len)
159            } else {
160                begin_ch
161            };
162
163            // Note (unsafe): Since we iterate character indices we can be sure that `begin_ch` and
164            // `end_ch` are on UTF-8 boundaries. For performance we use get_unchecked rather than
165            // simply indexing.
166            unsafe { Some(self.get_unchecked(begin_ch..end_ch)) }
167        }
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use core::ops::Bound;
174
175    use super::StringSlice;
176
177    #[test]
178    fn test_utf8() {
179        let str = "🗻∈🌏";
180        assert_eq!("🗻", str.slice(0..1));
181        assert_eq!("∈", str.slice(1..2));
182        assert_eq!("🌏", str.slice(2..3));
183    }
184
185    #[test]
186    fn test_zero_len() {
187        let str = "test";
188        assert_eq!("", str.slice(0..0));
189        assert_eq!("", str.slice(..0));
190        assert_eq!("", str.slice(1..1));
191    }
192
193    #[test]
194    #[should_panic]
195    fn test_bad_range() {
196        "string".slice(4..1);
197    }
198
199    #[test]
200    fn test_try_bad_range() {
201        assert_eq!("string".try_slice(4..1), None);
202    }
203
204    #[test]
205    fn test_large_range() {
206        assert_eq!("test_string".slice(0..500), "test_string");
207    }
208
209    #[test]
210    fn test_range_types() {
211        assert_eq!("test_string".slice(..), "test_string");
212        assert_eq!("test_string".slice(5..), "string");
213        assert_eq!("test_string".slice(..8), "test_str");
214        assert_eq!("test_string".slice(5..8), "str");
215        assert_eq!("test_string".slice(5..=7), "str");
216        assert_eq!(
217            "test_string".slice((Bound::Excluded(4), Bound::Included(7))),
218            "str"
219        );
220    }
221}