utf8_slice/
lib.rs

1//* # UTF8 Slice
2//* A lightweight heapless way to do slicing on unicode strings in Rust.
3//*
4//* # What does the library provide
5//* This library provides 4 utility functions to deal with unicode slices.
6//*
7//* ## `utf8_slice::slice(s: &str, begin: usize, end: usize) -> &str`
8//* This will do the same as `&s[begin..end]`, but now taking into account utf8 characters.
9//*
10//* ## `utf8_slice::from(s: &str, begin: usize) -> &str`
11//* This will do the same as `&s[begin..]`, but now taking into account utf8 characters.
12//*
13//* ## `utf8_slice::till(s: &str, end: usize) -> &str`
14//* This will do the same as `&s[..end]`, but now taking into account utf8 characters.
15//*
16//* ## `utf8_slice::len(s: &str) -> usize`
17//* This will do the same as `s.len()`, but now taking into account utf8 characters.
18//* # License
19//* MIT
20//*
21//* # Examples
22//*
23//* ```
24//* let s = "The 🚀 goes to the 🌑!";
25//*
26//* let rocket = utf8_slice::slice(s, 4, 5);
27//* # assert_eq!(utf8_slice::slice(s, 4, 5), "🚀");
28//* // Will equal "🚀"
29//* ```
30
31/// Fetches a slice of a string from a begin to an end index
32/// taking into account utf8/unicode character indices.
33///
34/// # Arguments
35///
36/// * `s` - An input string to take the slice from
37/// * `begin` - Where the slice begins
38/// * `end` - Where the slice ends
39///
40/// # Examples
41///
42/// ```
43/// let s = "The 🚀 goes to the 🌑!";
44///
45/// let rocket = utf8_slice::slice(s, 4, 5);
46/// # assert_eq!(utf8_slice::slice(s, 4, 5), "🚀");
47/// // Will equal "🚀"
48/// ```
49///
50/// # Note
51/// * Will return an empty string for invalid indices *
52pub fn slice(s: &str, begin: usize, end: usize) -> &str {
53    if end < begin {
54        return "";
55    }
56
57    s.char_indices()
58        .nth(begin)
59        .and_then(|(start_pos, _)| {
60            if end >= len(s) {
61                return Some(&s[start_pos..]);
62            }
63
64            s[start_pos..]
65                .char_indices()
66                .nth(end - begin)
67                .map(|(end_pos, _)| &s[start_pos..start_pos + end_pos])
68        })
69        .unwrap_or("")
70}
71
72/// Fetches a slice of a string from a starting index
73/// taking into account utf8/unicode character indices.
74///
75/// # Arguments
76///
77/// * `s` - An input string to take the slice from
78/// * `begin` - Where the slice begins
79///
80/// # Examples
81///
82/// ```
83/// let s = "The 🚀 goes to the 🌑!";
84///
85/// let rocket_goes_to_the_moon = utf8_slice::from(s, 4);
86/// # assert_eq!(utf8_slice::from(s, 4), "🚀 goes to the 🌑!");
87/// // Will equal "🚀 goes to the 🌑!"
88/// ```
89///
90/// # Note
91/// * Will return an empty string for invalid indices *
92pub fn from(s: &str, begin: usize) -> &str {
93    slice(s, begin, len(s))
94}
95
96/// Fetches a slice of a string until an ending index
97/// taking into account utf8/unicode character indices.
98///
99/// # Arguments
100///
101/// * `s` - An input string to take the slice from
102/// * `end` - Where the slice ends
103///
104/// # Examples
105///
106/// ```
107/// let s = "The 🚀 goes to the 🌑!";
108///
109/// let the_rocket = utf8_slice::till(s, 5);
110/// # assert_eq!(utf8_slice::till(s, 4), "The 🚀");
111/// // Will equal "The 🚀"
112/// ```
113///
114/// # Note
115/// * Will return an empty string for invalid indices *
116pub fn till(s: &str, end: usize) -> &str {
117    slice(s, 0, end)
118}
119
120/// Fetches the length in characters of an utf8/unicode string
121///
122/// # Arguments
123///
124/// * `s` - The string of which to fetch the length
125pub fn len(s: &str) -> usize {
126    s.chars().count()
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132
133    #[test]
134    fn test_same_as_std_slice() {
135        let s = "xjfdlskfaj sdfjlkj";
136        for i in 0..s.len() {
137            for j in i..s.len() + 1 {
138                assert_eq!(&s[i..j], slice(s, i, j));
139            }
140        }
141    }
142
143    #[test]
144    fn test_slice() {
145        assert_eq!(slice("\u{345}ab\u{898}xyz", 1, 4), "ab\u{898}");
146        assert_eq!(slice("\u{345}ab\u{898}xyz", 0, 4), "\u{345}ab\u{898}");
147        assert_eq!(slice("\u{345}ab\u{898}xyz", 5, 4), "");
148        assert_eq!(slice("\u{345}ab   \u{898}xyz", 0, 1), "\u{345}");
149        assert_eq!(slice("abcdef", 0, 6), "abcdef");
150        assert_eq!(slice("\u{345}ab\u{898}xyz", 1, 7), "ab\u{898}xyz");
151    }
152
153    #[test]
154    fn test_from() {
155        assert_eq!(from("\u{345}ab\u{898}xyz", 1), "ab\u{898}xyz");
156        assert_eq!(from("\u{345}ab\u{898}xyz", 3), "\u{898}xyz");
157        assert_eq!(from("\u{345}ab\u{898}xyz", 10), "");
158        assert_eq!(from("\u{345}ab   \u{898}xyz", 0), "\u{345}ab   \u{898}xyz");
159    }
160
161    #[test]
162    fn test_till() {
163        assert_eq!(till("\u{345}ab\u{898}xyz", 1), "\u{345}");
164        assert_eq!(till("\u{345}ab\u{898}xyz", 3), "\u{345}ab");
165        assert_eq!(till("\u{345}ab\u{898}xyz", 0), "");
166    }
167
168    #[test]
169    fn test_len() {
170        assert_eq!(len(""), 0);
171        assert_eq!(len("👨‍🚀"), 3);
172        assert_eq!(len("abc"), 3);
173        assert_eq!(len("abd👨‍🚀"), 6);
174    }
175}