char_slice/
lib.rs

1//! This crate provides the `CharSlice` trait which is implemented for `&str`'s
2//! and provides the `char_slice` method, which allows extracting a sequence of
3//! unicode codepoints from a slice.
4
5use std::{ops, usize};
6
7/// This trait provides the `char_slice` method.
8///
9/// `R` should be one of the ranges in `std::ops`.
10pub trait CharSlice<R> {
11    /// Returns a substring of self specified by the given range.
12    ///
13    /// In case of invalid input, this method will return the empty string.
14    fn char_slice(&self, r: R) -> &str;
15}
16
17impl CharSlice<ops::RangeFull> for str {
18    #[inline]
19    fn char_slice(&self, _: ops::RangeFull) -> &str {
20        self
21    }
22}
23
24impl CharSlice<ops::RangeTo<usize>> for str {
25    #[inline]
26    fn char_slice(&self, r: ops::RangeTo<usize>) -> &str {
27        self.char_slice(0..r.end)
28    }
29}
30
31impl CharSlice<ops::RangeFrom<usize>> for str {
32    #[inline]
33    fn char_slice(&self, r: ops::RangeFrom<usize>) -> &str {
34        self.char_slice(r.start..usize::MAX)
35    }
36}
37
38impl CharSlice<ops::Range<usize>> for str {
39    #[inline]
40    fn char_slice(&self, r: ops::Range<usize>) -> &str {
41        char_slice(self, r.start, r.end)
42    }
43}
44
45#[inline(always)]
46// Returns `true` if `b` is the start of (or a complete) utf8 codepoint
47fn utf8_start_byte(b: u8) -> bool {
48    b < 128 || b >= 192
49}
50
51fn char_slice(s: &str, start: usize, end: usize) -> &str {
52    if end <= start { return "" }
53
54    let mut bidx = 0; // byte index
55    let mut cidx = 0; // char index
56
57    let mut start_idx = 0;
58
59    for b in s.bytes() {
60        if utf8_start_byte(b) {
61            if cidx == start {
62                start_idx = bidx;
63            }
64
65            if cidx == end {
66                return &s[start_idx..bidx];
67            }
68
69            cidx += 1;
70        }
71
72        bidx += 1;
73    }
74
75    // did not find start
76    if start >= cidx {
77        return ""
78    }
79
80    // did find start but not end
81    &s[start_idx..]
82}
83
84#[test]
85fn substr_test() {
86    assert_eq!(  "".char_slice(0 .. 0), "");
87    assert_eq!(  "".char_slice(0 .. 1), "");
88    assert_eq!( "a".char_slice(1 .. 2), "");
89    assert_eq!( "a".char_slice(0 .. 1), "a");
90    assert_eq!( "a".char_slice(0 .. 2), "a");
91    assert_eq!( "a".char_slice(0 .. 0), "");
92    assert_eq!("ab".char_slice(0 .. 1), "a");
93    assert_eq!("ab".char_slice(1 .. 2), "b");
94    assert_eq!("ab".char_slice(0 .. 2), "ab");
95
96    assert_eq!("äöü".char_slice(0 .. 0), "");
97    assert_eq!("äöü".char_slice(4 .. 5), "");
98    assert_eq!("äöü".char_slice(0 .. 1), "ä");
99    assert_eq!("äöü".char_slice(1 .. 2), "ö");
100    assert_eq!("äöü".char_slice(2 .. 3), "ü");
101    assert_eq!("äöü".char_slice(0 .. 2), "äö");
102    assert_eq!("äöü".char_slice(1 .. 3), "öü");
103    assert_eq!("äöü".char_slice(0 .. 3), "äöü");
104    assert_eq!("äöü".char_slice(0 .. 4), "äöü");
105}