indexing/
utf8.rs

1use {
2    crate::{
3        proof::{NonEmpty, Unknown},
4        traits::{Idx, TrustedContainer, TrustedItem},
5        Container, Index, IndexError,
6    },
7    core::ops,
8    debug_unreachable::debug_unreachable,
9};
10
11#[allow(clippy::needless_return)] // alignment
12pub(crate) fn is_leading_byte(byte: u8) -> bool {
13    return byte & 0b1000_0000 == 0b0000_0000
14        || byte & 0b1110_0000 == 0b1100_0000
15        || byte & 0b1111_0000 == 0b1110_0000
16        || byte & 0b1111_1000 == 0b1111_0000;
17}
18
19/// A utf8 string slice of exactly one codepoint.
20#[repr(transparent)]
21#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
22pub struct Character(str);
23
24impl ops::Deref for Character {
25    type Target = str;
26
27    fn deref(&self) -> &Self::Target {
28        &self.0
29    }
30}
31
32impl Character {
33    pub fn as_char(&self) -> char {
34        self.chars()
35            .nth(0)
36            .unwrap_or_else(|| unsafe { debug_unreachable!() })
37    }
38}
39
40unsafe impl TrustedContainer for str {
41    type Item = Character;
42    type Slice = str;
43
44    fn unit_len(&self) -> usize {
45        self.len()
46    }
47
48    unsafe fn get_unchecked(&self, i: usize) -> &Self::Item {
49        debug_assert!(i < self.len());
50        debug_assert!(self.is_char_boundary(i));
51        let slice = self.get_unchecked(i..);
52        let byte_count = slice
53            .char_indices()
54            .map(|(i, _)| i)
55            .nth(1)
56            .unwrap_or_else(|| self.len() - i);
57        debug_assert!(slice.is_char_boundary(byte_count));
58        let code_point = slice.get_unchecked(..byte_count);
59        &*(code_point as *const str as *const Character)
60    }
61
62    unsafe fn slice_unchecked(&self, r: ops::Range<usize>) -> &Self::Slice {
63        debug_assert!(self.is_char_boundary(r.start));
64        debug_assert!(self.is_char_boundary(r.end));
65        debug_assert!(r.start < r.end);
66        self.get_unchecked(r)
67    }
68}
69
70unsafe impl TrustedItem<str> for Character {
71    type Unit = u8;
72
73    fn vet<'id, I: Idx>(
74        idx: I,
75        container: &Container<'id, str>,
76    ) -> Result<Index<'id, I, Unknown>, IndexError> {
77        match idx.as_usize() {
78            i if i < container.unit_len() => {
79                let leading_byte = unsafe {
80                    *container
81                        .untrusted()
82                        .as_bytes()
83                        .get_unchecked(idx.as_usize())
84                };
85                if is_leading_byte(leading_byte) {
86                    debug_assert!(container.untrusted().is_char_boundary(idx.as_usize()));
87                    unsafe { Ok(Index::new(idx)) }
88                } else {
89                    Err(IndexError::Invalid)
90                }
91            }
92            i if i == container.unit_len() => unsafe { Ok(Index::new(idx)) },
93            _ => Err(IndexError::OutOfBounds),
94        }
95    }
96
97    fn after<'id, I: Idx>(
98        this: Index<'id, I, NonEmpty>,
99        container: &Container<'id, str>,
100    ) -> Index<'id, I, Unknown> {
101        let len = container[this].len();
102        unsafe { Index::new(this.untrusted().add(len)) }
103    }
104
105    fn advance<'id, I: Idx>(
106        this: Index<'id, I, NonEmpty>,
107        container: &Container<'id, str>,
108    ) -> Option<Index<'id, I, NonEmpty>> {
109        let next = Self::after(this, container);
110        if next < container.end() {
111            unsafe { Some(Index::new_nonempty(next.untrusted())) }
112        } else {
113            None
114        }
115    }
116}
117
118#[cfg(feature = "std")]
119mod std_impls {
120    use super::*;
121    use std::string::String;
122
123    #[cfg_attr(feature = "doc", doc(cfg(feature = "std")))]
124    unsafe impl TrustedContainer for String {
125        type Item = Character;
126        type Slice = str;
127
128        fn unit_len(&self) -> usize {
129            self.len()
130        }
131
132        unsafe fn get_unchecked(&self, i: usize) -> &Self::Item {
133            <str as TrustedContainer>::get_unchecked(self, i)
134        }
135
136        unsafe fn slice_unchecked(&self, r: ops::Range<usize>) -> &Self::Slice {
137            <str>::get_unchecked(self, r)
138        }
139    }
140
141    #[cfg_attr(feature = "doc", doc(cfg(feature = "std")))]
142    unsafe impl TrustedItem<String> for Character {
143        type Unit = u8;
144
145        fn vet<'id, I: Idx>(
146            idx: I,
147            container: &Container<'id, String>,
148        ) -> Result<Index<'id, I, Unknown>, IndexError> {
149            Character::vet(idx, container.project())
150        }
151
152        fn after<'id, I: Idx>(
153            this: Index<'id, I, NonEmpty>,
154            container: &Container<'id, String>,
155        ) -> Index<'id, I, Unknown> {
156            Character::after(this, container.project())
157        }
158
159        fn advance<'id, I: Idx>(
160            this: Index<'id, I, NonEmpty>,
161            container: &Container<'id, String>,
162        ) -> Option<Index<'id, I, NonEmpty>> {
163            Character::advance(this, container.project())
164        }
165    }
166
167    #[cfg_attr(feature = "doc", doc(cfg(feature = "std")))]
168    impl<'id> Container<'id, String> {
169        pub fn project(&self) -> &Container<'id, str> {
170            unsafe { &*(&**self.untrusted() as *const str as *const Container<'id, str>) }
171        }
172    }
173}