Skip to main content

phpser/
str_trait.rs

1use std::str;
2
3/// Represents a string of data, either owned or referenced.
4///
5/// The data can either be a raw byte string (`[u8]`) or a UTF-8-checked string (`str`),
6/// and can be used either in the boxed type (`Vec<u8>`/`String`)
7/// or as a reference (`[u8]`/`str`).
8///
9/// # Safety
10/// See the safety sections in each method.
11pub unsafe trait Str<'de>: 'de + Sized {
12    /// Gets the length of the string.
13    fn len(&self) -> usize;
14
15    /// Express the string as a slice of bytes
16    fn as_bytes(&self) -> &[u8];
17
18    /// Returns whether the string is empty.
19    fn is_empty(&self) -> bool {
20        self.len() == 0
21    }
22
23    /// Gets the character at offset `i`.
24    /// Returns `None` if `i+1` is not a boundary.
25    ///
26    /// # Safety
27    /// The offset `i` must be a position that
28    /// the implementation previously inferred as a boundary
29    /// and less than the result of `len()`.
30    ///
31    /// The implementation must not return `Some`
32    /// unless `i+1` is also a boundary.
33    unsafe fn get_u8_char(&self, i: usize) -> Option<u8>;
34
35    /// Clones the characters from offset `i` to offset `j`.
36    /// Returns `None` if `j` is not a boundary.
37    ///
38    /// # Safety
39    /// The offset `i` must be a position that
40    /// the implementation previously inferred as a boundary
41    ///
42    /// `i` and `j` must be less than the result of `len()`.
43    ///
44    /// The implementation must not return `Some`
45    /// unless `i+1` is also a boundary.
46    unsafe fn clone_slice(&self, i: usize, j: usize) -> Option<Self>;
47
48    /// Finds the offset of the first occurrence of the ASCII character `char`
49    /// after (but not including) offset `i`.
50    ///
51    /// # Safety
52    /// The offset `i` should be a boundary,
53    /// although this does not affect the implementation
54    /// for UTF-8 strings.
55    ///
56    /// `i` must be less than the result of `len()`.
57    ///
58    /// `char` must be an ASCII character.
59    ///
60    /// If the returned value is `Some`,
61    /// it must contain a boundary.
62    unsafe fn find(&self, i: usize, char: u8) -> Option<usize>;
63
64    /// Takes the subslice in bytes `i..`.
65    ///
66    /// # Safety
67    /// The offset `i` must be a boundary,
68    /// and hence must be `<= self.len()`.
69    unsafe fn range_from(&self, i: usize) -> Self;
70
71    /// Takes the subslice in bytes `i..j`.
72    ///
73    /// # Safety
74    /// The offsets `i` and `j` must be boundaries,
75    /// and hence must be `<= self.len()`.
76    unsafe fn range(&self, i: usize, j: usize) -> Self;
77}
78
79unsafe impl<'de> Str<'de> for &'de str {
80    fn len(&self) -> usize {
81        str::len(*self)
82    }
83
84    fn as_bytes(&self) -> &[u8] {
85        str::as_bytes(*self)
86    }
87
88    unsafe fn get_u8_char(&self, i: usize) -> Option<u8> {
89        // safety assertions
90        debug_assert!(i < self.len());
91        debug_assert!(self.is_char_boundary(i));
92
93        if self.is_char_boundary(i + 1) {
94            Some(*self.as_bytes().get_unchecked(i))
95        } else {
96            None
97        }
98    }
99
100    unsafe fn clone_slice(&self, i: usize, j: usize) -> Option<Self> {
101        // safety assertions
102        debug_assert!(i < self.len());
103        debug_assert!(self.is_char_boundary(i));
104
105        if self.is_char_boundary(j) {
106            let bytes = str::as_bytes(*self).get_unchecked(i..j);
107            Some(str::from_utf8_unchecked(bytes)) // checked above
108        } else {
109            None
110        }
111    }
112
113    unsafe fn find(&self, i: usize, char: u8) -> Option<usize> {
114        self.as_bytes().find(i, char)
115    }
116
117    unsafe fn range_from(&self, i: usize) -> Self {
118        debug_assert!(i <= self.len());
119        debug_assert!(self.is_char_boundary(i));
120
121        self.get_unchecked(i..)
122    }
123
124    unsafe fn range(&self, i: usize, j: usize) -> Self {
125        debug_assert!(i <= j);
126        debug_assert!(j <= self.len());
127        debug_assert!(self.is_char_boundary(i));
128        debug_assert!(self.is_char_boundary(j));
129
130        self.get_unchecked(i..j)
131    }
132}
133
134unsafe impl<'de> Str<'de> for String {
135    fn len(&self) -> usize {
136        str::len(self.as_str())
137    }
138
139    fn as_bytes(&self) -> &[u8] {
140        str::as_bytes(self.as_str())
141    }
142
143    unsafe fn get_u8_char(&self, i: usize) -> Option<u8> {
144        self.as_str().get_u8_char(i)
145    }
146
147    unsafe fn clone_slice(&self, i: usize, j: usize) -> Option<Self> {
148        self.as_str().clone_slice(i, j).map(|s| s.to_string())
149    }
150
151    unsafe fn find(&self, i: usize, char: u8) -> Option<usize> {
152        self.as_bytes().find(i, char)
153    }
154
155    unsafe fn range_from(&self, i: usize) -> Self {
156        self.as_str().range_from(i).to_string()
157    }
158
159    unsafe fn range(&self, i: usize, j: usize) -> Self {
160        self.as_str().range(i, j).to_string()
161    }
162}
163
164unsafe impl<'de> Str<'de> for &'de [u8] {
165    fn len(&self) -> usize {
166        <[u8]>::len(*self)
167    }
168
169    fn as_bytes(&self) -> &[u8] {
170        *self
171    }
172
173    unsafe fn get_u8_char(&self, i: usize) -> Option<u8> {
174        // safety assertions
175        debug_assert!(i < self.len());
176
177        Some(*self.get_unchecked(i))
178    }
179
180    unsafe fn clone_slice(&self, i: usize, j: usize) -> Option<Self> {
181        // safety assertions
182        debug_assert!(i < self.len());
183
184        Some(self.get_unchecked(i..j))
185    }
186
187    unsafe fn find(&self, i: usize, char: u8) -> Option<usize> {
188        // safety assertions
189        debug_assert!(i < self.len());
190
191        let slice = self.get_unchecked((i + 1)..);
192        // It is safe to add 1 even for UTF-8 safety,
193        // provided that `char` is an ASCII character.
194
195        let index = slice.iter().position(|&other| char == other);
196        index.map(|index| i + 1 + index)
197    }
198
199    unsafe fn range_from(&self, i: usize) -> Self {
200        self.get_unchecked(i..)
201    }
202
203    unsafe fn range(&self, i: usize, j: usize) -> Self {
204        self.get_unchecked(i..j)
205    }
206}
207
208unsafe impl<'de> Str<'de> for Vec<u8> {
209    fn len(&self) -> usize {
210        <[u8]>::len(self.as_slice())
211    }
212
213    fn as_bytes(&self) -> &[u8] {
214        self.as_slice()
215    }
216
217    unsafe fn get_u8_char(&self, i: usize) -> Option<u8> {
218        self.as_slice().get_u8_char(i)
219    }
220
221    unsafe fn clone_slice(&self, i: usize, j: usize) -> Option<Self> {
222        self.as_slice().clone_slice(i, j).map(|s| s.to_vec())
223    }
224
225    unsafe fn find(&self, i: usize, char: u8) -> Option<usize> {
226        self.as_slice().find(i, char)
227    }
228
229    unsafe fn range_from(&self, i: usize) -> Self {
230        self.get_unchecked(i..).to_vec()
231    }
232
233    unsafe fn range(&self, i: usize, j: usize) -> Self {
234        self.get_unchecked(i..j).to_vec()
235    }
236}