1use std::borrow::Cow;
2use std::ops::{Bound, RangeBounds};
3use std::{fmt, slice};
4
5use crate::chars;
6
7#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
34pub enum Utf32Str<'a> {
35 Ascii(&'a [u8]),
38 Unicode(&'a [char]),
40}
41
42impl<'a> Utf32Str<'a> {
43 pub fn new(str: &'a str, buf: &'a mut Vec<char>) -> Self {
45 if str.is_ascii() {
46 Utf32Str::Ascii(str.as_bytes())
47 } else {
48 buf.clear();
49 buf.extend(crate::chars::graphemes(str));
50 if buf.iter().all(|c| c.is_ascii()) {
51 return Utf32Str::Ascii(str.as_bytes());
52 }
53 Utf32Str::Unicode(&*buf)
54 }
55 }
56
57 #[inline]
59 pub fn len(self) -> usize {
60 match self {
61 Utf32Str::Unicode(codepoints) => codepoints.len(),
62 Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
63 }
64 }
65
66 #[inline]
68 pub fn is_empty(self) -> bool {
69 match self {
70 Utf32Str::Unicode(codepoints) => codepoints.is_empty(),
71 Utf32Str::Ascii(ascii_bytes) => ascii_bytes.is_empty(),
72 }
73 }
74
75 #[inline]
78 pub fn slice(self, range: impl RangeBounds<usize>) -> Utf32Str<'a> {
79 let start = match range.start_bound() {
80 Bound::Included(&start) => start,
81 Bound::Excluded(&start) => start + 1,
82 Bound::Unbounded => 0,
83 };
84 let end = match range.end_bound() {
85 Bound::Included(&end) => end + 1,
86 Bound::Excluded(&end) => end,
87 Bound::Unbounded => self.len(),
88 };
89 match self {
90 Utf32Str::Ascii(bytes) => Utf32Str::Ascii(&bytes[start..end]),
91 Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
92 }
93 }
94
95 #[inline]
97 pub(crate) fn leading_white_space(self) -> usize {
98 match self {
99 Utf32Str::Ascii(bytes) => bytes
100 .iter()
101 .position(|b| !b.is_ascii_whitespace())
102 .unwrap_or(0),
103 Utf32Str::Unicode(codepoints) => codepoints
104 .iter()
105 .position(|c| !c.is_whitespace())
106 .unwrap_or(0),
107 }
108 }
109
110 #[inline]
112 pub(crate) fn trailing_white_space(self) -> usize {
113 match self {
114 Utf32Str::Ascii(bytes) => bytes
115 .iter()
116 .rev()
117 .position(|b| !b.is_ascii_whitespace())
118 .unwrap_or(0),
119 Utf32Str::Unicode(codepoints) => codepoints
120 .iter()
121 .rev()
122 .position(|c| !c.is_whitespace())
123 .unwrap_or(0),
124 }
125 }
126
127 #[inline]
130 pub fn slice_u32(self, range: impl RangeBounds<u32>) -> Utf32Str<'a> {
131 let start = match range.start_bound() {
132 Bound::Included(&start) => start as usize,
133 Bound::Excluded(&start) => start as usize + 1,
134 Bound::Unbounded => 0,
135 };
136 let end = match range.end_bound() {
137 Bound::Included(&end) => end as usize + 1,
138 Bound::Excluded(&end) => end as usize,
139 Bound::Unbounded => self.len(),
140 };
141 match self {
142 Utf32Str::Ascii(bytes) => Utf32Str::Ascii(&bytes[start..end]),
143 Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
144 }
145 }
146
147 pub fn is_ascii(self) -> bool {
149 matches!(self, Utf32Str::Ascii(_))
150 }
151
152 pub fn get(self, n: u32) -> char {
154 match self {
155 Utf32Str::Ascii(bytes) => bytes[n as usize] as char,
156 Utf32Str::Unicode(codepoints) => codepoints[n as usize],
157 }
158 }
159 pub(crate) fn last(self) -> char {
160 match self {
161 Utf32Str::Ascii(bytes) => bytes[bytes.len() - 1] as char,
162 Utf32Str::Unicode(codepoints) => codepoints[codepoints.len() - 1],
163 }
164 }
165
166 pub(crate) fn first(self) -> char {
167 match self {
168 Utf32Str::Ascii(bytes) => bytes[0] as char,
169 Utf32Str::Unicode(codepoints) => codepoints[0],
170 }
171 }
172
173 pub fn chars(self) -> Chars<'a> {
175 match self {
176 Utf32Str::Ascii(bytes) => Chars::Ascii(bytes.iter()),
177 Utf32Str::Unicode(codepoints) => Chars::Unicode(codepoints.iter()),
178 }
179 }
180}
181
182impl fmt::Debug for Utf32Str<'_> {
183 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
184 write!(f, "\"")?;
185 for c in self.chars() {
186 for c in c.escape_debug() {
187 write!(f, "{c}")?
188 }
189 }
190 write!(f, "\"")
191 }
192}
193
194impl fmt::Display for Utf32Str<'_> {
195 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
196 for c in self.chars() {
197 write!(f, "{c}")?
198 }
199 Ok(())
200 }
201}
202
203pub enum Chars<'a> {
204 Ascii(slice::Iter<'a, u8>),
205 Unicode(slice::Iter<'a, char>),
206}
207impl<'a> Iterator for Chars<'a> {
208 type Item = char;
209
210 fn next(&mut self) -> Option<Self::Item> {
211 match self {
212 Chars::Ascii(iter) => iter.next().map(|&c| c as char),
213 Chars::Unicode(iter) => iter.next().copied(),
214 }
215 }
216}
217
218impl DoubleEndedIterator for Chars<'_> {
219 fn next_back(&mut self) -> Option<Self::Item> {
220 match self {
221 Chars::Ascii(iter) => iter.next_back().map(|&c| c as char),
222 Chars::Unicode(iter) => iter.next_back().copied(),
223 }
224 }
225}
226
227#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
228pub enum Utf32String {
230 Ascii(Box<str>),
233 Unicode(Box<[char]>),
235}
236
237impl Default for Utf32String {
238 fn default() -> Self {
239 Self::Ascii(String::new().into_boxed_str())
240 }
241}
242
243impl Utf32String {
244 #[inline]
246 pub fn len(&self) -> usize {
247 match self {
248 Utf32String::Unicode(codepoints) => codepoints.len(),
249 Utf32String::Ascii(ascii_bytes) => ascii_bytes.len(),
250 }
251 }
252
253 #[inline]
255 pub fn is_empty(&self) -> bool {
256 match self {
257 Utf32String::Unicode(codepoints) => codepoints.is_empty(),
258 Utf32String::Ascii(ascii_bytes) => ascii_bytes.is_empty(),
259 }
260 }
261
262 #[inline]
265 pub fn slice(&self, range: impl RangeBounds<usize>) -> Utf32Str {
266 let start = match range.start_bound() {
267 Bound::Included(&start) => start,
268 Bound::Excluded(&start) => start + 1,
269 Bound::Unbounded => 0,
270 };
271 let end = match range.end_bound() {
272 Bound::Included(&end) => end + 1,
273 Bound::Excluded(&end) => end,
274 Bound::Unbounded => self.len(),
275 };
276 match self {
277 Utf32String::Ascii(bytes) => Utf32Str::Ascii(&bytes.as_bytes()[start..end]),
278 Utf32String::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
279 }
280 }
281
282 #[inline]
285 pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
286 let start = match range.start_bound() {
287 Bound::Included(&start) => start,
288 Bound::Excluded(&start) => start + 1,
289 Bound::Unbounded => 0,
290 };
291 let end = match range.end_bound() {
292 Bound::Included(&end) => end + 1,
293 Bound::Excluded(&end) => end,
294 Bound::Unbounded => self.len() as u32,
295 };
296 match self {
297 Utf32String::Ascii(bytes) => {
298 Utf32Str::Ascii(&bytes.as_bytes()[start as usize..end as usize])
299 }
300 Utf32String::Unicode(codepoints) => {
301 Utf32Str::Unicode(&codepoints[start as usize..end as usize])
302 }
303 }
304 }
305}
306
307impl From<&str> for Utf32String {
308 #[inline]
309 fn from(value: &str) -> Self {
310 if value.is_ascii() {
311 Self::Ascii(value.to_owned().into_boxed_str())
312 } else {
313 Self::Unicode(chars::graphemes(value).collect())
314 }
315 }
316}
317
318impl From<Box<str>> for Utf32String {
319 fn from(value: Box<str>) -> Self {
320 if value.is_ascii() {
321 Self::Ascii(value)
322 } else {
323 Self::Unicode(chars::graphemes(&value).collect())
324 }
325 }
326}
327
328impl From<String> for Utf32String {
329 #[inline]
330 fn from(value: String) -> Self {
331 value.into_boxed_str().into()
332 }
333}
334
335impl<'a> From<Cow<'a, str>> for Utf32String {
336 #[inline]
337 fn from(value: Cow<'a, str>) -> Self {
338 match value {
339 Cow::Borrowed(value) => value.into(),
340 Cow::Owned(value) => value.into(),
341 }
342 }
343}
344
345impl fmt::Debug for Utf32String {
346 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
347 write!(f, "{:?}", self.slice(..))
348 }
349}
350
351impl fmt::Display for Utf32String {
352 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
353 write!(f, "{}", self.slice(..))
354 }
355}