1use crate::error::{Result, Utf64Error};
2use std::{
3 borrow::{Borrow, BorrowMut},
4 fmt,
5 hash::{Hash, Hasher},
6 iter::{Extend, FromIterator},
7 ops::{Add, AddAssign, Deref, DerefMut, Index, Range, RangeFrom, RangeFull, RangeTo},
8 str::FromStr,
9};
10
11#[derive(Clone, PartialEq, Eq)]
30pub struct String64 {
31 data: Vec<u64>,
32}
33
34impl String64 {
35 pub fn new() -> Self {
37 Self { data: Vec::new() }
38 }
39
40 pub fn with_capacity(capacity: usize) -> Self {
42 Self {
43 data: Vec::with_capacity(capacity),
44 }
45 }
46
47 pub fn len(&self) -> usize {
51 self.data.len()
52 }
53
54 pub fn is_empty(&self) -> bool {
56 self.data.is_empty()
57 }
58
59 pub fn as_slice(&self) -> &[u64] {
61 &self.data
62 }
63
64 fn encode(s: &str) -> Result<Self> {
66 let mut data = Vec::with_capacity(s.chars().count());
67
68 for ch in s.chars() {
69 let mut utf8_buf = [0u8; 4];
70 let utf8_bytes = ch.encode_utf8(&mut utf8_buf).as_bytes();
71
72 let mut upper_bits: u32 = 0;
74 for (i, &byte) in utf8_bytes.iter().enumerate() {
75 upper_bits |= (byte as u32) << (24 - (i * 8));
76 }
77
78 let utf64_char = (upper_bits as u64) << 32;
80 data.push(utf64_char);
81 }
82
83 Ok(Self { data })
84 }
85
86 pub fn to_string(&self) -> Result<String> {
88 let mut utf8_bytes = Vec::new();
89
90 for &utf64_char in &self.data {
91 if (utf64_char & 0xFFFFFFFF) != 0 {
93 return Err(Utf64Error::NonZeroReservedBits);
94 }
95
96 let upper_bits = (utf64_char >> 32) as u32;
98
99 let bytes = [
101 ((upper_bits >> 24) & 0xFF) as u8,
102 ((upper_bits >> 16) & 0xFF) as u8,
103 ((upper_bits >> 8) & 0xFF) as u8,
104 (upper_bits & 0xFF) as u8,
105 ];
106
107 let len = if bytes[0] == 0 {
110 return Err(Utf64Error::InvalidUtf64);
111 } else if bytes[0] < 0x80 {
112 1
113 } else if bytes[0] < 0xE0 {
114 2
115 } else if bytes[0] < 0xF0 {
116 3
117 } else {
118 4
119 };
120
121 utf8_bytes.extend_from_slice(&bytes[..len]);
122 }
123
124 String::from_utf8(utf8_bytes).map_err(|_| Utf64Error::InvalidUtf8)
125 }
126}
127
128impl Default for String64 {
129 fn default() -> Self {
130 Self::new()
131 }
132}
133
134impl From<&str> for String64 {
135 fn from(s: &str) -> Self {
136 Self::encode(s).expect("valid UTF-8 &str should always encode to UTF64")
137 }
138}
139
140impl From<String> for String64 {
141 fn from(s: String) -> Self {
142 Self::encode(&s).expect("valid UTF-8 String should always encode to UTF64")
143 }
144}
145
146impl FromStr for String64 {
147 type Err = Utf64Error;
148
149 fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
150 Self::encode(s)
151 }
152}
153
154impl fmt::Display for String64 {
155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156 match self.to_string() {
157 Ok(s) => write!(f, "{s}"),
158 Err(_) => write!(f, "<invalid UTF64>"),
159 }
160 }
161}
162
163impl fmt::Debug for String64 {
164 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165 match self.to_string() {
166 Ok(s) => write!(f, "String64({s:?})"),
167 Err(_) => write!(f, "String64(<invalid>)"),
168 }
169 }
170}
171
172impl Hash for String64 {
173 fn hash<H: Hasher>(&self, state: &mut H) {
174 self.data.hash(state);
175 }
176}
177
178impl PartialOrd for String64 {
179 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
180 Some(self.cmp(other))
181 }
182}
183
184impl Ord for String64 {
185 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
186 match (self.to_string(), other.to_string()) {
188 (Ok(s1), Ok(s2)) => s1.cmp(&s2),
189 (Ok(_), Err(_)) => std::cmp::Ordering::Greater,
190 (Err(_), Ok(_)) => std::cmp::Ordering::Less,
191 (Err(_), Err(_)) => std::cmp::Ordering::Equal,
192 }
193 }
194}
195
196impl Index<usize> for String64 {
197 type Output = u64;
198
199 fn index(&self, index: usize) -> &Self::Output {
200 &self.data[index]
201 }
202}
203
204impl Index<Range<usize>> for String64 {
205 type Output = [u64];
206
207 fn index(&self, range: Range<usize>) -> &Self::Output {
208 &self.data[range]
209 }
210}
211
212impl Index<RangeFrom<usize>> for String64 {
213 type Output = [u64];
214
215 fn index(&self, range: RangeFrom<usize>) -> &Self::Output {
216 &self.data[range]
217 }
218}
219
220impl Index<RangeTo<usize>> for String64 {
221 type Output = [u64];
222
223 fn index(&self, range: RangeTo<usize>) -> &Self::Output {
224 &self.data[range]
225 }
226}
227
228impl Index<RangeFull> for String64 {
229 type Output = [u64];
230
231 fn index(&self, range: RangeFull) -> &Self::Output {
232 &self.data[range]
233 }
234}
235
236pub struct IntoIter {
238 data: std::vec::IntoIter<u64>,
239}
240
241impl Iterator for IntoIter {
242 type Item = char;
243
244 fn next(&mut self) -> Option<Self::Item> {
245 self.data.next().map(|utf64_char| {
246 let upper_bits = (utf64_char >> 32) as u32;
248 let bytes = [
249 ((upper_bits >> 24) & 0xFF) as u8,
250 ((upper_bits >> 16) & 0xFF) as u8,
251 ((upper_bits >> 8) & 0xFF) as u8,
252 (upper_bits & 0xFF) as u8,
253 ];
254
255 let len = if bytes[0] < 0x80 {
257 1
258 } else if bytes[0] < 0xE0 {
259 2
260 } else if bytes[0] < 0xF0 {
261 3
262 } else {
263 4
264 };
265
266 std::str::from_utf8(&bytes[..len])
267 .ok()
268 .and_then(|s| s.chars().next())
269 .expect("valid UTF64 should decode to valid char")
270 })
271 }
272
273 fn size_hint(&self) -> (usize, Option<usize>) {
274 self.data.size_hint()
275 }
276}
277
278impl ExactSizeIterator for IntoIter {
279 fn len(&self) -> usize {
280 self.data.len()
281 }
282}
283
284impl IntoIterator for String64 {
285 type Item = char;
286 type IntoIter = IntoIter;
287
288 fn into_iter(self) -> Self::IntoIter {
289 IntoIter {
290 data: self.data.into_iter(),
291 }
292 }
293}
294
295pub struct Iter<'a> {
297 data: std::slice::Iter<'a, u64>,
298}
299
300impl<'a> Iterator for Iter<'a> {
301 type Item = char;
302
303 fn next(&mut self) -> Option<Self::Item> {
304 self.data.next().map(|&utf64_char| {
305 let upper_bits = (utf64_char >> 32) as u32;
307 let bytes = [
308 ((upper_bits >> 24) & 0xFF) as u8,
309 ((upper_bits >> 16) & 0xFF) as u8,
310 ((upper_bits >> 8) & 0xFF) as u8,
311 (upper_bits & 0xFF) as u8,
312 ];
313
314 let len = if bytes[0] < 0x80 {
316 1
317 } else if bytes[0] < 0xE0 {
318 2
319 } else if bytes[0] < 0xF0 {
320 3
321 } else {
322 4
323 };
324
325 std::str::from_utf8(&bytes[..len])
326 .ok()
327 .and_then(|s| s.chars().next())
328 .expect("valid UTF64 should decode to valid char")
329 })
330 }
331
332 fn size_hint(&self) -> (usize, Option<usize>) {
333 self.data.size_hint()
334 }
335}
336
337impl<'a> ExactSizeIterator for Iter<'a> {
338 fn len(&self) -> usize {
339 self.data.len()
340 }
341}
342
343impl<'a> IntoIterator for &'a String64 {
344 type Item = char;
345 type IntoIter = Iter<'a>;
346
347 fn into_iter(self) -> Self::IntoIter {
348 Iter {
349 data: self.data.iter(),
350 }
351 }
352}
353
354impl FromIterator<char> for String64 {
355 fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
356 let mut s = String64::new();
357 s.extend(iter);
358 s
359 }
360}
361
362impl Extend<char> for String64 {
363 fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
364 for ch in iter {
365 let mut utf8_buf = [0u8; 4];
366 let utf8_bytes = ch.encode_utf8(&mut utf8_buf).as_bytes();
367
368 let mut upper_bits: u32 = 0;
370 for (i, &byte) in utf8_bytes.iter().enumerate() {
371 upper_bits |= (byte as u32) << (24 - (i * 8));
372 }
373
374 let utf64_char = (upper_bits as u64) << 32;
375 self.data.push(utf64_char);
376 }
377 }
378}
379
380impl Add<&str> for String64 {
381 type Output = String64;
382
383 fn add(mut self, rhs: &str) -> Self::Output {
384 self.extend(rhs.chars());
385 self
386 }
387}
388
389impl AddAssign<&str> for String64 {
390 fn add_assign(&mut self, rhs: &str) {
391 self.extend(rhs.chars());
392 }
393}
394
395impl PartialEq<str> for String64 {
396 fn eq(&self, other: &str) -> bool {
397 match self.to_string() {
398 Ok(s) => s == other,
399 Err(_) => false,
400 }
401 }
402}
403
404impl PartialEq<&str> for String64 {
405 fn eq(&self, other: &&str) -> bool {
406 self.eq(*other)
407 }
408}
409
410impl PartialEq<String> for String64 {
411 fn eq(&self, other: &String) -> bool {
412 self.eq(other.as_str())
413 }
414}
415
416impl AsRef<[u64]> for String64 {
417 fn as_ref(&self) -> &[u64] {
418 &self.data
419 }
420}
421
422impl TryFrom<String64> for String {
423 type Error = Utf64Error;
424
425 fn try_from(value: String64) -> Result<Self> {
426 value.to_string()
427 }
428}
429
430impl TryFrom<&String64> for String {
431 type Error = Utf64Error;
432
433 fn try_from(value: &String64) -> Result<Self> {
434 value.to_string()
435 }
436}
437
438impl Deref for String64 {
439 type Target = [u64];
440
441 fn deref(&self) -> &Self::Target {
442 &self.data
443 }
444}
445
446impl DerefMut for String64 {
447 fn deref_mut(&mut self) -> &mut Self::Target {
448 &mut self.data
449 }
450}
451
452impl Borrow<[u64]> for String64 {
453 fn borrow(&self) -> &[u64] {
454 &self.data
455 }
456}
457
458impl BorrowMut<[u64]> for String64 {
459 fn borrow_mut(&mut self) -> &mut [u64] {
460 &mut self.data
461 }
462}