use crate::error::{Result, Utf64Error};
use std::{
borrow::{Borrow, BorrowMut},
fmt,
hash::{Hash, Hasher},
iter::{Extend, FromIterator},
ops::{Add, AddAssign, Deref, DerefMut, Index, Range, RangeFrom, RangeFull, RangeTo},
str::FromStr,
};
#[derive(Clone, PartialEq, Eq)]
pub struct String64 {
data: Vec<u64>,
}
impl String64 {
pub fn new() -> Self {
Self { data: Vec::new() }
}
pub fn with_capacity(capacity: usize) -> Self {
Self {
data: Vec::with_capacity(capacity),
}
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
pub fn as_slice(&self) -> &[u64] {
&self.data
}
fn encode(s: &str) -> Result<Self> {
let mut data = Vec::with_capacity(s.chars().count());
for ch in s.chars() {
let mut utf8_buf = [0u8; 4];
let utf8_bytes = ch.encode_utf8(&mut utf8_buf).as_bytes();
let mut upper_bits: u32 = 0;
for (i, &byte) in utf8_bytes.iter().enumerate() {
upper_bits |= (byte as u32) << (24 - (i * 8));
}
let utf64_char = (upper_bits as u64) << 32;
data.push(utf64_char);
}
Ok(Self { data })
}
pub fn to_string(&self) -> Result<String> {
let mut utf8_bytes = Vec::new();
for &utf64_char in &self.data {
if (utf64_char & 0xFFFFFFFF) != 0 {
return Err(Utf64Error::NonZeroReservedBits);
}
let upper_bits = (utf64_char >> 32) as u32;
let bytes = [
((upper_bits >> 24) & 0xFF) as u8,
((upper_bits >> 16) & 0xFF) as u8,
((upper_bits >> 8) & 0xFF) as u8,
(upper_bits & 0xFF) as u8,
];
let len = if bytes[0] == 0 {
return Err(Utf64Error::InvalidUtf64);
} else if bytes[0] < 0x80 {
1
} else if bytes[0] < 0xE0 {
2
} else if bytes[0] < 0xF0 {
3
} else {
4
};
utf8_bytes.extend_from_slice(&bytes[..len]);
}
String::from_utf8(utf8_bytes).map_err(|_| Utf64Error::InvalidUtf8)
}
}
impl Default for String64 {
fn default() -> Self {
Self::new()
}
}
impl From<&str> for String64 {
fn from(s: &str) -> Self {
Self::encode(s).expect("valid UTF-8 &str should always encode to UTF64")
}
}
impl From<String> for String64 {
fn from(s: String) -> Self {
Self::encode(&s).expect("valid UTF-8 String should always encode to UTF64")
}
}
impl FromStr for String64 {
type Err = Utf64Error;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
Self::encode(s)
}
}
impl fmt::Display for String64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.to_string() {
Ok(s) => write!(f, "{s}"),
Err(_) => write!(f, "<invalid UTF64>"),
}
}
}
impl fmt::Debug for String64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.to_string() {
Ok(s) => write!(f, "String64({s:?})"),
Err(_) => write!(f, "String64(<invalid>)"),
}
}
}
impl Hash for String64 {
fn hash<H: Hasher>(&self, state: &mut H) {
self.data.hash(state);
}
}
impl PartialOrd for String64 {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for String64 {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
match (self.to_string(), other.to_string()) {
(Ok(s1), Ok(s2)) => s1.cmp(&s2),
(Ok(_), Err(_)) => std::cmp::Ordering::Greater,
(Err(_), Ok(_)) => std::cmp::Ordering::Less,
(Err(_), Err(_)) => std::cmp::Ordering::Equal,
}
}
}
impl Index<usize> for String64 {
type Output = u64;
fn index(&self, index: usize) -> &Self::Output {
&self.data[index]
}
}
impl Index<Range<usize>> for String64 {
type Output = [u64];
fn index(&self, range: Range<usize>) -> &Self::Output {
&self.data[range]
}
}
impl Index<RangeFrom<usize>> for String64 {
type Output = [u64];
fn index(&self, range: RangeFrom<usize>) -> &Self::Output {
&self.data[range]
}
}
impl Index<RangeTo<usize>> for String64 {
type Output = [u64];
fn index(&self, range: RangeTo<usize>) -> &Self::Output {
&self.data[range]
}
}
impl Index<RangeFull> for String64 {
type Output = [u64];
fn index(&self, range: RangeFull) -> &Self::Output {
&self.data[range]
}
}
pub struct IntoIter {
data: std::vec::IntoIter<u64>,
}
impl Iterator for IntoIter {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
self.data.next().map(|utf64_char| {
let upper_bits = (utf64_char >> 32) as u32;
let bytes = [
((upper_bits >> 24) & 0xFF) as u8,
((upper_bits >> 16) & 0xFF) as u8,
((upper_bits >> 8) & 0xFF) as u8,
(upper_bits & 0xFF) as u8,
];
let len = if bytes[0] < 0x80 {
1
} else if bytes[0] < 0xE0 {
2
} else if bytes[0] < 0xF0 {
3
} else {
4
};
std::str::from_utf8(&bytes[..len])
.ok()
.and_then(|s| s.chars().next())
.expect("valid UTF64 should decode to valid char")
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.data.size_hint()
}
}
impl ExactSizeIterator for IntoIter {
fn len(&self) -> usize {
self.data.len()
}
}
impl IntoIterator for String64 {
type Item = char;
type IntoIter = IntoIter;
fn into_iter(self) -> Self::IntoIter {
IntoIter {
data: self.data.into_iter(),
}
}
}
pub struct Iter<'a> {
data: std::slice::Iter<'a, u64>,
}
impl<'a> Iterator for Iter<'a> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
self.data.next().map(|&utf64_char| {
let upper_bits = (utf64_char >> 32) as u32;
let bytes = [
((upper_bits >> 24) & 0xFF) as u8,
((upper_bits >> 16) & 0xFF) as u8,
((upper_bits >> 8) & 0xFF) as u8,
(upper_bits & 0xFF) as u8,
];
let len = if bytes[0] < 0x80 {
1
} else if bytes[0] < 0xE0 {
2
} else if bytes[0] < 0xF0 {
3
} else {
4
};
std::str::from_utf8(&bytes[..len])
.ok()
.and_then(|s| s.chars().next())
.expect("valid UTF64 should decode to valid char")
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.data.size_hint()
}
}
impl<'a> ExactSizeIterator for Iter<'a> {
fn len(&self) -> usize {
self.data.len()
}
}
impl<'a> IntoIterator for &'a String64 {
type Item = char;
type IntoIter = Iter<'a>;
fn into_iter(self) -> Self::IntoIter {
Iter {
data: self.data.iter(),
}
}
}
impl FromIterator<char> for String64 {
fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
let mut s = String64::new();
s.extend(iter);
s
}
}
impl Extend<char> for String64 {
fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
for ch in iter {
let mut utf8_buf = [0u8; 4];
let utf8_bytes = ch.encode_utf8(&mut utf8_buf).as_bytes();
let mut upper_bits: u32 = 0;
for (i, &byte) in utf8_bytes.iter().enumerate() {
upper_bits |= (byte as u32) << (24 - (i * 8));
}
let utf64_char = (upper_bits as u64) << 32;
self.data.push(utf64_char);
}
}
}
impl Add<&str> for String64 {
type Output = String64;
fn add(mut self, rhs: &str) -> Self::Output {
self.extend(rhs.chars());
self
}
}
impl AddAssign<&str> for String64 {
fn add_assign(&mut self, rhs: &str) {
self.extend(rhs.chars());
}
}
impl PartialEq<str> for String64 {
fn eq(&self, other: &str) -> bool {
match self.to_string() {
Ok(s) => s == other,
Err(_) => false,
}
}
}
impl PartialEq<&str> for String64 {
fn eq(&self, other: &&str) -> bool {
self.eq(*other)
}
}
impl PartialEq<String> for String64 {
fn eq(&self, other: &String) -> bool {
self.eq(other.as_str())
}
}
impl AsRef<[u64]> for String64 {
fn as_ref(&self) -> &[u64] {
&self.data
}
}
impl TryFrom<String64> for String {
type Error = Utf64Error;
fn try_from(value: String64) -> Result<Self> {
value.to_string()
}
}
impl TryFrom<&String64> for String {
type Error = Utf64Error;
fn try_from(value: &String64) -> Result<Self> {
value.to_string()
}
}
impl Deref for String64 {
type Target = [u64];
fn deref(&self) -> &Self::Target {
&self.data
}
}
impl DerefMut for String64 {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.data
}
}
impl Borrow<[u64]> for String64 {
fn borrow(&self) -> &[u64] {
&self.data
}
}
impl BorrowMut<[u64]> for String64 {
fn borrow_mut(&mut self) -> &mut [u64] {
&mut self.data
}
}