use std::borrow::Cow;
use std::ops::{Bound, RangeBounds};
use std::{fmt, slice};
use crate::chars;
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub enum Utf32Str<'a> {
Ascii(&'a [u8]),
Unicode(&'a [char]),
}
impl<'a> Utf32Str<'a> {
pub fn new(str: &'a str, buf: &'a mut Vec<char>) -> Self {
if str.is_ascii() {
Utf32Str::Ascii(str.as_bytes())
} else {
buf.clear();
buf.extend(crate::chars::graphemes(str));
if buf.iter().all(|c| c.is_ascii()) {
return Utf32Str::Ascii(str.as_bytes());
}
Utf32Str::Unicode(&*buf)
}
}
#[inline]
pub fn len(self) -> usize {
match self {
Utf32Str::Unicode(codepoints) => codepoints.len(),
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
}
}
#[inline]
pub fn is_empty(self) -> bool {
match self {
Utf32Str::Unicode(codepoints) => codepoints.is_empty(),
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.is_empty(),
}
}
#[inline]
pub fn slice(self, range: impl RangeBounds<usize>) -> Utf32Str<'a> {
let start = match range.start_bound() {
Bound::Included(&start) => start,
Bound::Excluded(&start) => start + 1,
Bound::Unbounded => 0,
};
let end = match range.end_bound() {
Bound::Included(&end) => end + 1,
Bound::Excluded(&end) => end,
Bound::Unbounded => self.len(),
};
match self {
Utf32Str::Ascii(bytes) => Utf32Str::Ascii(&bytes[start..end]),
Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
}
}
#[inline]
pub(crate) fn leading_white_space(self) -> usize {
match self {
Utf32Str::Ascii(bytes) => bytes
.iter()
.position(|b| !b.is_ascii_whitespace())
.unwrap_or(0),
Utf32Str::Unicode(codepoints) => codepoints
.iter()
.position(|c| !c.is_whitespace())
.unwrap_or(0),
}
}
#[inline]
pub(crate) fn trailing_white_space(self) -> usize {
match self {
Utf32Str::Ascii(bytes) => bytes
.iter()
.rev()
.position(|b| !b.is_ascii_whitespace())
.unwrap_or(0),
Utf32Str::Unicode(codepoints) => codepoints
.iter()
.rev()
.position(|c| !c.is_whitespace())
.unwrap_or(0),
}
}
#[inline]
pub fn slice_u32(self, range: impl RangeBounds<u32>) -> Utf32Str<'a> {
let start = match range.start_bound() {
Bound::Included(&start) => start as usize,
Bound::Excluded(&start) => start as usize + 1,
Bound::Unbounded => 0,
};
let end = match range.end_bound() {
Bound::Included(&end) => end as usize + 1,
Bound::Excluded(&end) => end as usize,
Bound::Unbounded => self.len(),
};
match self {
Utf32Str::Ascii(bytes) => Utf32Str::Ascii(&bytes[start..end]),
Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
}
}
pub fn is_ascii(self) -> bool {
matches!(self, Utf32Str::Ascii(_))
}
pub fn get(self, n: u32) -> char {
match self {
Utf32Str::Ascii(bytes) => bytes[n as usize] as char,
Utf32Str::Unicode(codepoints) => codepoints[n as usize],
}
}
pub(crate) fn last(self) -> char {
match self {
Utf32Str::Ascii(bytes) => bytes[bytes.len() - 1] as char,
Utf32Str::Unicode(codepoints) => codepoints[codepoints.len() - 1],
}
}
pub(crate) fn first(self) -> char {
match self {
Utf32Str::Ascii(bytes) => bytes[0] as char,
Utf32Str::Unicode(codepoints) => codepoints[0],
}
}
pub fn chars(self) -> Chars<'a> {
match self {
Utf32Str::Ascii(bytes) => Chars::Ascii(bytes.iter()),
Utf32Str::Unicode(codepoints) => Chars::Unicode(codepoints.iter()),
}
}
}
impl fmt::Debug for Utf32Str<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "\"")?;
for c in self.chars() {
for c in c.escape_debug() {
write!(f, "{c}")?
}
}
write!(f, "\"")
}
}
impl fmt::Display for Utf32Str<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for c in self.chars() {
write!(f, "{c}")?
}
Ok(())
}
}
pub enum Chars<'a> {
Ascii(slice::Iter<'a, u8>),
Unicode(slice::Iter<'a, char>),
}
impl<'a> Iterator for Chars<'a> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
match self {
Chars::Ascii(iter) => iter.next().map(|&c| c as char),
Chars::Unicode(iter) => iter.next().copied(),
}
}
}
impl DoubleEndedIterator for Chars<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
match self {
Chars::Ascii(iter) => iter.next_back().map(|&c| c as char),
Chars::Unicode(iter) => iter.next_back().copied(),
}
}
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub enum Utf32String {
Ascii(Box<str>),
Unicode(Box<[char]>),
}
impl Default for Utf32String {
fn default() -> Self {
Self::Ascii(String::new().into_boxed_str())
}
}
impl Utf32String {
#[inline]
pub fn len(&self) -> usize {
match self {
Utf32String::Unicode(codepoints) => codepoints.len(),
Utf32String::Ascii(ascii_bytes) => ascii_bytes.len(),
}
}
#[inline]
pub fn is_empty(&self) -> bool {
match self {
Utf32String::Unicode(codepoints) => codepoints.is_empty(),
Utf32String::Ascii(ascii_bytes) => ascii_bytes.is_empty(),
}
}
#[inline]
pub fn slice(&self, range: impl RangeBounds<usize>) -> Utf32Str {
let start = match range.start_bound() {
Bound::Included(&start) => start,
Bound::Excluded(&start) => start + 1,
Bound::Unbounded => 0,
};
let end = match range.end_bound() {
Bound::Included(&end) => end + 1,
Bound::Excluded(&end) => end,
Bound::Unbounded => self.len(),
};
match self {
Utf32String::Ascii(bytes) => Utf32Str::Ascii(&bytes.as_bytes()[start..end]),
Utf32String::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
}
}
#[inline]
pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
let start = match range.start_bound() {
Bound::Included(&start) => start,
Bound::Excluded(&start) => start + 1,
Bound::Unbounded => 0,
};
let end = match range.end_bound() {
Bound::Included(&end) => end + 1,
Bound::Excluded(&end) => end,
Bound::Unbounded => self.len() as u32,
};
match self {
Utf32String::Ascii(bytes) => {
Utf32Str::Ascii(&bytes.as_bytes()[start as usize..end as usize])
}
Utf32String::Unicode(codepoints) => {
Utf32Str::Unicode(&codepoints[start as usize..end as usize])
}
}
}
}
impl From<&str> for Utf32String {
#[inline]
fn from(value: &str) -> Self {
if value.is_ascii() {
Self::Ascii(value.to_owned().into_boxed_str())
} else {
Self::Unicode(chars::graphemes(value).collect())
}
}
}
impl From<Box<str>> for Utf32String {
fn from(value: Box<str>) -> Self {
if value.is_ascii() {
Self::Ascii(value)
} else {
Self::Unicode(chars::graphemes(&value).collect())
}
}
}
impl From<String> for Utf32String {
#[inline]
fn from(value: String) -> Self {
value.into_boxed_str().into()
}
}
impl<'a> From<Cow<'a, str>> for Utf32String {
#[inline]
fn from(value: Cow<'a, str>) -> Self {
match value {
Cow::Borrowed(value) => value.into(),
Cow::Owned(value) => value.into(),
}
}
}
impl fmt::Debug for Utf32String {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}", self.slice(..))
}
}
impl fmt::Display for Utf32String {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.slice(..))
}
}