use crate::error::*;
use std::{
borrow::{Borrow, BorrowMut, Cow, ToOwned},
char,
cmp::Ordering,
fmt::{self, Write},
iter::{DoubleEndedIterator, FromIterator},
mem::size_of,
ops::{self, Deref, DerefMut},
str,
};
fn is_mutf8_valid(v: &[u8]) -> bool {
const CONSTANTS_LARGE_ENOUGH: bool = size_of::<u64>() >= size_of::<usize>();
macro_rules! is_block_non_ascii {
($block:expr) => {{
let block = $block;
let is_not_ascii = block & (0x80808080_80808080u64 as usize) != 0;
let contains_zero =
((block.wrapping_sub(0x01010101_01010101u64 as usize)) & (!block) & (0x80808080_80808080u64 as usize))
!= 0;
is_not_ascii || contains_zero
}};
}
let block_size = 2 * size_of::<usize>();
let align_offset = v.as_ptr().align_offset(size_of::<usize>());
let mut i = 0;
while i < v.len() {
let b1 = v[i];
if b1 >= 0x80 {
let width = if b1 & 0b1111_0000 == 0b1110_0000 {
3
} else if b1 & 0b1110_0000 == 0b1100_0000 {
2
} else {
return false;
};
if v.len() < i + width {
return false;
}
match width {
2 => {
if v[i + 1] & 0b1100_0000 != 0b1000_0000 {
return false;
}
if b1 & 0b0001_1110 == 0 && (b1 != 0b1100_0000 && v[i + 1] != 0b1000_0000) {
return false;
}
i += 2;
}
3 => {
if v[i + 1] & 0b1100_0000 != 0b1000_0000 || v[i + 2] & 0b1100_0000 != 0b1000_0000 {
return false;
}
if b1 & 0b0000_1111 == 0 && v[i + 1] & 0b0010_0000 == 0 {
return false;
}
i += 3;
}
_ => return false,
}
} else {
if !CONSTANTS_LARGE_ENOUGH || align_offset == usize::MAX || align_offset.wrapping_sub(i) % block_size != 0 {
if b1 == 0 {
return false;
}
i += 1;
} else {
while i + block_size < v.len() {
unsafe {
let ptr = v.as_ptr().add(i).cast::<usize>();
if is_block_non_ascii!(*ptr) || is_block_non_ascii!(*ptr.offset(1)) {
break;
}
}
i += block_size;
}
while i < v.len() && v[i] < 0x80 {
if v[i] == 0 {
return false;
}
i += 1;
}
}
}
}
true
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
pub struct MStr {
inner: [u8],
}
impl MStr {
pub fn from_mutf8(v: &[u8]) -> Result<&MStr, DecodeError> {
if is_mutf8_valid(v) {
Ok(unsafe { MStr::from_mutf8_unchecked(v) })
} else {
Err(DecodeError::new(DecodeErrorKind::InvalidMutf8))
}
}
pub fn from_mutf8_mut(v: &mut [u8]) -> Result<&mut MStr, DecodeError> {
if is_mutf8_valid(v) {
Ok(unsafe { MStr::from_mutf8_unchecked_mut(v) })
} else {
Err(DecodeError::new(DecodeErrorKind::InvalidMutf8))
}
}
#[must_use]
pub const unsafe fn from_mutf8_unchecked(v: &[u8]) -> &MStr {
unsafe { std::mem::transmute(v) }
}
#[must_use]
pub unsafe fn from_mutf8_unchecked_mut(v: &mut [u8]) -> &mut MStr {
unsafe {
let v: *mut [u8] = v;
&mut *(v as *mut MStr)
}
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.inner.len()
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
#[inline]
#[must_use]
pub fn as_bytes(&self) -> &[u8] {
&self.inner
}
#[inline]
#[must_use]
pub fn to_str(&self) -> Option<&str> {
str::from_utf8(&self.inner).ok()
}
#[inline]
#[must_use]
pub fn is_char_boundary(&self, index: usize) -> bool {
if index == 0 || index == self.len() {
true
} else {
match self.as_bytes().get(index) {
None => false,
Some(&b) => b & 0b1100_0000 != 0b1000_0000,
}
}
}
#[inline]
#[must_use]
pub fn chars(&self) -> Chars<'_> {
Chars { inner: &self.inner }
}
#[inline]
#[must_use]
pub fn chars_lossy(&self) -> CharsLossy<'_> {
CharsLossy { inner: &self.inner }
}
#[inline]
#[must_use]
pub fn display(&self) -> Display<'_> {
Display { inner: &self.inner }
}
}
impl Default for &'static MStr {
fn default() -> &'static MStr {
unsafe { MStr::from_mutf8_unchecked(&[]) }
}
}
impl Default for &'static mut MStr {
fn default() -> &'static mut MStr {
unsafe { MStr::from_mutf8_unchecked_mut(&mut []) }
}
}
impl ops::Index<ops::RangeFull> for MStr {
type Output = MStr;
#[inline]
fn index(&self, _: ops::RangeFull) -> &MStr {
self
}
}
impl ops::Index<ops::Range<usize>> for MStr {
type Output = MStr;
#[inline]
fn index(&self, index: ops::Range<usize>) -> &MStr {
if index.start <= index.end && self.is_char_boundary(index.start) && self.is_char_boundary(index.end) {
unsafe { MStr::from_mutf8_unchecked(self.inner.get_unchecked(index)) }
} else {
panic!("MUtf8 index out of bounds");
}
}
}
impl ops::Index<ops::RangeInclusive<usize>> for MStr {
type Output = MStr;
#[inline]
fn index(&self, index: ops::RangeInclusive<usize>) -> &MStr {
if *index.end() == usize::MAX {
panic!("cannot index mutf8 to maximum integer")
} else {
#[allow(clippy::range_plus_one)]
&self[*index.start()..*index.end() + 1]
}
}
}
impl ops::Index<ops::RangeTo<usize>> for MStr {
type Output = MStr;
#[inline]
fn index(&self, index: ops::RangeTo<usize>) -> &MStr {
if self.is_char_boundary(index.end) {
unsafe { MStr::from_mutf8_unchecked(self.inner.get_unchecked(index)) }
} else {
panic!("MUtf8 index out of bounds");
}
}
}
impl ops::Index<ops::RangeToInclusive<usize>> for MStr {
type Output = MStr;
#[inline]
fn index(&self, index: ops::RangeToInclusive<usize>) -> &MStr {
if index.end == usize::MAX {
panic!("cannot index mutf8 to maximum integer")
} else {
#[allow(clippy::range_plus_one)]
&self[..index.end + 1]
}
}
}
impl ops::Index<ops::RangeFrom<usize>> for MStr {
type Output = MStr;
#[inline]
fn index(&self, index: ops::RangeFrom<usize>) -> &MStr {
if self.is_char_boundary(index.start) {
unsafe { MStr::from_mutf8_unchecked(self.inner.get_unchecked(index)) }
} else {
panic!("MUtf8 index out of bounds");
}
}
}
impl PartialEq<MString> for MStr {
#[inline]
fn eq(&self, other: &MString) -> bool {
*self == **other
}
}
impl PartialEq<MStr> for MString {
#[inline]
fn eq(&self, other: &MStr) -> bool {
**self == *other
}
}
impl PartialEq<str> for MStr {
#[inline]
fn eq(&self, other: &str) -> bool {
let mut left = self.chars();
let mut right = other.chars();
loop {
match (left.next(), right.next()) {
(Some(Ok(l)), Some(r)) if l == r => {}
(None, None) => return true,
(_, _) => return false,
}
}
}
}
impl PartialEq<MStr> for str {
#[inline]
fn eq(&self, other: &MStr) -> bool {
*other == *self
}
}
impl PartialEq<&'_ str> for MStr {
#[inline]
fn eq(&self, other: &&'_ str) -> bool {
*self == **other
}
}
impl PartialEq<MStr> for &'_ str {
#[inline]
fn eq(&self, other: &MStr) -> bool {
*other == **self
}
}
impl PartialEq<Cow<'_, MStr>> for MStr {
#[inline]
fn eq(&self, other: &Cow<'_, MStr>) -> bool {
*self == **other
}
}
impl PartialEq<MStr> for Cow<'_, MStr> {
#[inline]
fn eq(&self, other: &MStr) -> bool {
**self == *other
}
}
impl PartialOrd<str> for MStr {
fn partial_cmp(&self, other: &str) -> Option<Ordering> {
let mut left = self.chars();
let mut right = other.chars();
loop {
match (left.next(), right.next()) {
(Some(Ok(l)), Some(r)) => match l.cmp(&r) {
Ordering::Equal => {}
ord => return Some(ord),
},
(None, None) => return Some(Ordering::Equal),
(None, Some(_)) => return Some(Ordering::Less),
(Some(_), None) => return Some(Ordering::Greater),
(Some(Err(l)), Some(r)) => return Some(l.cmp(&(r as u32))),
}
}
}
}
impl PartialOrd<MStr> for str {
#[inline]
fn partial_cmp(&self, other: &MStr) -> Option<Ordering> {
other.partial_cmp(self)
}
}
#[derive(Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct MString {
buf: Vec<u8>,
}
impl MString {
#[inline]
#[must_use]
pub fn new() -> MString {
MString { buf: Vec::new() }
}
#[inline]
#[must_use]
pub fn with_capacity(cap: usize) -> MString {
MString {
buf: Vec::with_capacity(cap),
}
}
pub fn from_mutf8(buf: Vec<u8>) -> Result<MString, DecodeError> {
if is_mutf8_valid(&buf) {
Ok(MString { buf })
} else {
Err(DecodeError::new(DecodeErrorKind::InvalidMutf8))
}
}
pub fn push(&mut self, ch: char) {
let mut buf = [0; 6];
let size = encode_mutf8_char(ch, &mut buf);
self.buf.extend_from_slice(&buf[..size]);
}
}
impl Deref for MString {
type Target = MStr;
#[inline]
fn deref(&self) -> &MStr {
unsafe { MStr::from_mutf8_unchecked(&self.buf) }
}
}
impl DerefMut for MString {
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe { MStr::from_mutf8_unchecked_mut(&mut self.buf) }
}
}
impl Borrow<MStr> for MString {
#[inline]
fn borrow(&self) -> &MStr {
self
}
}
impl BorrowMut<MStr> for MString {
fn borrow_mut(&mut self) -> &mut MStr {
&mut *self
}
}
impl AsRef<[u8]> for MString {
fn as_ref(&self) -> &[u8] {
self.as_bytes()
}
}
impl AsRef<MStr> for MString {
fn as_ref(&self) -> &MStr {
self
}
}
impl ToOwned for MStr {
type Owned = MString;
#[inline]
fn to_owned(&self) -> MString {
MString {
buf: self.inner.to_owned(),
}
}
}
impl fmt::Debug for MStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_char('"')?;
for c in self.chars() {
match c {
Ok(c) => {
write!(f, "{}", c.escape_debug())?;
}
Err(n) => {
write!(f, "\\s{{{n:x}}}")?;
}
}
}
f.write_char('"')
}
}
impl fmt::Debug for MString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
MStr::fmt(self, f)
}
}
pub struct Display<'a> {
inner: &'a [u8],
}
impl<'a> fmt::Display for Display<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut start = 0;
let mut i = 0;
while i < self.inner.len() {
if self.inner[i] != 0b1110_1101 {
i += 1 + i.leading_ones() as usize;
} else {
if i != start {
f.write_str(unsafe { str::from_utf8_unchecked(&self.inner[start..i]) })?;
}
let (size, ch) = unsafe { decode_mutf8_char(&self.inner[i..]) };
i += size;
start = i;
f.write_char(ch.unwrap_or(char::REPLACEMENT_CHARACTER))?;
}
}
if i != start {
f.write_str(unsafe { str::from_utf8_unchecked(&self.inner[start..i]) })?;
}
Ok(())
}
}
impl<'a> fmt::Debug for Display<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
unsafe { MStr::from_mutf8_unchecked(self.inner) }.fmt(f)
}
}
pub struct Chars<'a> {
inner: &'a [u8],
}
impl<'a> Chars<'a> {
#[must_use]
pub fn as_mstr(&self) -> &'a MStr {
unsafe { MStr::from_mutf8_unchecked(self.inner) }
}
}
impl<'a> Iterator for Chars<'a> {
type Item = Result<char, u32>;
fn next(&mut self) -> Option<Self::Item> {
if self.inner.is_empty() {
None
} else {
let (size, ch) = unsafe { decode_mutf8_char(self.inner) };
self.inner = &self.inner[size..];
Some(ch)
}
}
}
impl<'a> DoubleEndedIterator for Chars<'a> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.inner.is_empty() {
None
} else {
let (size, ch) = unsafe { decode_mutf8_char_reversed(self.inner) };
self.inner = &self.inner[..self.inner.len() - size];
Some(ch)
}
}
}
impl<'a> fmt::Debug for Chars<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = unsafe { MStr::from_mutf8_unchecked(self.inner) };
f.debug_struct("Chars").field("remaining", &s).finish()
}
}
pub struct CharsLossy<'a> {
inner: &'a [u8],
}
impl<'a> CharsLossy<'a> {
#[must_use]
pub fn as_mstr(&self) -> &'a MStr {
unsafe { MStr::from_mutf8_unchecked(self.inner) }
}
}
impl<'a> Iterator for CharsLossy<'a> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
if self.inner.is_empty() {
None
} else {
let (size, ch) = unsafe { decode_mutf8_char(self.inner) };
self.inner = &self.inner[size..];
Some(ch.unwrap_or(char::REPLACEMENT_CHARACTER))
}
}
}
impl<'a> DoubleEndedIterator for CharsLossy<'a> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.inner.is_empty() {
None
} else {
let (size, ch) = unsafe { decode_mutf8_char_reversed(self.inner) };
self.inner = &self.inner[..self.inner.len() - size];
Some(ch.unwrap_or(char::REPLACEMENT_CHARACTER))
}
}
}
impl<'a> fmt::Debug for CharsLossy<'a> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = unsafe { MStr::from_mutf8_unchecked(self.inner) };
f.debug_struct("CharsLossy").field("remaining", &s).finish()
}
}
unsafe fn decode_mutf8_char(v: &[u8]) -> (usize, Result<char, u32>) {
if v[0] & 0b1000_0000 == 0b0000_0000 {
return (1, Ok(v[0] as char));
}
if v[0] & 0b1110_0000 == 0b1100_0000 {
let c1 = u32::from(v[0] & 0b0001_1111) << 6;
let c2 = u32::from(v[1] & 0b0011_1111);
let c = unsafe { char::from_u32_unchecked(c1 | c2) };
return (2, Ok(c));
}
if v[0] == 0b1110_1101 {
if v.len() >= 6 && v[1] & 0b1111_0000 == 0b1010_0000 && v[3] == 0b1110_1101 && v[4] & 0b1111_0000 == 0b1011_0000
{
let c2 = u32::from(v[1] & 0b0000_1111) << 16;
let c3 = u32::from(v[2] & 0b0011_1111) << 10;
let c5 = u32::from(v[4] & 0b0000_1111) << 6;
let c6 = u32::from(v[5] & 0b0011_1111);
let c = unsafe { char::from_u32_unchecked(0x10000 + (c2 | c3 | c5 | c6)) };
return (6, Ok(c));
}
if v[1] & 0b1110_0000 == 0b1010_0000 {
let c2 = u32::from(v[1] & 0b0011_1111) << 6;
let c3 = u32::from(v[2] & 0b0011_1111);
return (3, Err(0b1101_0000_0000_0000 | c2 | c3));
}
}
let c1 = u32::from(v[0] & 0b0000_1111) << 12;
let c2 = u32::from(v[1] & 0b0011_1111) << 6;
let c3 = u32::from(v[2] & 0b0011_1111);
let c = unsafe { char::from_u32_unchecked(c1 | c2 | c3) };
(3, Ok(c))
}
unsafe fn decode_mutf8_char_reversed(v: &[u8]) -> (usize, Result<char, u32>) {
let b1 = v[v.len() - 1];
if b1 & 0b1000_0000 == 0b0000_0000 {
return (1, Ok(b1 as char));
}
let b2 = v[v.len() - 2];
if b2 & 0b1110_0000 == 0b1100_0000 {
let c1 = u32::from(b2 & 0b0001_1111) << 6;
let c2 = u32::from(b1 & 0b0011_1111);
let c = unsafe { char::from_u32_unchecked(c1 | c2) };
return (2, Ok(c));
}
let b3 = v[v.len() - 3];
if b3 == 0b1110_1101 {
if v.len() >= 6 {
let b4 = v[v.len() - 4];
let b5 = v[v.len() - 5];
let b6 = v[v.len() - 6];
if b2 & 0b1111_0000 == 0b1011_0000 && b5 & 0b1111_0000 == 0b1010_0000 && b6 == 0b1110_1101 {
let c2 = u32::from(b5 & 0b0000_1111) << 16;
let c3 = u32::from(b4 & 0b0011_1111) << 10;
let c5 = u32::from(b2 & 0b0000_1111) << 6;
let c6 = u32::from(b1 & 0b0011_1111);
let c = unsafe { char::from_u32_unchecked(0x10000 + (c2 | c3 | c5 | c6)) };
return (6, Ok(c));
}
}
if b2 & 0b1110_0000 == 0b1010_0000 {
let c2 = u32::from(b2 & 0b0011_1111) << 6;
let c3 = u32::from(b1 & 0b0011_1111);
return (3, Err(0b1101_0000_0000_0000 | c2 | c3));
}
}
let c1 = u32::from(b3 & 0b0000_1111) << 12;
let c2 = u32::from(b2 & 0b0011_1111) << 6;
let c3 = u32::from(b1 & 0b0011_1111);
let c = unsafe { char::from_u32_unchecked(c1 | c2 | c3) };
(3, Ok(c))
}
impl From<&MStr> for MString {
fn from(s: &MStr) -> MString {
s.to_owned()
}
}
impl From<&str> for MString {
fn from(s: &str) -> MString {
let mut buf = MString::with_capacity(s.len());
buf.extend(s.chars());
buf
}
}
impl FromIterator<char> for MString {
fn from_iter<I>(iter: I) -> MString
where
I: IntoIterator<Item = char>,
{
let mut buf = MString::new();
buf.extend(iter);
buf
}
}
impl Extend<char> for MString {
fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) {
let iter = iter.into_iter();
let (lower_bound, _) = iter.size_hint();
self.buf.reserve(lower_bound);
for ch in iter {
self.push(ch);
}
}
}
impl<'a> Extend<&'a str> for MString {
fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iter: I) {
self.extend(iter.into_iter().flat_map(str::chars));
}
}
fn encode_mutf8_char(ch: char, buf: &mut [u8]) -> usize {
let ch = ch as u32;
match ch {
0x01..=0x7F => {
buf[0] = ch as u8;
1
}
0 | 0x80..=0x7FF => {
buf[0] = (0b1100_0000 | (ch >> 6)) as u8;
buf[1] = (0b1000_0000 | (ch & 0b0011_1111)) as u8;
2
}
0x800..=0xFFFF => {
buf[0] = (0b1110_0000 | (ch >> 12)) as u8;
buf[1] = (0b1000_0000 | ((ch >> 6) & 0b0011_1111)) as u8;
buf[2] = (0b1000_0000 | (ch & 0b0011_1111)) as u8;
3
}
_ => {
let ch = ch - 0x10000;
buf[0] = 0b1110_1101;
buf[1] = (0b1010_0000 | ((ch >> 16) & 0b0000_1111)) as u8;
buf[2] = (0b1000_0000 | ((ch >> 10) & 0b0011_1111)) as u8;
buf[3] = 0b1110_1101;
buf[4] = (0b1011_0000 | ((ch >> 6) & 0b0000_1111)) as u8;
buf[5] = (0b1000_0000 | (ch & 0b0011_1111)) as u8;
6
}
}
}
#[doc(hidden)]
#[allow(non_camel_case_types, missing_debug_implementations)]
pub struct __private_MUtf8Literal<T>(pub T);
impl __private_MUtf8Literal<&'static str> {
pub const fn is_str(self) -> bool {
true
}
pub const fn as_bytes(self) -> &'static [u8] {
self.0.as_bytes()
}
}
impl<const N: usize> __private_MUtf8Literal<&'static [u8; N]> {
pub const fn is_str(self) -> bool {
false
}
pub const fn as_bytes(self) -> &'static [u8] {
self.0
}
}
#[doc(hidden)]
pub const fn __private_is_mutf8_valid(v: &[u8]) -> bool {
let mut i = 0;
while i < v.len() {
let b1 = v[i];
if b1 == 0 {
return false;
} else if b1 < 0x80 {
i += 1;
} else {
let width = if b1 & 0b1111_0000 == 0b1110_0000 {
3
} else if b1 & 0b1110_0000 == 0b1100_0000 {
2
} else {
return false;
};
if v.len() < i + width {
return false;
}
match width {
2 => {
if v[i + 1] & 0b1100_0000 != 0b1000_0000 {
return false;
}
if b1 & 0b0001_1110 == 0 && (b1 != 0b1100_0000 && v[i + 1] != 0b1000_0000) {
return false;
}
i += 2;
}
3 => {
if v[i + 1] & 0b1100_0000 != 0b1000_0000 || v[i + 2] & 0b1100_0000 != 0b1000_0000 {
return false;
}
if b1 & 0b0000_1111 == 0 && v[i + 1] & 0b0010_0000 == 0 {
return false;
}
i += 3;
}
_ => return false,
}
}
}
true
}
#[doc(hidden)]
pub const fn __private_utf8_to_mutf8_length(v: &[u8]) -> usize {
let mut i = 0;
let mut len = 0;
while i < v.len() {
match v[i] {
0b0000_0000 => {
i += 1;
len += 2;
}
0b0000_0001..=0b0111_1111 => {
i += 1;
len += 1;
}
0b1100_0000..=0b1101_1111 => {
i += 2;
len += 2;
}
0b1110_0000..=0b1110_1111 => {
i += 3;
len += 3;
}
0b1111_0000..=0b1111_0111 => {
i += 4;
len += 6;
}
_ => panic!("can't have invalid utf-8 here"),
}
}
len
}
#[doc(hidden)]
pub const fn __private_utf8_to_mutf8<const N: usize>(v: &[u8]) -> [u8; N] {
let mut out = [0; N];
let mut i = 0;
let mut m = 0;
while i < v.len() {
match v[i] {
0b0000_0000 => {
out[m] = 0b1100_0000;
out[m + 1] = 0b1000_0000;
i += 1;
m += 2;
}
0b0000_0001..=0b0111_1111 => {
out[m] = v[i];
i += 1;
m += 1;
}
0b1100_0000..=0b1101_1111 => {
out[m] = v[i];
out[m + 1] = v[i + 1];
i += 2;
m += 2;
}
0b1110_0000..=0b1110_1111 => {
out[m] = v[i];
out[m + 1] = v[i + 1];
out[m + 2] = v[i + 2];
i += 3;
m += 3;
}
0b1111_0000..=0b1111_0111 => {
out[m] = 0b1110_1101;
out[m + 1] = 0b1010_0000 | ((v[i] & 0b0000_0111) << 1) | ((v[i + 1] & 0b0010_0000) >> 5);
out[m + 2] = 0b1000_0000 | ((v[i + 1] & 0b0000_1111) << 2) | ((v[i + 2] & 0b0011_0000) >> 4);
out[m + 3] = 0b1110_1101;
out[m + 4] = 0b1011_0000 | (v[i + 2] & 0b0000_1111);
out[m + 5] = v[i + 3];
i += 4;
m += 6;
}
_ => panic!("can't have invalid utf-8 here"),
}
}
out
}
#[macro_export]
macro_rules! mutf8 {
($s:literal) => {
const {
const BYTES: &[u8] = $crate::mutf8::__private_MUtf8Literal($s).as_bytes();
if $crate::mutf8::__private_MUtf8Literal($s).is_str() {
let s = const {
&$crate::mutf8::__private_utf8_to_mutf8::<{ $crate::mutf8::__private_utf8_to_mutf8_length(BYTES) }>(
BYTES,
)
};
unsafe { $crate::mutf8::MStr::from_mutf8_unchecked(s) }
} else {
if !$crate::mutf8::__private_is_mutf8_valid(BYTES) {
panic!("literal is not a valid modified UTF-8 string.");
}
unsafe { $crate::mutf8::MStr::from_mutf8_unchecked(BYTES) }
}
}
};
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn valid_mutf8() {
assert!(is_mutf8_valid(b"Hello World"));
assert!(is_mutf8_valid("Ich grüße die Welt".as_bytes()));
assert!(is_mutf8_valid("你好,世界".as_bytes()));
assert!(is_mutf8_valid(&[0xED, 0xA0, 0xBD, 0xED, 0xB0, 0x96]));
assert!(is_mutf8_valid(&[0xED, 0xBB, 0x8B]));
assert!(is_mutf8_valid(&[0xED, 0xA7, 0xAB]));
assert!(is_mutf8_valid(&[0xED, 0xAD, 0x9C, 0x26, 0x0A, 0x0A]));
}
#[test]
fn invalid_mutf8() {
assert!(!is_mutf8_valid(&[0xFF]));
assert!(!is_mutf8_valid(&[0x00]));
assert!(!is_mutf8_valid(&[0xED, 0xAD, 0xBD, 0xED, 0x25]));
}
#[test]
fn display() {
assert_eq!(mutf8!("Ich grüße die Welt").display().to_string(), "Ich grüße die Welt");
assert_eq!(mutf8!("Hello 🦀").display().to_string(), "Hello 🦀");
assert_eq!(mutf8!(b"Test \xED\xBB\x8B.").display().to_string(), "Test \u{FFFD}.");
}
#[test]
fn iterate() {
let s = MStr::from_mutf8(&[0xED, 0xA0, 0xBD, 0xED, 0xB0, 0x96]).unwrap();
assert_eq!(s.chars().next_back(), s.chars().next());
}
#[test]
fn valid_mutf8_macro() {
assert_eq!(mutf8!("Hello World").to_str().unwrap(), "Hello World");
assert_eq!(mutf8!("Ich grüße die Welt").to_str().unwrap(), "Ich grüße die Welt");
assert_eq!(mutf8!("Hello 🦀").display().to_string(), "Hello 🦀");
assert!(is_mutf8_valid(
mutf8!(b"\xED\xA0\xBD\xED\xB0\x96 \xED\xBB\x8B \xED\xA7\xAB \xED\xAD\x9C \x26\x0A\x0A").as_bytes()
));
assert_eq!(
mutf8!("Ich grüße die 🦀.").as_bytes(),
MString::from("Ich grüße die 🦀.").as_bytes()
);
assert_eq!(
mutf8!("这里有一些三字节的案例").as_bytes(),
MString::from("这里有一些三字节的案例").as_bytes()
);
}
}