use crate::{
unicode::{BOM, WJ},
TextReader, TextWriter,
};
use layered_io::{LayeredReader, LayeredWriter};
#[cfg(try_reserve)]
use std::collections::TryReserveError;
#[cfg(pattern)]
use std::str::{
pattern::{Pattern, ReverseSearcher},
MatchIndices, Matches, RMatchIndices, RMatches,
};
use std::{
borrow::{Borrow, BorrowMut, Cow},
cmp::Ordering,
error::Error,
ffi::OsStr,
fmt::{self, Debug, Display, Formatter},
hash::Hash,
io::{self, Read},
net::{SocketAddr, ToSocketAddrs},
ops::{Add, AddAssign, Deref, DerefMut},
path::Path,
str::{self, Bytes, CharIndices, Chars, EncodeUtf16, FromStr, Lines, Utf8Error},
string::FromUtf8Error,
vec,
};
use utf8_io::{Utf8Reader, Utf8Writer, WriteStr};
#[derive(PartialEq, Eq, Hash, Debug)]
#[repr(transparent)]
pub struct TextString(String);
#[derive(PartialEq, Eq, Hash, Debug)]
#[repr(transparent)]
pub struct TextStr(str);
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub struct TextError {
}
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct FromTextError {
bytes: Vec<u8>,
error: TextError,
}
impl TextString {
#[inline]
pub const fn new() -> Self {
Self(String::new())
}
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
Self(String::with_capacity(capacity))
}
#[inline]
pub fn from_text_vec(vec: Vec<u8>) -> Result<Self, FromTextError> {
Self::from_text(String::from_utf8(vec)?)
}
#[inline]
pub fn from_text(s: String) -> Result<Self, FromTextError> {
let bytes: Vec<u8> = Vec::new();
let mut writer = TextWriter::new(Utf8Writer::new(LayeredWriter::new(bytes)));
writer.write_str(&s).map_err(|_err| FromTextError {
bytes: s.into_bytes(),
error: TextError {},
})?;
Ok(unsafe {
Self::from_text_vec_unchecked(
writer
.abandon_into_inner()
.abandon_into_inner()
.abandon_into_inner()
.unwrap(),
)
})
}
#[inline]
pub fn from_text_bytes_lossy(v: &[u8]) -> Cow<TextStr> {
Cow::Owned(Self::from_text_lossy(&String::from_utf8_lossy(v)).into_owned())
}
#[inline]
pub fn from_text_lossy(mut v: &str) -> Cow<TextStr> {
let mut reader = TextReader::new(Utf8Reader::new(LayeredReader::new(v.as_bytes())));
let mut text = String::new();
reader.read_to_string(&mut text).unwrap();
if let Some(suffix) = v.strip_prefix(BOM) {
text.insert(0, WJ);
v = suffix;
}
if !v.is_empty() && !v.ends_with(|c| matches!(c, '\n' | '\r')) {
let c = text.pop();
assert_eq!(c.unwrap(), '\n');
}
Cow::Owned(unsafe { Self::from_text_unchecked(text) })
}
#[inline]
pub unsafe fn from_text_vec_unchecked(vec: Vec<u8>) -> Self {
Self::from_text_unchecked(String::from_utf8_unchecked(vec))
}
#[inline]
pub const unsafe fn from_text_unchecked(s: String) -> Self {
Self(s)
}
#[inline]
pub fn into_utf8(self) -> String {
self.0
}
#[inline]
pub fn into_bytes(self) -> Vec<u8> {
self.0.into_bytes()
}
#[inline]
pub fn as_utf8(&self) -> &str {
&self.0
}
#[inline]
pub fn as_str(&self) -> &TextStr {
self
}
#[inline]
pub fn as_mut_utf8(&mut self) -> &mut str {
&mut self.0
}
#[inline]
pub fn as_mut_str(&mut self) -> &mut TextStr {
self
}
#[inline]
pub fn push_str(&mut self, s: &TextStr) {
self.0.push_str(&s.0);
}
#[inline]
pub fn capacity(&self) -> usize {
self.0.capacity()
}
#[inline]
pub fn reserve(&mut self, additional: usize) {
self.0.reserve(additional)
}
#[inline]
pub fn reserve_exact(&mut self, additional: usize) {
self.0.reserve_exact(additional)
}
#[cfg(try_reserve)]
#[inline]
pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
self.0.try_reserve(additional)
}
#[cfg(try_reserve)]
#[inline]
pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
self.0.try_reserve_exact(additional)
}
#[inline]
pub fn shrink_to_fit(&mut self) {
self.0.shrink_to_fit()
}
#[cfg(shrink_to)]
#[inline]
pub fn shrink_to(&mut self, min_capacity: usize) {
self.0.shrink_to(min_capacity)
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
self.0.as_bytes()
}
#[inline]
pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8> {
self.0.as_mut_vec()
}
#[inline]
pub fn len(&self) -> usize {
self.0.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
#[inline]
pub fn clear(&mut self) {
self.0.clear()
}
#[inline]
pub fn into_boxed_utf8(self) -> Box<str> {
self.0.into_boxed_str()
}
#[inline]
pub fn into_boxed_str(self) -> Box<TextStr> {
let slice = self.into_boxed_utf8();
unsafe { TextStr::from_boxed_text_unchecked(slice) }
}
}
impl Default for TextString {
#[inline]
fn default() -> Self {
Self(String::default())
}
}
impl Deref for TextString {
type Target = TextStr;
#[inline]
fn deref(&self) -> &Self::Target {
unsafe { TextStr::from_text_unchecked(&*self.0) }
}
}
impl DerefMut for TextString {
#[inline]
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe { TextStr::from_text_unchecked_mut(&mut *self.0) }
}
}
impl Borrow<TextStr> for TextString {
#[inline]
fn borrow(&self) -> &TextStr {
self
}
}
impl BorrowMut<TextStr> for TextString {
#[inline]
fn borrow_mut(&mut self) -> &mut TextStr {
self
}
}
impl AsMut<TextStr> for TextString {
#[inline]
fn as_mut(&mut self) -> &mut TextStr {
self
}
}
impl Add<&TextStr> for TextString {
type Output = Self;
#[inline]
fn add(mut self, other: &TextStr) -> Self::Output {
self.push_str(other);
self
}
}
impl AddAssign<&TextStr> for TextString {
#[inline]
fn add_assign(&mut self, other: &TextStr) {
self.push_str(other)
}
}
impl<'a> PartialEq<&'a TextStr> for TextString {
#[inline]
fn eq(&self, other: &&'a TextStr) -> bool {
self.0.eq(&other.0)
}
}
impl<'a> PartialEq<Cow<'a, TextStr>> for TextString {
#[inline]
fn eq(&self, other: &Cow<'a, TextStr>) -> bool {
self.0.eq(&other.0)
}
}
impl<'a> PartialEq<TextString> for Cow<'a, TextStr> {
#[inline]
fn eq(&self, other: &TextString) -> bool {
self.0.eq(&other.0)
}
}
impl<'a> PartialEq<str> for TextString {
#[inline]
fn eq(&self, other: &str) -> bool {
self.0.eq(other)
}
}
impl<'a> PartialEq<String> for TextString {
#[inline]
fn eq(&self, other: &String) -> bool {
self.0.eq(other)
}
}
impl<'a> PartialEq<TextString> for String {
#[inline]
fn eq(&self, other: &TextString) -> bool {
self.eq(&other.0)
}
}
impl<'a> PartialEq<TextString> for &'a str {
#[inline]
fn eq(&self, other: &TextString) -> bool {
self.eq(&other.0)
}
}
impl TextStr {
#[inline]
pub fn from_text_bytes(b: &[u8]) -> Result<&Self, TextError> {
Self::from_text(str::from_utf8(b)?)
}
#[inline]
pub fn from_text(s: &str) -> Result<&Self, TextError> {
if TextString::from_text(s.to_string())
.map_err(|e| e.text_error())?
.as_utf8()
!= s
{
return Err(TextError {});
}
Ok(unsafe { Self::from_text_unchecked(s) })
}
#[inline]
pub fn from_text_bytes_mut(b: &mut [u8]) -> Result<&mut Self, TextError> {
Self::from_text_mut(str::from_utf8_mut(b)?)
}
#[inline]
pub fn from_text_mut(s: &mut str) -> Result<&mut Self, TextError> {
if TextString::from_text((*s).to_string())
.map_err(|e| e.text_error())?
.as_utf8()
!= s
{
return Err(TextError {});
}
Ok(unsafe { Self::from_text_unchecked_mut(s) })
}
#[inline]
pub unsafe fn from_text_bytes_unchecked(b: &[u8]) -> &Self {
Self::from_text_unchecked(str::from_utf8_unchecked(b))
}
#[inline]
pub unsafe fn from_text_unchecked(s: &str) -> &Self {
&*(s as *const str as *const Self)
}
#[inline]
pub unsafe fn from_text_bytes_unchecked_mut(b: &mut [u8]) -> &mut Self {
Self::from_text_unchecked_mut(str::from_utf8_unchecked_mut(b))
}
#[inline]
pub unsafe fn from_text_unchecked_mut(s: &mut str) -> &mut Self {
&mut *(s as *mut str as *mut Self)
}
#[inline]
pub unsafe fn from_boxed_text_bytes_unchecked(v: Box<[u8]>) -> Box<Self> {
Box::from_raw(Box::into_raw(v) as *mut Self)
}
#[inline]
pub unsafe fn from_boxed_text_unchecked(v: Box<str>) -> Box<Self> {
Box::from_raw(Box::into_raw(v) as *mut Self)
}
#[inline]
pub const fn len(&self) -> usize {
self.0.len()
}
#[inline]
pub const fn is_empty(&self) -> bool {
self.0.is_empty()
}
#[inline]
pub const fn as_bytes(&self) -> &[u8] {
self.0.as_bytes()
}
#[inline]
pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
self.0.as_bytes_mut()
}
#[inline]
pub const fn as_ptr(&self) -> *const u8 {
self.0.as_ptr()
}
#[inline]
pub fn as_mut_ptr(&mut self) -> *mut u8 {
self.0.as_mut_ptr()
}
#[inline]
pub fn as_utf8(&self) -> &str {
&self.0
}
#[inline]
pub fn split_at(&self, mid: usize) -> (&Self, &Self) {
let (prefix, rest) = self.0.split_at(mid);
(
Self::from_text(prefix).unwrap(),
Self::from_text(rest).unwrap(),
)
}
#[inline]
pub fn split_at_mut(&mut self, mid: usize) -> (&mut Self, &mut Self) {
let (prefix, rest) = self.0.split_at_mut(mid);
(
Self::from_text_mut(prefix).unwrap(),
Self::from_text_mut(rest).unwrap(),
)
}
#[inline]
pub fn chars(&self) -> Chars {
self.0.chars()
}
#[inline]
pub fn char_indices(&self) -> CharIndices {
self.0.char_indices()
}
#[inline]
pub fn bytes(&self) -> Bytes {
self.0.bytes()
}
#[inline]
pub fn lines(&self) -> Lines {
self.0.lines()
}
#[inline]
pub fn encode_utf16(&self) -> EncodeUtf16<'_> {
self.0.encode_utf16()
}
#[cfg(pattern)]
#[inline]
pub fn contains<'a, P>(&'a self, pat: P) -> bool
where
P: Pattern<'a>,
{
self.0.contains(pat)
}
#[cfg(pattern)]
#[inline]
pub fn starts_with<'a, P>(&'a self, pat: P) -> bool
where
P: Pattern<'a>,
{
self.0.starts_with(pat)
}
#[cfg(pattern)]
#[inline]
pub fn ends_with<'a, P>(&'a self, pat: P) -> bool
where
P: Pattern<'a>,
<P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
{
self.0.ends_with(pat)
}
#[cfg(pattern)]
#[inline]
pub fn find<'a, P>(&'a self, pat: P) -> Option<usize>
where
P: Pattern<'a>,
{
self.0.find(pat)
}
#[cfg(pattern)]
#[inline]
pub fn rfind<'a, P>(&'a self, pat: P) -> Option<usize>
where
P: Pattern<'a>,
<P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
{
self.0.rfind(pat)
}
#[cfg(pattern)]
#[inline]
pub fn matches<'a, P>(&'a self, pat: P) -> Matches<'a, P>
where
P: Pattern<'a>,
{
self.0.matches(pat)
}
#[cfg(pattern)]
#[inline]
pub fn rmatches<'a, P>(&'a self, pat: P) -> RMatches<'a, P>
where
P: Pattern<'a>,
<P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
{
self.0.rmatches(pat)
}
#[cfg(pattern)]
#[inline]
pub fn match_indices<'a, P>(&'a self, pat: P) -> MatchIndices<'a, P>
where
P: Pattern<'a>,
{
self.0.match_indices(pat)
}
#[cfg(pattern)]
#[inline]
pub fn rmatch_indices<'a, P>(&'a self, pat: P) -> RMatchIndices<'a, P>
where
P: Pattern<'a>,
<P as Pattern<'a>>::Searcher: ReverseSearcher<'a>,
{
self.0.rmatch_indices(pat)
}
#[inline]
pub fn trim(&self) -> &Self {
unsafe { Self::from_text_unchecked(self.0.trim()) }
}
#[inline]
pub fn trim_start(&self) -> &Self {
unsafe { Self::from_text_unchecked(self.0.trim_start()) }
}
#[inline]
pub fn trim_end(&self) -> &Self {
unsafe { Self::from_text_unchecked(self.0.trim_end()) }
}
#[inline]
pub fn parse<F>(&self) -> Result<F, <F as FromStr>::Err>
where
F: FromStr,
{
self.0.parse()
}
#[inline]
pub fn is_ascii(&self) -> bool {
self.0.is_ascii()
}
#[inline]
pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
self.0.eq_ignore_ascii_case(&other.0)
}
#[inline]
pub fn into_boxed_bytes(self: Box<Self>) -> Box<[u8]> {
self.into_boxed_str().into_boxed_bytes()
}
#[inline]
pub fn into_boxed_str(self: Box<Self>) -> Box<str> {
self.into()
}
#[inline]
pub fn into_utf8(self: Box<Self>) -> String {
let slice = Box::<[u8]>::from(self);
unsafe { String::from_utf8_unchecked(slice.into_vec()) }
}
#[inline]
pub fn into_string(self: Box<Self>) -> TextString {
unsafe { TextString::from_text_unchecked(Self::into_utf8(self)) }
}
}
impl AsRef<[u8]> for TextStr {
#[inline]
fn as_ref(&self) -> &[u8] {
self.0.as_ref()
}
}
impl AsRef<OsStr> for TextStr {
#[inline]
fn as_ref(&self) -> &OsStr {
self.0.as_ref()
}
}
impl AsRef<Path> for TextStr {
#[inline]
fn as_ref(&self) -> &Path {
self.0.as_ref()
}
}
impl AsRef<str> for TextStr {
#[inline]
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}
impl AsRef<TextStr> for TextStr {
#[inline]
fn as_ref(&self) -> &Self {
self
}
}
impl Default for &TextStr {
#[inline]
fn default() -> Self {
unsafe { TextStr::from_text_unchecked("") }
}
}
impl Default for &mut TextStr {
#[inline]
fn default() -> Self {
unsafe { TextStr::from_text_bytes_unchecked_mut(&mut []) }
}
}
impl Display for TextStr {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
Display::fmt(&self.0, f)
}
}
impl Ord for TextStr {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
self.0.cmp(&other.0)
}
}
impl<'a> PartialEq<Cow<'a, TextStr>> for TextStr {
#[inline]
fn eq(&self, other: &Cow<'a, Self>) -> bool {
self.0.eq(&other.0)
}
}
impl<'a, 'b> PartialEq<Cow<'a, TextStr>> for &'b TextStr {
#[inline]
fn eq(&self, other: &Cow<'a, TextStr>) -> bool {
self.0.eq(&other.0)
}
}
impl<'a> PartialEq<str> for TextStr {
#[inline]
fn eq(&self, other: &str) -> bool {
self.0.eq(other)
}
}
impl<'a> PartialEq<String> for TextStr {
#[inline]
fn eq(&self, other: &String) -> bool {
self.0.eq(other)
}
}
impl<'a> PartialEq<TextStr> for String {
#[inline]
fn eq(&self, other: &TextStr) -> bool {
self.eq(&other.0)
}
}
impl PartialOrd<TextStr> for TextStr {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.0.partial_cmp(&other.0)
}
}
impl ToOwned for TextStr {
type Owned = TextString;
#[inline]
fn to_owned(&self) -> Self::Owned {
TextString(self.0.to_owned())
}
}
impl ToSocketAddrs for TextStr {
type Iter = vec::IntoIter<SocketAddr>;
#[inline]
fn to_socket_addrs(&self) -> io::Result<Self::Iter> {
self.0.to_socket_addrs()
}
}
impl TextError {
}
impl From<Utf8Error> for TextError {
fn from(_err: Utf8Error) -> Self {
Self {}
}
}
impl Display for TextError {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "TODO: Display for TextError: {:?}", self)
}
}
impl Error for TextError {}
impl FromTextError {
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
#[inline]
pub fn into_bytes(self) -> Vec<u8> {
self.bytes
}
#[inline]
pub fn text_error(&self) -> TextError {
self.error
}
}
impl From<FromUtf8Error> for FromTextError {
#[inline]
fn from(err: FromUtf8Error) -> Self {
let error = err.utf8_error().into();
let bytes = err.into_bytes();
Self { bytes, error }
}
}
impl Display for FromTextError {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), fmt::Error> {
write!(f, "TODO: Display for FromTextError: {:?}", self)
}
}
impl Error for FromTextError {}
impl From<Box<TextStr>> for Box<[u8]> {
#[inline]
fn from(s: Box<TextStr>) -> Self {
unsafe { Self::from_raw(Box::into_raw(s) as *mut [u8]) }
}
}
impl From<Box<TextStr>> for Box<str> {
#[inline]
fn from(s: Box<TextStr>) -> Self {
unsafe { Self::from_raw(Box::into_raw(s) as *mut str) }
}
}
impl From<Box<TextStr>> for TextString {
#[inline]
fn from(s: Box<TextStr>) -> Self {
s.into_string()
}
}
impl From<Cow<'_, TextStr>> for Box<TextStr> {
#[inline]
fn from(cow: Cow<'_, TextStr>) -> Self {
match cow {
Cow::Borrowed(s) => Self::from(s),
Cow::Owned(s) => Self::from(s),
}
}
}
impl From<TextString> for Box<TextStr> {
#[inline]
fn from(s: TextString) -> Self {
s.into_boxed_str()
}
}
impl Clone for Box<TextStr> {
#[inline]
fn clone(&self) -> Self {
let buf: Box<[u8]> = self.as_bytes().into();
unsafe { TextStr::from_boxed_text_bytes_unchecked(buf) }
}
}
impl Default for Box<TextStr> {
#[inline]
fn default() -> Self {
unsafe { TextStr::from_boxed_text_bytes_unchecked(Box::default()) }
}
}
impl From<&TextStr> for Box<TextStr> {
#[inline]
fn from(s: &TextStr) -> Self {
unsafe { TextStr::from_boxed_text_bytes_unchecked(Box::from(s.as_bytes())) }
}
}
#[test]
fn normalize_string() {
let ring = "\u{030a}";
let unnormal = "A\u{30a}";
let unnormal_nl = "A\u{30a}\n";
let composed = TextStr::from_text("\u{c5}").unwrap();
let composed_nl = TextStr::from_text("\u{c5}\n").unwrap();
assert!(TextStr::from_text(unnormal).is_err());
assert!(TextStr::from_text(ring).is_err());
assert_eq!(composed, &TextString::from_text_lossy(unnormal));
assert_eq!(composed_nl, &TextString::from_text_lossy(unnormal_nl));
}
#[test]
fn validate_string() {
assert!(TextStr::from_text_bytes(b"").is_ok());
assert!(TextStr::from_text_bytes(b"\xff").is_err());
}
#[test]
fn split_escape() {
TextStr::from_text_bytes(b"\x1b[!p").unwrap_err();
TextStr::from_text_bytes(b"\x1b[p").unwrap_err();
TextStr::from_text_bytes(b"\x1b[!").unwrap_err();
TextStr::from_text_bytes(b"\x1b[").unwrap_err();
TextStr::from_text_bytes(b"\x1b").unwrap_err();
}