use alloc::borrow::{Borrow, BorrowMut, Cow, ToOwned};
use alloc::string::String as StdString;
use alloc::vec::Vec;
use core::error::Error;
use core::fmt;
use core::hash::{Hash, Hasher};
use core::marker::PhantomData;
use core::ops::{Deref, DerefMut};
#[cfg(feature = "serde")]
use serde::{
de::{self, Unexpected},
Deserialize, Deserializer, Serialize, Serializer,
};
use crate::cstring::{CString, NulError};
use crate::encoding::{AlwaysValid, ArrayLike, Encoding, NullTerminable, Utf8, ValidateError};
use crate::str::Str;
mod chunks;
use chunks::EncodedChunks;
#[derive(Debug)]
#[non_exhaustive]
pub struct InvalidChar;
impl fmt::Display for InvalidChar {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Invalid character for output encoding")
}
}
impl Error for InvalidChar {}
#[derive(Clone, Debug, PartialEq)]
pub struct OwnValidateError {
bytes: Vec<u8>,
cause: ValidateError,
}
impl OwnValidateError {
pub(crate) fn new(cause: ValidateError, bytes: Vec<u8>) -> OwnValidateError {
OwnValidateError { bytes, cause }
}
pub fn cause(&self) -> &ValidateError {
&self.cause
}
pub fn into_vec(self) -> Vec<u8> {
self.bytes
}
}
impl fmt::Display for OwnValidateError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Error while validating data")
}
}
impl Error for OwnValidateError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
Some(&self.cause)
}
}
#[derive(Clone)]
pub struct String<E>(PhantomData<E>, Vec<u8>);
impl<E: Encoding> String<E> {
pub const fn new() -> String<E> {
String(PhantomData, Vec::new())
}
pub fn with_capacity(len: usize) -> String<E> {
String(PhantomData, Vec::with_capacity(len))
}
pub unsafe fn from_bytes_unchecked(bytes: Vec<u8>) -> String<E> {
String(PhantomData, bytes)
}
pub fn from_bytes(bytes: Vec<u8>) -> Result<String<E>, OwnValidateError> {
match E::validate(&bytes) {
Ok(_) => Ok(unsafe { String::from_bytes_unchecked(bytes) }),
Err(err) => Err(OwnValidateError::new(err, bytes)),
}
}
pub fn from_bytes_lossy(bytes: &[u8]) -> Cow<'_, Str<E>> {
let mut chunks = EncodedChunks::new(bytes);
let first_valid = if let Some(chunk) = chunks.next() {
let valid = chunk.valid();
if chunk.invalid().is_empty() {
debug_assert_eq!(valid.len(), bytes.len());
return Cow::Borrowed(valid);
}
valid
} else {
return Cow::Borrowed(<&Str<E>>::default());
};
let mut res = String::with_capacity(bytes.len());
res.push_str(first_valid);
res.push(E::REPLACEMENT);
for chunk in chunks {
res.push_str(chunk.valid());
if !chunk.invalid().is_empty() {
res.push(E::REPLACEMENT);
}
}
Cow::Owned(res)
}
pub fn into_bytes(self) -> Vec<u8> {
self.1
}
pub fn push(&mut self, c: char) {
self.try_push(c).unwrap_or_else(|_| {
panic!("Invalid character {:?} for encoding {}", c, E::shorthand())
});
}
pub fn try_push(&mut self, c: char) -> Result<(), InvalidChar> {
self.1.extend(E::encode_char(c).ok_or(InvalidChar)?.slice());
Ok(())
}
pub fn push_str(&mut self, str: &Str<E>) {
self.1.extend(str.as_bytes());
}
}
impl<E: Encoding + NullTerminable> String<E> {
pub fn into_cstring(self) -> Result<CString<E>, NulError> {
self.try_into()
}
}
impl<E: AlwaysValid> String<E> {
pub fn from_bytes_infallible(bytes: Vec<u8>) -> String<E> {
unsafe { String::from_bytes_unchecked(bytes) }
}
}
impl String<Utf8> {
pub fn from_std(value: StdString) -> Self {
unsafe { String::from_bytes_unchecked(value.into_bytes()) }
}
pub fn into_std(self) -> StdString {
unsafe { StdString::from_utf8_unchecked(self.into_bytes()) }
}
}
impl<E: Encoding> fmt::Debug for String<E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
<Str<E> as fmt::Debug>::fmt(self, f)
}
}
impl<E: Encoding> fmt::Display for String<E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
<Str<E> as fmt::Display>::fmt(self, f)
}
}
impl<E: Encoding> Default for String<E> {
fn default() -> Self {
String::new()
}
}
impl<E: Encoding> PartialEq for String<E> {
fn eq(&self, other: &Self) -> bool {
self.1 == other.1
}
}
impl<E: Encoding> Eq for String<E> {}
impl<E: Encoding> Hash for String<E> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.1.hash(state)
}
}
impl<E: Encoding> Deref for String<E> {
type Target = Str<E>;
fn deref(&self) -> &Self::Target {
unsafe { Str::from_bytes_unchecked(&self.1) }
}
}
impl<E: Encoding> DerefMut for String<E> {
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe { Str::from_bytes_unchecked_mut(&mut self.1) }
}
}
impl<E: Encoding> AsRef<Str<E>> for String<E> {
fn as_ref(&self) -> &Str<E> {
self
}
}
impl<E: Encoding> AsMut<Str<E>> for String<E> {
fn as_mut(&mut self) -> &mut Str<E> {
self
}
}
impl<E: Encoding> Borrow<Str<E>> for String<E> {
fn borrow(&self) -> &Str<E> {
self
}
}
impl<E: Encoding> BorrowMut<Str<E>> for String<E> {
fn borrow_mut(&mut self) -> &mut Str<E> {
self
}
}
impl<E: Encoding> FromIterator<char> for String<E> {
fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
iter.into_iter().fold(String::new(), |mut acc, c| {
acc.push(c);
acc
})
}
}
impl<E: NullTerminable> From<CString<E>> for String<E> {
fn from(value: CString<E>) -> Self {
unsafe { String::from_bytes_unchecked(value.into_bytes()) }
}
}
#[cfg(feature = "serde")]
impl<E: Encoding> Serialize for String<E> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
<[u8]>::serialize(self.as_bytes(), serializer)
}
}
#[cfg(feature = "serde")]
impl<'de, E: Encoding> Deserialize<'de> for String<E> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let bytes = Vec::deserialize(deserializer)?;
match E::validate(&bytes) {
Ok(()) => Ok(unsafe { String::from_bytes_unchecked(bytes) }),
Err(_) => {
let msg = &*alloc::format!("a valid string for the {} encoding", E::shorthand());
Err(de::Error::invalid_value(Unexpected::Bytes(&bytes), &msg))
}
}
}
}
impl From<&str> for String<Utf8> {
fn from(value: &str) -> Self {
Str::from_std(value).to_owned()
}
}
impl From<StdString> for String<Utf8> {
fn from(value: StdString) -> Self {
Self::from_std(value)
}
}
impl From<String<Utf8>> for StdString {
fn from(value: String<Utf8>) -> Self {
value.into_std()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_from_lossy_utf8() {
assert_eq!(
String::<Utf8>::from_bytes_lossy(b"Ab\xF0\x90\x90\xB7def"),
Cow::Borrowed(Str::from_std("Ab𐐷def")),
);
assert_eq!(
String::<Utf8>::from_bytes_lossy(b"Abcd \xD8\xF0\x90\x90\xB7"),
Cow::Owned(Str::from_std("Abcd �𐐷").to_owned()),
);
assert_eq!(
String::<Utf8>::from_bytes_lossy(b"A\xD8B\xD9C\xDAD"),
Cow::Owned(Str::from_std("A�B�C�D").to_owned()),
);
}
}