Struct LegacyCesu8Str

Source
pub struct LegacyCesu8Str<'s> { /* private fields */ }
👎Deprecated since 0.1.3: use one of the types most appropriate for your string format (cesu8str::{Cesu8Str, Cesu8String, Mutf8Str, Mutf8String, Mutf8CStr, Mutf8CString})
Expand description

A CESU-8 or Modified UTF-8 string.

The main difference between a CESU-8/MUTF-8 string and a regular UTF-8 string is in handling of 4-byte long (in UTF-8) characters. For CESU-8/MUTF-8, these characters are instead encoded as two, three-byte long UTF-16 characters.

CESU-8 and MUTF-8 strings are encoded the same, except that MUTF-8 strings, as used by the JVM and JNI applications, encode a nul byte (hex 00) as a UTF-8 2-byte zero character (hex C0 80)

Implementations§

Source§

impl<'s> Cesu8Str<'s>

Source

pub const fn variant(&self) -> Variant

Returns the CESU8 variant this string is encoded in.

Source

pub const fn utf8_error(&self) -> Result<(), Utf8Error>

If the string is invalid UTF-8, this returns the UTF-8 error that would occur, given str::from_utf8(cesu8.as_bytes()).unwrap_err()

§Examples
  • Example 1: A valid UTF8/Ascii string
const VALID_UTF8: &[u8] = b"my valid string";
let as_str = str::from_utf8(VALID_UTF8).map(|_| ());
let as_cesu8 = Cesu8Str::from_cesu8(VALID_UTF8, Variant::Standard).unwrap();
assert_eq!(as_str, as_cesu8.utf8_error());
assert!(as_str.is_ok());
  • Example 2: Embedded Nuls are invalid UTF8
const INVALID_UTF8: &[u8] = b"with embedded \xC0\x80 null";
let as_str = str::from_utf8(INVALID_UTF8).map(|_| ());
let as_mutf8 = Cesu8Str::from_cesu8(INVALID_UTF8, Variant::Java).unwrap();
assert_eq!(as_str, as_mutf8.utf8_error());
let utf8_err = as_str.unwrap_err();
assert_eq!(14, utf8_err.valid_up_to());
assert_eq!(Some(1), utf8_err.error_len());
Source

pub fn into_owned(self) -> Cesu8Str<'static>

Ensures the string is owned to allievate any lifetime issues

Source

pub fn from_cesu8( bytes: &[u8], variant: Variant, ) -> Result<Cesu8Str<'_>, Cesu8Error>

Validates a sequence of bytes as CESU8, will not allocate.

§Examples
§Valid CESU-8, Valid UTF-8, Valid ascii
use cesu8str::{LegacyCesu8Str as Cesu8Str, Variant};
const ASCII: &[u8] = b"normal ascii string";
let as_cesu8 = Cesu8Str::from_cesu8(ASCII, Variant::Standard).unwrap();

// There were no UTF-8 errors within the string
assert_eq!(from_utf8(ASCII).map(|_| ()), as_cesu8.utf8_error());
assert_eq!(as_cesu8.utf8_error(), Ok(()));
§Valid CESU-8, Invalid UTF-8
use cesu8str::{LegacyCesu8Str as Cesu8Str, Variant};
const VALID_CESU8: &[u8] = b"with embedded \xC0\x80 null";
let as_cesu8 = Cesu8Str::from_cesu8(VALID_CESU8, Variant::Java).unwrap();

// It's not valid UTF-8, check the utf8_error
assert_eq!(from_utf8(VALID_CESU8).map(|_| ()), as_cesu8.utf8_error());
let utf8_err = as_cesu8.utf8_error().unwrap_err();
assert_eq!(14, utf8_err.valid_up_to());
assert_eq!(Some(1), utf8_err.error_len());
§Invalid CESU-8, Invalid UTF-8
use cesu8str::{LegacyCesu8Str as Cesu8Str, Variant};
const INVALID_CESU8: &[u8] = b"with embedded \xC0\x80 null"; // is valid Java variant, but test with Standard so it's invalid
let as_cesu8_err = Cesu8Str::from_cesu8(INVALID_CESU8, Variant::Standard).unwrap_err();
assert_eq!(14, as_cesu8_err.valid_up_to());
assert_eq!(from_utf8(INVALID_CESU8).map(|_| ()), as_cesu8_err.utf8_error());


let valid = &INVALID_CESU8[..as_cesu8_err.valid_up_to()];
let as_cesu8 = Cesu8Str::from_cesu8(valid, Variant::Standard).unwrap();
assert_eq!(from_utf8(valid).map(|_| ()), as_cesu8.utf8_error());
assert_eq!(Ok(()), as_cesu8.utf8_error());
§Invalid CESU-8, Valid UTF-8
use cesu8str::{LegacyCesu8Str as Cesu8Str, Variant};
const VALID_UTF8: &str = "with literal \0 null";
let as_cesu8_err = Cesu8Str::from_cesu8(VALID_UTF8.as_bytes(), Variant::Java).unwrap_err();

assert_eq!(std::str::from_utf8(VALID_UTF8.as_bytes()).map(|_| ()), as_cesu8_err.utf8_error());
Source

pub fn from_cesu8_lossy(bytes: &[u8], variant: Variant) -> Cesu8Str<'_>

Creates a valid CESU-8 string, replacing invalid sequences with a replacement character.

If the string is already valid, it will not allocate. Otherwise, it will allocate a new buffer.

Note that if an invalid is found at the end (such as incomplete sequences), they will be replaced, even if more bytes in the buffer could fix it

Source

pub unsafe fn from_utf8_unchecked( bytes: Cow<'_, str>, variant: Variant, ) -> Cesu8Str<'_>

Creates a Cesu8Str from a UTF-8 string.

§Safety

The internal CESU-8 string must not contain invalid CESU-8 sequences.

Namely, there must not be 4-byte UTF-8 supplementary characters, and, if this is the Java variant, there must not be any nul-bytes.

Source

pub fn from_utf8<C: Into<Cow<'s, str>>>( text: C, variant: Variant, ) -> Cesu8Str<'s>

Converts a UTF-8 string into a CESU-8 string, allocating if necessary.

use cesu8str::{LegacyCesu8Str as Cesu8Str, Variant};

// Encode a UTF-8 str (that is also valid CESU-8) into CESU-8 without allocating
let to_encode = "my string (valid CESU8)";
let as_cesu8 = Cesu8Str::from_utf8(to_encode, Variant::Standard);
assert!(matches!(as_cesu8.into_bytes(), Cow::Borrowed(_)));

// Encode a UTF-8 str into Java CESU-8. Will allocate since it has to encode the nul byte.
let to_encode_java = "my string (not valid Java CESU8)\0";
let as_jcesu8 = Cesu8Str::from_utf8(to_encode_java, Variant::Java);
assert!(matches!(as_jcesu8.into_bytes(), Cow::Owned(_)));

// Encode an owned UTF-8 String into CESU-8. Will not allocate since the string is already owned.
let to_encode = "my string (valid CESU8)".to_owned();
let as_cesu8 = Cesu8Str::from_utf8(to_encode, Variant::Standard);
assert!(matches!(as_cesu8.into_bytes(), Cow::Owned(_)));
Source

pub fn try_from_utf8<C: Into<Cow<'s, str>>>( text: C, variant: Variant, ) -> Result<Cesu8Str<'s>, Cesu8Error>

Validates a UTF-8 string as a CESU-8 string. Will return an error if it cannot do so without allocating.

See Cesu8Str::from_utf8 for a version that will convert (and allocate if necessary)

Source

pub fn from_utf8_inplace( text: &'s str, buf: &'s mut [u8], variant: Variant, ) -> Result<Cesu8Str<'s>>

Creates a Cesu8Str into a provided buffer. Alternatively, the string could borrow from the original string if it is valid CESU8.

May return an io::Error if there is not enough space in the provided buffer, in which case the buffer’s contents is undefined.

Source

pub fn from_utf8_writer<W: Write>( text: &str, target: &mut W, variant: Variant, ) -> Result<()>

Converts a UTF-8 string directly into the provided io::Write-capable object. This allows writing directly into a preallocated Vec or byte slice stored on the stack, for example.

Source

pub fn as_bytes(&self) -> &[u8] ⓘ

Obtains the raw CESU8 bytes of the string

Source

pub fn as_str(&self) -> Result<&str, Utf8Error>

Returns the CESU-8 string as a UTF-8 string without allocating.

Source

pub fn to_str(&self) -> Cow<'_, str>

Returns the CESU-8 string as a UTF-8 string, may allocate.

Source

pub fn into_str(self) -> Cow<'s, str>

Returns the CESU-8 string as a UTF-8 string, preserving the allocation if possible.

Source

pub fn into_bytes(self) -> Cow<'s, [u8]>

Returns the underlying bytes that make up the CESU-8 string.

Source

pub fn to_variant(&self, variant: Variant) -> Cesu8Str<'_>

Converts between variants

Source

pub fn into_variant(self, variant: Variant) -> Cesu8Str<'s>

Encodes this string into the specified variant. No-op if already encoded in the variant.

Source

pub fn into_bytes0(self, var: Variant) -> Vec<u8> ⓘ

Returns a byte buffer of this CESU-8 string, converted to the specified variant with a null-terminator

Allocates if there is not enough capacity to store the terminator.

Source

pub fn reencode0<C: Into<Cow<'s, str>>>(s: C, var: Variant) -> Vec<u8> ⓘ

Convenience function to turn a UTF-8 string into a null-terminated CESU-8 string of the specified variant

Trait Implementations§

Source§

impl<'cs, 'us> Add<&'us Cesu8Str<'us>> for Cesu8Str<'cs>

Source§

type Output = Cesu8Str<'cs>

The resulting type after applying the + operator.
Source§

fn add(self, text: &'us Cesu8Str<'us>) -> Self::Output

Performs the + operation. Read more
Source§

impl<'cs, 'us> Add<&'us str> for Cesu8Str<'cs>

Source§

type Output = Cesu8Str<'cs>

The resulting type after applying the + operator.
Source§

fn add(self, text: &'us str) -> Self::Output

Performs the + operation. Read more
Source§

impl<'cs, 'us> AddAssign<&'us Cesu8Str<'us>> for Cesu8Str<'cs>

Source§

fn add_assign(&mut self, rhs: &'us Cesu8Str<'us>)

Performs the += operation. Read more
Source§

impl<'cs, 'us> AddAssign<&'us str> for Cesu8Str<'cs>

Source§

fn add_assign(&mut self, text: &'us str)

Performs the += operation. Read more
Source§

impl<'s> AsRef<[u8]> for Cesu8Str<'s>

Source§

fn as_ref(&self) -> &[u8] ⓘ

Converts this type into a shared reference of the (usually inferred) input type.
Source§

impl<'s> Borrow<[u8]> for Cesu8Str<'s>

Source§

fn borrow(&self) -> &[u8] ⓘ

Immutably borrows from an owned value. Read more
Source§

impl<'s> Clone for Cesu8Str<'s>

Source§

fn clone(&self) -> Cesu8Str<'s>

Returns a duplicate of the value. Read more
1.0.0 · Source§

const fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl<'s> Debug for Cesu8Str<'s>

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Display a debug representation of the string, escaping non-ascii characters to hex

Source§

impl<'s> Display for Cesu8Str<'s>

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl From<Cesu8Str<'_>> for String

Source§

fn from(c8s: Cesu8Str<'_>) -> String

Converts to this type from the input type.
Source§

impl From<Cesu8Str<'_>> for Vec<u8>

Source§

fn from(c8s: Cesu8Str<'_>) -> Vec<u8> ⓘ

Converts to this type from the input type.
Source§

impl Hash for Cesu8Str<'_>

Source§

fn hash<H: Hasher>(&self, state: &mut H)

Feeds this value into the given Hasher. Read more
1.3.0 · Source§

fn hash_slice<H>(data: &[Self], state: &mut H)
where H: Hasher, Self: Sized,

Feeds a slice of this type into the given Hasher. Read more
Source§

impl<'s> Ord for Cesu8Str<'s>

Source§

fn cmp(&self, other: &Self) -> Ordering

This method returns an Ordering between self and other. Read more
1.21.0 · Source§

fn max(self, other: Self) -> Self
where Self: Sized,

Compares and returns the maximum of two values. Read more
1.21.0 · Source§

fn min(self, other: Self) -> Self
where Self: Sized,

Compares and returns the minimum of two values. Read more
1.50.0 · Source§

fn clamp(self, min: Self, max: Self) -> Self
where Self: Sized,

Restrict a value to a certain interval. Read more
Source§

impl PartialEq<&Cesu8Str<'_>> for &str

Source§

fn eq(&self, other: &&Cesu8Str<'_>) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

const fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl<'a> PartialEq<&'a str> for Cesu8Str<'_>

Source§

fn eq(&self, other: &&'a str) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

const fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl PartialEq<Cesu8Str<'_>> for &str

Source§

fn eq(&self, other: &Cesu8Str<'_>) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

const fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl<'s> PartialEq<Cesu8Str<'_>> for Cesu8Str<'s>

Source§

fn eq(&self, other: &Cesu8Str<'_>) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

const fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl PartialEq<str> for Cesu8Str<'_>

Source§

fn eq(&self, other: &str) -> bool

Tests for self and other values to be equal, and is used by ==.
1.0.0 · Source§

const fn ne(&self, other: &Rhs) -> bool

Tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
Source§

impl<'s> PartialOrd<Cesu8Str<'_>> for Cesu8Str<'s>

Source§

fn partial_cmp(&self, other: &Cesu8Str<'_>) -> Option<Ordering>

This method returns an ordering between self and other values if one exists. Read more
1.0.0 · Source§

fn lt(&self, other: &Rhs) -> bool

Tests less than (for self and other) and is used by the < operator. Read more
1.0.0 · Source§

fn le(&self, other: &Rhs) -> bool

Tests less than or equal to (for self and other) and is used by the <= operator. Read more
1.0.0 · Source§

fn gt(&self, other: &Rhs) -> bool

Tests greater than (for self and other) and is used by the > operator. Read more
1.0.0 · Source§

fn ge(&self, other: &Rhs) -> bool

Tests greater than or equal to (for self and other) and is used by the >= operator. Read more
Source§

impl<'s> PartialOrd<str> for Cesu8Str<'s>

Source§

fn partial_cmp(&self, other: &str) -> Option<Ordering>

This method returns an ordering between self and other values if one exists. Read more
1.0.0 · Source§

fn lt(&self, other: &Rhs) -> bool

Tests less than (for self and other) and is used by the < operator. Read more
1.0.0 · Source§

fn le(&self, other: &Rhs) -> bool

Tests less than or equal to (for self and other) and is used by the <= operator. Read more
1.0.0 · Source§

fn gt(&self, other: &Rhs) -> bool

Tests greater than (for self and other) and is used by the > operator. Read more
1.0.0 · Source§

fn ge(&self, other: &Rhs) -> bool

Tests greater than or equal to (for self and other) and is used by the >= operator. Read more
Source§

impl<'s> Eq for Cesu8Str<'s>

Auto Trait Implementations§

§

impl<'s> Freeze for Cesu8Str<'s>

§

impl<'s> RefUnwindSafe for Cesu8Str<'s>

§

impl<'s> Send for Cesu8Str<'s>

§

impl<'s> Sync for Cesu8Str<'s>

§

impl<'s> Unpin for Cesu8Str<'s>

§

impl<'s> UnwindSafe for Cesu8Str<'s>

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T> ToString for T
where T: Display + ?Sized,

Source§

fn to_string(&self) -> String

Converts the given value to a String. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.