[][src]Struct encode_unicode::Utf8Char

pub struct Utf8Char { /* fields omitted */ }

An unicode codepoint stored as UTF-8.

It can be borrowed as a str, and has the same size as char.

Methods

impl Utf8Char[src]

pub fn from_str_start(src: &str) -> Result<(Self, usize), EmptyStrError>[src]

Create an Utf8Char from the first codepoint in a str.

Returns an error if the str is empty.

Examples

use encode_unicode::Utf8Char;

assert_eq!(Utf8Char::from_str_start("a"), Ok((Utf8Char::from('a'),1)));
assert_eq!(Utf8Char::from_str_start("ab"), Ok((Utf8Char::from('a'),1)));
assert_eq!(Utf8Char::from_str_start("🂠 "), Ok((Utf8Char::from('🂠'),4)));
assert_eq!(Utf8Char::from_str_start("é"), Ok((Utf8Char::from('e'),1)));// 'e'+u301 combining mark
assert!(Utf8Char::from_str_start("").is_err());

pub fn from_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>[src]

Create an Utf8Char of the first codepoint in an UTF-8 slice.
Also returns the length of the UTF-8 sequence for the codepoint.

If the slice is from a str, use ::from_str_start() to skip UTF-8 validation.

Errors

Returns an Err if the slice is empty, doesn't start with a valid UTF-8 sequence or is too short for the sequence.

Examples

use encode_unicode::Utf8Char;
use encode_unicode::error::InvalidUtf8Slice::*;
use encode_unicode::error::InvalidUtf8::*;

assert_eq!(Utf8Char::from_slice_start(&[b'A', b'B', b'C']), Ok((Utf8Char::from('A'),1)));
assert_eq!(Utf8Char::from_slice_start(&[0xdd, 0xbb]), Ok((Utf8Char::from('\u{77b}'),2)));

assert_eq!(Utf8Char::from_slice_start(&[]), Err(TooShort(1)));
assert_eq!(Utf8Char::from_slice_start(&[0xf0, 0x99]), Err(TooShort(4)));
assert_eq!(Utf8Char::from_slice_start(&[0xee, b'F', 0x80]), Err(Utf8(NotAContinuationByte(1))));
assert_eq!(Utf8Char::from_slice_start(&[0xee, 0x99, 0x0f]), Err(Utf8(NotAContinuationByte(2))));

pub unsafe fn from_slice_start_unchecked(src: &[u8]) -> (Self, usize)[src]

A from_slice_start() that doesn't validate the codepoint.

Safety

The slice must be non-empty and start with a valid UTF-8 codepoint.
Invalid or incomplete values might cause reads of uninitalized memory.

pub fn from_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>[src]

Create an Utf8Char from a byte array after validating it.

The codepoint must start at the first byte.
Unused bytes are set to zero by this function and so can be anything.

Errors

Returns an Err if the array doesn't start with a valid UTF-8 sequence.

Examples

use encode_unicode::Utf8Char;
use encode_unicode::error::InvalidUtf8Array::*;
use encode_unicode::error::InvalidUtf8::*;
use encode_unicode::error::InvalidCodepoint::*;

assert_eq!(Utf8Char::from_array([b'A', 0, 0, 0]), Ok(Utf8Char::from('A')));
assert_eq!(Utf8Char::from_array([0xf4, 0x8b, 0xbb, 0xbb]), Ok(Utf8Char::from('\u{10befb}')));
assert_eq!(Utf8Char::from_array([b'A', b'B', b'C', b'D']), Ok(Utf8Char::from('A')));
assert_eq!(Utf8Char::from_array([0, 0, 0xcc, 0xbb]), Ok(Utf8Char::from('\0')));

assert_eq!(Utf8Char::from_array([0xef, b'F', 0x80, 0x80]), Err(Utf8(NotAContinuationByte(1))));
assert_eq!(Utf8Char::from_array([0xc1, 0x80, 0, 0]), Err(Utf8(OverLong)));
assert_eq!(Utf8Char::from_array([0xf7, 0xaa, 0x99, 0x88]), Err(Codepoint(TooHigh)));

pub unsafe fn from_array_unchecked(utf8: [u8; 4]) -> Self[src]

Zero-cost constructor.

Safety

Must contain a valid codepoint starting at the first byte, with the unused bytes zeroed.
Bad values can easily lead to undefined behavior.

pub fn from_ascii(ascii: u8) -> Result<Self, NonAsciiError>[src]

Create an Utf8Char from a single byte.

The byte must be an ASCII character.

Errors

Returns NonAsciiError if the byte greater than 127.

Examples

assert_eq!(Utf8Char::from_ascii(b'a').unwrap(), 'a');
assert!(Utf8Char::from_ascii(128).is_err());

pub unsafe fn from_ascii_unchecked(ascii: u8) -> Self[src]

Create an Utf8Char from a single byte without checking that it's a valid codepoint on its own, which is only true for ASCII characters.

Safety

The byte must be less than 128.

pub fn len(self) -> usize[src]

The number of bytes this character needs.

Is between 1 and 4 (inclusive) and identical to .as_ref().len() or .as_char().len_utf8().

pub fn is_ascii(&self) -> bool[src]

Checks that the codepoint is an ASCII character.

pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool[src]

Checks that two characters are an ASCII case-insensitive match.

Is equivalent to a.to_ascii_lowercase() == b.to_ascii_lowercase().

pub fn to_ascii_uppercase(&self) -> Self[src]

Converts the character to its ASCII upper case equivalent.

ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', but non-ASCII letters are unchanged.

pub fn to_ascii_lowercase(&self) -> Self[src]

Converts the character to its ASCII lower case equivalent.

ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', but non-ASCII letters are unchanged.

pub fn make_ascii_uppercase(&mut self)[src]

Converts the character to its ASCII upper case equivalent in-place.

ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', but non-ASCII letters are unchanged.

pub fn make_ascii_lowercase(&mut self)[src]

Converts the character to its ASCII lower case equivalent in-place.

ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', but non-ASCII letters are unchanged.

pub fn to_char(self) -> char[src]

Convert from UTF-8 to UTF-32

pub fn to_slice(self, dst: &mut [u8]) -> usize[src]

Write the internal representation to a slice, and then returns the number of bytes written.

Panics

Will panic the buffer is too small; You can get the required length from .len(), but a buffer of length four is always large enough.

pub fn to_array(self) -> ([u8; 4], usize)[src]

Expose the internal array and the number of used bytes.

pub fn as_str(&self) -> &str[src]

Return a str view of the array the codepoint is stored as.

Is an unambiguous version of .as_ref().

Trait Implementations

impl From<Utf16Char> for Utf8Char[src]

impl From<char> for Utf8Char[src]

impl From<Utf8Char> for char[src]

impl From<AsciiChar> for Utf8Char[src]

Requires the feature "ascii".

impl From<Utf8Char> for Utf8Iterator[src]

impl From<Utf8Char> for Utf16Char[src]

impl Ord for Utf8Char[src]

fn max(self, other: Self) -> Self1.21.0[src]

Compares and returns the maximum of two values. Read more

fn min(self, other: Self) -> Self1.21.0[src]

Compares and returns the minimum of two values. Read more

fn clamp(self, min: Self, max: Self) -> Self[src]

🔬 This is a nightly-only experimental API. (clamp)

Restrict a value to a certain interval. Read more

impl Extend<Utf8Char> for Vec<u8>[src]

impl<'a> Extend<&'a Utf8Char> for Vec<u8>[src]

impl Extend<Utf8Char> for String[src]

impl<'a> Extend<&'a Utf8Char> for String[src]

impl PartialOrd<Utf8Char> for Utf8Char[src]

impl PartialOrd<char> for Utf8Char[src]

#[must_use] fn lt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than (for self and other) and is used by the < operator. Read more

#[must_use] fn le(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than or equal to (for self and other) and is used by the <= operator. Read more

#[must_use] fn gt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than (for self and other) and is used by the > operator. Read more

#[must_use] fn ge(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than or equal to (for self and other) and is used by the >= operator. Read more

impl PartialOrd<Utf8Char> for char[src]

#[must_use] fn lt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than (for self and other) and is used by the < operator. Read more

#[must_use] fn le(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than or equal to (for self and other) and is used by the <= operator. Read more

#[must_use] fn gt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than (for self and other) and is used by the > operator. Read more

#[must_use] fn ge(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than or equal to (for self and other) and is used by the >= operator. Read more

impl PartialOrd<Utf16Char> for Utf8Char[src]

#[must_use] fn lt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than (for self and other) and is used by the < operator. Read more

#[must_use] fn le(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than or equal to (for self and other) and is used by the <= operator. Read more

#[must_use] fn gt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than (for self and other) and is used by the > operator. Read more

#[must_use] fn ge(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than or equal to (for self and other) and is used by the >= operator. Read more

impl PartialOrd<AsciiChar> for Utf8Char[src]

Utf8Chars that are not ASCII always compare greater.

#[must_use] fn lt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than (for self and other) and is used by the < operator. Read more

#[must_use] fn le(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than or equal to (for self and other) and is used by the <= operator. Read more

#[must_use] fn gt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than (for self and other) and is used by the > operator. Read more

#[must_use] fn ge(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than or equal to (for self and other) and is used by the >= operator. Read more

impl PartialOrd<Utf8Char> for AsciiChar[src]

Utf8Chars that are not ASCII always compare greater.

#[must_use] fn lt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than (for self and other) and is used by the < operator. Read more

#[must_use] fn le(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than or equal to (for self and other) and is used by the <= operator. Read more

#[must_use] fn gt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than (for self and other) and is used by the > operator. Read more

#[must_use] fn ge(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than or equal to (for self and other) and is used by the >= operator. Read more

impl PartialOrd<Utf8Char> for Utf16Char[src]

#[must_use] fn lt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than (for self and other) and is used by the < operator. Read more

#[must_use] fn le(&self, other: &Rhs) -> bool1.0.0[src]

This method tests less than or equal to (for self and other) and is used by the <= operator. Read more

#[must_use] fn gt(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than (for self and other) and is used by the > operator. Read more

#[must_use] fn ge(&self, other: &Rhs) -> bool1.0.0[src]

This method tests greater than or equal to (for self and other) and is used by the >= operator. Read more

impl PartialEq<Utf8Char> for Utf8Char[src]

impl PartialEq<char> for Utf8Char[src]

#[must_use] fn ne(&self, other: &Rhs) -> bool1.0.0[src]

This method tests for !=.

impl PartialEq<Utf8Char> for char[src]

#[must_use] fn ne(&self, other: &Rhs) -> bool1.0.0[src]

This method tests for !=.

impl PartialEq<Utf16Char> for Utf8Char[src]

#[must_use] fn ne(&self, other: &Rhs) -> bool1.0.0[src]

This method tests for !=.

impl PartialEq<u8> for Utf8Char[src]

Only considers the byte equal if both it and the Utf8Char represents ASCII characters.

There is no impl in the opposite direction, as this should only be used to compare Utf8Chars against constants.

Examples

assert!(Utf8Char::from('8') == b'8');
assert!(Utf8Char::from_array([0xf1,0x80,0x80,0x80]).unwrap() != 0xf1);
assert!(Utf8Char::from('\u{ff}') != 0xff);
assert!(Utf8Char::from('\u{80}') != 0x80);

#[must_use] fn ne(&self, other: &Rhs) -> bool1.0.0[src]

This method tests for !=.

impl PartialEq<AsciiChar> for Utf8Char[src]

Utf8Chars that are not ASCII never compare equal.

#[must_use] fn ne(&self, other: &Rhs) -> bool1.0.0[src]

This method tests for !=.

impl PartialEq<Utf8Char> for AsciiChar[src]

Utf8Chars that are not ASCII never compare equal.

#[must_use] fn ne(&self, other: &Rhs) -> bool1.0.0[src]

This method tests for !=.

impl PartialEq<Utf8Char> for Utf16Char[src]

#[must_use] fn ne(&self, other: &Rhs) -> bool1.0.0[src]

This method tests for !=.

impl Default for Utf8Char[src]

impl IntoIterator for Utf8Char[src]

type Item = u8

The type of the elements being iterated over.

type IntoIter = Utf8Iterator

Which kind of iterator are we turning this into?

Important traits for Utf8Iterator
fn into_iter(self) -> Utf8Iterator[src]

Iterate over the byte values.

impl Clone for Utf8Char[src]

fn clone_from(&mut self, source: &Self)1.0.0[src]

Performs copy-assignment from source. Read more

impl Eq for Utf8Char[src]

impl Copy for Utf8Char[src]

impl AsRef<[u8]> for Utf8Char[src]

impl AsRef<str> for Utf8Char[src]

impl Display for Utf8Char[src]

impl Debug for Utf8Char[src]

impl Deref for Utf8Char[src]

type Target = str

The resulting type after dereferencing.

impl Hash for Utf8Char[src]

fn hash_slice<H>(data: &[Self], state: &mut H) where
    H: Hasher
1.3.0[src]

Feeds a slice of this type into the given [Hasher]. Read more

impl FromIterator<Utf8Char> for String[src]

impl<'a> FromIterator<&'a Utf8Char> for String[src]

impl FromIterator<Utf8Char> for Vec<u8>[src]

impl<'a> FromIterator<&'a Utf8Char> for Vec<u8>[src]

impl FromStr for Utf8Char[src]

type Err = FromStrError

The associated error which can be returned from parsing.

fn from_str(s: &str) -> Result<Self, FromStrError>[src]

Create an Utf8Char from a string slice. The string must contain exactly one codepoint.

Examples

use encode_unicode::error::FromStrError::*;
use encode_unicode::Utf8Char;
use std::str::FromStr;

assert_eq!(Utf8Char::from_str("a"), Ok(Utf8Char::from('a')));
assert_eq!(Utf8Char::from_str("🂠"), Ok(Utf8Char::from('🂠')));
assert_eq!(Utf8Char::from_str(""), Err(Empty));
assert_eq!(Utf8Char::from_str("ab"), Err(MultipleCodepoints));
assert_eq!(Utf8Char::from_str("é"), Err(MultipleCodepoints));// 'e'+u301 combining mark

impl Borrow<[u8]> for Utf8Char[src]

impl Borrow<str> for Utf8Char[src]

impl AsciiExt for Utf8Char[src]

type Owned = Utf8Char

Deprecated since 1.26.0:

use inherent methods instead

Container type for copied ASCII characters.

impl ToAsciiChar for Utf8Char[src]

Requires the feature "ascii".

Auto Trait Implementations

Blanket Implementations

impl<T> From<T> for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T> ToString for T where
    T: Display + ?Sized
[src]

impl<I> IntoIterator for I where
    I: Iterator
[src]

type Item = <I as Iterator>::Item

The type of the elements being iterated over.

type IntoIter = I

Which kind of iterator are we turning this into?

impl<T> ToOwned for T where
    T: Clone
[src]

type Owned = T

The resulting type after obtaining ownership.

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> Any for T where
    T: 'static + ?Sized
[src]