[−][src]Trait encode_unicode::CharExt

pub trait CharExt: Sized {
    fn to_utf8(self) -> Utf8Char;
    fn to_utf16(self) -> Utf16Char;
    fn iter_utf8_bytes(self) -> Utf8Iterator;
    fn iter_utf16_units(self) -> Utf16Iterator;
    fn to_utf8_array(self) -> ([u8; 4], usize);
    fn to_utf16_array(self) -> [u16; 2];
    fn to_utf16_tuple(self) -> (u16, Option<u16>);
    fn from_utf8_slice_start(
        src: &[u8]
    ) -> Result<(Self, usize), InvalidUtf8Slice>;
    fn from_utf16_slice_start(
        src: &[u16]
    ) -> Result<(Self, usize), InvalidUtf16Slice>;
    fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>;
    fn from_utf16_array(utf16: [u16; 2]) -> Result<Self, InvalidUtf16Array>;
    fn from_utf16_tuple(
        utf16: (u16, Option<u16>)
    ) -> Result<Self, InvalidUtf16Tuple>;
    unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self;
    fn from_utf16_array_unchecked(utf16: [u16; 2]) -> Self;
    unsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self;
    fn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>;
}

Extension trait for char that adds methods for converting to and from UTF-8 or UTF-16.

Required methods

`fn to_utf8(self) -> Utf8Char`

Get the UTF-8 representation of this codepoint.

Utf8Char is to [u8;4] what char is to u32: a restricted type that cannot be mutated internally.

`fn to_utf16(self) -> Utf16Char`

Get the UTF-16 representation of this codepoint.

Utf16Char is to [u16;2] what char is to u32: a restricted type that cannot be mutated internally.

ⓘImportant traits for Utf8Iterator
Important traits for Utf8Iterator
`impl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iterator`
`fn iter_utf8_bytes(self) -> Utf8Iterator`

Iterate over or read the one to four bytes in the UTF-8 representation of this codepoint.

An identical alternative to the unstable char.encode_utf8(). That method somehow still exist on stable, so I have to use a different name.

ⓘImportant traits for Utf16Iterator
Important traits for Utf16Iterator
`impl Iterator for Utf16Iterator type Item = u16;`
`fn iter_utf16_units(self) -> Utf16Iterator`

Iterate over the one or two units in the UTF-16 representation of this codepoint.

An identical alternative to the unstable char.encode_utf16(). That method somehow still exist on stable, so I have to use a different name.

`fn to_utf8_array(self) -> ([u8; 4], usize)`

Convert this char to an UTF-8 array, and also return how many bytes of the array are used,

The returned array is left-aligned with unused bytes set to zero.

`fn to_utf16_array(self) -> [u16; 2]`

Convert this char to UTF-16.

The second element is non-zero when a surrogate pair is required.

Examples

use encode_unicode::CharExt;

assert_eq!('@'.to_utf16_array(), ['@' as u16, 0]);
assert_eq!('睷'.to_utf16_array(), ['睷' as u16, 0]);
assert_eq!('\u{abcde}'.to_utf16_array(), [0xda6f, 0xdcde]);

`fn to_utf16_tuple(self) -> (u16, Option<u16>)`

Convert this char to UTF-16. The second item is Some if a surrogate pair is required.

Examples

use encode_unicode::CharExt;

assert_eq!('@'.to_utf16_tuple(), ('@' as u16, None));
assert_eq!('睷'.to_utf16_tuple(), ('睷' as u16, None));
assert_eq!('\u{abcde}'.to_utf16_tuple(), (0xda6f, Some(0xdcde)));

`fn from_utf8_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>`

Create a char from the start of an UTF-8 slice, and also return how many bytes were used.

Errors

Returns an Err if the slice is empty, doesn't start with a valid UTF-8 sequence or is too short for the sequence.

Examples

use encode_unicode::CharExt;
use encode_unicode::error::InvalidUtf8Slice::*;
use encode_unicode::error::InvalidUtf8::*;

assert_eq!(char::from_utf8_slice_start(&[b'A', b'B', b'C']), Ok(('A',1)));
assert_eq!(char::from_utf8_slice_start(&[0xdd, 0xbb]), Ok(('\u{77b}',2)));

assert_eq!(char::from_utf8_slice_start(&[]), Err(TooShort(1)));
assert_eq!(char::from_utf8_slice_start(&[0xf0, 0x99]), Err(TooShort(4)));
assert_eq!(char::from_utf8_slice_start(&[0xee, b'F', 0x80]), Err(Utf8(NotAContinuationByte(1))));
assert_eq!(char::from_utf8_slice_start(&[0xee, 0x99, 0x0f]), Err(Utf8(NotAContinuationByte(2))));

`fn from_utf16_slice_start( src: &[u16] ) -> Result<(Self, usize), InvalidUtf16Slice>`

Create a char from the start of an UTF-16 slice, and also return how many units were used.

If you want to continue after an error, continue with the next u16 unit.

`fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>`

Convert an UTF-8 sequence as returned from .to_utf8_array() into a char

The codepoint must start at the first byte, and leftover bytes are ignored.

Errors

Returns an Err if the array doesn't start with a valid UTF-8 sequence.

Examples

use encode_unicode::CharExt;
use encode_unicode::error::InvalidUtf8Array::*;
use encode_unicode::error::InvalidUtf8::*;
use encode_unicode::error::InvalidCodepoint::*;

assert_eq!(char::from_utf8_array([b'A', 0, 0, 0]), Ok('A'));
assert_eq!(char::from_utf8_array([0xf4, 0x8b, 0xbb, 0xbb]), Ok('\u{10befb}'));
assert_eq!(char::from_utf8_array([b'A', b'B', b'C', b'D']), Ok('A'));
assert_eq!(char::from_utf8_array([0, 0, 0xcc, 0xbb]), Ok('\0'));

assert_eq!(char::from_utf8_array([0xef, b'F', 0x80, 0x80]), Err(Utf8(NotAContinuationByte(1))));
assert_eq!(char::from_utf8_array([0xc1, 0x80, 0, 0]), Err(Utf8(OverLong)));
assert_eq!(char::from_utf8_array([0xf7, 0xaa, 0x99, 0x88]), Err(Codepoint(TooHigh)));

`fn from_utf16_array(utf16: [u16; 2]) -> Result<Self, InvalidUtf16Array>`

Convert a UTF-16 pair as returned from .to_utf16_array() into a char.

The second element is ignored when not required.

Examples

use encode_unicode::CharExt;
use encode_unicode::error::InvalidUtf16Array;

assert_eq!(char::from_utf16_array(['x' as u16, 'y' as u16]), Ok('x'));
assert_eq!(char::from_utf16_array(['睷' as u16, 0]), Ok('睷'));
assert_eq!(char::from_utf16_array([0xda6f, 0xdcde]), Ok('\u{abcde}'));
assert_eq!(char::from_utf16_array([0xf111, 0xdbad]), Ok('\u{f111}'));
assert_eq!(char::from_utf16_array([0xdaaf, 0xdaaf]), Err(InvalidUtf16Array::SecondIsNotTrailingSurrogate));
assert_eq!(char::from_utf16_array([0xdcac, 0x9000]), Err(InvalidUtf16Array::FirstIsTrailingSurrogate));

`fn from_utf16_tuple( utf16: (u16, Option<u16>) ) -> Result<Self, InvalidUtf16Tuple>`

Convert a UTF-16 pair as returned from .to_utf16_tuple() into a char.

`unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self`

Convert an UTF-8 sequence into a char.

The length of the slice is taken as length of the sequence; it should be 1,2,3 or 4.

Safety

The slice must contain exactly one, valid, UTF-8 sequence.

Passing a slice that produces an invalid codepoint is always undefined behavior; Later checks that the codepoint is valid can be removed by the compiler.

Panics

If the slice is empty

`fn from_utf16_array_unchecked(utf16: [u16; 2]) -> Self`

Convert a UTF-16 array as returned from .to_utf16_array() into a char.

This function is safe because it avoids creating invalid codepoints, but the returned value might not be what one expectedd.

Examples

use encode_unicode::CharExt;

// starts with a trailing surrogate - converted as if it was a valid
// surrogate pair anyway.
assert_eq!(char::from_utf16_array_unchecked([0xdbad, 0xf19e]), '\u{fb59e}');
// missing trailing surrogate - ditto
assert_eq!(char::from_utf16_array_unchecked([0xd802, 0]), '\u{10800}');

`unsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self`

Convert a UTF-16 tuple as returned from .to_utf16_tuple() into a char.

`fn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>`

Produces more detailed errors than char::from_u32()

Errors

This function will return an error if

the value is greater than 0x10ffff
the value is between 0xd800 and 0xdfff (inclusive)

Examples

use encode_unicode::CharExt;
use encode_unicode::error::InvalidCodepoint;

assert_eq!(char::from_u32_detailed(0x41), Ok('A'));
assert_eq!(char::from_u32_detailed(0x40_00_00), Err(InvalidCodepoint::TooHigh));
assert_eq!(char::from_u32_detailed(0xd951), Err(InvalidCodepoint::Utf16Reserved));
assert_eq!(char::from_u32_detailed(0xdddd), Err(InvalidCodepoint::Utf16Reserved));
assert_eq!(char::from_u32_detailed(0xdd), Ok('Ý'));
assert_eq!(char::from_u32_detailed(0x1f331), Ok('🌱'));

Loading content...

Implementations on Foreign Types

`impl CharExt for char`[src]

`fn to_utf8(self) -> Utf8Char`[src]

ⓘImportant traits for Utf8Iterator
Important traits for Utf8Iterator
`impl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iterator`
`fn iter_utf8_bytes(self) -> Utf8Iterator`[src]

`fn to_utf8_array(self) -> ([u8; 4], usize)`[src]

`fn from_utf8_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>`[src]

`fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>`[src]

`unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self`[src]

`fn to_utf16(self) -> Utf16Char`[src]

ⓘImportant traits for Utf16Iterator
Important traits for Utf16Iterator
`impl Iterator for Utf16Iterator type Item = u16;`
`fn iter_utf16_units(self) -> Utf16Iterator`[src]

`fn to_utf16_array(self) -> [u16; 2]`[src]

`fn to_utf16_tuple(self) -> (u16, Option<u16>)`[src]

`fn from_utf16_slice_start( src: &[u16] ) -> Result<(Self, usize), InvalidUtf16Slice>`[src]

`fn from_utf16_array(utf16: [u16; 2]) -> Result<Self, InvalidUtf16Array>`[src]

`fn from_utf16_tuple( utf16: (u16, Option<u16>) ) -> Result<Self, InvalidUtf16Tuple>`[src]

`fn from_utf16_array_unchecked(utf16: [u16; 2]) -> Self`[src]

`unsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self`[src]

`fn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>`[src]

Loading content...

Implementors

Loading content...

[−][src]Trait encode_unicode::CharExt

Required methods

fn to_utf8(self) -> Utf8Char

fn to_utf16(self) -> Utf16Char

ⓘImportant traits for Utf8IteratorImportant traits for Utf8Iteratorimpl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iteratorfn iter_utf8_bytes(self) -> Utf8Iterator

Important traits for Utf8Iterator

ⓘImportant traits for Utf16IteratorImportant traits for Utf16Iteratorimpl Iterator for Utf16Iterator type Item = u16;fn iter_utf16_units(self) -> Utf16Iterator

Important traits for Utf16Iterator

fn to_utf8_array(self) -> ([u8; 4], usize)

fn to_utf16_array(self) -> [u16; 2]

fn to_utf16_tuple(self) -> (u16, Option<u16>)

fn from_utf8_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>

fn from_utf16_slice_start( src: &[u16]) -> Result<(Self, usize), InvalidUtf16Slice>

fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>

fn from_utf16_array(utf16: [u16; 2]) -> Result<Self, InvalidUtf16Array>

fn from_utf16_tuple( utf16: (u16, Option<u16>)) -> Result<Self, InvalidUtf16Tuple>

unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self

fn from_utf16_array_unchecked(utf16: [u16; 2]) -> Self

unsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self

fn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>

Implementations on Foreign Types

impl CharExt for char[src]

fn to_utf8(self) -> Utf8Char[src]

ⓘImportant traits for Utf8IteratorImportant traits for Utf8Iteratorimpl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iteratorfn iter_utf8_bytes(self) -> Utf8Iterator[src]

Important traits for Utf8Iterator

fn to_utf8_array(self) -> ([u8; 4], usize)[src]

fn from_utf8_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>[src]

fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>[src]

unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self[src]

fn to_utf16(self) -> Utf16Char[src]

ⓘImportant traits for Utf16IteratorImportant traits for Utf16Iteratorimpl Iterator for Utf16Iterator type Item = u16;fn iter_utf16_units(self) -> Utf16Iterator[src]

Important traits for Utf16Iterator

fn to_utf16_array(self) -> [u16; 2][src]

fn to_utf16_tuple(self) -> (u16, Option<u16>)[src]

fn from_utf16_slice_start( src: &[u16]) -> Result<(Self, usize), InvalidUtf16Slice>[src]

fn from_utf16_array(utf16: [u16; 2]) -> Result<Self, InvalidUtf16Array>[src]

fn from_utf16_tuple( utf16: (u16, Option<u16>)) -> Result<Self, InvalidUtf16Tuple>[src]

fn from_utf16_array_unchecked(utf16: [u16; 2]) -> Self[src]

unsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self[src]

fn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>[src]

Implementors

`fn to_utf8(self) -> Utf8Char`

`fn to_utf16(self) -> Utf16Char`

ⓘImportant traits for Utf8Iterator
Important traits for Utf8Iterator
`impl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iterator`
`fn iter_utf8_bytes(self) -> Utf8Iterator`

ⓘImportant traits for Utf16Iterator
Important traits for Utf16Iterator
`impl Iterator for Utf16Iterator type Item = u16;`
`fn iter_utf16_units(self) -> Utf16Iterator`

`fn to_utf8_array(self) -> ([u8; 4], usize)`

`fn to_utf16_array(self) -> [u16; 2]`

`fn to_utf16_tuple(self) -> (u16, Option<u16>)`

`fn from_utf8_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>`

`fn from_utf16_slice_start( src: &[u16] ) -> Result<(Self, usize), InvalidUtf16Slice>`

`fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>`

`fn from_utf16_array(utf16: [u16; 2]) -> Result<Self, InvalidUtf16Array>`

`fn from_utf16_tuple( utf16: (u16, Option<u16>) ) -> Result<Self, InvalidUtf16Tuple>`

`unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self`

`fn from_utf16_array_unchecked(utf16: [u16; 2]) -> Self`

`unsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self`

`fn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>`

`impl CharExt for char`[src]

`fn to_utf8(self) -> Utf8Char`[src]

ⓘImportant traits for Utf8Iterator
Important traits for Utf8Iterator
`impl Iterator for Utf8Iterator type Item = u8;impl Read for Utf8Iterator`
`fn iter_utf8_bytes(self) -> Utf8Iterator`[src]

`fn to_utf8_array(self) -> ([u8; 4], usize)`[src]

`fn from_utf8_slice_start(src: &[u8]) -> Result<(Self, usize), InvalidUtf8Slice>`[src]

`fn from_utf8_array(utf8: [u8; 4]) -> Result<Self, InvalidUtf8Array>`[src]

`unsafe fn from_utf8_exact_slice_unchecked(src: &[u8]) -> Self`[src]

`fn to_utf16(self) -> Utf16Char`[src]

ⓘImportant traits for Utf16Iterator
Important traits for Utf16Iterator
`impl Iterator for Utf16Iterator type Item = u16;`
`fn iter_utf16_units(self) -> Utf16Iterator`[src]

`fn to_utf16_array(self) -> [u16; 2]`[src]

`fn to_utf16_tuple(self) -> (u16, Option<u16>)`[src]

`fn from_utf16_slice_start( src: &[u16] ) -> Result<(Self, usize), InvalidUtf16Slice>`[src]

`fn from_utf16_array(utf16: [u16; 2]) -> Result<Self, InvalidUtf16Array>`[src]

`fn from_utf16_tuple( utf16: (u16, Option<u16>) ) -> Result<Self, InvalidUtf16Tuple>`[src]

`fn from_utf16_array_unchecked(utf16: [u16; 2]) -> Self`[src]

`unsafe fn from_utf16_tuple_unchecked(utf16: (u16, Option<u16>)) -> Self`[src]

`fn from_u32_detailed(c: u32) -> Result<Self, InvalidCodepoint>`[src]