Skip to main content

encode

Function encode 

Source
pub fn encode(value: &str) -> Cow<'_, [u8]>
Expand description

Encodes a string to CESU-8.

The algorithm is as follows:

  • If the input, as UTF-8, is also valid CESU-8, the function will return Cow::Borrowed(&[u8]).
  • If the input, as UTF-8, is not valid CESU-8, the function will return Cow::Owned(Vec<u8>). This case has the potential to panic.

§Panics

This function will panic if the buffer required to encode the input exceeds isize::MAX bytes.

§Examples

use alloc::borrow::Cow;

let single_byte = "\u{0045}";
assert_eq!(single_byte, "E");
assert_eq!(single_byte.len(), 1);
assert_eq!(single_byte.as_bytes(), &[0x45]);
assert_eq!(simd_cesu8::encode(single_byte), Cow::Borrowed(&[0x45]));

let two_bytes = "\u{0205}";
assert_eq!(two_bytes, "ȅ");
assert_eq!(two_bytes.len(), 2);
assert_eq!(two_bytes.as_bytes(), &[0xc8, 0x85]);
assert_eq!(simd_cesu8::encode(two_bytes), Cow::Borrowed(&[0xc8, 0x85]));

let three_bytes = "\u{20ac}";
assert_eq!(three_bytes, "€");
assert_eq!(three_bytes.len(), 3);
assert_eq!(three_bytes.as_bytes(), &[0xe2, 0x82, 0xac]);
assert_eq!(
    simd_cesu8::encode(three_bytes),
    Cow::Borrowed(&[0xe2, 0x82, 0xac])
);

let four_bytes = "\u{10400}";
assert_eq!(four_bytes, "𐐀");
assert_eq!(four_bytes.len(), 4);
assert_eq!(four_bytes.as_bytes(), &[0xf0, 0x90, 0x90, 0x80]);
assert_eq!(
    simd_cesu8::encode(four_bytes),
    Cow::<[u8]>::Owned(vec![0xed, 0xa0, 0x81, 0xed, 0xb0, 0x80])
);