pub fn encode(value: &str) -> Cow<'_, [u8]>Expand description
Encodes a string to CESU-8.
The algorithm is as follows:
- If the input, as UTF-8, is also valid CESU-8, the function will return
Cow::Borrowed(&[u8]). - If the input, as UTF-8, is not valid CESU-8, the function will return
Cow::Owned(Vec<u8>). This case has the potential to panic.
§Panics
This function will panic if the buffer required to encode the input exceeds
isize::MAX bytes.
§Examples
use alloc::borrow::Cow;
let single_byte = "\u{0045}";
assert_eq!(single_byte, "E");
assert_eq!(single_byte.len(), 1);
assert_eq!(single_byte.as_bytes(), &[0x45]);
assert_eq!(simd_cesu8::encode(single_byte), Cow::Borrowed(&[0x45]));
let two_bytes = "\u{0205}";
assert_eq!(two_bytes, "ȅ");
assert_eq!(two_bytes.len(), 2);
assert_eq!(two_bytes.as_bytes(), &[0xc8, 0x85]);
assert_eq!(simd_cesu8::encode(two_bytes), Cow::Borrowed(&[0xc8, 0x85]));
let three_bytes = "\u{20ac}";
assert_eq!(three_bytes, "€");
assert_eq!(three_bytes.len(), 3);
assert_eq!(three_bytes.as_bytes(), &[0xe2, 0x82, 0xac]);
assert_eq!(
simd_cesu8::encode(three_bytes),
Cow::Borrowed(&[0xe2, 0x82, 0xac])
);
let four_bytes = "\u{10400}";
assert_eq!(four_bytes, "𐐀");
assert_eq!(four_bytes.len(), 4);
assert_eq!(four_bytes.as_bytes(), &[0xf0, 0x90, 0x90, 0x80]);
assert_eq!(
simd_cesu8::encode(four_bytes),
Cow::<[u8]>::Owned(vec![0xed, 0xa0, 0x81, 0xed, 0xb0, 0x80])
);