#![no_std]
#![deny(missing_docs)]
#[macro_export]
macro_rules! encode {
($s:expr) => {{
$crate::encode!($s, non_null_terminated)
}};
($s:expr, $null_terminated:ident) => {{
const __STRING: &'static str = $s;
const __EXTRA_BYTE: usize = $crate::encode!(@@ $null_terminated);
const __STRING_LEN: usize = __STRING.len() + __EXTRA_BYTE;
const __BUFFER_AND_LEN: (&[u16; __STRING_LEN], usize) = {
let mut result = [0; __STRING_LEN];
let mut utf16_offset = 0;
let mut iterator = $crate::CodePointIterator::new(__STRING.as_bytes());
while let Some((next, mut code)) = iterator.next() {
iterator = next;
if code == 0 && __EXTRA_BYTE == 1 {
#[allow(unconditional_panic)]
let _ =
["Found a null byte in string which should have no null bytes"][usize::MAX];
}
if (code & 0xFFFF) == code {
result[utf16_offset] = code as u16;
utf16_offset += 1;
} else {
code -= 0x1_0000;
result[utf16_offset] = 0xD800 | ((code >> 10) as u16);
result[utf16_offset + 1] = 0xDC00 | ((code as u16) & 0x3FF);
utf16_offset += 2;
}
}
(&{ result }, utf16_offset + __EXTRA_BYTE)
};
const __OUT: &[u16; __BUFFER_AND_LEN.1] = unsafe {
::core::mem::transmute::<
&'static &[u16; __STRING_LEN],
&'static &[u16; __BUFFER_AND_LEN.1],
>(&__BUFFER_AND_LEN.0)
};
__OUT
}};
(@@ null_terminated) => {
1
};
(@@ non_null_terminated) => {
0
};
}
#[macro_export]
macro_rules! encode_null_terminated {
($s:expr) => {{
$crate::encode!($s, null_terminated)
}};
}
#[doc(hidden)]
pub struct CodePointIterator<'a> {
buffer: &'a [u8],
offset: usize,
}
impl<'a> CodePointIterator<'a> {
#[doc(hidden)]
pub const fn new(buffer: &'a [u8]) -> Self {
Self::new_with_offset(buffer, 0)
}
#[doc(hidden)]
pub const fn new_with_offset(buffer: &'a [u8], offset: usize) -> Self {
Self { buffer, offset }
}
#[doc(hidden)]
pub const fn next(self) -> Option<(Self, u32)> {
if let Some((codepont, num_utf8_bytes)) = next_code_point(self.buffer, self.offset) {
Some((
Self::new_with_offset(self.buffer, self.offset + num_utf8_bytes),
codepont,
))
} else {
None
}
}
}
const fn next_code_point(bytes: &[u8], start: usize) -> Option<(u32, usize)> {
if bytes.len() == start {
return None;
}
let mut num_bytes = 1;
let x = bytes[start + 0];
if x < 128 {
return Some((x as u32, num_bytes));
}
let init = utf8_first_byte(x, 2);
let y = unwrap_or_0(bytes, start + 1);
if y != 0 {
num_bytes += 1;
}
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
let z = unwrap_or_0(bytes, start + 2);
if z != 0 {
num_bytes += 1;
}
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
ch = init << 12 | y_z;
if x >= 0xF0 {
let w = unwrap_or_0(bytes, start + 3);
if w != 0 {
num_bytes += 1;
}
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
}
Some((ch, num_bytes))
}
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
(byte & (0x7F >> width)) as u32
}
const fn unwrap_or_0(slice: &[u8], index: usize) -> u8 {
if slice.len() > index {
slice[index]
} else {
0
}
}
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
(ch << 6) | (byte & CONT_MASK) as u32
}
const CONT_MASK: u8 = 0b0011_1111;
#[cfg(test)]
mod tests {
use super::*;
use core::iter::once;
#[test]
fn encode_utf16_works() {
const TEXT: &str = "Hello \0ä日本 語";
let expected = TEXT.encode_utf16();
const RESULT: &[u16] = encode!(TEXT);
assert!(RESULT.iter().cloned().eq(expected));
}
#[test]
fn encode_utf16_with_null_byte_works() {
const TEXT: &str = "Hello ä日本 語";
let expected = TEXT.encode_utf16().chain(once(0));
const RESULT: &[u16] = encode_null_terminated!(TEXT);
assert!(RESULT.iter().cloned().eq(expected));
}
}