1#![no_std]
12#![deny(missing_docs)]
13
14#[macro_export]
16macro_rules! encode {
17 ($s:expr) => {{
18 $crate::encode!($s, non_null_terminated)
19 }};
20 ($s:expr, $null_terminated:ident) => {{
21 const __STRING: &'static str = $s;
22 const __EXTRA_BYTE: usize = $crate::encode!(@@ $null_terminated);
23 const __STRING_LEN: usize = __STRING.len() + __EXTRA_BYTE;
24 const __BUFFER_AND_LEN: (&[u16; __STRING_LEN], usize) = {
25 let mut result = [0; __STRING_LEN];
26 let mut utf16_offset = 0;
27
28 let mut iterator = $crate::CodePointIterator::new(__STRING.as_bytes());
29 while let Some((next, mut code)) = iterator.next() {
30 iterator = next;
31 if code == 0 && __EXTRA_BYTE == 1 {
32 #[allow(unconditional_panic)]
33 let _ =
34 ["Found a null byte in string which should have no null bytes"][usize::MAX];
35 }
36 if (code & 0xFFFF) == code {
37 result[utf16_offset] = code as u16;
38 utf16_offset += 1;
39 } else {
40 code -= 0x1_0000;
42 result[utf16_offset] = 0xD800 | ((code >> 10) as u16);
43 result[utf16_offset + 1] = 0xDC00 | ((code as u16) & 0x3FF);
44 utf16_offset += 2;
45 }
46 }
47 (&{ result }, utf16_offset + __EXTRA_BYTE)
48 };
49 const __OUT: &[u16; __BUFFER_AND_LEN.1] = unsafe {
50 ::core::mem::transmute::<
51 &'static &[u16; __STRING_LEN],
52 &'static &[u16; __BUFFER_AND_LEN.1],
53 >(&__BUFFER_AND_LEN.0)
54 };
55 __OUT
56 }};
57 (@@ null_terminated) => {
58 1
59 };
60 (@@ non_null_terminated) => {
61 0
62 };
63}
64
65#[macro_export]
71macro_rules! encode_null_terminated {
72 ($s:expr) => {{
73 $crate::encode!($s, null_terminated)
74 }};
75}
76
77#[doc(hidden)]
78pub struct CodePointIterator<'a> {
79 buffer: &'a [u8],
80 offset: usize,
81}
82
83impl<'a> CodePointIterator<'a> {
84 #[doc(hidden)]
85 pub const fn new(buffer: &'a [u8]) -> Self {
86 Self::new_with_offset(buffer, 0)
87 }
88
89 #[doc(hidden)]
90 pub const fn new_with_offset(buffer: &'a [u8], offset: usize) -> Self {
91 Self { buffer, offset }
92 }
93
94 #[doc(hidden)]
95 pub const fn next(self) -> Option<(Self, u32)> {
96 if let Some((codepont, num_utf8_bytes)) = next_code_point(self.buffer, self.offset) {
97 Some((
98 Self::new_with_offset(self.buffer, self.offset + num_utf8_bytes),
99 codepont,
100 ))
101 } else {
102 None
103 }
104 }
105}
106
107const fn next_code_point(bytes: &[u8], start: usize) -> Option<(u32, usize)> {
109 if bytes.len() == start {
110 return None;
111 }
112 let mut num_bytes = 1;
113 let x = bytes[start + 0];
114 if x < 128 {
115 return Some((x as u32, num_bytes));
116 }
117 let init = utf8_first_byte(x, 2);
121 let y = unwrap_or_0(bytes, start + 1);
122 if y != 0 {
123 num_bytes += 1;
124 }
125 let mut ch = utf8_acc_cont_byte(init, y);
126 if x >= 0xE0 {
127 let z = unwrap_or_0(bytes, start + 2);
130 if z != 0 {
131 num_bytes += 1;
132 }
133 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
134 ch = init << 12 | y_z;
135 if x >= 0xF0 {
136 let w = unwrap_or_0(bytes, start + 3);
139 if w != 0 {
140 num_bytes += 1;
141 }
142 ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
143 }
144 }
145
146 Some((ch, num_bytes))
147}
148
149const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
153 (byte & (0x7F >> width)) as u32
154}
155
156const fn unwrap_or_0(slice: &[u8], index: usize) -> u8 {
157 if slice.len() > index {
158 slice[index]
159 } else {
160 0
161 }
162}
163
164const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
165 (ch << 6) | (byte & CONT_MASK) as u32
166}
167
168const CONT_MASK: u8 = 0b0011_1111;
170
171#[cfg(test)]
172mod tests {
173 use super::*;
174 use core::iter::once;
175
176 #[test]
177 fn encode_utf16_works() {
178 const TEXT: &str = "Hello \0ä日本 語";
179 let expected = TEXT.encode_utf16();
180 const RESULT: &[u16] = encode!(TEXT);
181
182 assert!(RESULT.iter().cloned().eq(expected));
183 }
184
185 #[test]
186 fn encode_utf16_with_null_byte_works() {
187 const TEXT: &str = "Hello ä日本 語";
188 let expected = TEXT.encode_utf16().chain(once(0));
189 const RESULT: &[u16] = encode_null_terminated!(TEXT);
190
191 assert!(RESULT.iter().cloned().eq(expected));
192 }
193}