Skip to main content

utf16_lit/
lib.rs

1#![no_std]
2//! Provides a macro_rules for making utf-16 literals.
3//!
4//! Outputs are arrays of the correct size. Prefix the macro with `&` to make
5//! slices.
6//!
7//! ```rust
8//! use utf16_lit::{utf16, utf16_null};
9//!
10//! const EXAMPLE: &[u16] = &utf16!("example");
11//!
12//! const EXAMPLE_NULL: &[u16] = &utf16_null!("example");
13//!
14//! fn main() {
15//!   let v: Vec<u16> = "example".encode_utf16().collect();
16//!   assert_eq!(v, EXAMPLE);
17//!
18//!   let v: Vec<u16> = "example".encode_utf16().chain(Some(0)).collect();
19//!   assert_eq!(v, EXAMPLE_NULL);
20//!   let v: Vec<u16> = "example\0".encode_utf16().collect();
21//!   assert_eq!(v, EXAMPLE_NULL);
22//!
23//!   // You don't even need to assign the output to a const.
24//!   assert_eq!(utf16!("This works")[0], 'T' as u8 as u16);
25//! }
26//! ```
27
28macro_rules! imp {
29  ($(
30    $(#[$m:meta])*
31    $name:ident has $n:literal trailing zeroes
32  )*) => {
33    $(
34      $(#[$m])*
35      #[macro_export]
36      macro_rules! $name {
37        ($text:expr) => {{
38          // Here we pick a name highly unlikely to exist in the scope
39          // that $text came from, which prevents a potential const eval cycle error.
40          const ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF8: &str = $text;
41          const ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_LEN: usize =
42            $crate::internals::length_as_utf16(ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF8) + $n;
43          const ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF16: [u16; ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_LEN] = {
44            let mut buffer = [0u16; ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_LEN];
45            let mut bytes = ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF8.as_bytes();
46            let mut i = 0;
47            while let Some((ch, rest)) = $crate::internals::next_code_point(bytes) {
48              bytes = rest;
49              // https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf16
50              if ch & 0xFFFF == ch {
51                buffer[i] = ch as u16;
52                i += 1;
53              } else {
54                let code = ch - 0x1_0000;
55                buffer[i] = 0xD800 | ((code >> 10) as u16);
56                buffer[i + 1] = 0xDC00 | ((code as u16) & 0x3FF);
57                i += 2;
58              }
59            }
60            buffer
61          };
62          ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF16
63        }};
64      }
65    )*
66  }
67}
68
69imp! {
70  /// Turns a string literal into a `u16` array literal (`[u16; N]`).
71  ///
72  /// If you want to have a "null terminated" string (such as for some parts of
73  /// Windows FFI) then you should use [`utf16_null!`](utf16_null!).
74  utf16 has 0 trailing zeroes
75
76  /// Turns a string literal into a `u16` array literal (`[u16; N]`) with a trailing `0`.
77  ///
78  /// If you do **not** want to have a null terminator added to the string then
79  /// you should use [`utf16!`](utf16!).
80  utf16_null has 1 trailing zeroes
81}
82
83#[doc(hidden)]
84pub mod internals {
85  // A const implementation of https://github.com/rust-lang/rust/blob/d902752866cbbdb331e3cf28ff6bba86ab0f6c62/library/core/src/str/mod.rs#L509-L537
86  // Assumes `utf8` is a valid &str
87  pub const fn next_code_point(utf8: &[u8]) -> Option<(u32, &[u8])> {
88    const CONT_MASK: u8 = 0b0011_1111;
89    match utf8 {
90      [one @ 0..=0b0111_1111, rest @ ..] => Some((*one as u32, rest)),
91      [one @ 0b1100_0000..=0b1101_1111, two, rest @ ..] => Some((
92        (((*one & 0b0001_1111) as u32) << 6) | ((*two & CONT_MASK) as u32),
93        rest,
94      )),
95      [one @ 0b1110_0000..=0b1110_1111, two, three, rest @ ..] => Some((
96        (((*one & 0b0000_1111) as u32) << 12)
97          | (((*two & CONT_MASK) as u32) << 6)
98          | ((*three & CONT_MASK) as u32),
99        rest,
100      )),
101      [one, two, three, four, rest @ ..] => Some((
102        (((*one & 0b0000_0111) as u32) << 18)
103          | (((*two & CONT_MASK) as u32) << 12)
104          | (((*three & CONT_MASK) as u32) << 6)
105          | ((*four & CONT_MASK) as u32),
106        rest,
107      )),
108      [..] => None,
109    }
110  }
111  // A const implementation of `s.chars().map(|ch| ch.len_utf16()).sum()`
112  pub const fn length_as_utf16(s: &str) -> usize {
113    let mut bytes = s.as_bytes();
114    let mut len = 0;
115    while let Some((ch, rest)) = next_code_point(bytes) {
116      bytes = rest;
117      len += if (ch & 0xFFFF) == ch { 1 } else { 2 };
118    }
119    len
120  }
121}