1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#![no_std]
//! Provides a macro_rules for making utf-16 literals.
//!
//! Outputs are arrays of the correct size. Prefix the macro with `&` to make
//! slices.
//!
//! ```rust
//! use utf16_lit::{utf16, utf16_null};
//!
//! const EXAMPLE: &[u16] = &utf16!("example");
//!
//! const EXAMPLE_NULL: &[u16] = &utf16_null!("example");
//!
//! fn main() {
//!   let v: Vec<u16> = "example".encode_utf16().collect();
//!   assert_eq!(v, EXAMPLE);
//!
//!   let v: Vec<u16> = "example".encode_utf16().chain(Some(0)).collect();
//!   assert_eq!(v, EXAMPLE_NULL);
//!   let v: Vec<u16> = "example\0".encode_utf16().collect();
//!   assert_eq!(v, EXAMPLE_NULL);
//!
//!   // You don't even need to assign the output to a const.
//!   assert_eq!(utf16!("This works")[0], 'T' as u8 as u16);
//! }
//! ```

macro_rules! imp {
  ($(
    $(#[$m:meta])*
    $name:ident has $n:literal trailing zeroes
  )*) => {
    $(
      $(#[$m])*
      #[macro_export]
      macro_rules! $name {
        ($text:expr) => {{
          // Here we pick a name highly unlikely to exist in the scope
          // that $text came from, which prevents a potential const eval cycle error.
          const ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF8: &str = $text;
          const ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_LEN: usize =
            $crate::internals::length_as_utf16(ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF8) + $n;
          const ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF16: [u16; ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_LEN] = {
            let mut buffer = [0u16; ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_LEN];
            let mut bytes = ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF8.as_bytes();
            let mut i = 0;
            while let Some((ch, rest)) = $crate::internals::next_code_point(bytes) {
              bytes = rest;
              // https://doc.rust-lang.org/std/primitive.char.html#method.encode_utf16
              if ch & 0xFFFF == ch {
                buffer[i] = ch as u16;
                i += 1;
              } else {
                let code = ch - 0x1_0000;
                buffer[i] = 0xD800 | ((code >> 10) as u16);
                buffer[i + 1] = 0xDC00 | ((code as u16) & 0x3FF);
                i += 2;
              }
            }
            buffer
          };
          ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF16
        }};
      }
    )*
  }
}

imp! {
  /// Turns a string literal into a `u16` array literal (`[u16; N]`).
  ///
  /// If you want to have a "null terminated" string (such as for some parts of
  /// Windows FFI) then you should use [`utf16_null!`](utf16_null!).
  utf16 has 0 trailing zeroes

  /// Turns a string literal into a `u16` array literal (`[u16; N]`) with a trailing `0`.
  ///
  /// If you do **not** want to have a null terminator added to the string then
  /// you should use [`utf16!`](utf16!).
  utf16_null has 1 trailing zeroes
}

#[doc(hidden)]
pub mod internals {
  // A const implementation of https://github.com/rust-lang/rust/blob/d902752866cbbdb331e3cf28ff6bba86ab0f6c62/library/core/src/str/mod.rs#L509-L537
  // Assumes `utf8` is a valid &str
  pub const fn next_code_point(utf8: &[u8]) -> Option<(u32, &[u8])> {
    const CONT_MASK: u8 = 0b0011_1111;
    match utf8 {
      [one @ 0..=0b0111_1111, rest @ ..] => Some((*one as u32, rest)),
      [one @ 0b1100_0000..=0b1101_1111, two, rest @ ..] => Some((
        (((*one & 0b0001_1111) as u32) << 6) | ((*two & CONT_MASK) as u32),
        rest,
      )),
      [one @ 0b1110_0000..=0b1110_1111, two, three, rest @ ..] => Some((
        (((*one & 0b0000_1111) as u32) << 12)
          | (((*two & CONT_MASK) as u32) << 6)
          | ((*three & CONT_MASK) as u32),
        rest,
      )),
      [one, two, three, four, rest @ ..] => Some((
        (((*one & 0b0000_0111) as u32) << 18)
          | (((*two & CONT_MASK) as u32) << 12)
          | (((*three & CONT_MASK) as u32) << 6)
          | ((*four & CONT_MASK) as u32),
        rest,
      )),
      [..] => None,
    }
  }
  // A const implementation of `s.chars().map(|ch| ch.len_utf16()).sum()`
  pub const fn length_as_utf16(s: &str) -> usize {
    let mut bytes = s.as_bytes();
    let mut len = 0;
    while let Some((ch, rest)) = next_code_point(bytes) {
      bytes = rest;
      len += if (ch & 0xFFFF) == ch { 1 } else { 2 };
    }
    len
  }
}