1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#![no_std]
macro_rules! imp {
($(
$(#[$m:meta])*
$name:ident has $n:literal trailing zeroes
)*) => {
$(
$(#[$m])*
#[macro_export]
macro_rules! $name {
($text:expr) => {{
const ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF8: &str = $text;
const ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_LEN: usize =
$crate::internals::length_as_utf16(ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF8) + $n;
const ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF16: [u16; ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_LEN] = {
let mut buffer = [0u16; ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_LEN];
let mut bytes = ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF8.as_bytes();
let mut i = 0;
while let Some((ch, rest)) = $crate::internals::next_code_point(bytes) {
bytes = rest;
if ch & 0xFFFF == ch {
buffer[i] = ch as u16;
i += 1;
} else {
let code = ch - 0x1_0000;
buffer[i] = 0xD800 | ((code >> 10) as u16);
buffer[i + 1] = 0xDC00 | ((code as u16) & 0x3FF);
i += 2;
}
}
buffer
};
ABC678_PREFIX_THAT_SHOULD_NEVER_CLASH_WITH_OUTER_SCOPE_UTF16
}};
}
)*
}
}
imp! {
utf16 has 0 trailing zeroes
utf16_null has 1 trailing zeroes
}
#[doc(hidden)]
pub mod internals {
pub const fn next_code_point(utf8: &[u8]) -> Option<(u32, &[u8])> {
const CONT_MASK: u8 = 0b0011_1111;
match utf8 {
[one @ 0..=0b0111_1111, rest @ ..] => Some((*one as u32, rest)),
[one @ 0b1100_0000..=0b1101_1111, two, rest @ ..] => Some((
(((*one & 0b0001_1111) as u32) << 6) | ((*two & CONT_MASK) as u32),
rest,
)),
[one @ 0b1110_0000..=0b1110_1111, two, three, rest @ ..] => Some((
(((*one & 0b0000_1111) as u32) << 12)
| (((*two & CONT_MASK) as u32) << 6)
| ((*three & CONT_MASK) as u32),
rest,
)),
[one, two, three, four, rest @ ..] => Some((
(((*one & 0b0000_0111) as u32) << 18)
| (((*two & CONT_MASK) as u32) << 12)
| (((*three & CONT_MASK) as u32) << 6)
| ((*four & CONT_MASK) as u32),
rest,
)),
[..] => None,
}
}
pub const fn length_as_utf16(s: &str) -> usize {
let mut bytes = s.as_bytes();
let mut len = 0;
while let Some((ch, rest)) = next_code_point(bytes) {
bytes = rest;
len += if (ch & 0xFFFF) == ch { 1 } else { 2 };
}
len
}
}