solar_parse/lexer/
utf8.rs1const TAG_CONT: u8 = 0b1000_0000;
3const TAG_TWO_B: u8 = 0b1100_0000;
4const TAG_THREE_B: u8 = 0b1110_0000;
5const TAG_FOUR_B: u8 = 0b1111_0000;
6const MAX_ONE_B: u32 = 0x80;
7const MAX_TWO_B: u32 = 0x800;
8const MAX_THREE_B: u32 = 0x10000;
9
10#[inline]
11const fn len_utf8(code: u32) -> usize {
12 if code < MAX_ONE_B {
13 1
14 } else if code < MAX_TWO_B {
15 2
16 } else if code < MAX_THREE_B {
17 3
18 } else {
19 4
20 }
21}
22
23#[inline]
25#[allow(clippy::precedence)]
26pub(super) fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
27 let len = len_utf8(code);
28 match (len, &mut dst[..]) {
29 (1, [a, ..]) => {
30 *a = code as u8;
31 }
32 (2, [a, b, ..]) => {
33 *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
34 *b = (code & 0x3F) as u8 | TAG_CONT;
35 }
36 (3, [a, b, c, ..]) => {
37 *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
38 *b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
39 *c = (code & 0x3F) as u8 | TAG_CONT;
40 }
41 (4, [a, b, c, d, ..]) => {
42 *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
43 *b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
44 *c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
45 *d = (code & 0x3F) as u8 | TAG_CONT;
46 }
47 _ => panic!(
48 "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
49 len,
50 code,
51 dst.len(),
52 ),
53 };
54 &mut dst[..len]
55}