solar_parse/lexer/
utf8.rs

1// UTF-8 ranges and tags for encoding characters
2const TAG_CONT: u8 = 0b1000_0000;
3const TAG_TWO_B: u8 = 0b1100_0000;
4const TAG_THREE_B: u8 = 0b1110_0000;
5const TAG_FOUR_B: u8 = 0b1111_0000;
6const MAX_ONE_B: u32 = 0x80;
7const MAX_TWO_B: u32 = 0x800;
8const MAX_THREE_B: u32 = 0x10000;
9
10#[inline]
11const fn len_utf8(code: u32) -> usize {
12    if code < MAX_ONE_B {
13        1
14    } else if code < MAX_TWO_B {
15        2
16    } else if code < MAX_THREE_B {
17        3
18    } else {
19        4
20    }
21}
22
23/// Copied from [`core::char::encode_utf8_raw`].
24#[inline]
25#[allow(clippy::precedence)]
26pub(super) fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
27    let len = len_utf8(code);
28    match (len, &mut dst[..]) {
29        (1, [a, ..]) => {
30            *a = code as u8;
31        }
32        (2, [a, b, ..]) => {
33            *a = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
34            *b = (code & 0x3F) as u8 | TAG_CONT;
35        }
36        (3, [a, b, c, ..]) => {
37            *a = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
38            *b = (code >> 6 & 0x3F) as u8 | TAG_CONT;
39            *c = (code & 0x3F) as u8 | TAG_CONT;
40        }
41        (4, [a, b, c, d, ..]) => {
42            *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
43            *b = (code >> 12 & 0x3F) as u8 | TAG_CONT;
44            *c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
45            *d = (code & 0x3F) as u8 | TAG_CONT;
46        }
47        _ => panic!(
48            "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
49            len,
50            code,
51            dst.len(),
52        ),
53    };
54    &mut dst[..len]
55}