obeli_sk_boa_parser/source/
utf16.rs1use super::ReadChar;
2use std::io;
3
4#[derive(Debug)]
6pub struct UTF16Input<'a> {
7 input: &'a [u16],
8 index: usize,
9}
10
11impl<'a> UTF16Input<'a> {
12 #[must_use]
16 pub const fn new(input: &'a [u16]) -> Self {
17 Self { input, index: 0 }
18 }
19
20 #[cold]
22 fn handle_surrogate_pair(&mut self, u1: u16) -> u32 {
23 let Some(u2) = self.input.get(self.index).copied() else {
24 return u1.into();
25 };
26
27 if !is_low_surrogate(u2) {
29 return u1.into();
30 }
31
32 self.index += 1;
33
34 code_point_from_surrogates(u1, u2)
35 }
36}
37
38impl ReadChar for UTF16Input<'_> {
39 fn next_char(&mut self) -> io::Result<Option<u32>> {
41 let Some(u1) = self.input.get(self.index).copied() else {
42 return Ok(None);
43 };
44
45 self.index += 1;
46
47 if !is_high_surrogate(u1) {
49 return Ok(Some(u1.into()));
50 }
51
52 Ok(Some(self.handle_surrogate_pair(u1)))
53 }
54}
55
56const SURROGATE_HIGH_START: u16 = 0xD800;
57const SURROGATE_HIGH_END: u16 = 0xDBFF;
58const SURROGATE_LOW_START: u16 = 0xDC00;
59const SURROGATE_LOW_END: u16 = 0xDFFF;
60
61fn is_high_surrogate(b: u16) -> bool {
62 (SURROGATE_HIGH_START..=SURROGATE_HIGH_END).contains(&b)
63}
64
65fn is_low_surrogate(b: u16) -> bool {
66 (SURROGATE_LOW_START..=SURROGATE_LOW_END).contains(&b)
67}
68
69fn code_point_from_surrogates(high: u16, low: u16) -> u32 {
70 (((u32::from(high & 0x3ff)) << 10) | u32::from(low & 0x3ff)) + 0x1_0000
71}