boa_parser/source/
utf8.rs1use super::ReadChar;
2use std::io::{self, Bytes, Read};
3
4#[derive(Debug)]
6pub struct UTF8Input<R> {
7 input: Bytes<R>,
8}
9
10impl<R: Read> UTF8Input<R> {
11 pub(crate) fn new(iter: R) -> Self {
13 Self {
14 #[allow(clippy::unbuffered_bytes)]
15 input: iter.bytes(),
16 }
17 }
18}
19
20impl<R: Read> UTF8Input<R> {
21 fn next_byte(&mut self) -> io::Result<Option<u8>> {
23 self.input.next().transpose()
24 }
25}
26
27impl<R: Read> ReadChar for UTF8Input<R> {
28 fn next_char(&mut self) -> io::Result<Option<u32>> {
30 let x = match self.next_byte()? {
32 Some(b) if b >= 128 => b, b => return Ok(b.map(u32::from)), };
35
36 let init = utf8_first_byte(x, 2);
40 let y = self.next_byte()?.unwrap_or(0);
41 let mut ch = utf8_acc_cont_byte(init, y);
42 if x >= 0xE0 {
43 let z = self.next_byte()?.unwrap_or(0);
46 let y_z = utf8_acc_cont_byte(u32::from(y & CONT_MASK), z);
47 ch = (init << 12) | y_z;
48 if x >= 0xF0 {
49 let w = self.next_byte()?.unwrap_or(0);
52 ch = ((init & 7) << 18) | utf8_acc_cont_byte(y_z, w);
53 }
54 }
55
56 Ok(Some(ch))
57 }
58}
59
60const CONT_MASK: u8 = 0b0011_1111;
62
63fn utf8_first_byte(byte: u8, width: u32) -> u32 {
67 u32::from(byte & (0x7F >> width))
68}
69
70fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
72 (ch << 6) | u32::from(byte & CONT_MASK)
73}