wood_parse 0.2.0

A library for lexing
Documentation
use std::io::Read;

use anyhow::Result;

#[inline]
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
    (byte & (0x7F >> width)) as u32
}

fn read_byte(reader: &mut impl Read) -> Option<Result<u8>> {
    let mut buf = [0u8; 1];
    let l = match reader.read(&mut buf) {
        Ok(l) => l,
        Err(err) => return Some(Err(err.into())),
    };
    if l == 0 {
        return None;
    }
    Some(Ok(buf[0]))
}

const CONT_MASK: u8 = 0b0011_1111;

#[inline]
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
    (ch << 6) | (byte & CONT_MASK) as u32
}

pub fn read_char(reader: &mut impl Read) -> Option<Result<char>> {
    let x = match read_byte(reader) {
        Some(Ok(x)) => x,
        Some(Err(err)) => return Some(Err(err)),
        None => return None,
    };
    if x < 128 {
        return Some(Ok(x as char));
    }

    let init = utf8_first_byte(x, 2);

    let y = match read_byte(reader) {
        Some(Ok(x)) => x,
        Some(Err(err)) => return Some(Err(err)),
        None => return None,
    };
    let mut ch = utf8_acc_cont_byte(init, y);
    if x >= 0xE0 {
        let z = match read_byte(reader) {
            Some(Ok(x)) => x,
            Some(Err(err)) => return Some(Err(err)),
            None => return None,
        };
        let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
        ch = init << 12 | y_z;
        if x >= 0xF0 {
            let w = match read_byte(reader) {
                Some(Ok(x)) => x,
                Some(Err(err)) => return Some(Err(err)),
                None => return None,
            };
            ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
        }
    }

    char::from_u32(ch)
        .ok_or(anyhow::anyhow!("invalid utf-8"))
        .into()
}