quake-util 0.4.0

A utility library for using Quake file formats
Documentation
#[cfg(feature = "std")]
extern crate std;

use std::{
    cell::{Cell, RefCell},
    convert::TryInto,
    fmt, io,
    num::{NonZeroU64, NonZeroU8},
    string::String,
    vec::Vec,
};

use crate::error;

pub type LexResult = Result<Option<Token>, Cell<Option<error::TextParse>>>;

const TEXT_CAPACITY: usize = 32;

#[derive(PartialEq, Eq, Debug, Clone)]
pub struct Token {
    pub text: Vec<NonZeroU8>,
    pub line_number: NonZeroU64,
}

impl Token {
    pub fn match_byte(&self, byte: u8) -> bool {
        self.text.len() == 1 && self.text[0].get() == byte
    }

    pub fn match_quoted(&self) -> bool {
        self.text.len() >= 2
            && self.text[0] == b'"'.try_into().unwrap()
            && self.text.last() == Some(&b'"'.try_into().unwrap())
    }

    pub fn starts_numeric(&self) -> bool {
        !self.text.is_empty() && {
            let first_byte = self.text[0].get();
            first_byte == b'-' || first_byte.is_ascii_digit()
        }
    }

    pub fn text_as_string(&self) -> String {
        self.text
            .iter()
            .map::<char, _>(|ch| ch.get().into())
            .collect()
    }
}

impl fmt::Display for Token {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}: line {}", self.text_as_string(), self.line_number)
    }
}

#[derive(Debug)]
pub struct TokenIterator<R: io::Read> {
    text: RefCell<Option<Vec<NonZeroU8>>>,
    state: fn(iter: &mut TokenIterator<R>) -> Option<Token>,
    byte: Option<NonZeroU8>,
    last_byte: Option<NonZeroU8>,
    line_number: NonZeroU64,
    input: io::Bytes<R>,
}

impl<R: io::Read> TokenIterator<R> {
    pub fn new(reader: R) -> TokenIterator<R> {
        TokenIterator {
            text: RefCell::new(None),
            state: lex_default,
            byte: None,
            last_byte: None,
            line_number: NonZeroU64::new(1).unwrap(),
            input: reader.bytes(),
        }
    }

    fn byte_read(&mut self, b: io::Result<u8>) -> LexResult {
        let byte = b.map_err(|e| Cell::new(Some(e.into())))?;

        self.byte = Some(
            byte.try_into()
                .map_err(|_| {
                    error::TextParse::from_lexer(
                        String::from("Null byte"),
                        self.line_number,
                    )
                })
                .map_err(Some)
                .map_err(Cell::new)?,
        );

        let maybe_token = (self.state)(self);

        if self.byte == NonZeroU8::new(b'\n')
            || self.last_byte == NonZeroU8::new(b'\r')
        {
            let next_line = self.line_number.get().saturating_add(1);
            unsafe {
                self.line_number = NonZeroU64::new_unchecked(next_line);
            }
        }

        self.last_byte = self.byte;

        Ok(maybe_token)
    }

    fn eof_read(&mut self) -> LexResult {
        if let Some(last_text) = self.text.replace(None) {
            if last_text[0] == NonZeroU8::new(b'"').unwrap()
                && (last_text.last() != NonZeroU8::new(b'"').as_ref()
                    || last_text.len() == 1)
            {
                Err(Cell::new(Some(error::TextParse::from_lexer(
                    String::from("Missing closing quote"),
                    self.line_number,
                ))))
            } else {
                Ok(Some(Token {
                    text: last_text,
                    line_number: self.line_number,
                }))
            }
        } else {
            Ok(None)
        }
    }
}

impl<R: io::Read> Iterator for TokenIterator<R> {
    type Item = Result<Token, Cell<Option<error::TextParse>>>;

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            if let Some(b) = self.input.next() {
                if let token @ Some(_) = self.byte_read(b).transpose() {
                    break token;
                }
            } else {
                break self.eof_read().transpose();
            }
        }
    }
}

fn lex_default<R: io::Read>(iterator: &mut TokenIterator<R>) -> Option<Token> {
    if !iterator.byte.unwrap().get().is_ascii_whitespace() {
        if iterator.byte == NonZeroU8::new(b'"') {
            iterator.state = lex_quoted;
            let mut text_bytes = Vec::with_capacity(TEXT_CAPACITY);
            text_bytes.push(iterator.byte.unwrap());
            *iterator.text.borrow_mut() = Some(text_bytes);
        } else if iterator.byte == NonZeroU8::new(b'/') {
            iterator.state = lex_maybe_comment;
        } else {
            iterator.state = lex_unquoted;
            let mut text_bytes = Vec::with_capacity(TEXT_CAPACITY);
            text_bytes.push(iterator.byte.unwrap());
            *iterator.text.borrow_mut() = Some(text_bytes);
        }
    }

    None
}

fn lex_comment<R: io::Read>(iterator: &mut TokenIterator<R>) -> Option<Token> {
    if iterator.byte == NonZeroU8::new(b'\r')
        || iterator.byte == NonZeroU8::new(b'\n')
    {
        iterator.state = lex_default;
    }

    None
}

fn lex_maybe_comment<R: io::Read>(
    iterator: &mut TokenIterator<R>,
) -> Option<Token> {
    if iterator.byte == NonZeroU8::new(b'/') {
        iterator.state = lex_comment;
    } else {
        let mut text_bytes: Vec<NonZeroU8> = Vec::with_capacity(TEXT_CAPACITY);
        text_bytes.push(NonZeroU8::new(b'/').unwrap());
        text_bytes.push(iterator.byte.unwrap());
        *iterator.text.borrow_mut() = Some(text_bytes);
        iterator.state = lex_unquoted;
    }

    None
}

fn lex_quoted<R: io::Read>(iterator: &mut TokenIterator<R>) -> Option<Token> {
    iterator
        .text
        .borrow_mut()
        .as_mut()
        .unwrap()
        .push(iterator.byte.unwrap());
    if iterator.byte == NonZeroU8::new(b'"') {
        let local_text = iterator.text.replace(None).unwrap();
        iterator.state = lex_default;

        Some(Token {
            text: local_text,
            line_number: iterator.line_number,
        })
    } else {
        None
    }
}

fn lex_unquoted<R: io::Read>(iterator: &mut TokenIterator<R>) -> Option<Token> {
    if iterator.byte.unwrap().get().is_ascii_whitespace() {
        let local_text = iterator.text.replace(None).unwrap();
        iterator.state = lex_default;

        Some(Token {
            text: local_text,
            line_number: iterator.line_number,
        })
    } else {
        iterator
            .text
            .borrow_mut()
            .as_mut()
            .unwrap()
            .push(iterator.byte.unwrap());

        None
    }
}