quake-map 0.6.1

Parser for Quake and Quake II .map files
Documentation
#[cfg(feature = "std")]
extern crate std;

use std::{
    cell::Cell,
    convert::TryInto,
    ffi::CString,
    fmt, io,
    iter::once,
    mem::transmute,
    num::{NonZeroU64, NonZeroU8},
    string::String,
    vec::Vec,
};

use fmt::{Display, Formatter};

use crate::error;

pub type LexResult = Result<Option<LineToken>, Cell<Option<error::TextParse>>>;

const TEXT_CAPACITY: usize = 32;

#[derive(PartialEq, Eq, Debug, Clone)]
pub enum Token {
    OpenParen,
    CloseParen,
    OpenCurly,
    CloseCurly,
    OpenSquare,
    CloseSquare,
    QuotedString(Vec<NonZeroU8>),
    BareString(Vec<NonZeroU8>),
}

impl Token {
    pub fn as_number_text(&self) -> &str {
        match &self {
            Token::BareString(s) => {
                let slice = unsafe { transmute::<&[NonZeroU8], &[u8]>(&s[..]) };
                str::from_utf8(slice).unwrap_or("")
            }
            _ => "",
        }
    }

    pub fn to_string_fast(&self) -> String {
        match &self {
            Token::OpenParen => String::from("("),
            Token::CloseParen => String::from(")"),
            Token::OpenCurly => String::from("{"),
            Token::CloseCurly => String::from("}"),
            Token::OpenSquare => String::from("["),
            Token::CloseSquare => String::from("]"),
            Token::QuotedString(s) => once('"')
                .chain(s.iter().map(|ch| char::from(ch.get())).chain(once('"')))
                .collect(),
            Token::BareString(s) => {
                s.iter().map(|ch| char::from(ch.get())).collect()
            }
        }
    }
}

impl Display for Token {
    fn fmt(&self, fmtr: &mut Formatter) -> fmt::Result {
        match &self {
            Token::OpenParen => fmtr.write_str("("),
            Token::CloseParen => fmtr.write_str(")"),
            Token::OpenCurly => fmtr.write_str("{"),
            Token::CloseCurly => fmtr.write_str("}"),
            Token::OpenSquare => fmtr.write_str("["),
            Token::CloseSquare => fmtr.write_str("]"),
            _ => fmtr.write_str(&self.to_string_fast()),
        }
    }
}

#[derive(PartialEq, Eq, Debug, Clone)]
pub struct LineToken {
    pub token: Token,
    pub line_number: NonZeroU64,
}

impl LineToken {
    pub fn from_text(text: Vec<NonZeroU8>, line_number: NonZeroU64) -> Self {
        let token = if text.len() == 1 {
            match text[0].get() {
                b'(' => Token::OpenParen,
                b')' => Token::CloseParen,
                b'{' => Token::OpenCurly,
                b'}' => Token::CloseCurly,
                b'[' => Token::OpenSquare,
                b']' => Token::CloseSquare,
                _ => Token::BareString(text),
            }
        } else if text.len() >= 2
            && text[0].get() == b'"'
            && text.last() == Some(&b'"'.try_into().unwrap())
        {
            Token::QuotedString(text[1..text.len() - 1].to_vec())
        } else {
            Token::BareString(text)
        };

        Self { token, line_number }
    }

    pub fn into_bare_cstring(self) -> CString {
        match self.token {
            Token::BareString(s) | Token::QuotedString(s) => s.into(),
            _ => CString::new("").unwrap(),
        }
    }

    pub fn is_quoted(&self) -> bool {
        matches!(&self.token, Token::QuotedString(_))
    }
}

impl fmt::Display for LineToken {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{}: line {}", self.token, self.line_number)
    }
}

pub struct TokenIterator<R: io::Read> {
    text: Cell<Option<Vec<NonZeroU8>>>,
    state: fn(iter: &mut TokenIterator<R>) -> Option<LineToken>,
    byte: Option<NonZeroU8>,
    last_byte: Option<NonZeroU8>,
    line_number: NonZeroU64,
    input: io::Bytes<R>,
}

impl<R: io::Read> TokenIterator<R> {
    #[allow(
        clippy::unbuffered_bytes,
        reason = "Read implementation expected to be buffered"
    )]
    pub fn new(reader: R) -> TokenIterator<R> {
        TokenIterator {
            text: Cell::new(None),
            state: lex_default,
            byte: None,
            last_byte: None,
            line_number: NonZeroU64::new(1).unwrap(),
            input: reader.bytes(),
        }
    }

    fn byte_read(&mut self, b: io::Result<u8>) -> LexResult {
        let byte = b.map_err(|e| Cell::new(Some(e.into())))?;

        self.byte = Some(
            byte.try_into()
                .map_err(|_| {
                    error::TextParse::from_lexer(
                        String::from("Null byte"),
                        self.line_number,
                    )
                })
                .map_err(Some)
                .map_err(Cell::new)?,
        );

        let maybe_token = (self.state)(self);

        if self.byte == NonZeroU8::new(b'\n')
            || self.last_byte == NonZeroU8::new(b'\r')
        {
            let next_line = self.line_number.get().saturating_add(1);
            unsafe {
                self.line_number = NonZeroU64::new_unchecked(next_line);
            }
        }

        self.last_byte = self.byte;

        Ok(maybe_token)
    }

    fn eof_read(&mut self) -> LexResult {
        if let Some(last_text) = self.text.replace(None) {
            if last_text[0] == NonZeroU8::new(b'"').unwrap()
                && (last_text.last() != NonZeroU8::new(b'"').as_ref()
                    || last_text.len() == 1)
            {
                Err(Cell::new(Some(error::TextParse::from_lexer(
                    String::from("Missing closing quote"),
                    self.line_number,
                ))))
            } else {
                Ok(Some(LineToken::from_text(last_text, self.line_number)))
            }
        } else {
            Ok(None)
        }
    }
}

impl<R: io::Read> Iterator for TokenIterator<R> {
    type Item = Result<LineToken, Cell<Option<error::TextParse>>>;

    fn next(&mut self) -> Option<Self::Item> {
        loop {
            if let Some(b) = self.input.next() {
                if let token @ Some(_) = self.byte_read(b).transpose() {
                    break token;
                }
            } else {
                break self.eof_read().transpose();
            }
        }
    }
}

fn lex_default<R: io::Read>(
    iterator: &mut TokenIterator<R>,
) -> Option<LineToken> {
    if !iterator.byte.unwrap().get().is_ascii_whitespace() {
        if iterator.byte == NonZeroU8::new(b'"') {
            iterator.state = lex_quoted;
            let mut text_bytes = Vec::with_capacity(TEXT_CAPACITY);
            text_bytes.push(iterator.byte.unwrap());
            iterator.text.replace(Some(text_bytes));
        } else if iterator.byte == NonZeroU8::new(b'/') {
            iterator.state = lex_maybe_comment;
        } else {
            iterator.state = lex_unquoted;
            let mut text_bytes = Vec::with_capacity(TEXT_CAPACITY);
            text_bytes.push(iterator.byte.unwrap());
            iterator.text.replace(Some(text_bytes));
        }
    }

    None
}

fn lex_comment<R: io::Read>(
    iterator: &mut TokenIterator<R>,
) -> Option<LineToken> {
    if iterator.byte == NonZeroU8::new(b'\r')
        || iterator.byte == NonZeroU8::new(b'\n')
    {
        iterator.state = lex_default;
    }

    None
}

fn lex_maybe_comment<R: io::Read>(
    iterator: &mut TokenIterator<R>,
) -> Option<LineToken> {
    if iterator.byte == NonZeroU8::new(b'/') {
        iterator.state = lex_comment;
    } else {
        let mut text_bytes: Vec<NonZeroU8> = Vec::with_capacity(TEXT_CAPACITY);
        text_bytes.push(NonZeroU8::new(b'/').unwrap());
        text_bytes.push(iterator.byte.unwrap());
        iterator.text.replace(Some(text_bytes));
        iterator.state = lex_unquoted;
    }

    None
}

fn lex_quoted<R: io::Read>(
    iterator: &mut TokenIterator<R>,
) -> Option<LineToken> {
    iterator
        .text
        .get_mut()
        .as_mut()
        .unwrap()
        .push(iterator.byte.unwrap());
    if iterator.byte == NonZeroU8::new(b'"') {
        let local_text = iterator.text.replace(None).unwrap();
        iterator.state = lex_default;

        Some(LineToken::from_text(local_text, iterator.line_number))
    } else {
        None
    }
}

fn lex_unquoted<R: io::Read>(
    iterator: &mut TokenIterator<R>,
) -> Option<LineToken> {
    if iterator.byte.unwrap().get().is_ascii_whitespace() {
        let local_text = iterator.text.replace(None).unwrap();
        iterator.state = lex_default;

        Some(LineToken::from_text(local_text, iterator.line_number))
    } else {
        iterator
            .text
            .get_mut()
            .as_mut()
            .unwrap()
            .push(iterator.byte.unwrap());

        None
    }
}