kbvm 0.1.5

An implementation of the XKB specification
Documentation
#[cfg(test)]
mod tests;

use {
    crate::xkb::{
        code::Code,
        code_slice::CodeSlice,
        diagnostic::DiagnosticKind,
        interner::Interner,
        rmlvo::token::Token,
        span::{SpanExt, SpanUnit, Spanned},
        whitespace::consume_whitespace,
    },
    std::{path::PathBuf, sync::Arc},
    thiserror::Error,
};

#[derive(Debug)]
pub(crate) struct Lexer {
    path: Arc<PathBuf>,
    code: Code,
    span_lo: SpanUnit,
    pos: usize,
}

struct LineLexer<'a> {
    code: CodeSlice<'a>,
    interner: &'a mut Interner,
    span_lo: SpanUnit,
    pos: usize,
}

#[derive(Debug, Error, Eq, PartialEq)]
pub(crate) enum LexerError {
    #[error("empty macro name")]
    EmptyMacroName,
    #[error("Unexpected byte {:?}", *.0 as char)]
    UnexpectedByte(u8),
}

impl LexerError {
    pub(crate) fn diagnostic_kind(&self) -> DiagnosticKind {
        match self {
            LexerError::EmptyMacroName => DiagnosticKind::EmptyMacroName,
            LexerError::UnexpectedByte(_) => DiagnosticKind::UnlexableByte,
        }
    }
}

enum One {
    Eof,
    Eol,
    Token(Spanned<Token>),
}

impl Lexer {
    pub(crate) fn new(path: &Arc<PathBuf>, code: &Code, span_lo: SpanUnit) -> Self {
        Self {
            path: path.clone(),
            code: code.clone(),
            span_lo,
            pos: 0,
        }
    }

    pub(crate) fn lex_line(
        &mut self,
        interner: &mut Interner,
        output: &mut Vec<Spanned<Token>>,
    ) -> Result<(), Spanned<LexerError>> {
        let mut lexer = LineLexer {
            code: self.code.to_slice(),
            interner,
            span_lo: self.span_lo,
            pos: self.pos,
        };
        lexer.lex_line(output)?;
        self.pos = lexer.pos;
        Ok(())
    }

    pub(crate) fn path(&self) -> &Arc<PathBuf> {
        &self.path
    }
}

impl LineLexer<'_> {
    fn lex_line(&mut self, output: &mut Vec<Spanned<Token>>) -> Result<(), Spanned<LexerError>> {
        let mut lexed_any = false;
        loop {
            match self.lex_one()? {
                One::Eof => break,
                One::Eol if lexed_any => break,
                One::Eol => {}
                One::Token(t) => {
                    lexed_any = true;
                    output.push(t);
                }
            }
        }
        Ok(())
    }

    fn lex_one(&mut self) -> Result<One, Spanned<LexerError>> {
        use LexerError::*;
        let mut b;
        loop {
            consume_whitespace(&mut self.pos, &self.code, true);
            b = match self.code.get(self.pos) {
                Some(c) => *c,
                _ => return Ok(One::Eof),
            };
            match b {
                b'/' if self.code.get(self.pos + 1) == Some(&b'/') => {
                    self.pos += 2;
                    while self.pos < self.code.len() {
                        b = self.code[self.pos];
                        self.pos += 1;
                        if b == b'\n' {
                            break;
                        }
                    }
                    return Ok(One::Eol);
                }
                b'\\' => {
                    self.pos += 1;
                    if self.code.get(self.pos) == Some(&b'\r') {
                        self.pos += 1;
                    }
                    if let Some(&b) = self.code.get(self.pos) {
                        if b != b'\n' {
                            let lo = self.span_lo + self.pos as SpanUnit;
                            return Err(UnexpectedByte(b).spanned(lo, lo + 1));
                        }
                        self.pos += 1;
                    }
                }
                _ => break,
            }
        }
        let mut start = self.pos;
        let lo = self.span_lo + start as SpanUnit;
        self.pos += 1;
        'punctuation: {
            let t = match b {
                b'\n' => return Ok(One::Eol),
                b'=' => token![=],
                b'*' => token![*],
                b'!' => token![!],
                _ => break 'punctuation,
            };
            return Ok(One::Token(t.spanned(lo, lo + 1)));
        }
        let mut is_macro = false;
        if b == b'$' {
            b = match self.code.get(self.pos) {
                Some(b) => *b,
                _ => return Err(EmptyMacroName.spanned(lo, lo + 1)),
            };
            is_macro = true;
            self.pos += 1;
            start += 1;
        }
        self.pos -= 1;
        while b >= b'!' && b <= b'~' && b != b'\\' {
            self.pos += 1;
            b = match self.code.get(self.pos) {
                None => break,
                Some(b) => *b,
            };
        }
        if start == self.pos {
            let lo = self.span_lo.saturating_add(self.pos as SpanUnit);
            return Err(UnexpectedByte(b).spanned(lo, lo.saturating_add(1)));
        }
        let end = self.pos;
        let value = self.interner.intern(&self.code.slice(start..end));
        let token = match is_macro {
            true => Token::GroupName(value),
            false => Token::Ident(value),
        };
        let hi = self.span_lo.saturating_add(self.pos as SpanUnit);
        Ok(One::Token(token.spanned(lo, hi)))
    }
}