kbvm 0.1.5

An implementation of the XKB specification
Documentation
#[cfg(test)]
mod tests;

use {
    crate::{
        from_bytes::FromBytes,
        xkb::{
            code::Code,
            code_slice::CodeSlice,
            diagnostic::DiagnosticKind,
            interner::Interner,
            kccgst::token::{
                Punctuation,
                Token::{self, Float, Ident, Integer, KeyName, String},
            },
            span::{Span, SpanExt, SpanUnit, Spanned},
            whitespace::consume_whitespace,
        },
    },
    std::{num::ParseFloatError, path::PathBuf, str::FromStr, sync::Arc},
    thiserror::Error,
};

#[derive(Debug)]
pub(crate) struct Lexer {
    path: Option<Arc<PathBuf>>,
    code: Code,
    span_lo: SpanUnit,
    pos: usize,
}

struct ItemLexer<'a> {
    code: CodeSlice<'a>,
    interner: &'a mut Interner,
    span_lo: SpanUnit,
    pos: usize,
}

#[derive(Debug, Error, Eq, PartialEq)]
pub(crate) enum LexerError {
    #[error("unterminated key name")]
    UnterminatedKeyName,
    #[error("unterminated string")]
    UnterminatedString,
    #[error("invalid float literal")]
    InvalidFloatLiteral(#[source] ParseFloatError),
    #[error("invalid integer literal")]
    InvalidIntegerLiteral,
    #[error("unexpected byte {:?}", *.0 as char)]
    UnexpectedByte(u8),
}

impl LexerError {
    pub(crate) fn diagnostic_kind(&self) -> DiagnosticKind {
        match self {
            LexerError::UnterminatedKeyName => DiagnosticKind::UnterminatedKeyName,
            LexerError::UnterminatedString => DiagnosticKind::UnterminatedString,
            LexerError::InvalidFloatLiteral(_) => DiagnosticKind::InvalidFloatLiteral,
            LexerError::InvalidIntegerLiteral => DiagnosticKind::InvalidIntegerLiteral,
            LexerError::UnexpectedByte(_) => DiagnosticKind::UnlexableByte,
        }
    }
}

impl Lexer {
    pub(crate) fn new(path: Option<&Arc<PathBuf>>, code: &Code, span_lo: SpanUnit) -> Self {
        Self {
            path: path.cloned(),
            code: code.clone(),
            span_lo,
            pos: 0,
        }
    }

    pub(crate) fn lex_item(
        &mut self,
        interner: &mut Interner,
        output: &mut Vec<Spanned<Token>>,
    ) -> Result<(), Spanned<LexerError>> {
        let mut lexer = ItemLexer {
            code: self.code.to_slice(),
            interner,
            span_lo: self.span_lo,
            pos: self.pos,
        };
        lexer.lex_item(output)?;
        self.pos = lexer.pos;
        Ok(())
    }

    pub(crate) fn path(&self) -> Option<&Arc<PathBuf>> {
        self.path.as_ref()
    }

    pub(crate) fn code(&self) -> &Code {
        &self.code
    }

    pub(crate) fn span(&self) -> Span {
        Span {
            lo: self.span_lo,
            hi: self.span_lo + self.code.len() as SpanUnit,
        }
    }
}

impl ItemLexer<'_> {
    fn lex_item(&mut self, output: &mut Vec<Spanned<Token>>) -> Result<(), Spanned<LexerError>> {
        let mut depth = 0u64;
        while let Some(t) = self.lex_one()? {
            output.push(t);
            if let Token::Punctuation(p) = t.val {
                match p {
                    Punctuation::Obrace => depth += 1,
                    Punctuation::Cbrace if depth == 0 => {}
                    Punctuation::Cbrace => depth -= 1,
                    punctuation![;] if depth == 0 => break,
                    _ => {}
                }
            }
        }
        Ok(())
    }

    fn lex_one(&mut self) -> Result<Option<Spanned<Token>>, Spanned<LexerError>> {
        use LexerError::*;
        let mut b;
        loop {
            consume_whitespace(&mut self.pos, &self.code, false);
            match self.code.get(self.pos) {
                Some(c) => b = *c,
                _ => return Ok(None),
            };
            let is_comment = match b {
                b'#' => true,
                b'/' if self.code.get(self.pos + 1) == Some(&b'/') => true,
                _ => false,
            };
            if is_comment {
                while self.pos < self.code.len() {
                    if self.code[self.pos] == b'\n' {
                        break;
                    }
                    self.pos += 1;
                }
            } else {
                break;
            }
        }
        let start = self.pos;
        let lo = self.span_lo + start as SpanUnit;
        self.pos += 1;
        'single_character: {
            let t = match b {
                b';' => token![;],
                b'{' => Punctuation::Obrace.into(),
                b'}' => Punctuation::Cbrace.into(),
                b'=' => token![=],
                b'[' => Punctuation::Obracket.into(),
                b']' => Punctuation::Cbracket.into(),
                b'(' => Punctuation::Oparen.into(),
                b')' => Punctuation::Cparen.into(),
                b'.' => token![.],
                b',' => token![,],
                b'+' => token![+],
                b'-' => token![-],
                b'*' => token![*],
                b'/' => token![/],
                b'!' => token![!],
                b'~' => token![~],
                _ => break 'single_character,
            };
            return Ok(Some(t.spanned(lo, lo.saturating_add(1))));
        }
        let next = |err: LexerError, pos: usize| match self.code.get(pos) {
            Some(c) => Ok(*c),
            _ => {
                let hi = self.span_lo.saturating_add(pos as SpanUnit);
                Err(err.spanned(lo, hi))
            }
        };
        if b == b'<' {
            loop {
                b = next(UnterminatedKeyName, self.pos)?;
                self.pos += 1;
                match b {
                    b'>' => break,
                    b'!'..=b'~' => {}
                    _ => {
                        let hi = self.span_lo + self.pos as SpanUnit - 1;
                        return Err(UnterminatedKeyName.spanned(lo, hi));
                    }
                }
            }
            let start = start + 1;
            let end = self.pos - 1;
            let slice = self.code.slice(start..end);
            let interned = self.interner.intern(&slice);
            let hi = self.span_lo + self.pos as SpanUnit;
            return Ok(Some(KeyName(interned).spanned(lo, hi)));
        }
        if b == b'"' {
            let mut esc = false;
            loop {
                b = next(UnterminatedString, self.pos)?;
                self.pos += 1;
                match b {
                    b'"' if !esc => break,
                    b'\\' if !esc => esc = true,
                    _ => esc = false,
                }
            }
            let start = start + 1;
            let end = self.pos - 1;
            let slice = self.code.slice(start..end);
            let interned = self.interner.intern(&slice);
            let hi = self.span_lo + self.pos as SpanUnit;
            return Ok(Some(String(interned).spanned(lo, hi)));
        }
        if matches!(b, b'_' | b'a'..=b'z' | b'A'..=b'Z') {
            while self.pos < self.code.len() {
                b = self.code[self.pos];
                if matches!(b, b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9') {
                    self.pos += 1;
                } else {
                    break;
                }
            }
            let end = self.pos;
            let slice = self.code.slice(start..end);
            let interned = self.interner.intern(&slice);
            let hi = self.span_lo + end as SpanUnit;
            return Ok(Some(Ident(interned).spanned(lo, hi)));
        };
        if b.is_ascii_digit() {
            let mut hex = false;
            let mut digits_start = start;
            if b == b'0' && self.code.get(self.pos) == Some(&b'x') {
                hex = true;
                self.pos += 1;
                digits_start += 2;
            }
            let mut have_dot = false;
            while self.pos < self.code.len() {
                match self.code[self.pos] {
                    b'0'..=b'9' => {}
                    b'a'..=b'f' | b'A'..=b'F' if hex => {}
                    b'.' if !have_dot && !hex => have_dot = true,
                    _ => break,
                }
                self.pos += 1;
            }
            let end = self.pos;
            let slice = self.code.slice(start..end);
            let interned = self.interner.intern(&slice);
            let hi = self.span_lo + end as SpanUnit;
            let token = match have_dot {
                true => {
                    let digits = std::str::from_utf8(&self.code[digits_start..end]).unwrap();
                    match f64::from_str(digits) {
                        Ok(f) => Float(interned, f),
                        Err(e) => return Err(InvalidFloatLiteral(e).spanned(lo, hi)),
                    }
                }
                false => match i64::from_bytes(&self.code[digits_start..end], hex) {
                    Some(f) => Integer(interned, f),
                    _ => return Err(InvalidIntegerLiteral.spanned(lo, hi)),
                },
            };
            return Ok(Some(token.spanned(lo, hi)));
        }
        let hi = self.span_lo + self.pos as SpanUnit;
        Err(UnexpectedByte(b).spanned(lo, hi))
    }
}