toktok_macros 0.2.1

toktok - WIP
Documentation
use logos::Logos;
use std::fmt;

#[allow(clippy::enum_variant_names)]
#[derive(Logos, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[logos(skip r"[ \t\r\n\f]+")]
#[logos(skip(r"//[^\r\n]*", allow_greedy = true))]
pub enum Token {
    #[token(";")]
    Semicolon,
    #[token("::")]
    DoubleColon,
    #[token(":")]
    Colon,
    #[token(",")]
    Comma,
    #[token("{")]
    LeftBrace,
    #[token("}")]
    RightBrace,
    #[token("[")]
    LeftBracket,
    #[token("]")]
    RightBracket,
    #[token("(")]
    LeftParen,
    #[token(")")]
    RightParen,
    #[token("<")]
    LeftAngle,
    #[token(">")]
    RightAngle,
    #[token("'")]
    SingleQuote,
    #[token("=")]
    Assign,
    #[token("?")]
    QuestionMark,
    #[token("@")]
    At,

    #[token("pub")]
    KeywordPublic,
    #[token("use")]
    KeywordUse,

    #[token("=>")]
    FatArrow,
    #[token("=>?")]
    FatArrowFallible,

    #[token("~")]
    OperatorSeq,
    #[token("|")]
    OperatorChoice,
    // #[token("?")]
    // OperatorOpt,
    #[token("*")]
    OperatorMany0,
    #[token("+")]
    OperatorMany1,

    #[regex("[a-zA-Z_][a-zA-Z0-9_]*")]
    Identifier,

    #[regex(r#""(?:[^"\\]|\\.)*""#)]
    TokenLit,
    #[regex(r#"r"(?:[^"\\]|\\.)*""#)]
    TokenRegex,

    RustExpression,

    Error,
}

impl fmt::Display for Token {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        use Token::*;

        let s = match self {
            Semicolon => "`;`",
            DoubleColon => "`::`",
            Colon => "`:`",
            Comma => "`,`",
            LeftBrace => "`{`",
            RightBrace => "`}`",
            LeftBracket => "`[`",
            RightBracket => "`]`",
            LeftParen => "`(`",
            RightParen => "`)`",
            LeftAngle => "`<`",
            RightAngle => "`>`",
            SingleQuote => "`'`",
            Assign => "`=`",
            QuestionMark => "`?`",
            At => "`@`",

            KeywordPublic => "`pub`",
            KeywordUse => "`use`",

            FatArrow => "`=>`",
            FatArrowFallible => "`=>?`",

            OperatorSeq => "`~`",
            OperatorChoice => "`|`",
            OperatorMany0 => "`*`",
            OperatorMany1 => "`+`",

            Identifier => "<ID>",

            TokenLit => "<TOKEN_LITERAL>",
            TokenRegex => "<TOKEN_REGEX>",

            RustExpression => "<RUST_EXPRESSION>",

            Error => "<ERROR>",
        };
        write!(f, "{}", s)
    }
}

pub fn lex(source: &str) -> Vec<toktok_core::SpannedToken<Token>> {
    let mut lexer = Token::lexer(source);
    let mut tokens = Vec::new();

    while let Some(token_result) = lexer.next() {
        let token = match token_result {
            Ok(token) => token,
            Err(()) => Token::Error,
        };

        tokens.push(toktok_core::SpannedToken { token, span: lexer.span() });
        if token == Token::FatArrow || token == Token::FatArrowFallible {
            let span_start = lexer.span().end;
            parse_rust_expression(&mut lexer);
            let span = span_start..lexer.span().end;

            tokens.push(toktok_core::SpannedToken { token: Token::RustExpression, span });
        }
    }

    tokens
}

fn parse_rust_expression(lexer: &mut logos::Lexer<'_, Token>) {
    let mut balance = (0, 0, 0);
    let mut bump = 0;

    for c in lexer.remainder().chars() {
        match c {
            ',' | ';' | '}' if balance == (0, 0, 0) => break,

            '{' => balance.0 += 1,
            '}' => balance.0 -= 1,
            '[' => balance.1 += 1,
            ']' => balance.1 -= 1,
            '(' => balance.2 += 1,
            ')' => balance.2 -= 1,

            _ => (),
        }
        bump += c.len_utf8();
    }

    lexer.bump(bump);
}