Crate regex_lexer_lalrpop

Source
Expand description

A regex-based lexer (tokenizer).

use regex_lexer_lalrpop::LexerBuilder;

#[derive(Debug, PartialEq, Eq)]
enum Token {
    Num(u32),
    Add,
    Sub,
    Mul,
    Div,
    Open,
    Close,
}

let lexer = LexerBuilder::new()
    .token(r"[0-9]+", |_, tok, _| Some(Token::Num(tok.parse().unwrap())))
    .token(r"\+", |_, _, _| Some(Token::Add))
    .token(r"-", |_, _, _| Some(Token::Sub))
    .token(r"\*", |_, _, _| Some(Token::Mul))
    .token(r"/", |_, _, _| Some(Token::Div))
    .token(r"\(", |_, _, _| Some(Token::Open))
    .token(r"\)", |_, _, _| Some(Token::Close))
    .token(r"\s+", |_, _, _| None) // skip whitespace
    .build()?;

let source = "(1 + 2) * 3";
assert_eq!(
    lexer.tokens(source).collect::<Vec<_>>(),
    vec![
        Ok(Token::Open), Ok(Token::Num(1)), Ok(Token::Add), Ok(Token::Num(2)), Ok(Token::Close),
        Ok(Token::Mul), Ok(Token::Num(3))
    ],
);

Structsยง