Expand description
A regex-based lexer (tokenizer).
use regex_lexer_lalrpop::LexerBuilder;
#[derive(Debug, PartialEq, Eq)]
enum Token {
Num(u32),
Add,
Sub,
Mul,
Div,
Open,
Close,
}
let lexer = LexerBuilder::new()
.token(r"[0-9]+", |_, tok, _| Some(Token::Num(tok.parse().unwrap())))
.token(r"\+", |_, _, _| Some(Token::Add))
.token(r"-", |_, _, _| Some(Token::Sub))
.token(r"\*", |_, _, _| Some(Token::Mul))
.token(r"/", |_, _, _| Some(Token::Div))
.token(r"\(", |_, _, _| Some(Token::Open))
.token(r"\)", |_, _, _| Some(Token::Close))
.token(r"\s+", |_, _, _| None) // skip whitespace
.build()?;
let source = "(1 + 2) * 3";
assert_eq!(
lexer.tokens(source).collect::<Vec<_>>(),
vec![
Ok(Token::Open), Ok(Token::Num(1)), Ok(Token::Add), Ok(Token::Num(2)), Ok(Token::Close),
Ok(Token::Mul), Ok(Token::Num(3))
],
);
Structsยง
- A regex-based lexer.
- Builder struct for Lexer.
- Location in text file being lexed.
- The type returned by Lexer::tokens.