use crate::{CalcToken, SymTab, TokenID, TokenValue};
use lexer_data::{LexData, Mode, Rule};
use parlex::{Lexer, LexerData, LexerDriver, LexerStats, ParlexError};
use std::marker::PhantomData;
use try_next::TryNextWithContext;
pub mod lexer_data {
include!(concat!(env!("OUT_DIR"), "/lexer_data.rs"));
}
pub struct CalcLexerDriver<I> {
comment_level: i32,
_marker: PhantomData<I>,
}
impl<I> LexerDriver for CalcLexerDriver<I>
where
I: TryNextWithContext<SymTab, Item = u8, Error: std::fmt::Display + 'static>,
{
type LexerData = LexData;
type Token = CalcToken;
type Lexer = Lexer<I, Self, Self::Context>;
type Context = SymTab;
fn action(
&mut self,
lexer: &mut Self::Lexer,
context: &mut Self::Context,
rule: <Self::LexerData as LexerData>::LexerRule,
) -> Result<(), ParlexError> {
match rule {
Rule::Empty => {
unreachable!()
}
Rule::Ident => {
let index = context.intern(lexer.take_str()?);
lexer.yield_token(CalcToken {
token_id: TokenID::Ident,
span: Some(lexer.span()),
value: TokenValue::Ident(index),
});
}
Rule::Number => {
let s = lexer.take_str()?;
lexer.yield_token(CalcToken {
token_id: TokenID::Number,
span: Some(lexer.span()),
value: TokenValue::Number(
s.as_str()
.parse::<i64>()
.map_err(|e| ParlexError::from_err(e, Some(lexer.span())))?,
),
});
}
Rule::Semicolon => {
lexer.yield_token(CalcToken {
token_id: TokenID::End,
span: Some(lexer.span()),
value: TokenValue::None,
});
}
Rule::Equals => {
lexer.yield_token(CalcToken {
token_id: TokenID::Equals,
span: Some(lexer.span()),
value: TokenValue::None,
});
}
Rule::Plus => {
lexer.yield_token(CalcToken {
token_id: TokenID::Plus,
span: Some(lexer.span()),
value: TokenValue::None,
});
}
Rule::Minus => {
lexer.yield_token(CalcToken {
token_id: TokenID::Minus,
span: Some(lexer.span()),
value: TokenValue::None,
});
}
Rule::Asterisk => {
lexer.yield_token(CalcToken {
token_id: TokenID::Asterisk,
span: Some(lexer.span()),
value: TokenValue::None,
});
}
Rule::Slash => {
lexer.yield_token(CalcToken {
token_id: TokenID::Slash,
span: Some(lexer.span()),
value: TokenValue::None,
});
}
Rule::LeftParen => {
lexer.yield_token(CalcToken {
token_id: TokenID::LeftParen,
span: Some(lexer.span()),
value: TokenValue::None,
});
}
Rule::RightParen => {
lexer.yield_token(CalcToken {
token_id: TokenID::RightParen,
span: Some(lexer.span()),
value: TokenValue::None,
});
}
Rule::CommentBegin => {
lexer.accum();
lexer.begin(Mode::Comment);
self.comment_level += 1;
}
Rule::CommentEnd => {
self.comment_level -= 1;
if self.comment_level == 0 {
lexer.begin(Mode::Expr);
let s = lexer.take_str()?;
lexer.yield_token(CalcToken {
token_id: TokenID::Comment,
span: Some(lexer.span()),
value: TokenValue::Comment(s),
});
}
}
Rule::CommentChar => { }
Rule::NewLine => {
}
Rule::WhiteSpace => { }
Rule::Error => {
lexer.yield_token(CalcToken {
token_id: TokenID::Error,
span: Some(lexer.span()),
value: TokenValue::None,
});
}
Rule::End => {
if lexer.mode() == Mode::Expr {
lexer.yield_token(CalcToken {
token_id: TokenID::End,
span: Some(lexer.span()),
value: TokenValue::None,
});
} else {
lexer.yield_token(CalcToken {
token_id: TokenID::Error,
span: Some(lexer.span()),
value: TokenValue::None,
});
}
}
}
Ok(())
}
}
pub struct CalcLexer<I>
where
I: TryNextWithContext<SymTab, Item = u8, Error: std::fmt::Display + 'static>,
{
lexer: Lexer<I, CalcLexerDriver<I>, SymTab>,
}
impl<I> CalcLexer<I>
where
I: TryNextWithContext<SymTab, Item = u8, Error: std::fmt::Display + 'static>,
{
pub fn try_new(input: I) -> Result<Self, ParlexError> {
let driver = CalcLexerDriver {
comment_level: 0,
_marker: PhantomData,
};
let lexer = Lexer::try_new(input, driver)?;
Ok(Self { lexer })
}
}
impl<I> TryNextWithContext<SymTab, LexerStats> for CalcLexer<I>
where
I: TryNextWithContext<SymTab, Item = u8, Error: std::fmt::Display + 'static>,
{
type Item = CalcToken;
type Error = ParlexError;
fn try_next_with_context(
&mut self,
context: &mut SymTab,
) -> Result<Option<CalcToken>, ParlexError> {
self.lexer.try_next_with_context(context)
}
fn stats(&self) -> LexerStats {
self.lexer.stats()
}
}
#[cfg(test)]
mod tests {
use crate::{CalcLexer, CalcToken, SymTab, TokenID, TokenValue};
use parlex::span;
use try_next::{IterInput, TryNextWithContext};
#[test]
fn lex_ident_plus_ident_number_end() {
let _ = env_logger::builder().is_test(true).try_init();
let mut symtab = SymTab::new();
let input = IterInput::from("hello\n +\n\n\n\n\n\n\n\n\n\n world\n\n123".bytes());
let mut lexer = CalcLexer::try_new(input).unwrap();
assert!(matches!(
lexer.try_next_with_context(&mut symtab).unwrap(),
Some(CalcToken {
token_id: TokenID::Ident,
span: span!(0, 0, 0, 5),
value: TokenValue::Ident(0)
}),
));
assert!(matches!(
lexer.try_next_with_context(&mut symtab).unwrap(),
Some(CalcToken {
token_id: TokenID::Plus,
span: span!(1, 1, 1, 2),
value: TokenValue::None
}),
));
assert!(matches!(
lexer.try_next_with_context(&mut symtab).unwrap(),
Some(CalcToken {
token_id: TokenID::Ident,
span: span!(11, 1, 11, 6),
value: TokenValue::Ident(1)
}),
));
assert!(matches!(
lexer.try_next_with_context(&mut symtab).unwrap(),
Some(CalcToken {
token_id: TokenID::Number,
span: span!(13, 0, 13, 3),
value: TokenValue::Number(123)
}),
));
assert!(matches!(
lexer.try_next_with_context(&mut symtab).unwrap(),
Some(CalcToken {
token_id: TokenID::End,
span: span!(13, 3, 13, 3),
value: TokenValue::None
}),
));
assert!(matches!(
lexer.try_next_with_context(&mut symtab).unwrap(),
None,
));
}
#[test]
fn nested_block_comments_are_skipped() {
let _ = env_logger::builder().is_test(true).try_init();
let mut symtab = SymTab::new();
let src = "a /* outer /* inner\n */ still\n comment */ + b;";
let input = IterInput::from(src.bytes());
let mut lexer = CalcLexer::try_new(input).unwrap();
let t1 = lexer.try_next_with_context(&mut symtab).unwrap().unwrap();
assert!(matches!(
t1,
CalcToken {
token_id: TokenID::Ident,
span: span!(0, 0, 0, 1),
value: TokenValue::Ident(0),
}
));
let t2 = lexer.try_next_with_context(&mut symtab).unwrap().unwrap();
assert!(matches!(
t2,
CalcToken {
token_id: TokenID::Comment,
span: span!(0, 2, 2, 11),
value: TokenValue::Comment(s),
} if s == "/* outer /* inner\n */ still\n comment */"
));
let t3 = lexer.try_next_with_context(&mut symtab).unwrap().unwrap();
assert!(matches!(
dbg!(t3),
CalcToken {
token_id: TokenID::Plus,
span: span!(2, 12, 2, 13),
value: TokenValue::None,
}
));
let t4 = lexer.try_next_with_context(&mut symtab).unwrap().unwrap();
assert!(matches!(
dbg!(t4),
CalcToken {
token_id: TokenID::Ident,
span: span!(2, 14, 2, 15),
value: TokenValue::Ident(1),
}
));
let t5 = lexer.try_next_with_context(&mut symtab).unwrap().unwrap();
assert!(matches!(
dbg!(t5),
CalcToken {
token_id: TokenID::End,
span: span!(2, 15, 2, 16),
value: TokenValue::None,
}
));
let t6 = lexer.try_next_with_context(&mut symtab).unwrap().unwrap();
assert!(matches!(
dbg!(t6),
CalcToken {
token_id: TokenID::End,
span: span!(2, 16, 2, 16),
value: TokenValue::None,
}
));
assert!(lexer.try_next_with_context(&mut symtab).unwrap().is_none());
assert_eq!(symtab.len(), 2);
}
#[test]
fn unterminated_block_comment_emits_error_at_eof() {
let _ = env_logger::builder().is_test(true).try_init();
let mut symtab = SymTab::new();
let input = IterInput::from("/* unclosed".bytes());
let mut lexer = CalcLexer::try_new(input).unwrap();
let t = lexer.try_next_with_context(&mut symtab).unwrap().unwrap();
assert!(matches!(
t,
CalcToken {
token_id: TokenID::Error,
..
}
));
assert!(lexer.try_next_with_context(&mut symtab).unwrap().is_none());
}
}