use crate::compile::parse::Keyword;
use crate::types::delimiter::Delimiter;
use crate::types::span::Span;
use crate::{Engine, Error, Result};
#[cfg_attr(internal_debug, derive(Debug))]
pub struct Lexer<'engine, 'source> {
engine: &'engine Engine<'engine>,
pub source: &'source str,
cursor: usize,
state: State,
left_trim: bool,
next: Option<(Token, Span)>,
brackets: Vec<(Span, Token)>,
}
#[cfg_attr(internal_debug, derive(Debug))]
enum State {
Template,
Block {
begin: Span,
end: Token,
},
BlockPath {
begin: Span,
end: Token,
},
Comment {
begin: Span,
end: Token,
},
}
#[derive(Clone, Copy)]
#[cfg_attr(internal_debug, derive(Debug))]
enum BlockState {
Unknown,
Path,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Token {
Raw,
BeginExpr,
EndExpr,
BeginBlock,
EndBlock,
BeginComment,
EndComment,
OpenBracket,
CloseBracket,
OpenBrace,
CloseBrace,
OpenParen,
CloseParen,
Dot,
QuestionDot,
Pipe,
Comma,
Colon,
Plus,
Minus,
Whitespace,
Keyword,
Ident,
Index,
Number,
String,
}
impl<'engine, 'source> Lexer<'engine, 'source> {
pub fn new(engine: &'engine Engine<'engine>, source: &'source str) -> Self {
Self {
engine,
source,
cursor: 0,
state: State::Template,
left_trim: false,
next: None,
brackets: Vec::new(),
}
}
pub fn next(&mut self) -> Result<Option<(Token, Span)>> {
loop {
match self.lex()? {
Some((tk, sp)) if !tk.is_whitespace() => return Ok(Some((tk, sp))),
None => return Ok(None),
_ => continue,
}
}
}
fn lex(&mut self) -> Result<Option<(Token, Span)>> {
if let Some(next) = self.next.take() {
return Ok(Some(next));
}
let i = self.cursor;
if self.source[i..].is_empty() {
return Ok(None);
}
match self.state {
State::Template => self.lex_template(i),
State::Block { begin, end } => self.lex_block(BlockState::Unknown, begin, end, i),
State::BlockPath { begin, end } => self.lex_block(BlockState::Path, begin, end, i),
State::Comment { begin, end } => self.lex_comment(begin, end, i),
}
}
fn lex_template(&mut self, i: usize) -> Result<Option<(Token, Span)>> {
let mut trim_raw_token = |mut i, mut j, right_trim| {
if right_trim {
j = self.source[..j].trim_end().len();
}
if self.left_trim {
self.left_trim = false;
let s = &self.source[i..j];
i += s.len() - s.trim_start().len();
}
Ok(Some((Token::Raw, Span::from(i..j))))
};
match self.engine.searcher.find_at(self.source, i) {
Some((delimiter, j, k)) => {
let (tk, trim) = Token::from_delimiter(delimiter);
if !tk.is_begin_tag() {
return Err(self.err_unexpected_token(tk, j..k));
}
let mut lex = |m, n| {
let begin = Span::from(m..n);
let end = tk.pair();
self.cursor = n;
self.state = if tk.is_begin_comment() {
State::Comment { begin, end }
} else {
State::Block { begin, end }
};
Ok(Some((tk, begin)))
};
if i == j {
lex(j, k)
} else {
self.next = lex(j, k)?;
trim_raw_token(i, j, trim)
}
}
None => {
let j = self.source.len();
self.cursor = j;
trim_raw_token(i, j, false)
}
}
}
fn lex_block(
&mut self,
block_state: BlockState,
begin: Span,
end: Token,
i: usize,
) -> Result<Option<(Token, Span)>> {
let (tk, j) = match self.engine.searcher.starts_with(self.source, i) {
Some((delimiter, j)) => {
let (tk, trim) = Token::from_delimiter(delimiter);
if tk.is_begin_tag() {
return Err(self.err_unclosed(begin, end));
}
if tk != end {
return Err(self.err_unexpected_token(tk, i..j));
}
if let Some((open, end)) = self.brackets.pop() {
return Err(self.err_unclosed(open, end));
}
self.state = State::Template;
self.left_trim = trim;
(tk, j)
}
None => {
let mut iter = self.source[i..].char_indices().map(|(d, c)| (i + d, c));
let (i, c) = iter.next().unwrap();
match c {
'.' => (Token::Dot, i + 1),
'|' => (Token::Pipe, i + 1),
',' => (Token::Comma, i + 1),
':' => (Token::Colon, i + 1),
'+' => (Token::Plus, i + 1),
'-' => (Token::Minus, i + 1),
'[' => self.lex_open(Token::OpenBracket, i, c),
']' => self.lex_close(Token::CloseBracket, i, c)?,
'{' => self.lex_open(Token::OpenBrace, i, c),
'}' => self.lex_close(Token::CloseBrace, i, c)?,
'(' => self.lex_open(Token::OpenParen, i, c),
')' => self.lex_close(Token::CloseParen, i, c)?,
'?' => self.lex_question_dot(iter, i)?,
'"' => self.lex_string(iter, i)?,
c if c.is_ascii_digit() => match block_state {
BlockState::Path => self.lex_index(iter),
BlockState::Unknown => self.lex_number(iter),
},
c if is_whitespace(c) => self.lex_whitespace(iter),
c if is_ident_start(c) => self.lex_ident_or_keyword(iter, i),
_ => {
return Err(self.err_unexpected_character(i..(i + c.len_utf8())));
}
}
}
};
match (block_state, tk) {
(BlockState::Unknown, Token::Ident | Token::Dot | Token::QuestionDot) => {
self.state = State::BlockPath { begin, end };
}
(BlockState::Path, Token::OpenParen | Token::Pipe | Token::Comma | Token::Colon) => {
self.state = State::Block { begin, end };
}
_ => {}
}
self.cursor = j;
Ok(Some((tk, Span::from(i..j))))
}
fn lex_comment(&mut self, begin: Span, end: Token, i: usize) -> Result<Option<(Token, Span)>> {
match self.engine.searcher.find_at(self.source, i) {
Some((delimiter, j, k)) => {
let (tk, trim) = Token::from_delimiter(delimiter);
if tk.is_begin_tag() {
return Err(self.err_unclosed(begin, end));
}
if tk != end {
return Err(self.err_unexpected_token(tk, j..k));
}
let mut lex = |m, n| {
self.cursor = n;
self.state = State::Template;
self.left_trim = trim;
let end = Span::from(m..n);
Ok(Some((tk, end)))
};
if i == j {
lex(j, k)
} else {
self.next = lex(j, k)?;
Ok(Some((Token::Raw, Span::from(i..j))))
}
}
None => {
let j = self.source.len();
self.cursor = j;
Ok(Some((Token::Raw, Span::from(i..j))))
}
}
}
fn lex_open(&mut self, tk: Token, i: usize, c: char) -> (Token, usize) {
let sp = Span::from(i..(i + c.len_utf8()));
self.brackets.push((sp, tk.pair()));
(tk, i + 1)
}
fn lex_close(&mut self, tk: Token, i: usize, c: char) -> Result<(Token, usize)> {
let j = i + c.len_utf8();
let (_, close) = self
.brackets
.pop()
.ok_or_else(|| self.err_unexpected_token(tk, i..j))?;
if close != tk {
return Err(self.err_unexpected_token(tk, i..j));
}
Ok((tk, j))
}
fn lex_question_dot<I>(&mut self, mut iter: I, i: usize) -> Result<(Token, usize)>
where
I: Iterator<Item = (usize, char)> + Clone,
{
match iter.next() {
Some((_, '.')) => Ok((Token::QuestionDot, i + 2)),
Some((j, c)) => Err(self.err_unexpected_character(i..j + c.len_utf8())),
None => Err(self.err_unexpected_character(i..self.source.len())),
}
}
fn lex_string<I>(&mut self, mut iter: I, i: usize) -> Result<(Token, usize)>
where
I: Iterator<Item = (usize, char)> + Clone,
{
let mut curr = '"';
loop {
match iter.next() {
None => {
return Err(self.err_undelimited_string(i..self.source.len()));
}
Some((j, '\r' | '\n')) => {
return Err(self.err_undelimited_string(i..j));
}
Some((j, '"')) if curr != '\\' => {
return Ok((Token::String, j + 1));
}
Some((_, c)) => {
curr = c;
}
}
}
}
fn lex_number<I>(&mut self, iter: I) -> (Token, usize)
where
I: Iterator<Item = (usize, char)> + Clone,
{
(Token::Number, self.lex_while(iter, is_number))
}
fn lex_index<I>(&mut self, iter: I) -> (Token, usize)
where
I: Iterator<Item = (usize, char)> + Clone,
{
(Token::Index, self.lex_while(iter, is_index))
}
fn lex_whitespace<I>(&mut self, iter: I) -> (Token, usize)
where
I: Iterator<Item = (usize, char)> + Clone,
{
(Token::Whitespace, self.lex_while(iter, is_whitespace))
}
fn lex_ident_or_keyword<I>(&mut self, iter: I, i: usize) -> (Token, usize)
where
I: Iterator<Item = (usize, char)> + Clone,
{
let j = self.lex_while(iter, is_ident);
let tk = match Keyword::all().contains(&&self.source[i..j]) {
true => Token::Keyword,
false => Token::Ident,
};
(tk, j)
}
fn lex_while<I, P>(&mut self, mut iter: I, pred: P) -> usize
where
I: Iterator<Item = (usize, char)> + Clone,
P: Fn(char) -> bool,
{
loop {
match iter.clone().next() {
Some((_, c)) if pred(c) => {
iter.next().unwrap();
}
Some((j, _)) => return j,
None => return self.source.len(),
}
}
}
fn err_unclosed(&self, begin: Span, end: Token) -> Error {
let end = end.pair().human();
Error::syntax(format!("unclosed {end}"), self.source, begin)
}
fn err_unexpected_token(&self, tk: Token, span: impl Into<Span>) -> Error {
let tk = tk.human();
Error::syntax(format!("unexpected {tk}"), self.source, span)
}
fn err_unexpected_character(&self, span: impl Into<Span>) -> Error {
Error::syntax("unexpected character", self.source, span)
}
fn err_undelimited_string(&self, span: impl Into<Span>) -> Error {
Error::syntax("undelimited string", self.source, span)
}
}
impl Token {
pub fn human(&self) -> &'static str {
match self {
Self::Raw => "raw template",
Self::BeginExpr => "begin expression",
Self::EndExpr => "end expression",
Self::BeginBlock => "begin block",
Self::EndBlock => "end block",
Self::BeginComment => "begin comment",
Self::EndComment => "end comment",
Self::OpenBracket => "open bracket",
Self::CloseBracket => "close bracket",
Self::OpenBrace => "open brace",
Self::CloseBrace => "close brace",
Self::OpenParen => "open parenthesis",
Self::CloseParen => "close parenthesis",
Self::Dot => "member access operator",
Self::QuestionDot => "optional member access operator",
Self::Pipe => "pipe",
Self::Comma => "comma",
Self::Colon => "colon",
Self::Minus => "minus",
Self::Plus => "plus",
Self::Whitespace => "whitespace",
Self::Keyword => "keyword",
Self::Ident => "identifier",
Self::Index => "index",
Self::String => "string",
Self::Number => "number",
}
}
fn pair(&self) -> Self {
match self {
Self::BeginExpr => Self::EndExpr,
Self::EndExpr => Self::BeginExpr,
Self::BeginBlock => Self::EndBlock,
Self::EndBlock => Self::BeginBlock,
Self::BeginComment => Self::EndComment,
Self::EndComment => Self::BeginComment,
Self::OpenBracket => Self::CloseBracket,
Self::CloseBracket => Self::OpenBracket,
Self::OpenBrace => Self::CloseBrace,
Self::CloseBrace => Self::OpenBrace,
Self::OpenParen => Self::CloseParen,
Self::CloseParen => Self::OpenParen,
_ => panic!("not a tag or bracket"),
}
}
fn is_begin_tag(&self) -> bool {
matches!(
self,
Self::BeginExpr | Self::BeginBlock | Self::BeginComment
)
}
fn is_begin_comment(&self) -> bool {
matches!(self, Self::BeginComment)
}
fn is_whitespace(&self) -> bool {
matches!(self, Self::Whitespace)
}
fn from_delimiter(d: Delimiter) -> (Self, bool) {
match d {
Delimiter::BeginExpr => (Self::BeginExpr, false),
Delimiter::EndExpr => (Self::EndExpr, false),
Delimiter::BeginExprTrim => (Self::BeginExpr, true),
Delimiter::EndExprTrim => (Self::EndExpr, true),
Delimiter::BeginBlock => (Self::BeginBlock, false),
Delimiter::EndBlock => (Self::EndBlock, false),
Delimiter::BeginBlockTrim => (Self::BeginBlock, true),
Delimiter::EndBlockTrim => (Self::EndBlock, true),
Delimiter::BeginComment => (Self::BeginComment, false),
Delimiter::EndComment => (Self::EndComment, false),
Delimiter::BeginCommentTrim => (Self::BeginComment, true),
Delimiter::EndCommentTrim => (Self::EndComment, true),
}
}
}
fn is_whitespace(c: char) -> bool {
matches!(c, '\t' | ' ' | '\r' | '\n')
}
#[cfg(feature = "unicode")]
fn is_ident_start(c: char) -> bool {
c == '_' || unicode_ident::is_xid_start(c)
}
#[cfg(feature = "unicode")]
fn is_ident(c: char) -> bool {
unicode_ident::is_xid_continue(c)
}
#[cfg(not(feature = "unicode"))]
fn is_ident_start(c: char) -> bool {
matches!(c, '0'..='9' | 'A'..='Z' | 'a'..='z' | '_')
}
#[cfg(not(feature = "unicode"))]
fn is_ident(c: char) -> bool {
matches!(c, '0'..='9' | 'A'..='Z' | 'a'..='z' | '_')
}
fn is_index(c: char) -> bool {
c.is_ascii_digit()
}
fn is_number(c: char) -> bool {
matches!(c, '0'..='9' | 'A'..='Z' | 'a'..='z' | '_' | '-' | '+' | '.')
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lex_empty() {
let tokens = lex("").unwrap();
assert_eq!(tokens, []);
}
#[test]
fn lex_raw() {
let tokens = lex("lorem ipsum").unwrap();
assert_eq!(tokens, [(Token::Raw, "lorem ipsum")]);
}
#[test]
fn lex_begin_expr() {
let tokens = lex("lorem ipsum {{").unwrap();
assert_eq!(
tokens,
[(Token::Raw, "lorem ipsum "), (Token::BeginExpr, "{{"),]
);
}
#[test]
fn lex_begin_expr_trickery() {
let tokens = lex("lorem { ipsum {{").unwrap();
assert_eq!(
tokens,
[(Token::Raw, "lorem { ipsum "), (Token::BeginExpr, "{{"),]
);
}
#[test]
fn lex_begin_expr_trim() {
let tokens = lex("lorem ipsum \t\n{{-").unwrap();
assert_eq!(
tokens,
[(Token::Raw, "lorem ipsum"), (Token::BeginExpr, "{{-"),]
);
}
#[test]
fn lex_begin_expr_eof() {
let tokens = lex("lorem ipsum {{ dolor").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginExpr, "{{"),
(Token::Whitespace, " "),
(Token::Ident, "dolor"),
]
);
}
#[test]
fn lex_end_expr_trim() {
let tokens = lex("lorem ipsum {{ -}} \t\ndolor sit amet").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginExpr, "{{"),
(Token::Whitespace, " "),
(Token::EndExpr, "-}}"),
(Token::Raw, "dolor sit amet")
]
);
}
#[test]
fn lex_empty_expr() {
let tokens = lex("lorem ipsum {{}}").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginExpr, "{{"),
(Token::EndExpr, "}}"),
]
);
}
#[test]
fn lex_expr_double_trim() {
let tokens = lex("lorem {{ -}} {{- }} dolor").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem "),
(Token::BeginExpr, "{{"),
(Token::Whitespace, " "),
(Token::EndExpr, "-}}"),
(Token::Raw, ""),
(Token::BeginExpr, "{{-"),
(Token::Whitespace, " "),
(Token::EndExpr, "}}"),
(Token::Raw, " dolor")
]
);
}
#[test]
fn lex_expr_multiline() {
let tokens = lex("lorem {{\n ipsum }} dolor").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem "),
(Token::BeginExpr, "{{"),
(Token::Whitespace, "\n "),
(Token::Ident, "ipsum"),
(Token::Whitespace, " "),
(Token::EndExpr, "}}"),
(Token::Raw, " dolor")
]
);
}
#[test]
fn lex_expr_literals() {
let tokens = lex("lorem {{ [1, 3] {.} }} dolor").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem "),
(Token::BeginExpr, "{{"),
(Token::Whitespace, " "),
(Token::OpenBracket, "["),
(Token::Number, "1"),
(Token::Comma, ","),
(Token::Whitespace, " "),
(Token::Number, "3"),
(Token::CloseBracket, "]"),
(Token::Whitespace, " "),
(Token::OpenBrace, "{"),
(Token::Dot, "."),
(Token::CloseBrace, "}"),
(Token::Whitespace, " "),
(Token::EndExpr, "}}"),
(Token::Raw, " dolor")
]
);
}
#[cfg(feature = "unicode")]
#[test]
fn lex_expr() {
let tokens = lex(
"lorem ipsum {{ . ?. |\t (_aZ_0 привіт) :\"hello\\n\" 0.5 0xffee00 }} dolor sit amet",
)
.unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginExpr, "{{"),
(Token::Whitespace, " "),
(Token::Dot, "."),
(Token::Whitespace, " "),
(Token::QuestionDot, "?."),
(Token::Whitespace, " "),
(Token::Pipe, "|"),
(Token::Whitespace, "\t "),
(Token::OpenParen, "("),
(Token::Ident, "_aZ_0"),
(Token::Whitespace, " "),
(Token::Ident, "привіт"),
(Token::CloseParen, ")"),
(Token::Whitespace, " "),
(Token::Colon, ":"),
(Token::String, "\"hello\\n\""),
(Token::Whitespace, " "),
(Token::Number, "0.5"),
(Token::Whitespace, " "),
(Token::Number, "0xffee00"),
(Token::Whitespace, " "),
(Token::EndExpr, "}}"),
(Token::Raw, " dolor sit amet"),
]
);
}
#[test]
fn lex_expr_path_with_index() {
let tokens = lex("lorem {{ ipsum.123?.dolor }} sit amet").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem "),
(Token::BeginExpr, "{{"),
(Token::Whitespace, " "),
(Token::Ident, "ipsum"),
(Token::Dot, "."),
(Token::Index, "123"),
(Token::QuestionDot, "?."),
(Token::Ident, "dolor"),
(Token::Whitespace, " "),
(Token::EndExpr, "}}"),
(Token::Raw, " sit amet")
]
)
}
#[test]
fn lex_expr_trim() {
let tokens = lex("lorem ipsum {{- .|\t aZ_0 -}} dolor sit amet").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum"),
(Token::BeginExpr, "{{-"),
(Token::Whitespace, " "),
(Token::Dot, "."),
(Token::Pipe, "|"),
(Token::Whitespace, "\t "),
(Token::Ident, "aZ_0"),
(Token::Whitespace, " "),
(Token::EndExpr, "-}}"),
(Token::Raw, "dolor sit amet"),
]
);
}
#[test]
fn lex_begin_block() {
let tokens = lex("lorem ipsum {%").unwrap();
assert_eq!(
tokens,
[(Token::Raw, "lorem ipsum "), (Token::BeginBlock, "{%"),]
);
}
#[test]
fn lex_empty_block() {
let tokens = lex("lorem ipsum {%%}").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginBlock, "{%"),
(Token::EndBlock, "%}"),
]
);
}
#[test]
fn lex_block_trim() {
let tokens = lex("lorem ipsum {%- dolor -%} sit").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum"),
(Token::BeginBlock, "{%-"),
(Token::Whitespace, " "),
(Token::Ident, "dolor"),
(Token::Whitespace, " "),
(Token::EndBlock, "-%}"),
(Token::Raw, "sit"),
]
);
}
#[test]
fn lex_block_multiline() {
let tokens = lex("lorem {%\n ipsum %} dolor").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem "),
(Token::BeginBlock, "{%"),
(Token::Whitespace, "\n "),
(Token::Ident, "ipsum"),
(Token::Whitespace, " "),
(Token::EndBlock, "%}"),
(Token::Raw, " dolor")
]
);
}
#[test]
fn lex_block_and_expr() {
let tokens =
lex("{% if cond %} lorem ipsum {{ path.segment }} dolor sit amet {% end %}").unwrap();
assert_eq!(
tokens,
[
(Token::BeginBlock, "{%"),
(Token::Whitespace, " "),
(Token::Keyword, "if"),
(Token::Whitespace, " "),
(Token::Ident, "cond"),
(Token::Whitespace, " "),
(Token::EndBlock, "%}"),
(Token::Raw, " lorem ipsum "),
(Token::BeginExpr, "{{"),
(Token::Whitespace, " "),
(Token::Ident, "path"),
(Token::Dot, "."),
(Token::Ident, "segment"),
(Token::Whitespace, " "),
(Token::EndExpr, "}}"),
(Token::Raw, " dolor sit amet "),
(Token::BeginBlock, "{%"),
(Token::Whitespace, " "),
(Token::Ident, "end"),
(Token::Whitespace, " "),
(Token::EndBlock, "%}"),
]
);
}
#[test]
fn lex_begin_comment() {
let tokens = lex("lorem ipsum {#").unwrap();
assert_eq!(
tokens,
[(Token::Raw, "lorem ipsum "), (Token::BeginComment, "{#"),]
);
}
#[test]
fn lex_begin_comment_trim() {
let tokens = lex("lorem ipsum \t\n{#-").unwrap();
assert_eq!(
tokens,
[(Token::Raw, "lorem ipsum"), (Token::BeginComment, "{#-"),]
);
}
#[test]
fn lex_begin_comment_eof() {
let tokens = lex("lorem ipsum {# dolor").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginComment, "{#"),
(Token::Raw, " dolor")
]
);
}
#[test]
fn lex_end_comment() {
let tokens = lex("lorem ipsum {# dolor #} sit amet").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginComment, "{#"),
(Token::Raw, " dolor "),
(Token::EndComment, "#}"),
(Token::Raw, " sit amet"),
]
);
}
#[test]
fn lex_end_comment_trim() {
let tokens = lex("lorem ipsum {# -#} \t\ndolor sit amet").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginComment, "{#"),
(Token::Raw, " "),
(Token::EndComment, "-#}"),
(Token::Raw, "dolor sit amet"),
]
);
}
#[test]
fn lex_empty_comment() {
let tokens = lex("lorem ipsum {##}").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginComment, "{#"),
(Token::EndComment, "#}"),
]
);
}
#[test]
fn lex_comment() {
let tokens = lex("lorem ipsum {# anything goes e.g. - # { #}").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginComment, "{#"),
(Token::Raw, " anything goes e.g. - # { "),
(Token::EndComment, "#}"),
]
);
}
#[test]
fn lex_comment_trim() {
let tokens = lex("lorem ipsum {# anything goes e.g. - # { #}").unwrap();
assert_eq!(
tokens,
[
(Token::Raw, "lorem ipsum "),
(Token::BeginComment, "{#"),
(Token::Raw, " anything goes e.g. - # { "),
(Token::EndComment, "#}"),
]
);
}
#[track_caller]
fn lex(source: &str) -> Result<Vec<(Token, &str)>> {
let engine = Engine::default();
let mut lexer = Lexer::new(&engine, source);
let mut tokens = Vec::new();
while let Some((tk, sp)) = lexer.lex()? {
tokens.push((tk, &source[sp]));
}
for _ in 0..3 {
assert!(lexer.lex().unwrap().is_none());
}
Ok(tokens)
}
}