mod test_utils;
use lexgen::lexer;
use lexgen_util::{LexerError, LexerErrorKind};
use test_utils::{loc, next};
#[test]
fn failure_confusion_1() {
#[derive(Debug, Default)]
struct LexerState {
buf: String,
}
lexer! {
Lexer(LexerState) -> String;
let whitespace = [' ' '\t' '\n'];
'"' => |lexer| {
println!("matched a double quote");
let str = std::mem::take(&mut lexer.state().buf);
lexer.return_(str)
},
"\\\"" => |lexer| {
println!("matched an escaped double quote");
lexer.state().buf.push('"');
lexer.continue_()
},
_ => |lexer| {
let char = lexer.match_().chars().next_back().unwrap();
println!("wildcard matched {:?}", char);
lexer.state().buf.push(char);
lexer.continue_()
},
}
let mut lexer = Lexer::new("test\"");
assert_eq!(next(&mut lexer), Some(Ok("test".to_owned())));
assert_eq!(next(&mut lexer), None);
let mut lexer = Lexer::new("\\\"\"");
assert_eq!(next(&mut lexer), Some(Ok("\"".to_owned())));
assert_eq!(next(&mut lexer), None);
}
#[test]
fn failure_confusion_2() {
#[derive(Debug, Default)]
struct LexerState {
comment_depth: usize,
}
lexer! {
Lexer(LexerState) -> ();
rule Init {
' ',
"(*" => |lexer| {
lexer.state().comment_depth = 1;
lexer.switch(LexerRule::Comment)
},
}
rule Comment {
"(*" => |lexer| {
let depth = &mut lexer.state().comment_depth;
*depth += 1;
lexer.continue_()
},
"*)" => |lexer| {
let depth = &mut lexer.state().comment_depth;
if *depth == 1 {
lexer.switch(LexerRule::Init)
} else {
*depth -= 1;
lexer.continue_()
}
},
_,
}
}
let mut lexer = Lexer::new("(* * *) (* (* ** *) *)");
assert_eq!(lexer.next(), None);
}
#[test]
fn failure_confusion_3_1() {
lexer! {
Lexer -> usize;
' ' = 0,
"ab" = 1,
_ = 2,
}
let mut lexer = Lexer::new("a ab abc");
assert_eq!(next(&mut lexer), Some(Ok(2)));
assert_eq!(next(&mut lexer), Some(Ok(0)));
assert_eq!(next(&mut lexer), Some(Ok(1)));
assert_eq!(next(&mut lexer), Some(Ok(0)));
assert_eq!(next(&mut lexer), Some(Ok(1)));
assert_eq!(next(&mut lexer), Some(Ok(2)));
assert_eq!(next(&mut lexer), None);
}
#[test]
fn failure_confusion_3_2() {
lexer! {
Lexer -> usize;
$$ascii_lowercase+ = 1,
',' = 2,
}
let mut lexer = Lexer::new("f,");
assert_eq!(next(&mut lexer), Some(Ok(1)));
assert_eq!(next(&mut lexer), Some(Ok(2)));
assert_eq!(next(&mut lexer), None);
}
#[test]
fn failure_confusion_4() {
lexer! {
Lexer -> u32;
' ',
"aaa" = 1,
"aa" = 2,
_ = 3,
}
let mut lexer = Lexer::new("aaa aa a");
assert_eq!(next(&mut lexer), Some(Ok(1)));
assert_eq!(next(&mut lexer), Some(Ok(2)));
assert_eq!(next(&mut lexer), Some(Ok(3)));
assert_eq!(next(&mut lexer), None);
}
#[test]
fn continue_confusion_1() {
lexer! {
Lexer -> u32;
_,
}
let mut lexer = Lexer::new("");
assert_eq!(lexer.next(), None);
let mut lexer = Lexer::new("a");
assert_eq!(lexer.next(), None);
let mut lexer = Lexer::new("aaa");
assert_eq!(lexer.next(), None);
}
#[test]
fn continue_confusion_2() {
lexer! {
Lexer -> u32;
rule Init {
_ => |lexer| lexer.switch(LexerRule::Test),
}
rule Test {
_,
}
}
let mut lexer = Lexer::new("a");
assert!(matches!(lexer.next(), Some(Err(_))));
let mut lexer = Lexer::new("aa");
assert!(matches!(lexer.next(), Some(Err(_))));
}
#[test]
fn return_should_reset_match() {
lexer! {
Lexer -> &'input str;
rule Init {
"aaa" => |lexer| {
let match_ = lexer.match_();
lexer.switch_and_return(LexerRule::State1, match_)
},
}
rule State1 {
"bbb" => |lexer| {
let match_ = lexer.match_();
lexer.switch_and_return(LexerRule::Init, match_)
},
}
}
let mut lexer = Lexer::new("aaabbb");
assert_eq!(next(&mut lexer), Some(Ok("aaa")));
assert_eq!(next(&mut lexer), Some(Ok("bbb")));
assert_eq!(next(&mut lexer), None);
}
#[test]
fn issue_16_backtracking_1() {
lexer! {
Lexer -> &'input str;
'a'+ 'b' => |lexer| {
let match_ = lexer.match_();
lexer.return_(match_)
},
'a' => |lexer| {
let match_ = lexer.match_();
lexer.return_(match_)
},
}
let mut lexer = Lexer::new("aaaab");
assert_eq!(next(&mut lexer), Some(Ok("aaaab")));
assert_eq!(next(&mut lexer), None);
let mut lexer = Lexer::new("aaaa");
assert_eq!(next(&mut lexer), Some(Ok("a")));
assert_eq!(next(&mut lexer), Some(Ok("a")));
assert_eq!(next(&mut lexer), Some(Ok("a")));
assert_eq!(next(&mut lexer), Some(Ok("a")));
assert_eq!(next(&mut lexer), None);
}
#[test]
fn issue_16_backtracking_2() {
fn return_match<'input, I: Iterator<Item = char> + Clone>(
lexer: &mut Lexer<'input, I>,
) -> lexgen_util::SemanticActionResult<&'input str> {
let match_ = lexer.match_();
lexer.return_(match_)
}
lexer! {
Lexer -> &'input str;
"xyzxyz" => return_match,
"xyz" => return_match,
"xya" => return_match,
}
let mut lexer = Lexer::new("xyzxya");
assert_eq!(next(&mut lexer), Some(Ok("xyz")));
assert_eq!(next(&mut lexer), Some(Ok("xya")));
assert_eq!(next(&mut lexer), None);
}
#[test]
fn end_of_input_handling() {
lexer! {
Lexer -> (usize, &'input str);
rule Init {
'a' => |lexer| {
let match_ = lexer.match_();
lexer.switch_and_return(LexerRule::Rule1, (0, match_))
},
}
rule Rule1 {
$,
'a' => |lexer| {
let match_ = lexer.match_();
lexer.return_((1, match_))
},
}
}
let mut lexer = Lexer::new("aa");
assert_eq!(
lexer.next(),
Some(Ok((loc(0, 0, 0), (0, "a"), loc(0, 1, 1))))
);
assert_eq!(
lexer.next(),
Some(Ok((loc(0, 1, 1), (1, "a"), loc(0, 2, 2))))
);
assert_eq!(lexer.next(), None);
}
#[test]
fn empty_rule_simpification_issue_27() {
lexer! {
Lexer -> &'input str;
rule Init {
"0x" => |lexer| lexer.switch(LexerRule::HexInt),
'0' => |lexer| lexer.switch(LexerRule::DecInt),
}
rule DecInt {
_ => |lexer| lexer.return_("wat"),
}
rule HexInt {}
}
let mut lexer = Lexer::new("0xff");
assert_eq!(
next(&mut lexer),
Some(Err(LexerError {
location: loc(0, 0, 0),
kind: LexerErrorKind::InvalidToken,
}))
);
}
#[test]
fn range_any_overlap_issue_31() {
lexer! {
Lexer -> usize;
"'" _ "'" = 1,
"'" ['a'-'z']+ = 2,
}
let input = "'a'";
let mut lexer = Lexer::new(input);
assert_eq!(lexer.next(), Some(Ok((loc(0, 0, 0), 1, loc(0, 3, 3)))));
assert_eq!(lexer.next(), None);
}
#[test]
fn failure_should_reset_state_issue_48() {
lexer! {
Lexer -> &'input str;
rule Init {
's' => |lexer|
lexer.switch_and_return(LexerRule::InString, lexer.match_()),
}
rule InString {
'a' => |lexer|
lexer.switch_and_return(LexerRule::Init, lexer.match_()),
}
}
let input = "sxasa";
let mut lexer = Lexer::new(input);
assert_eq!(lexer.next(), Some(Ok((loc(0, 0, 0), "s", loc(0, 1, 1)))));
assert_eq!(
lexer.next(),
Some(Err(LexerError {
location: loc(0, 1, 1),
kind: LexerErrorKind::InvalidToken
}))
);
assert_eq!(
lexer.next(),
Some(Err(LexerError {
location: loc(0, 2, 2),
kind: LexerErrorKind::InvalidToken
}))
);
assert_eq!(lexer.next(), Some(Ok((loc(0, 3, 3), "s", loc(0, 4, 4)))));
assert_eq!(lexer.next(), Some(Ok((loc(0, 4, 4), "a", loc(0, 5, 5)))));
assert_eq!(lexer.next(), None);
}
#[test]
fn new_methods_no_default() {
struct UserState {}
lexer! {
Lexer(UserState) -> ();
$ = (),
}
Lexer::new_with_state("", UserState {});
Lexer::new_from_iter_with_state(std::iter::empty(), UserState {});
}
#[test]
fn new_methods_default() {
#[derive(Default)]
struct UserState {}
lexer! {
Lexer(UserState) -> ();
$ = (),
}
Lexer::new("");
Lexer::new_from_iter(std::iter::empty());
}