use crate::lexer::char_source::StrSource;
use crate::lexer::{Error, StrTokenizer, Token, TokenType, Tokenizer};
use std::borrow::Cow;
macro_rules! single_token_test {
($($name:ident:$input:expr => $tok:expr,$start:expr,$end:expr);*) => {
$(
#[test]
fn $name () {
let _ = env_logger::try_init();
let mut tokenizer : StrTokenizer<'static> = Tokenizer::new($input);
assert_eq!(
tokenizer.get_next_token().unwrap(),
Token {
token_type: $tok,
start_byte: $start,
end_byte: $end,
text: Cow::Owned($input.into()),
}
);
assert_eq!(
tokenizer.get_next_token().unwrap(),
Token {
token_type: TokenType::EOF,
start_byte: $end+1,
end_byte: $end+1,
text: Cow::Owned("".into()),
}
)
}
)*
}
}
macro_rules! single_token_header_test {
($($name:ident:$input:expr => $tok:expr,$start:expr,$end:expr);*) => {
$(
#[test]
fn $name () {
let _ = env_logger::try_init();
let mut tokenizer : StrTokenizer<'static> = Tokenizer::new($input);
tokenizer.set_header_name_allowed(true);
assert_eq!(
tokenizer.get_next_token().unwrap(),
Token {
token_type: $tok,
start_byte: $start,
end_byte: $end,
text: Cow::Owned($input.into()),
}
);
assert_eq!(
tokenizer.get_next_token().unwrap(),
Token {
token_type: TokenType::EOF,
start_byte: $end+1,
end_byte: $end+1,
text: Cow::Owned("".into()),
}
)
}
)*
}
}
single_token_test!(
punct_left_bracket:"[" => TokenType::Punctuator,0,0;
punct_right_bracket:"]" => TokenType::Punctuator,0,0;
punct_left_paren:"(" => TokenType::Punctuator,0,0;
punct_right_paren:")" => TokenType::Punctuator,0,0;
punct_left_brace:"{" => TokenType::Punctuator,0,0;
punct_right_brace:"}" => TokenType::Punctuator,0,0;
punct_dot:"." => TokenType::Punctuator,0,0;
punct_push:"->" => TokenType::Punctuator,0,1;
punct_increment: "++" => TokenType::Punctuator,0,1;
punct_decrement: "--" => TokenType::Punctuator,0,1;
punct_bit_and: "&" => TokenType::Punctuator,0,0;
punct_mul: "*" => TokenType::Punctuator,0,0;
punct_plus: "+" => TokenType::Punctuator,0,0;
punct_minux: "-" => TokenType::Punctuator,0,0;
punct_bit_not: "~" => TokenType::Punctuator,0,0;
punct_bang: "!" => TokenType::Punctuator,0,0;
punct_div: "/" => TokenType::Punctuator,0,0;
punct_mod: "%" => TokenType::Punctuator,0,0;
punct_lshift: "<<" => TokenType::Punctuator,0,1;
punct_rshift: ">>" => TokenType::Punctuator,0,1;
punct_lt: "<" => TokenType::Punctuator,0,0;
punct_gt: ">" => TokenType::Punctuator,0,0;
punct_lte: "<=" => TokenType::Punctuator,0,1;
punct_gte: ">=" => TokenType::Punctuator,0,1;
punct_equal: "==" => TokenType::Punctuator,0,1;
punct_bang_eq: "!=" => TokenType::Punctuator,0,1;
punct_xor: "^" => TokenType::Punctuator,0,0;
punct_bit_or: "|" => TokenType::Punctuator,0,0;
punct_logic_and: "&&" => TokenType::Punctuator,0,1;
punct_logic_or: "||" => TokenType::Punctuator,0,1;
punct_question_mark:"?" => TokenType::Punctuator,0,0;
punct_colon: ":" => TokenType::Punctuator,0,0;
punct_semi_colon:";" => TokenType::Punctuator,0,0;
punct_elipsis:"..." => TokenType::Punctuator,0,2;
punct_assign: "=" => TokenType::Punctuator,0,0;
punct_mul_eq: "*=" => TokenType::Punctuator,0,1;
punct_div_eq: "/=" => TokenType::Punctuator,0,1;
punct_mod_eq: "%=" => TokenType::Punctuator,0,1;
punct_plus_eq: "+=" => TokenType::Punctuator,0,1;
punct_minux_eq: "-=" => TokenType::Punctuator,0,1;
punct_lshifteq: "<<=" => TokenType::Punctuator,0,2;
punct_rshifteq: ">>=" => TokenType::Punctuator,0,2;
punct_bit_and_eq: "&=" => TokenType::Punctuator,0,1;
punct_xor_eq: "^=" => TokenType::Punctuator,0,1;
punct_bit_or_eq: "|=" => TokenType::Punctuator,0,1;
punct_comma:"," => TokenType::Punctuator,0,0;
punct_pound: "#" => TokenType::Punctuator,0,0;
punct_paste: "##" => TokenType::Punctuator,0,1;
punct_alt_left_bracket: "<:" => TokenType::Punctuator,0,1;
punct_alt_right_bracket: ":>" => TokenType::Punctuator,0,1;
punct_alt_left_brace: "<%" => TokenType::Punctuator,0,1;
punct_alt_right_brace: "%>" => TokenType::Punctuator,0,1;
punct_alt_pound: "%:" => TokenType::Punctuator,0,1;
punct_alt_paste: "%:%:" => TokenType::Punctuator,0,3;
ident_simple:"a" => TokenType::Identifier,0,0;
ident_underscore:"_" => TokenType::Identifier,0,0;
ident_underscore2:"__FLORG" => TokenType::Identifier,0,6;
ident_ucn: "a\\u1234" => TokenType::Identifier,0,6;
ident_ucn_long: "a\\U12345678" => TokenType::Identifier,0,10;
ident_ucn2: "a\\u1234g" => TokenType::Identifier,0,7;
ident_ucn_long2: "a\\U12345678g" => TokenType::Identifier,0,11;
ident_long:"FooBar_Baz2" => TokenType::Identifier,0,10;
ident_that_might_be_string_specifier: "u8" => TokenType::Identifier,0,1;
ident_that_might_be_string_specifier2: "U" => TokenType::Identifier,0,0;
ident_that_might_be_string_specifier3: "L" => TokenType::Identifier,0,0;
ident_that_might_be_string_specifier4: "u88" => TokenType::Identifier,0,2;
ident_that_might_be_string_specifier5: "uy" => TokenType::Identifier,0,1;
string_literal_simple:"\"Foo\"" => TokenType::StringLiteral,0,4;
string_literal_with_simple_specifier: "u\"foo\"" => TokenType::StringLiteral,0,5;
string_literal_with_escape: "\"FOO\\\"BAR\"" => TokenType::StringLiteral,0,9;
string_literal_with_specifier: "u8\"foo\"" => TokenType::StringLiteral,0,6;
ppnumber_zero: "0" => TokenType::PPNumber,0,0;
ppnumber_exponent: "0e5" => TokenType::PPNumber,0,2;
ppnumber_pos_exponent: "0e+" => TokenType::PPNumber,0,2;
ppnumber_neg_exponent: "0e-" => TokenType::PPNumber,0,2;
ppnumber_crazy: ".3OO.FOO.3.14159.ebar_" => TokenType::PPNumber,0,21;
ppnumber_ucn: "3.\\u1234" => TokenType::PPNumber,0,7;
ppnumber_ucn2: "3.\\U12345678" => TokenType::PPNumber,0,11;
comment_line_empty: "//" => TokenType::Comment,0,1;
comment_line_content: "//foo" => TokenType::Comment,0,4;
comment_block_empty: "/**/" => TokenType::Comment,0,3;
comment_block_content: "/*foo*/" => TokenType::Comment,0,6
);
single_token_header_test!(
header_simple_quoted: r#""foo""# => TokenType::HeaderName,0,4;
header_braced_quoted: r#"<foo>"# => TokenType::HeaderName,0,4
);
#[test]
fn string_literal_with_no_ending() {
let _ = env_logger::try_init();
let mut tokenizer: StrTokenizer<'static> = Tokenizer::new(r#""foo"#);
assert_eq!(
tokenizer.get_next_token(),
Err(Error::StringLiteralMissingClosingQuote)
);
assert_eq!(
tokenizer.get_next_token().unwrap(),
Token {
token_type: TokenType::StringLiteral,
start_byte: 0,
end_byte: 3,
text: Cow::Owned(r#""foo"#.into()),
}
);
assert_eq!(
tokenizer.get_next_token().unwrap(),
Token {
token_type: TokenType::EOF,
start_byte: 4,
end_byte: 4,
text: Cow::Owned("".into()),
}
)
}
#[test]
fn string_literal_with_no_ending_and_escape() {
let _ = env_logger::try_init();
let mut tokenizer: StrTokenizer<'static> = Tokenizer::new(r#""foo\"#);
assert_eq!(
tokenizer.get_next_token(),
Err(Error::StringLiteralMissingClosingQuote)
);
assert_eq!(
tokenizer.get_next_token().unwrap(),
Token {
token_type: TokenType::StringLiteral,
start_byte: 0,
end_byte: 3,
text: Cow::Owned(r#""foo"#.into()),
}
);
}
#[test]
fn ident_that_ends_in_escape() {
let _ = env_logger::try_init();
let mut tokenizer: StrTokenizer<'static> = Tokenizer::new("foo\\");
assert_eq!(
tokenizer.get_next_token().unwrap(),
Token {
token_type: TokenType::Identifier,
start_byte: 0,
end_byte: 2,
text: Cow::Owned("foo".into()),
}
);
}
#[test]
fn string_literal_with_newline_ending() {
let _ = env_logger::try_init();
let mut tokenizer: StrTokenizer<'static> = Tokenizer::new("\"foo\n");
assert_eq!(
tokenizer.get_next_token(),
Err(Error::StringLiteralMissingClosingQuote)
);
assert_eq!(
tokenizer.get_next_token().unwrap(),
Token {
token_type: TokenType::StringLiteral,
start_byte: 0,
end_byte: 3,
text: Cow::Owned("\"foo".into()),
}
);
}
#[test]
fn bad_escape_in_ident() {
let _ = env_logger::try_init();
let mut tokenizer: StrTokenizer<'static> = Tokenizer::new("F\\c");
assert_eq!(
tokenizer.get_next_token(),
Err(Error::UnexpectedCharacter {
found: 'c',
expected: vec!['u', 'U'],
})
);
assert_eq!(
tokenizer.get_next_token(),
Err(Error::CannotRecoverFromError),
);
}
#[test]
fn bad_escape_in_ppnumber() {
let _ = env_logger::try_init();
let mut tokenizer: StrTokenizer<'static> = Tokenizer::new("0\\c");
assert_eq!(
tokenizer.get_next_token(),
Err(Error::UnexpectedCharacter {
found: 'c',
expected: vec!['u', 'U'],
})
);
assert_eq!(
tokenizer.get_next_token(),
Err(Error::CannotRecoverFromError),
);
}
#[test]
fn ppnumber_end_in_slash() {
let _ = env_logger::try_init();
let mut tokenizer: StrTokenizer<'static> = Tokenizer::new("0\\");
assert_eq!(
tokenizer.get_next_token().unwrap(),
Token {
token_type: TokenType::PPNumber,
start_byte: 0,
end_byte: 0,
text: Cow::Owned("0".into()),
}
);
}
#[test]
fn bad_ucs_character() {
let _ = env_logger::try_init();
let mut tokenizer: StrTokenizer<'static> = Tokenizer::new("F\\u36y8");
assert_eq!(
tokenizer.get_next_token(),
Err(Error::UnexpectedCharacter {
found: 'y',
expected: vec![
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
],
})
);
assert_eq!(
tokenizer.get_next_token(),
Err(Error::CannotRecoverFromError),
);
}
#[test]
fn bad_quoted_header() {
let _ = env_logger::try_init();
let mut tokenizer: StrTokenizer<'static> = Tokenizer::new("\"foo\n\"");
tokenizer.set_header_name_allowed(true);
assert_eq!(
tokenizer.get_next_token(),
Err(Error::UnexpectedCharacter {
found: '\n',
expected: vec!['"'],
})
);
assert_eq!(
tokenizer.get_next_token(),
Err(Error::CannotRecoverFromError),
);
}
#[test]
fn bad_bracketed_header() {
let _ = env_logger::try_init();
let mut tokenizer: StrTokenizer<'static> = Tokenizer::new("<foo\n>");
tokenizer.set_header_name_allowed(true);
assert_eq!(
tokenizer.get_next_token(),
Err(Error::UnexpectedCharacter {
found: '\n',
expected: vec!['>'],
})
);
assert_eq!(
tokenizer.get_next_token(),
Err(Error::CannotRecoverFromError),
);
}