use std::fmt::Write;
use dupe::Dupe;
use crate::codemap::CodeMap;
use crate::dialect::Dialect;
use crate::golden_test_template::golden_test_template;
use crate::lexer::Lexer;
use crate::lexer::Token;
use crate::slice_vec_ext::SliceExt;
use crate::slice_vec_ext::VecExt;
fn lex_tokens(program: &str) -> Vec<(usize, Token, usize)> {
fn tokens(dialect: &Dialect, program: &str) -> Vec<(usize, Token, usize)> {
let codemap = CodeMap::new("assert.bzl".to_owned(), program.to_owned());
Lexer::new(program, dialect, codemap.dupe())
.collect::<Result<Vec<_>, _>>()
.unwrap_or_else(|e|
panic!(
"starlark::assert::lex_tokens, expected lex success but failed\nCode: {}\nError: {}",
program, e
)
)
}
fn check_spans(tokens: &[(usize, Token, usize)]) {
let mut pos = 0;
for (i, t, j) in tokens {
let span_incorrect = format!("Span of {:?} incorrect", t);
assert!(pos <= *i, "{}: {} > {}", span_incorrect, pos, i);
assert!(i <= j, "{}: {} > {}", span_incorrect, i, j);
pos = *j;
}
}
let orig = tokens(&Dialect::AllOptionsInternal, program);
check_spans(&orig);
let with_r = tokens(
&Dialect::AllOptionsInternal,
&program.replace("\r\n", "\n").replace('\n', "\r\n"),
);
check_spans(&with_r);
assert_eq!(
orig.map(|x| &x.1),
with_r.map(|x| &x.1),
"starlark::assert::lex_tokens, difference using CRLF newlines\nCode: {}",
program,
);
orig
}
fn lex(program: &str) -> String {
lex_tokens(program).map(|x| x.1.unlex()).join(" ")
}
fn lexer_golden_test(name: &str, program: &str) {
let program = program.trim();
let mut out = String::new();
writeln!(out, "Program:").unwrap();
writeln!(out, "{}", program).unwrap();
writeln!(out).unwrap();
writeln!(out, "Tokens:").unwrap();
let tokens = lex_tokens(program).into_map(|(from, token, to)| (from, token.to_string(), to));
let max_width = tokens
.iter()
.map(|(_, token, _)| token.len())
.max()
.unwrap_or(0);
for (from, token, to) in &tokens {
let source = program[*from..*to].replace('\n', "\\n");
writeln!(out, "{token:<max_width$} # {source}").unwrap();
}
golden_test_template(&format!("src/lexer_tests/{}.golden", name), &out);
}
fn lexer_fail_golden_test(name: &str, programs: &[&str]) {
let mut out = String::new();
for (i, program) in programs.iter().enumerate() {
if i != 0 {
writeln!(out).unwrap();
}
let program = program.trim();
let e = Lexer::new(
program,
&Dialect::AllOptionsInternal,
CodeMap::new("x".to_owned(), program.to_owned()),
)
.collect::<Result<Vec<_>, _>>()
.unwrap_err();
writeln!(out, "Program:").unwrap();
writeln!(out, "{}", program).unwrap();
writeln!(out).unwrap();
writeln!(out, "Error:").unwrap();
writeln!(out, "{}", e).unwrap();
}
golden_test_template(&format!("src/lexer_tests/{}.fail.golden", name), &out);
}
#[test]
fn test_int_lit() {
lexer_golden_test(
"int_lit",
r#"
0 123
0x7F 0x7d
0B1011 0b1010
0o755 0O753
"#,
);
lexer_fail_golden_test("int_lit", &["x = 01"]);
}
#[test]
fn test_indentation() {
lexer_golden_test(
"indentation",
"
+
-
/
*
=
%
.
+=
",
);
}
#[test]
fn test_symbols() {
lexer_golden_test(
"symbols",
", ; : += -= *= /= //= %= == != <= >= ** = < > - + * % / // . { } [ ] ( ) |\n\
,;:{}[]()|...",
);
}
#[test]
fn test_keywords() {
lexer_golden_test(
"keywords",
"and else load break for not not in continue if or def in pass elif return lambda",
);
}
#[test]
fn test_number_collated_with_keywords_or_identifier() {
lexer_golden_test(
"number_collated_with_keywords_or_identifier",
"0in 1and 2else 3load 4break 5for 6not 7not in 8continue 10identifier11",
);
}
#[test]
fn test_reserved() {
lexer_fail_golden_test(
"reserved",
&"as import is class nonlocal del raise except try finally while from with global yield"
.split_whitespace()
.collect::<Vec<&str>>(),
);
}
#[test]
fn test_comment() {
lexer_golden_test(
"comment",
r#"
# first comment
# second comment
a # third comment
# But it should not eat everything
[
# comment inside list
]
"#,
);
}
#[test]
fn test_identifier() {
lexer_golden_test("identifier", "a identifier CAPS _CAPS _0123");
}
#[test]
fn test_string_lit() {
assert_eq!(
lex("'123' \"123\" '' \"\" '\\'' \"\\\"\" '\"' \"'\" '\\n' '\\w'"),
"\"123\" \"123\" \"\" \"\" \"\'\" \"\\\"\" \"\\\"\" \"\'\" \"\\n\" \"\\\\w\" \n"
);
lexer_fail_golden_test(
"string_lit",
&[
"'\n'",
"\"\n\"",
"this = a + test + r\"",
"test + \' of thing that",
"test + \' of thing that\n'",
],
);
assert_eq!(
lex("'''''' '''\\n''' '''\n''' \"\"\"\"\"\" \"\"\"\\n\"\"\" \"\"\"\n\"\"\""),
"\"\" \"\\n\" \"\\n\" \"\" \"\\n\" \"\\n\" \n"
);
assert_eq!(
lex("r'' r\"\" r'\\'' r\"\\\"\" r'\"' r\"'\" r'\\n'"),
"\"\" \"\" \"\'\" \"\\\"\" \"\\\"\" \"\'\" \"\\\\n\" \n"
);
}
#[test]
fn test_string_escape() {
lexer_golden_test(
"string_escape",
r#"
'\0\0\1n'
'\0\00\000\0000'
'\x000'
'\372x'
"#,
);
lexer_fail_golden_test(
"string_escape",
&[
"test 'more \\xTZ",
"test + 'more \\UFFFFFFFF overflows'",
"test 'more \\x0yabc'",
"test 'more \\x0",
],
);
}
#[test]
fn test_simple_example() {
lexer_golden_test(
"simple_example",
"\"\"\"A docstring.\"\"\"
def _impl(ctx):
# Print Hello, World!
print('Hello, World!')
",
);
}
#[test]
fn test_escape_newline() {
lexer_golden_test(
"escape_newline",
r#"
a \
b
"#,
);
}
#[test]
fn test_lexer_multiline_triple() {
lexer_golden_test(
"multiline_triple",
r#"
cmd = """A \
B \
C \
""""#,
);
}
#[test]
fn test_span() {
use crate::lexer::Token::*;
let expected = vec![
(0, Newline, 1),
(1, Def, 4),
(5, Identifier("test".to_owned()), 9),
(9, OpeningRound, 10),
(10, Identifier("a".to_owned()), 11),
(11, ClosingRound, 12),
(12, Colon, 13),
(13, Newline, 14),
(14, Indent, 16),
(16, Identifier("fail".to_owned()), 20),
(20, OpeningRound, 21),
(21, Identifier("a".to_owned()), 22),
(22, ClosingRound, 23),
(23, Newline, 24),
(24, Newline, 25),
(25, Dedent, 25),
(25, Identifier("test".to_owned()), 29),
(29, OpeningRound, 30),
(30, String("abc".to_owned()), 35),
(35, ClosingRound, 36),
(36, Newline, 37),
(37, Newline, 37),
];
let actual = lex_tokens(
r#"
def test(a):
fail(a)
test("abc")
"#,
);
assert_eq!(expected, actual);
}
#[test]
fn test_lexer_final_comment() {
lexer_golden_test(
"final_comment",
r#"
x
# test"#,
);
}
#[test]
fn test_lexer_dedent() {
lexer_golden_test(
"dedent",
r#"
def stuff():
if 1:
if 1:
pass
pass
"#,
);
}
#[test]
fn test_lexer_operators() {
lexer_golden_test(
"operators",
r#"
1+-2
1+------2
///==/+-
"#,
);
}
#[test]
fn test_lexer_error_messages() {
lexer_fail_golden_test(
"error_messages",
&[
"unknown $&%+ operator",
"an 'incomplete string\nends",
"an + 'invalid escape \\x3 character'",
"leading_zero = 003 + 8",
"reserved_word = raise + 1",
],
);
}
#[test]
fn test_float_lit() {
lexer_golden_test(
"float_lit",
r#"
0.0 0. .0
1e10 1e+10 1e-10
1.1e10 1.1e+10 1.1e-10
0. .123 3.14 .2e3 1E+4
"#,
);
}
#[test]
fn test_f_string() {
lexer_golden_test(
"f_string",
r#"
f"basic1 {stuff1}"
f'basic2 {stuff2}'
# Raw f-string
fr'' fr"" fr'\'' fr"\"" fr'"' fr"'" fr'\n'
"#,
);
}