use std::sync::LazyLock;
macro_rules! lazy {
{$($(#[doc = $doc:literal])* $name:ident = $pattern:expr;)*} => {
$($(#[doc = $doc])*
pub static $name: LazyLock<(&'static str, &'static str)> =
LazyLock::new(|| (Box::leak(stringify!($name).to_lowercase().into_boxed_str()), $pattern));
)*
};
}
macro_rules! format_str {
($str:expr) => {
Box::leak(format!($str).into_boxed_str())
};
}
const STRING_BASE: &str = r"(?:\\.|[^\\])*?";
const INT_BASE: &str = r"[0-9](?:[0-9_]*[0-9])?";
const FLOAT_BASE: &str = concat!(
r"[0-9](?:[0-9_]*[0-9])?", r"(?:[eE][+\-]?[0-9](?:[0-9_]*[0-9])?)", r"|", r"(?:", r"[0-9](?:[0-9_]*[0-9])?", r"\.(?:[0-9](?:[0-9_]*[0-9])?)?", r"|", r"\.[0-9](?:[0-9_]*[0-9])?", r")", r"(?:[eE][+\-]?[0-9](?:[0-9_]*[0-9])?)?" );
lazy! {
CHAR = r"'(?:\\'|[^'])'";
SINGLE_QUOTED_STRING = format_str!("'{STRING_BASE}'");
DOUBLE_QUOTED_STRING = format_str!(r#""{STRING_BASE}""#);
LETTER = r"[A-Za-z]";
WORD = r"[A-Za-z]+(-[A-Za-z]+)*";
C_NAME = r"[_A-Za-z][_A-Za-z\d]*";
NEWLINE = r"\r?\n";
DIGIT = r"[0-9]";
HEXDIGIT = r"[0-9A-Fa-f]";
UNSIGNED_INT = INT_BASE;
SIGNED_INT = format_str!(r"[+\-]{INT_BASE}");
DECIMAL = format_str!(r"{INT_BASE}\.(?:{INT_BASE})?|\.{INT_BASE}");
UNSIGNED_FLOAT = FLOAT_BASE;
SIGNED_FLOAT = format_str!(r"[+\-](?:{FLOAT_BASE})");
STRING = format_str!(r#""{STRING_BASE}"|'{STRING_BASE}'"#);
UNSIGNED_NUMBER = format_str!("{FLOAT_BASE}|{INT_BASE}");
SIGNED_NUMBER = format_str!(r"[+\-](?:(?:{FLOAT_BASE})|{INT_BASE})");
INT = format_str!(r"[+\-]?{INT_BASE}");
FLOAT = format_str!(r"[+\-]?(?:{FLOAT_BASE})");
NUMBER = format_str!(r"[+\-]?(?:(?:{FLOAT_BASE})|{INT_BASE})");
}
#[cfg(test)]
mod tests {
use crate::{Tokenizer, common, error::Error};
fn prepare_tokenizer<'a>(pattern: (&'static str, &'static str)) -> Tokenizer<'a> {
Tokenizer::default()
.with_patterns(&[pattern])
.expect("the pattern should be valid")
}
type TestOutcome<'a> = Result<Vec<&'a str>, (char, usize)>;
fn test_patterns(tokenizer: &Tokenizer<'_>, tests: Vec<(&str, TestOutcome)>) {
for (inp, out) in tests {
match (tokenizer.tokenize(inp).find(Result::is_err), out) {
(
Some(Err(Error::BadToken(err_value, err_position))),
Err((expected_err_value, expected_err_position)),
) => {
assert_eq!(err_value, expected_err_value);
assert_eq!(err_position, expected_err_position);
}
(None, Ok(expected_values)) => {
let values = tokenizer
.tokenize(inp)
.map(Result::unwrap)
.map(|token| token.value)
.collect::<Vec<_>>();
assert_eq!(values, expected_values);
}
(res, exp) => {
panic!("Mismatched result for input {inp:?}: got {res:?}, expected {exp:?}")
}
}
}
}
#[test]
fn single_quoted_string() {
test_patterns(
&prepare_tokenizer(common::SINGLE_QUOTED_STRING.clone()),
vec![
("'test'", Ok(vec!["'test'"])),
("'''", Err(('\'', 2))),
("test", Err(('t', 0))),
("'test", Err(('\'', 0))),
("\\'test'", Err(('\\', 0))),
("'\\'test'", Ok(vec!["'\\'test'"])),
("'test\\'", Err(('\'', 0))),
("'test\\ntest'", Ok(vec!["'test\\ntest'"])),
("''", Ok(vec!["''"])),
],
);
}
#[test]
fn double_quoted_string() {
test_patterns(
&prepare_tokenizer(common::DOUBLE_QUOTED_STRING.clone()),
vec![
("\"test\"", Ok(vec!["\"test\""])),
("\"\"\"", Err(('"', 2))),
("test", Err(('t', 0))),
("\"test", Err(('"', 0))),
("\\\"test\"", Err(('\\', 0))),
(r#""\"test""#, Ok(vec![r#""\"test""#])),
("\"test\\\"", Err(('"', 0))),
("\"test\\ntest\"", Ok(vec!["\"test\\ntest\""])),
("\"\"", Ok(vec!["\"\""])),
],
);
}
#[test]
fn string() {
test_patterns(
&prepare_tokenizer(common::STRING.clone()),
vec![("'test'\"test\"", Ok(vec!["'test'", "\"test\""]))],
);
}
#[test]
fn char() {
test_patterns(
&prepare_tokenizer(common::CHAR.clone()),
vec![
("'t'", Ok(vec!["'t'"])),
("'''", Err(('\'', 0))),
("'\\''", Ok(vec!["'\\''"])),
("t", Err(('t', 0))),
("t'", Err(('t', 0))),
("'t", Err(('\'', 0))),
("\\'t'", Err(('\\', 0))),
("'t\\'", Err(('\'', 0))),
("'tt'", Err(('\'', 0))),
("''", Err(('\'', 0))),
],
);
}
#[test]
fn letter() {
test_patterns(
&prepare_tokenizer(common::LETTER.clone()),
vec![
("AZaz", Ok(vec!["A", "Z", "a", "z"])),
("Wow!", Err(('!', 3))),
("!", Err(('!', 0))),
("@", Err(('@', 0))),
("|", Err(('|', 0))),
],
);
}
#[test]
fn word() {
test_patterns(
&prepare_tokenizer(common::WORD.clone()),
vec![
("A", Ok(vec!["A"])),
("word", Ok(vec!["word"])),
(" word", Err((' ', 0))),
("-", Err(('-', 0))),
("a-", Err(('-', 1))),
("-a", Err(('-', 0))),
("a-a", Ok(vec!["a-a"])),
("a--a", Err(('-', 1))),
("thread-safe", Ok(vec!["thread-safe"])),
("thread-", Err(('-', 6))),
("-jack-o", Err(('-', 0))),
("jack-o-lantern", Ok(vec!["jack-o-lantern"])),
],
);
}
#[test]
fn digit() {
test_patterns(
&prepare_tokenizer(common::DIGIT.clone()),
vec![
(
"0123456789",
Ok(vec!["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]),
),
("Ù¥", Err(('Ù¥', 0))),
("/", Err(('/', 0))),
(":", Err((':', 0))),
],
);
}
#[test]
fn unsigned_int() {
test_patterns(
&prepare_tokenizer(common::UNSIGNED_INT.clone()),
vec![
("21", Ok(vec!["21"])),
("037", Ok(vec!["037"])),
("1_000_000", Ok(vec!["1_000_000"])),
("1__0", Ok(vec!["1__0"])),
],
);
}
#[test]
fn signed_int() {
test_patterns(
&prepare_tokenizer(common::SIGNED_INT.clone()),
vec![
("+21", Ok(vec!["+21"])),
("-37", Ok(vec!["-37"])),
("-142+315", Ok(vec!["-142", "+315"])),
("13", Err(('1', 0))),
],
);
}
#[test]
fn decimal() {
test_patterns(
&prepare_tokenizer(common::DECIMAL.clone()),
vec![
("3.14", Ok(vec!["3.14"])),
("3.0", Ok(vec!["3.0"])),
("21.37", Ok(vec!["21.37"])),
("2_1.37", Ok(vec!["2_1.37"])),
("2_1.3_7", Ok(vec!["2_1.3_7"])),
("0.92", Ok(vec!["0.92"])),
("0000.92", Ok(vec!["0000.92"])),
(".92", Ok(vec![".92"])),
("3.", Ok(vec!["3."])),
("3..3", Ok(vec!["3.", ".3"])),
("3..", Err(('.', 2))),
("3", Err(('3', 0))),
(".", Err(('.', 0))),
],
);
}
#[test]
fn hexdigit() {
test_patterns(
&prepare_tokenizer(common::HEXDIGIT.clone()),
vec![
("3Da", Ok(vec!["3", "D", "a"])),
("0x", Err(('x', 1))),
("g", Err(('g', 0))),
],
);
}
#[test]
fn c_name() {
test_patterns(
&prepare_tokenizer(common::C_NAME.clone()),
vec![
("W", Ok(vec!["W"])),
("_", Ok(vec!["_"])),
("word", Ok(vec!["word"])),
("two_words", Ok(vec!["two_words"])),
("_word", Ok(vec!["_word"])),
("_two_words", Ok(vec!["_two_words"])),
("0word", Err(('0', 0))),
("word0", Ok(vec!["word0"])),
("_0word", Ok(vec!["_0word"])),
("_word0", Ok(vec!["_word0"])),
("0", Err(('0', 0))),
("2322", Err(('2', 0))),
("wórd", Err(('ó', 1))),
],
);
}
#[test]
fn newline() {
test_patterns(
&prepare_tokenizer(common::NEWLINE.clone()),
vec![
("\n", Ok(vec!["\n"])),
("\r\n", Ok(vec!["\r\n"])),
("\r", Err(('\r', 0))),
("\\n", Err(('\\', 0))),
],
);
}
#[test]
fn unsigned_float() {
test_patterns(
&prepare_tokenizer(common::UNSIGNED_FLOAT.clone()),
vec![
("13", Err(('1', 0))),
("13.", Ok(vec!["13."])),
(".13", Ok(vec![".13"])),
("1e3", Ok(vec!["1e3"])),
("1e+3", Ok(vec!["1e+3"])),
("1e+3.5", Ok(vec!["1e+3", ".5"])),
("1e-3", Ok(vec!["1e-3"])),
("1E3", Ok(vec!["1E3"])),
(".0e3", Ok(vec![".0e3"])),
("1.e5", Ok(vec!["1.e5"])),
("1.0e3", Ok(vec!["1.0e3"])),
("1.0e+3", Ok(vec!["1.0e+3"])),
("1.0e-3", Ok(vec!["1.0e-3"])),
("1_0.5_0e-3_0", Ok(vec!["1_0.5_0e-3_0"])),
("1.0e", Err(('e', 3))),
],
);
}
#[test]
fn signed_float() {
test_patterns(
&prepare_tokenizer(common::SIGNED_FLOAT.clone()),
vec![
("+1", Err(('+', 0))),
("+1e3", Ok(vec!["+1e3"])),
("-1e+3", Ok(vec!["-1e+3"])),
("+1e+3.5", Err(('.', 5))),
("+1e+3+.5", Ok(vec!["+1e+3", "+.5"])),
("-1e-3", Ok(vec!["-1e-3"])),
("+1E3", Ok(vec!["+1E3"])),
("1E3", Err(('1', 0))),
("-1.0e3", Ok(vec!["-1.0e3"])),
("+1.0e+3", Ok(vec!["+1.0e+3"])),
("-1.0e-3", Ok(vec!["-1.0e-3"])),
("-1_0.5_0e-3_0", Ok(vec!["-1_0.5_0e-3_0"])),
("+1.0e", Err(('e', 4))),
],
);
}
#[test]
fn unsigned_number() {
test_patterns(
&prepare_tokenizer(common::UNSIGNED_NUMBER.clone()),
vec![
("1", Ok(vec!["1"])),
("1.0", Ok(vec!["1.0"])),
("1_0.0_0", Ok(vec!["1_0.0_0"])),
],
);
}
#[test]
fn signed_number() {
test_patterns(
&prepare_tokenizer(common::SIGNED_NUMBER.clone()),
vec![
("+1", Ok(vec!["+1"])),
("+1_0", Ok(vec!["+1_0"])),
("-1.0", Ok(vec!["-1.0"])),
("1", Err(('1', 0))),
("1.0", Err(('1', 0))),
],
);
}
#[test]
fn int() {
test_patterns(
&prepare_tokenizer(common::INT.clone()),
vec![(
"10+200-3000-4_000",
Ok(vec!["10", "+200", "-3000", "-4_000"]),
)],
);
}
#[test]
fn float() {
test_patterns(
&prepare_tokenizer(common::FLOAT.clone()),
vec![
("8_192.8_3-77641702.4", Ok(vec!["8_192.8_3", "-77641702.4"])),
("8.83-77641702.4", Ok(vec!["8.83", "-77641702.4"])),
("-497e4815.0+19.", Ok(vec!["-497e4815", ".0", "+19."])),
("-25.-7.6320036.8", Ok(vec!["-25.", "-7.6320036", ".8"])),
("11.9+8e55009.239", Ok(vec!["11.9", "+8e55009", ".239"])),
(".7e.68732406+ee", Err(('e', 2))),
("5e8336+8.+717.52", Ok(vec!["5e8336", "+8.", "+717.52"])),
("5e8336++8.+717.52", Err(('+', 6))),
],
);
}
#[test]
fn number() {
test_patterns(
&prepare_tokenizer(common::NUMBER.clone()),
vec![
("+8_192.8_3", Ok(vec!["+8_192.8_3"])),
("45692.+3795+74-e35.+", Err(('-', 14))),
("70-.8-", Err(('-', 5))),
("-", Err(('-', 0))),
(
"+491814+4.4677-3412.",
Ok(vec!["+491814", "+4.4677", "-3412."]),
),
(".e2..1", Err(('.', 0))),
("484-3+798.", Ok(vec!["484", "-3", "+798."])),
("2e6121+15+04", Ok(vec!["2e6121", "+15", "+04"])),
(".537e0-5.56e097e16", Err(('e', 15))),
("-40e66.84712889820", Ok(vec!["-40e66", ".84712889820"])),
("+683011.+8557+e.76", Err(('+', 13))),
("662+2.60.305179", Ok(vec!["662", "+2.60", ".305179"])),
("", Ok(vec![])),
("26286086801-8+.5", Ok(vec!["26286086801", "-8", "+.5"])),
("7179", Ok(vec!["7179"])),
],
);
}
}