use ::lazy_static::lazy_static;
use ::regex::Regex;
use crate::lexing::lexer::Lexer;
use crate::lexing::reader::reader::{Reader, ReaderResult};
use crate::token::{ParenthesisCloseToken, ParenthesisOpenToken, Tokens};
use crate::token::collect::{association, identifier, literal_bool, literal_int, literal_real, literal_text, operator, parenthesis_close, parenthesis_open, unlexable};
use crate::util::codeparts::operator::ASSOCIATION_RE;
use crate::util::codeparts::operator::SYMBOL_RE;
use crate::util::strtype::name::IDENTIFIER_RE;
use crate::util::parsetxt::int::parse_int;
use crate::util::parsetxt::int::INT_RE;
use crate::util::parsetxt::real::parse_real;
use crate::util::parsetxt::real::REAL_RE;
use crate::util::parsetxt::text::parse_single_quote;
use crate::util::parsetxt::text::SINGLE_QUOTE_RE;
lazy_static! {
static ref CONSTANTS_RE: Regex = Regex::new(r"^(?:true|false|NaN|infinity)\b").unwrap();
}
pub fn lex_literal(reader: &mut impl Reader, lexer: &mut impl Lexer) {
while let ReaderResult::Match(sym) = reader.strip_match(&*CONSTANTS_RE) {
lexer.add(match sym.as_str() {
"true" => literal_bool(true),
"false" => literal_bool(false),
"NaN" => panic!("NaN is not currently supported"),
"infinity" => panic!("infinity is not currently supported"),
_ => unreachable!(),
});
}
while let ReaderResult::Match(sym) = reader.strip_match(&*REAL_RE) {
lexer.add(literal_real(parse_real(sym.as_str()).unwrap()));
}
while let ReaderResult::Match(sym) = reader.strip_match(&*INT_RE) {
lexer.add(literal_int(parse_int(sym.as_str()).unwrap()));
}
while let ReaderResult::Match(sym) = reader.strip_match(&*SINGLE_QUOTE_RE) {
lexer.add(literal_text(parse_single_quote(sym.as_str())));
}
}
#[cfg(test)]
mod constants {
use crate::lexing::lexer::Lexer;
use crate::lexing::tests::create_lexer;
use crate::token::{IdentifierToken, Tokens};
use crate::token::collect::{identifier, literal_int, literal_bool};
use crate::token::collect::token_list::TokenList;
use crate::token::tokens::OperatorToken;
use crate::util::codeparts::Symbol;
use crate::util::strtype::Name;
use crate::util::strtype::typ::StrType;
use super::lex_literal;
fn check(input: &str, expected: &[Tokens]) {
let (source, mut reader, mut lexer) = create_lexer(input);
lex_literal(&mut reader, &mut lexer);
assert_eq!(lexer.tokens(), &expected.into());
}
#[test]
fn empty() {
check("", &vec![]);
}
#[test]
fn after_mismatch() {
check("a true", &vec![]);
check("a NaN", &vec![]);
}
#[test]
fn too_long() {
check("trueq", &vec![]);
check("falseq", &vec![]);
check("NaNq", &vec![]);
check("infinityq", &vec![]);
}
#[test]
fn bool() {
check("true", &vec![literal_bool(true)]);
check("false", &vec![literal_bool(false)]);
}
#[test]
fn multiple() {
check("true false\ttrue false", &vec![
literal_bool(true),
literal_bool(false),
literal_bool(true),
literal_bool(false),
]);
}
}
#[cfg(test)]
mod int {
use crate::lexing::lexer::Lexer;
use crate::lexing::tests::create_lexer;
use crate::token::{IdentifierToken, Tokens};
use crate::token::collect::{identifier, literal_int, literal_bool};
use crate::token::collect::token_list::TokenList;
use crate::token::tokens::OperatorToken;
use crate::util::codeparts::Symbol;
use crate::util::strtype::Name;
use crate::util::strtype::typ::StrType;
use super::lex_literal;
fn check(input: &str, expected: &[Tokens]) {
let (source, mut reader, mut lexer) = create_lexer(input);
lex_literal(&mut reader, &mut lexer);
assert_eq!(lexer.tokens(), &expected.into());
}
#[test]
fn empty() {
check("", &vec![]);
}
#[test]
fn mismatch() {
check("!", &vec![]);
check("a", &vec![]);
}
#[test]
fn after_mismatch() {
check("a 1", &vec![]);
check("a1", &vec![]);
}
#[test]
fn zero() {
check("0", &vec![literal_int(0)]);
check("0000000000000000000000000000000000", &vec![literal_int(0)]);
}
#[test]
fn prefix() {
check("+1", &vec![literal_int(1)]);
check("-1", &vec![literal_int(-1)]);
}
fn double_minus() {
check("--1", &vec![literal_int(1)]);
check("---1", &vec![literal_int(1)]);
check("-+-1", &vec![literal_int(1)]);
}
#[test]
fn valid_underscores() {
check("1_2_3", &vec![literal_int(123)]);
}
#[test]
fn invalid_underscores() {
check("1__2_3", &vec![]);
check("_1_2_3", &vec![]);
check("123_", &vec![]);
}
#[test]
fn long() {
let big = format!("{}", ::std::i64::MAX);
check(&big, &vec![literal_int(::std::i64::MAX)]);
let small = format!("{}", ::std::i64::MIN);
check(&small, &vec![literal_int(::std::i64::MIN)]);
}
#[test]
fn multiple() {
check("1 2 3 1234567890", &vec![
literal_int(1),
literal_int(2),
literal_int(3),
literal_int(1234567890),
]);
}
}
#[cfg(test)]
mod real {
use crate::lexing::lexer::Lexer;
use crate::lexing::tests::create_lexer;
use crate::token::{IdentifierToken, Tokens};
use crate::token::collect::{identifier, literal_int, literal_bool, literal_real};
use crate::token::collect::token_list::TokenList;
use crate::token::tokens::OperatorToken;
use crate::util::codeparts::Symbol;
use crate::util::strtype::Name;
use crate::util::strtype::typ::StrType;
use super::lex_literal;
fn check(input: &str, expected: &[Tokens]) {
let (source, mut reader, mut lexer) = create_lexer(input);
lex_literal(&mut reader, &mut lexer);
assert_eq!(lexer.tokens(), &expected.into());
}
#[test]
fn empty() {
check("", &vec![]);
}
#[test]
fn mismatch() {
check("!", &vec![]);
check("a", &vec![]);
}
#[test]
fn after_mismatch() {
check("a 1.0", &vec![]);
check("a1.0", &vec![]);
}
#[test]
fn zero() {
check("0.0", &vec![literal_real(0.0)]);
check("0.000000000000000000000000000000000", &vec![literal_real(0.0)]);
check("000000000000000000000000000000000.0", &vec![literal_real(0.0)]);
}
#[test]
fn prefix() {
check("+1.0", &vec![literal_real(1.0)]);
check("-1.0", &vec![literal_real(-1.0)]);
}
#[test]
fn exponential() {
check("1.0e1", &vec![literal_real(10.0)]);
check("1.0e-1", &vec![literal_real(0.10)]);
check("-1.0e1", &vec![literal_real(-10.0)]);
check("-1.0e-1", &vec![literal_real(-0.10)]);
check("+1.0e+1", &vec![literal_real(10.0)]);
}
#[test]
fn multiple() {
check("1.1 2.2 3.3 0.1234567890", &vec![
literal_real(1.1),
literal_real(2.2),
literal_real(3.3),
literal_real(0.1234567890),
]);
}
}
#[cfg(test)]
mod text {
use crate::lexing::lexer::Lexer;
use crate::lexing::tests::create_lexer;
use crate::token::{IdentifierToken, Tokens};
use crate::token::collect::{identifier, literal_int, literal_bool, literal_real, literal_text};
use crate::token::collect::token_list::TokenList;
use crate::token::tokens::OperatorToken;
use crate::util::codeparts::Symbol;
use crate::util::strtype::Name;
use crate::util::strtype::typ::StrType;
use super::lex_literal;
fn check(input: &str, expected: &[Tokens]) {
let (source, mut reader, mut lexer) = create_lexer(input);
lex_literal(&mut reader, &mut lexer);
assert_eq!(lexer.tokens(), &expected.into());
}
#[test]
fn empty() {
check("", &vec![]);
}
#[test]
fn mismatch() {
check("!", &vec![]);
check("a", &vec![]);
}
#[test]
fn after_mismatch() {
check("a 'a'", &vec![]);
check("a'a'", &vec![]);
}
#[test]
fn no_content() {
check("''", &vec![literal_text("")]);
}
#[test]
fn simple() {
check("'x'", &vec![literal_text("x")]);
check("'hello world!'", &vec![literal_text("hello world!")]);
}
#[test]
fn double_quotes() {
check("'\"\"'", &vec![literal_text("\"\"")]);
}
fn unbalanced() {
check("'''", &vec![literal_text("")]);
}
#[test]
fn escaped() {
check("'\\''", &vec![literal_text("\\'")]);
}
fn escape_escaped() {
check("'\\\\'", &vec![literal_text("\\\\")]);
}
#[test]
fn repeated() {
check("'' 'hello' 'world'", &vec![
literal_text(""),
literal_text("hello"),
literal_text("world"),
]);
check("'''' ''", &vec![
literal_text(""),
literal_text(""),
literal_text(""),
]);
}
}