use super::{BoxedObject, Dict, Object, RefValue};
use crate::reader::Reader;
use crate::vm::*;
use charclass::{charclass, CharClass};
use tokay_macros::tokay_token;
extern crate self as tokay;
#[derive(Debug, Clone, PartialEq, PartialOrd)]
pub enum Token {
Void, EOF, Char(CharClass), BuiltinChar(fn(ch: char) -> bool), Chars(CharClass), BuiltinChars(fn(ch: char) -> bool), Match(String), Touch(String), }
impl Token {
pub fn builtin(ident: &str) -> Option<Token> {
fn builtin_ccl(ident: &str) -> Option<Token> {
Some(match ident {
"Alphabetic" => Token::BuiltinChar(|c| c.is_alphabetic()),
"Alphanumeric" => Token::BuiltinChar(|c| c.is_alphanumeric()),
"Ascii" => Token::BuiltinChar(|c| c.is_ascii()),
"AsciiAlphabetic" => Token::Char(charclass!['A' => 'Z', 'a' => 'z']),
"AsciiAlphanumeric" => Token::Char(charclass!['A' => 'Z', 'a' => 'z', '0' => '9']),
"AsciiControl" => Token::BuiltinChar(|c| c.is_ascii_control()),
"AsciiDigit" => Token::Char(charclass!['0' => '9']),
"AsciiGraphic" => Token::Char(charclass!['!' => '~']),
"AsciiHexdigit" => Token::Char(charclass!['0' => '9', 'A' => 'F', 'a' => 'f']),
"AsciiLowercase" => Token::Char(charclass!['a' => 'z']),
"AsciiPunctuation" => Token::BuiltinChar(|c| c.is_ascii_punctuation()),
"AsciiUppercase" => Token::Char(charclass!['A' => 'Z']),
"AsciiWhitespace" => Token::Char(charclass!['A' => 'Z', 'a' => 'z']),
"Control" => Token::BuiltinChar(|c| c.is_control()),
"Digit" => Token::BuiltinChar(|c| c.is_digit(10)),
"Lowercase" => Token::BuiltinChar(|c| c.is_lowercase()),
"Numeric" => Token::BuiltinChar(|c| c.is_numeric()),
"Uppercase" => Token::BuiltinChar(|c| c.is_uppercase()),
"Whitespace" => Token::BuiltinChar(|c| c.is_whitespace()),
ident if ident.len() > 1 && ident.ends_with("s") => {
if let Some(Token::BuiltinChar(f)) = builtin_ccl(&ident[..ident.len() - 1]) {
Token::BuiltinChars(f)
} else {
return None;
}
}
_ => return None,
})
}
match ident {
"Any" => Some(Token::any()),
"EOF" => Some(Token::EOF),
"Void" => Some(Token::Void),
ident => builtin_ccl(ident),
}
}
pub fn any() -> Self {
Self::Char(CharClass::new().negate())
}
pub fn read(&self, reader: &mut Reader) -> Result<Accept, Reject> {
match self {
Token::Void => Ok(Accept::Push(Capture::Empty)),
Token::EOF => {
if let None = reader.peek() {
Ok(Accept::Next)
} else {
Err(Reject::Next)
}
}
Token::Char(ccl) => {
if let Some(ch) = reader.take(|ch| ccl.test(&(ch..=ch))) {
return Ok(Accept::Push(Capture::Range(
reader.capture_last(ch.len_utf8()),
None,
5,
)));
}
Err(Reject::Next)
}
Token::BuiltinChar(f) => {
if let Some(ch) = reader.take(f) {
return Ok(Accept::Push(Capture::Range(
reader.capture_last(ch.len_utf8()),
None,
5,
)));
}
Err(Reject::Next)
}
Token::Chars(ccl) => {
let start = reader.tell();
while let Some(ch) = reader.peek() {
if !ccl.test(&(ch..=ch)) {
break;
}
reader.next();
}
let range = reader.capture_from(&start);
if range.len() > 0 {
Ok(Accept::Push(Capture::Range(range, None, 5)))
} else {
reader.reset(start);
Err(Reject::Next)
}
}
Token::BuiltinChars(f) => {
let start = reader.tell();
while let Some(ch) = reader.peek() {
if !f(ch) {
break;
}
reader.next();
}
let range = reader.capture_from(&start);
if range.len() > 0 {
Ok(Accept::Push(Capture::Range(range, None, 5)))
} else {
reader.reset(start);
Err(Reject::Next)
}
}
Token::Match(string) | Token::Touch(string) => {
let start = reader.tell();
for ch in string.chars() {
if let Some(c) = reader.peek() {
if c != ch {
break;
}
} else {
break;
}
reader.next();
}
let range = reader.capture_from(&start);
if range.len() == string.len() {
Ok(Accept::Push(Capture::Range(
range,
None,
if matches!(self, Token::Touch(_)) {
0
} else {
5
},
)))
} else {
reader.reset(start);
Err(Reject::Next)
}
}
}
}
}
impl Object for Token {
fn name(&self) -> &'static str {
"token"
}
fn is_callable(&self, without_arguments: bool) -> bool {
without_arguments }
fn is_consuming(&self) -> bool {
true }
fn is_nullable(&self) -> bool {
match self {
Token::Void => true,
Token::EOF => false,
Token::Char(ccl) | Token::Chars(ccl) => ccl.len() == 0, Token::BuiltinChar(_) | Token::BuiltinChars(_) => true,
Token::Match(s) | Token::Touch(s) => s.len() == 0, }
}
fn call(
&self,
context: &mut Context,
args: usize,
nargs: Option<Dict>,
) -> Result<Accept, Reject> {
assert!(args == 0 && nargs.is_none());
self.read(context.runtime.reader)
}
}
impl From<Token> for RefValue {
fn from(token: Token) -> Self {
RefValue::from(Box::new(token) as BoxedObject)
}
}
tokay_token!("Ident", {
let reader = &mut context.runtime.reader;
let start = reader.tell();
if reader.take(|ch| ch.is_alphabetic() || ch == '_').is_none() {
return Err(Reject::Next);
}
reader.span(|ch| ch.is_alphanumeric() || ch == '_');
Ok(Accept::Push(Capture::Range(
reader.capture_from(&start),
None,
5,
)))
});
tokay_token!("Int(base=10, with_signs=true)", {
let reader = &mut context.runtime.reader;
let base = base.to_i64();
if base < 1 || base > 36 {
return Err(format!(
"{} base value is {}, allowed is only between 1 and 36",
__function, base
)
.into());
}
let mut neg = false;
if with_signs.is_true() {
if let Some(ch) = reader.take(|ch: char| ch == '-' || ch == '+') {
neg = ch == '-'
}
}
if let Some(input) = reader.span(|ch: char| ch.is_digit(base as u32)) {
let mut value: i64 = 0;
for dig in input.chars() {
value = value * base + dig.to_digit(base as u32).unwrap() as i64;
}
return Ok(Accept::Push(Capture::Value(
crate::value!(if neg { -value } else { value }),
None,
5,
)));
}
Err(Reject::Next)
});
tokay_token!("Float(with_signs=true)", {
let reader = &mut context.runtime.reader;
let start = reader.tell();
if with_signs.is_true() {
reader.take(|ch: char| ch == '-' || ch == '+');
}
let has_int = reader.span(|ch: char| ch.is_numeric()).is_some();
if reader.take(|ch: char| ch == '.').is_none() {
return Err(Reject::Next);
}
if reader.span(|ch: char| ch.is_numeric()).is_none() && !has_int {
return Err(Reject::Next);
}
let mut range = reader.capture_from(&start);
if reader.take(|ch: char| ch == 'e' || ch == 'E').is_some() {
reader.take(|ch: char| ch == '-' || ch == '+');
if reader.span(|ch: char| ch.is_numeric()).is_some() {
range = reader.capture_from(&start)
}
}
Ok(Accept::Push(Capture::Value(
crate::value!(reader.get(&range).parse::<f64>().unwrap()),
None,
5,
)))
});
tokay_token!("Word(min=1 max=void)", {
let reader = &mut context.runtime.reader;
let start = reader.tell();
if let Some(input) = reader.span(|ch| ch.is_alphabetic()) {
if input.chars().count() < min.to_usize() {
return Err(Reject::Skip); }
if !max.is_void() && input.chars().count() > max.to_usize() {
return Ok(Accept::Next);
}
Ok(Accept::Push(Capture::Range(
reader.capture_from(&start),
None,
5,
)))
} else {
Err(Reject::Next)
}
});
#[test]
#[allow(non_snake_case)]
fn builtin_tokens_Integer_Word_Whitespaces() {
let gliders = "Libelle 201b\tG102 Astir \nVentus-2cT";
assert_eq!(
crate::run("Ident", gliders),
Ok(Some(crate::value!([
"Libelle", "b", "G102", "Astir", "Ventus", "cT"
])))
);
assert_eq!(
crate::run("Int", gliders),
Ok(Some(crate::value!([201, 102, (-2)])))
);
assert_eq!(
crate::run("Int(with_signs=false)", gliders),
Ok(Some(crate::value!([201, 102, 2])))
);
assert_eq!(
crate::run("Int(16)", gliders),
Ok(Some(crate::value!([190, 14, 8219, 258, 10, 14, (-44)])))
);
assert_eq!(
crate::run("Whitespaces", gliders),
Ok(Some(crate::value!([" ", "\t", " ", " \n"])))
);
assert_eq!(
crate::run("Word", gliders),
Ok(Some(crate::value!([
"Libelle", "b", "G", "Astir", "Ventus", "cT"
])))
);
assert_eq!(
crate::run("Word(3)", gliders),
Ok(Some(crate::value!(["Libelle", "Astir", "Ventus"])))
);
assert_eq!(
crate::run("Word(3, 6)", gliders),
Ok(Some(crate::value!(["Astir", "Ventus"])))
);
}
#[test]
#[allow(non_snake_case)]
fn builtin_tokens_Float() {
assert_eq!(
crate::run("Float", ". 13. .37 13.37 13.37e 13.37e+2 13.37E+2"),
Ok(Some(crate::value!([13., 0.37, 13.37, 13.37, 1337., 1337.])))
);
assert_eq!(
crate::run("Float", "-. -13. -.37 -13.37 -13.37e -13.37e+2 -13.37E+2"),
Ok(Some(crate::value!([
(-13.),
(-0.37),
(-13.37),
(-13.37),
(-1337.),
(-1337.)
])))
);
}