use super::{BoxedObject, Dict, Object, RefValue};
use crate::vm::*;
use charclass::{charclass, CharClass};
use num_bigint::BigInt;
use num_parse::*;
use tokay_macros::tokay_token;
extern crate self as tokay;
#[derive(Debug, Clone, Hash, PartialEq, PartialOrd)]
pub enum Token {
Void, EOF, Char(CharClass), BuiltinChar(fn(ch: char) -> bool), Chars(CharClass), BuiltinChars(fn(ch: char) -> bool), Match(String), Touch(String), }
impl Token {
pub fn builtin(ident: &str) -> Option<Token> {
fn builtin_ccl(ident: &str) -> Option<Token> {
Some(match ident {
"Alphabetic" => Token::BuiltinChar(|c| c.is_alphabetic()),
"Alphanumeric" => Token::BuiltinChar(|c| c.is_alphanumeric()),
"Ascii" => Token::BuiltinChar(|c| c.is_ascii()),
"AsciiAlphabetic" => Token::Char(charclass!['A' => 'Z', 'a' => 'z']),
"AsciiAlphanumeric" => Token::Char(charclass!['A' => 'Z', 'a' => 'z', '0' => '9']),
"AsciiControl" => Token::BuiltinChar(|c| c.is_ascii_control()),
"AsciiDigit" => Token::Char(charclass!['0' => '9']),
"AsciiGraphic" => Token::Char(charclass!['!' => '~']),
"AsciiHexdigit" => Token::Char(charclass!['0' => '9', 'A' => 'F', 'a' => 'f']),
"AsciiLowercase" => Token::Char(charclass!['a' => 'z']),
"AsciiPunctuation" => Token::BuiltinChar(|c| c.is_ascii_punctuation()),
"AsciiUppercase" => Token::Char(charclass!['A' => 'Z']),
"AsciiWhitespace" => Token::Char(charclass!['A' => 'Z', 'a' => 'z']),
"Control" => Token::BuiltinChar(|c| c.is_control()),
"Digit" => Token::BuiltinChar(|c| c.is_digit(10)),
"Lowercase" => Token::BuiltinChar(|c| c.is_lowercase()),
"Numeric" => Token::BuiltinChar(|c| c.is_numeric()),
"Uppercase" => Token::BuiltinChar(|c| c.is_uppercase()),
"Whitespace" => Token::BuiltinChar(|c| c.is_whitespace()),
ident if ident.len() > 1 && ident.ends_with("s") => {
match builtin_ccl(&ident[..ident.len() - 1]) {
Some(Token::BuiltinChar(f)) => Token::BuiltinChars(f),
Some(Token::Char(c)) => Token::Chars(c),
_ => return None,
}
}
_ => return None,
})
}
match ident {
"EOF" => Some(Token::EOF),
"Void" => Some(Token::Void),
ident => builtin_ccl(ident),
}
}
}
impl Object for Token {
fn name(&self) -> &'static str {
"token"
}
fn repr(&self) -> String {
match self {
Token::Void => "Void".to_string(),
Token::EOF => "EOF".to_string(),
Token::Char(ccl) => format!("{:?}", ccl),
Token::Chars(ccl) => format!("{:?}+", ccl),
Token::BuiltinChar(_) | Token::BuiltinChars(_) => "\"<token builtin fn>\n".to_string(),
Token::Touch(s) => format!("'{}'", s),
Token::Match(s) => format!("''{}''", s),
}
}
fn is_callable(&self, without_arguments: bool) -> bool {
without_arguments }
fn is_consuming(&self) -> bool {
true }
fn is_nullable(&self) -> bool {
match self {
Token::Void => true,
Token::EOF => false,
Token::Char(ccl) | Token::Chars(ccl) => ccl.len() == 0, Token::BuiltinChar(_) | Token::BuiltinChars(_) => true,
Token::Match(s) | Token::Touch(s) => s.len() == 0, }
}
fn call(
&self,
context: Option<&mut Context>,
args: Vec<RefValue>,
nargs: Option<Dict>,
) -> Result<Accept, Reject> {
assert!(context.is_some() && args.len() == 0 && nargs.is_none());
let context = context.unwrap();
let reader = &mut context.thread.reader;
match self {
Token::Void => Ok(Accept::Next),
Token::EOF => {
if let Some(_) = reader.peek() {
Err(Reject::Next)
} else {
Ok(Accept::Next)
}
}
Token::Char(ccl) => {
if let Some(ch) = reader.once(|ch| ccl.test(&(ch..=ch))) {
return Ok(Accept::Push(Capture::Range(
reader.capture_last(ch.len_utf8()),
None,
5,
)));
}
Err(Reject::Next)
}
Token::BuiltinChar(f) => {
if let Some(ch) = reader.once(f) {
return Ok(Accept::Push(Capture::Range(
reader.capture_last(ch.len_utf8()),
None,
5,
)));
}
Err(Reject::Next)
}
Token::Chars(ccl) => {
let start = reader.tell();
while let Some(ch) = reader.peek() {
if !ccl.test(&(*ch..=*ch)) {
break;
}
reader.next();
}
let range = reader.capture_from(&start);
if range.len() > 0 {
Ok(Accept::Push(Capture::Range(range, None, 5)))
} else {
reader.reset(start);
Err(Reject::Next)
}
}
Token::BuiltinChars(f) => {
let start = reader.tell();
while let Some(ch) = reader.peek() {
if !f(*ch) {
break;
}
reader.next();
}
let range = reader.capture_from(&start);
if range.len() > 0 {
Ok(Accept::Push(Capture::Range(range, None, 5)))
} else {
reader.reset(start);
Err(Reject::Next)
}
}
Token::Match(string) | Token::Touch(string) => {
let start = reader.tell();
for ch in string.chars() {
if let Some(c) = reader.peek() {
if *c != ch {
break;
}
} else {
break;
}
reader.next();
}
let range = reader.capture_from(&start);
if range.len() == string.len() {
Ok(Accept::Push(Capture::Range(
range,
None,
if matches!(self, Token::Touch(_)) {
0
} else {
5
},
)))
} else {
reader.reset(start);
Err(Reject::Next)
}
}
}
}
}
impl From<Token> for RefValue {
fn from(token: Token) -> Self {
RefValue::from(Box::new(token) as BoxedObject)
}
}
tokay_token!("Ident", {
let reader = &mut context.thread.reader;
let start = reader.tell();
if reader.once(|ch| ch.is_alphabetic() || ch == '_').is_none() {
return Err(Reject::Next);
}
reader.span(|ch| ch.is_alphanumeric() || ch == '_');
Ok(Accept::Push(Capture::Range(
reader.capture_from(&start),
None,
5,
)))
});
tokay_token!("Int : @base=void, with_signs=true", {
let base = if base.is_void() {
None
} else {
let base = base.to_i64()?;
if base < 1 || base > 36 {
return Err(format!(
"{} base value is {}, allowed is only between 1 and 36",
__function, base
)
.into());
}
Some(base as u32)
};
if let Some(int) = if with_signs.is_true() {
parse_int_from_iter_with_radix::<BigInt>(context.thread.reader, base, false)
} else {
parse_uint_from_iter_with_radix::<BigInt>(context.thread.reader, base, false)
} {
Ok(Accept::Push(Capture::Value(crate::value!(int), None, 5)))
} else {
Err(Reject::Next)
}
});
tokay_token!("Float : @with_signs=true", {
let reader = &mut context.thread.reader;
let start = reader.tell();
if with_signs.is_true() {
reader.once(|ch: char| ch == '-' || ch == '+');
}
let has_int = reader.span(|ch: char| ch.is_numeric()).is_some();
if reader.once(|ch: char| ch == '.').is_none() {
return Err(Reject::Next);
}
if reader.span(|ch: char| ch.is_numeric()).is_none() && !has_int {
return Err(Reject::Next);
}
let mut range = reader.capture_from(&start);
if reader.once(|ch: char| ch == 'e' || ch == 'E').is_some() {
reader.once(|ch: char| ch == '-' || ch == '+');
if reader.span(|ch: char| ch.is_numeric()).is_some() {
range = reader.capture_from(&start)
}
}
Ok(Accept::Push(Capture::Value(
crate::value!(reader.get(&range).parse::<f64>().unwrap()),
None,
5,
)))
});
tokay_token!("Word : @min=1 max=void", {
let reader = &mut context.thread.reader;
let start = reader.tell();
if let Some(input) = reader.span(|ch| ch.is_alphabetic()) {
if input.chars().count() < min.to_usize()? {
return Err(Reject::Skip); }
if !max.is_void() && input.chars().count() > max.to_usize()? {
return Ok(Accept::Next);
}
Ok(Accept::Push(Capture::Range(
reader.capture_from(&start),
None,
5,
)))
} else {
Err(Reject::Next)
}
});