use super::*;
use peekmore::PeekMoreIterator;
#[allow(type_alias_bounds)]
pub type PeekableTokenStream<TS: TokenStream> = PeekMoreIterator<TS>;
pub trait TokenStream: Iterator<Item = Token> {}
impl<T> TokenStream for T where T: Iterator<Item = Token> {}
pub fn tokenize(input: &str) -> PeekableTokenStream<impl TokenStream + '_> {
let mut input = PosIter::from(input.char_indices()).peekmore();
let mut encode = [0; std::mem::size_of::<char>() * 2];
std::iter::from_fn(move || {
let current = loop {
match input.next() {
Some(current) => if current.is_whitespace() {
} else {
break current;
},
None => return None
};
};
let index = current.idx;
let mut last_idx = current.idx;
let (currstr, peekstr) = {
let cl0 = current.encode_utf8(&mut encode).len();
let cl1 = input.peek()
.map(|c|c.char)
.unwrap_or(' ')
.encode_utf8(&mut encode[cl0..]).len();
(
std::str::from_utf8(&encode[..(cl0)]).unwrap(),
std::str::from_utf8(&encode[..(cl0+cl1)]).unwrap()
)
};
if let Ok(symbol) = peekstr.parse::<Symbol>() {
input.next();
if symbol == Symbol::DoubleDollar {
return Some((index, index+2, Literal::RefCtx).into());
}
return Some((index, index+2, symbol).into());
}
if let Ok(symbol) = currstr.parse::<Symbol>() {
if symbol == Symbol::At {
if let Some((start, end, uuid)) = try_lex_uuid(&mut input, current.idx) {
return Some((start, end, Literal::ObjUid(uuid)).into());
}
if let Some(PosChar { char, .. }) = input.peek().cloned() {
if is_bareword_start(char) {
input.next(); let (start, end, bareword)
= try_lex_bareword(&mut input, index, char);
return Some((start, end, Literal::ObjKey(bareword)).into());
}
if char == '"' {
input.next(); let (start, end, string) = try_lex_string(&mut input, index, char, '"');
return Some((start, end, Literal::ObjKey(string)).into());
}
if char == '\'' {
input.next(); let (start, end, string) = try_lex_string(&mut input, index, char, '\'');
return Some((start, end, Literal::ObjKey(string)).into());
}
}
panic!("Failed to parse object reference (@) at {}:{}", current.line, current.col);
}
if symbol == Symbol::DollarSign {
if let Some(PosChar { char, .. }) = input.peek().cloned() {
if is_bareword_start(char) {
input.next(); let (start, end, bareword)
= try_lex_bareword(&mut input, index, char);
return Some((start, end, Literal::RefVar(bareword)).into());
}
}
return Some((index, index+1, Literal::RefRes).into());
}
if ! (symbol == Symbol::Plus || symbol == Symbol::Dash) {
return Some((index, index+1, symbol).into());
}
}
if *current == 'U' {
if let Some((start, end, uuid)) = try_lex_uuid(&mut input, current.idx) {
return Some((start, end, Literal::Uid(uuid)).into());
}
}
if is_bareword_start(*current) {
let (start, end, bareword) = try_lex_bareword(&mut input, index, *current);
return Some((start, end,
try_into_constant(bareword.as_str()).unwrap_or(Literal::Str(bareword))
).into());
}
if *current == '"' {
let (start, end, string) = try_lex_string(&mut input, index, *current, '"');
return Some((start, end, Literal::Str(string)).into());
}
if *current == '\'' {
let (start, end, string) = try_lex_string(&mut input, index, *current, '\'');
return Some((start, end, Literal::Str(string)).into());
}
if current.is_ascii_digit() || *current == '+' || *current == '-' {
let peeked = input.peek().copied().map(|c|c.char).unwrap_or('0');
let (current, sign, bsign) = match *current {
'-' => {
if !peeked.is_ascii_digit() {
return Some((index, index+1, Symbol::Dash).into());
}
(input.next().map(|c|c.char).unwrap_or('0'), -1.0f64, true)
},
'+' => {
if !peeked.is_ascii_digit() {
return Some((index, index+1, Symbol::Plus).into());
}
(input.next().map(|c|c.char).unwrap_or('0'), 1.0f64, true)
},
_ => (*current, 1.0f64, false)
};
let mut buffer = CompactString::new();
buffer.push(current);
let mut radix = 10;
if current == '0' {
radix = match input.peek().copied().map(|c|c.char).unwrap_or(' ') {
'x' => {input.next(); 16},
'd' => {input.next(); 10},
'o' => {input.next(); 8},
'b' => {input.next(); 2},
_ => radix
};
}
if !bsign {
if let Some(PosChar { char: '[', .. }) = input.peek().copied() {
input.next(); let mut array: Vec<Token> = vec![];
loop {
if let Some(PosChar { char: ']', idx: index , .. }) = input.peek().copied() {
last_idx = index;
input.next(); break; }
buffer.clear();
let mut sign = false;
if let Some(PosChar { char: '-', .. }) = input.peek().copied() {
sign = true;
input.next();
} else if let Some(PosChar { char: '+', .. }) = input.peek().copied() {
sign = false;
input.next();
}
while let Some(PosChar { char: peeked, idx: index , .. }) = input.peek().copied() {
if peeked == ']' { break }
last_idx = index;
if ! is_digit_valid(peeked, radix) {
input.next(); break
}
buffer.push(peeked);
input.next(); }
if buffer.is_empty() {
continue;
}
let integer = match i64::from_str_radix(&buffer, radix) {
Ok(i) => i,
Err(err) => {
panic!("Failed to parse '{}' with radix {}: {}", buffer, radix, err)
},
};
let integer = sign.then(||-integer).unwrap_or(integer);
array.push((last_idx, last_idx, Literal::Int(integer)).into());
}
return Some((index, last_idx, TokenContent::Group(Symbol::BraketLeft, array)).into());
}
}
while let Some(PosChar { char: peeked, .. }) = input.peek().copied() {
if ! is_digit_valid(peeked, radix) {
break
}
buffer.push(peeked);
input.next(); }
let integer = match i64::from_str_radix(&buffer, radix) {
Ok(i) => i,
Err(err) => panic!("Failed to parse '{}' with radix {}: {}", buffer, radix, err),
};
let decimal = if radix == 10
&& '.' == input.peek_nth(0).copied().map(|c|c.char).unwrap_or(' ')
&& input.peek_nth(1).copied().map(|c|c.char).unwrap_or(' ').is_ascii_digit()
&& '.' != input.peek_nth(1).copied().map(|c|c.char).unwrap_or(' ')
{
buffer.clear(); buffer.push_str("0.");
input.next();
while let Some(PosChar { char: peeked, .. }) = input.peek().copied() {
if peeked.is_ascii_digit() {
buffer.push(peeked);
input.next(); } else {
break;
}
}
buffer.parse().unwrap()
} else {
0f64
};
let pow10: f64 = if radix == 10 && 'e' == input.peek().copied().map(|c|c.char).unwrap_or(' ') {
input.next();
let sign = match input.peek().copied().map(|c|c.char).unwrap_or(' ') {
'+' => {input.next(); false}, '-' => {input.next(); true}, _ => false, };
buffer.clear(); while let Some(PosChar { char: peeked , .. }) = input.peek().copied() {
if peeked.is_ascii_digit() {
buffer.push(peeked);
input.next(); } else {
break;
}
}
let mut pow10: i32 = buffer.parse().unwrap();
if sign {pow10 *= -1;}
10f64.powi(pow10)
} else {
1.0
};
if decimal == 0.0 {
if pow10 > 0.0 {
return Some((index, index, Literal::Int((sign as i64) * integer * (pow10 as i64))).into());
}
return Some((index, index, Literal::Dec((sign) * (integer as f64) * (pow10 as f64))).into());
}
let value = (sign) * (integer as f64 + decimal) * (pow10 as f64);
return Some((index, index, Literal::Dec(value)).into());
}
let remainder: String = input.clone().map(|p| p.char).collect();
Some((index, index+remainder.len(), TokenContent::Remainder(remainder)).into())
}).peekmore()
}
fn try_lex_bareword(input: &mut PosInput, start: usize, current: char) -> (usize, usize, CompactString) {
let mut end = start;
let mut buffer = CompactString::new();
buffer.push(current);
while let Some(PosChar { char: peeked, idx: index , .. }) = input.peek().copied() {
end = index;
if is_bareword_part(peeked) {
buffer.push(peeked);
input.next(); } else {
break;
}
}
(start, end, buffer)
}
fn try_lex_string(input: &mut PosInput, start: usize, current: char, delimiter: char) -> (usize, usize, CompactString) {
let mut buffer = CompactString::new();
let mut last = current;
let mut end = start;
#[allow(clippy::while_let_loop)]
loop {
let PosChar { char: peeked, idx: index , .. } = match input.peek().copied() {
Some(i) => i,
None => break };
end = index;
if peeked == delimiter && last != '\\' {
input.next(); break;
} else {
buffer.push(peeked);
input.next(); last = peeked;
}
}
(start, end, buffer)
}
fn try_lex_uuid(input: &mut PosInput, start: usize) -> Option<(usize, usize, uuid::Uuid)> {
let mut uuid_str: [u8; 36] = [b' '; 36];
let len = input .peek_amount(36)
.iter()
.flatten()
.filter_map(|c| std::convert::TryInto::<u8>::try_into(c.char).ok())
.enumerate()
.inspect(|(i,c)| uuid_str[*i] = *c)
.count()
;
if let Ok(uuid) = uuid::Uuid::try_parse_ascii(&uuid_str) {
for _ in 0..len { input.next(); }
return Some((start, start + len, uuid));
}
None
}
fn try_into_constant(str: &str) -> Option<Literal> {
Some(match str {
"null" => Literal::Nil,
"true" => Literal::Bool(true),
"false" => Literal::Bool(false),
"NaN" => Literal::Dec(f64::NAN),
"inf" => Literal::Dec(f64::INFINITY),
"infinity" => Literal::Dec(f64::INFINITY),
"PI" => Literal::Dec(std::f64::consts::PI),
"TAU" => Literal::Dec(std::f64::consts::TAU),
"EULER" => Literal::Dec(std::f64::consts::E),
"SQRT2" => Literal::Dec(std::f64::consts::SQRT_2),
_ => return None
})
}
fn is_digit_valid(peeked: char, radix: u32) -> bool {
match radix {
2 if peeked == '0' || peeked == '1' => true,
8 if ('0'..='7').contains(&peeked) => true,
10 if peeked.is_ascii_digit() => true,
16 if peeked.is_ascii_hexdigit() => true,
_ => false
}
}