use smallvec::{SmallVec};
use std::cell::{Cell};
use std::convert::{TryFrom};
use std::str::{self, FromStr};
use super::collections::{Arr, DequeOps, Str, Tab};
use super::engine::{Filename, glsp, Span, SpanStorage, stock_syms::*, Sym};
use super::error::{GResult};
use super::gc::{Root};
use super::lex::{char_is_whitespace, Lexer, StrStatus, TokType};
use super::val::{Val};
#[doc(hidden)]
pub struct Parser {
lexer: Lexer,
stack: SmallVec<[Form; 32]>,
prev_tok_type: TokType,
file: Option<Filename>
}
impl Parser {
pub fn new(file: Option<Filename>) -> Parser {
Parser {
lexer: Lexer::new(),
stack: SmallVec::new(),
prev_tok_type: TokType::Whitespace,
file
}
}
pub fn parse_all(&mut self, mut text: &str, dst: &mut Vec<Val>) -> GResult<usize> {
let starting_len = dst.len();
while text.len() > 0 {
if let Some(form) = self.parse(&mut text)? {
dst.push(form);
}
}
self.ensure_finished()?;
Ok(dst.len() - starting_len)
}
pub fn parse(&mut self, text: &mut &str) -> GResult<Option<Val>> {
parse(self, text)
}
pub fn ensure_finished(&self) -> GResult<()> {
match self.lexer.str_status {
StrStatus::OutsideStr => (),
StrStatus::InsideStr => bail!("unterminated str"),
StrStatus::InsideRawStr(_) => bail!("unterminated raw str"),
StrStatus::InsideBlockComment(_) => bail!("unterminated block comment")
}
if let Some(&ref form) = self.stack.last() {
match form {
Form::Arr(..) => bail!("unterminated arr"),
Form::Str(..) | Form::PausedStr(..) | Form::ResumedStr(..) => {
bail!("unterminated str")
}
Form::RawStr(..) => bail!("unterminated raw str"),
Form::Tab(..) => bail!("unterminated tab"),
Form::TabClause(..) => bail!("unterminated tab clause"),
Form::Access(..) => bail!("unterminated []"),
Form::DiscardNext => bail!("#_ at end of input"),
Form::Abbrv(sym) => bail!("{} abbreviation at end of input", sym)
}
}
Ok(())
}
}
enum Form {
Arr(Root<Arr>),
Str(Root<Str>, StrEscape),
RawStr(Root<Str>),
PausedStr(Root<Arr>),
ResumedStr(Root<Arr>, Root<Str>, StrEscape),
Tab(Root<Tab>),
TabClause(Option<Val>, Option<Val>),
Access(Root<Arr>),
DiscardNext, Abbrv(Sym), }
#[derive(Copy, Clone, PartialEq)]
enum StrEscape {
NoEscape,
NewlineEscape
}
fn parse(parser: &mut Parser, text: &mut &str) -> GResult<Option<Val>> {
assert!(text.len() > 0, "empty string passed to parse()");
let span_storage: Cell<Option<Span>> = Cell::new(None);
let line_number = parser.lexer.line_number;
let file = parser.file;
let span = || -> Span {
match file {
Some(file) => {
if span_storage.get().is_none() {
span_storage.set(Some(glsp::span(SpanStorage::Loaded(file, line_number))));
}
span_storage.get().unwrap()
}
None => glsp::generated_span()
}
};
let tok = match parser.lexer.lex(text) {
Ok(tok) => tok,
Err(err) => return Err(error_at!(span(), "lexing error").with_source(err))
};
let delimited = match parser.prev_tok_type {
TokType::NumOrSym | TokType::True | TokType::False | TokType::Nil | TokType::Char |
TokType::SpaceChar | TokType::TabChar | TokType::NewlineChar | TokType::ReturnChar |
TokType::NulChar | TokType::AsciiChar | TokType::UnicodeChar | TokType::ArrClose |
TokType::AccessClose | TokType::StrClose | TokType::RawStrClose => true,
TokType::Whitespace | TokType::FormComment | TokType::ArrOpen | TokType::TabOpen |
TokType::AccessOpen | TokType::StrOpen | TokType::StrPause | TokType::StrResume |
TokType::StrChars | TokType::RawStrOpen | TokType::RawStrChars | TokType::Quote |
TokType::Backquote | TokType::Unquote | TokType::Splay | TokType::MetName |
TokType::Atsign => false
};
if delimited {
match tok.tok_type {
TokType::Whitespace | TokType::ArrClose |
TokType::AccessClose | TokType::StrResume => (),
_ => bail_at!(span(), "the token {:?} must be followed by ), ], }}, or whitespace, \
rather than {:?}", parser.prev_tok_type, tok.tok_type)
}
}
parser.prev_tok_type = tok.tok_type;
let mut parsed_val: Option<Val> = match tok.tok_type {
TokType::Whitespace => None,
TokType::FormComment => {
parser.stack.push(Form::DiscardNext);
None
}
TokType::NumOrSym => Some(parse_num_or_sym(tok.text)?),
TokType::True => Some(Val::Bool(true)),
TokType::False => Some(Val::Bool(false)),
TokType::Nil => Some(Val::Nil),
TokType::Char => Some(Val::Char(tok.text.chars().nth(1).unwrap())),
TokType::SpaceChar => Some(Val::Char(' ')),
TokType::TabChar => Some(Val::Char('\t')),
TokType::NewlineChar => Some(Val::Char('\n')),
TokType::ReturnChar => Some(Val::Char('\r')),
TokType::NulChar => Some(Val::Char('\0')),
TokType::AsciiChar => Some(Val::Char(parse_ascii_char(tok.text))),
TokType::UnicodeChar => Some(Val::Char(parse_unicode_char(tok.text, span())?)),
TokType::ArrOpen => {
match parser.stack.last() {
Some(&Form::Tab(_)) => parser.stack.push(Form::TabClause(None, None)),
_ => {
let arr = glsp::arr();
arr.set_span(span());
parser.stack.push(Form::Arr(arr));
}
}
None
}
TokType::TabOpen => {
parser.stack.push(Form::Tab(glsp::tab()));
None
}
TokType::ArrClose => {
match parser.stack.pop() {
Some(Form::Arr(arr)) => {
arr.freeze();
Some(Val::Arr(arr))
}
Some(Form::Tab(tab)) => {
tab.freeze();
Some(Val::Tab(tab))
}
Some(Form::TabClause(key, val)) => {
if let (Some(key), Some(val)) = (key, val) {
match parser.stack.last() {
Some(Form::Tab(ref tab)) => tab.set(key, val)?,
_ => panic!()
}
None
} else {
bail_at!(span(), "invalid clause in tab literal")
}
}
Some(_) => bail_at!(span(), "unexpected ) token"),
None => bail_at!(span(), "unexpected ) token")
}
}
TokType::AccessOpen => {
parser.stack.push(Form::Access(arr![ACCESS_SYM]));
None
}
TokType::AccessClose => {
match parser.stack.pop() {
Some(Form::Access(access_arr)) => {
access_arr.freeze();
Some(Val::Arr(access_arr))
}
_ => bail_at!(span(), "unexpected ] token")
}
}
TokType::StrOpen => {
parser.stack.push(Form::Str(glsp::str(), StrEscape::NoEscape));
None
}
TokType::StrClose => {
match parser.stack.pop() {
Some(Form::Str(st, _)) => {
st.freeze();
Some(Val::Str(st))
}
Some(Form::ResumedStr(arr, st, _)) => {
if st.len() > 0 {
st.freeze();
arr.push(st)?;
}
arr.freeze();
Some(Val::Arr(arr))
}
_ => panic!()
}
}
TokType::StrPause => {
match parser.stack.pop() {
Some(Form::Str(st, _)) => {
st.freeze();
let arr = glsp::arr();
arr.set_span(span());
arr.push(TEMPLATE_STR_SYM)?;
arr.push(st)?;
parser.stack.push(Form::PausedStr(arr));
}
Some(Form::ResumedStr(arr, st, _)) => {
st.freeze();
arr.push(st)?;
parser.stack.push(Form::PausedStr(arr));
}
_ => panic!()
}
None
}
TokType::StrResume => {
match parser.stack.pop() {
Some(Form::PausedStr(arr)) => {
parser.stack.push(Form::ResumedStr(arr, glsp::str(), StrEscape::NoEscape));
None
}
_ => bail_at!(span(), "unexpected }}")
}
}
TokType::StrChars => {
match parser.stack.last_mut() {
Some(&mut Form::Str(ref st, ref mut escape)) |
Some(&mut Form::ResumedStr(_, ref st, ref mut escape)) => {
parse_str_chars(st, tok.text, escape, span())?;
None
}
_ => panic!()
}
}
TokType::RawStrOpen => {
parser.stack.push(Form::RawStr(glsp::str()));
None
}
TokType::RawStrClose => {
match parser.stack.pop() {
Some(Form::RawStr(st)) => {
st.freeze();
Some(Val::Str(st))
}
_ => panic!()
}
}
TokType::RawStrChars => {
match parser.stack.last() {
Some(&Form::RawStr(ref st)) => st.extend(tok.text.chars())?,
_ => panic!()
}
None
}
TokType::Quote | TokType::Backquote | TokType::Unquote |
TokType::Splay | TokType::MetName | TokType::Atsign => {
let sym = match tok.tok_type {
TokType::Quote => QUOTE_SYM,
TokType::Backquote => BACKQUOTE_SYM,
TokType::Unquote => UNQUOTE_SYM,
TokType::Splay => SPLAY_SYM,
TokType::MetName => MET_NAME_SYM,
TokType::Atsign => ATSIGN_SYM,
_ => unreachable!()
};
parser.stack.push(Form::Abbrv(sym));
None
}
};
while let Some(ref val) = parsed_val {
let to_pop = match parser.stack.last_mut() {
None => {
return Ok(Some(val.clone()))
}
Some(&mut Form::Arr(ref arr)) |
Some(&mut Form::PausedStr(ref arr)) |
Some(&mut Form::Access(ref arr)) => {
arr.push(val)?;
parsed_val = None;
false
}
Some(&mut Form::Tab(_)) => bail_at!(span(), "invalid clause in tab literal"),
Some(&mut Form::TabClause(ref mut key, ref mut value)) => {
match (key.is_some(), value.is_some()) {
(false, false) => *key = Some(val.clone()),
(true, false) => *value = Some(val.clone()),
(true, true) => bail_at!(span(), "invalid clause in tab literal"),
(false, true) => panic!()
}
parsed_val = None;
false
}
Some(&mut Form::DiscardNext) => {
parsed_val = None;
true
}
Some(&mut Form::Abbrv(abbrv_sym)) => {
let arr = glsp::arr_with_capacity(2);
arr.set_span(span());
arr.push(abbrv_sym)?;
arr.push(val)?;
arr.freeze();
parsed_val = Some(Val::Arr(arr));
true
}
Some(&mut Form::Str(_, _)) => panic!(),
Some(&mut Form::RawStr(_)) => panic!(),
Some(&mut Form::ResumedStr(_, _, _)) => panic!(),
};
if to_pop {
parser.stack.pop().unwrap();
}
}
Ok(None)
}
fn parse_num_or_sym(text: &str) -> GResult<Val> {
let val = if let Some(i) = parse_int(text) {
Val::Int(i)
} else if let Some(f) = parse_flo(text) {
Val::Flo(f)
} else {
Val::Sym(glsp::sym(text)?)
};
Ok(val)
}
fn parse_int(mut text: &str) -> Option<i32> {
let sign = if text.starts_with('-') {
text = &text[1..];
-1
} else {
1
};
let radix: i32 = if text.starts_with("0x") {
text = &text[2..];
16
} else if text.starts_with("0o") {
text = &text[2..];
8
} else if text.starts_with("0b") {
text = &text[2..];
2
} else if let Some(ch) = text.chars().next() {
if ch.is_digit(10) {
10
} else {
return None
}
} else {
return None
};
let mut digit_count = 0;
let mut val = 0i32;
for ch in text.chars() {
if let Some(digit) = ch.to_digit(radix as u32) {
val = val.overflowing_mul(radix).0.overflowing_add(digit as i32).0;
digit_count += 1;
} else if ch == '_' {
()
} else {
return None
}
}
if digit_count == 0 {
None
} else {
if sign == -1 {
Some(val.overflowing_neg().0)
} else {
Some(val)
}
}
}
fn parse_flo(text: &str) -> Option<f32> {
let first = text.chars().next().unwrap();
if !matches!(first, '-' | '+' | 'n' | '0' ..= '9') {
return None
}
match text {
"+inf.0" => return Some(f32::INFINITY),
"-inf.0" => return Some(f32::NEG_INFINITY),
"nan.0" => return Some(f32::NAN),
_ => ()
}
if text.contains("inf") || text.contains("NaN") || text.starts_with("-.") {
return None
}
let mut bytes = SmallVec::<[u8; 128]>::with_capacity(text.len());
bytes.extend(text.bytes().filter(|byte| *byte != b'_'));
match f32::from_str(str::from_utf8(&bytes[..]).unwrap()) {
Ok(f) => Some(f),
Err(_) => None
}
}
fn parse_ascii_char(text: &str) -> char {
let mut ch = text.chars();
let (slash, x, hi, lo) = (ch.next().unwrap(), ch.next().unwrap(),
ch.next().unwrap(), ch.next().unwrap());
debug_assert!(slash == '\\' && x == 'x');
let hi_u32 = hi.to_digit(8).unwrap() << 4;
let lo_u32 = lo.to_digit(16).unwrap();
debug_assert!(hi_u32 | lo_u32 <= 0x7f);
char::from((hi_u32 | lo_u32) as u8)
}
fn parse_unicode_char(text: &str, span: Span) -> GResult<char> {
debug_assert!(text.starts_with("\\u{"));
if text.len() == 4 {
bail_at!(span, "\\u{{}} is invalid syntax")
}
let mut value = 0u32;
for (i, digit) in text[3..].chars().enumerate() {
if digit == '}' {
match char::try_from(value) {
Ok(ch) => return Ok(ch),
Err(_) => bail_at!(span, "invalid unicode escape {}", text)
}
}
if i >= 6 {
bail_at!(span, "too many digits in {}", text)
}
value <<= 4;
value |= digit.to_digit(16).unwrap();
}
bail_at!(span, "unterminated \\u{{ escape sequence")
}
fn parse_str_chars(
dst: &Root<Str>,
mut text: &str,
escape: &mut StrEscape,
span: Span
) -> GResult<()> {
if *escape == StrEscape::NewlineEscape {
text = text.trim_start_matches(char_is_whitespace);
if text.len() == 0 {
return Ok(())
}
*escape = StrEscape::NoEscape;
}
while text.len() > 0 {
if text == "\\" {
bail_at!(span, "unexpected \\ character at end of str")
}
if text.starts_with("\\\n") || text.starts_with("\\\r\n") {
text = (&text[1..]).trim_start_matches(char_is_whitespace);
if text.len() == 0 {
*escape = StrEscape::NewlineEscape;
return Ok(())
}
continue
}
if text.starts_with('\\') {
let mut escape_chars = text[1..].chars();
let escaped_ch = match escape_chars.next().unwrap() {
'\\' => '\\',
'n' => '\n',
'r' => '\r',
't' => '\t',
'0' => '\0',
'"' => '"',
'x' => {
match (escape_chars.next(), escape_chars.next()) {
(Some(hi), Some(lo)) if hi.is_digit(8) && lo.is_digit(16) => {
let offs = 2 + hi.len_utf8() + lo.len_utf8();
parse_ascii_char(&text[..offs])
}
_ => bail_at!(span, "invalid \\x escape sequence")
}
}
'u' => {
ensure_at!(span, escape_chars.next() == Some('{'), "invalid \\u escape");
let mut offs = 3;
loop {
match escape_chars.next() {
Some('}') => {
offs += 1;
break
}
Some(ch) if ch.is_digit(16) => offs += 1,
_ => bail_at!(span, "invalid \\u escape")
}
}
parse_unicode_char(&text[..offs], span)?
}
unknown_ch => bail_at!(span, "unrecognized escape character '{}'", unknown_ch)
};
dst.push(escaped_ch)?;
text = escape_chars.as_str();
} else {
let ch = text.chars().next().unwrap();
dst.push(ch)?;
if text.starts_with("{{") || text.starts_with("}}") {
text = &text[2..];
} else {
text = &text[ch.len_utf8()..];
}
}
}
Ok(())
}