use std::borrow::Cow::{self, Borrowed, Owned};
use std::collections::HashMap;
use std::fmt;
use std::path::PathBuf;
use crate::bytes::Bytes;
use crate::error::Error;
use crate::exec::Context;
use crate::integer::{Integer, Ratio};
use crate::lexer::{Lexer, Span, Token};
use crate::name::{get_standard_name_for, standard_names, Name, NameDisplay, NameStore};
use crate::restrict::RestrictError;
use crate::string;
use crate::value::Value;
const MODULE_DOC_COMMENT: &str = ";;;";
pub struct Parser<'a, 'lex> {
lexer: Lexer<'lex>,
ctx: &'a Context,
name_cache: HashMap<&'lex str, Name>,
cur_token: Option<(Span, Token<'lex>)>,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct ParseError {
pub span: Span,
pub kind: ParseErrorKind,
}
impl ParseError {
pub fn new(span: Span, kind: ParseErrorKind) -> ParseError {
ParseError{ span, kind }
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(&self.kind, f)
}
}
impl NameDisplay for ParseError {
fn fmt(&self, _names: &NameStore, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum ParseErrorKind {
CannotDocumentItem,
DocCommentEof,
InvalidLiteral,
InvalidToken,
InvalidByte(char),
InvalidByteEscape(char),
InvalidChar(char),
InvalidNumericEscape(char),
LiteralParseError,
MissingCloseParen,
UnbalancedComma,
UnexpectedEof,
UnexpectedToken{
expected: &'static str,
found: &'static str,
},
UnknownCharEscape(char),
UnmatchedParen,
UnterminatedChar,
UnterminatedComment,
UnterminatedString,
}
impl fmt::Display for ParseErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ParseErrorKind::CannotDocumentItem =>
f.write_str("doc comment precedes item that cannot be documented"),
ParseErrorKind::DocCommentEof => f.write_str("doc comment at end-of-file"),
ParseErrorKind::InvalidLiteral => f.write_str("invalid numeric literal"),
ParseErrorKind::InvalidToken => f.write_str("invalid token"),
ParseErrorKind::InvalidByte(ch) =>
write!(f, "byte literal must be ASCII: {:?}", ch),
ParseErrorKind::InvalidByteEscape(ch) =>
write!(f, "invalid escape sequence in byte literal: {:?}", ch),
ParseErrorKind::InvalidChar(ch) =>
write!(f, "invalid character: {:?}", ch),
ParseErrorKind::InvalidNumericEscape(ch) =>
write!(f, "invalid character in escape sequence: {:?}", ch),
ParseErrorKind::LiteralParseError => f.write_str("literal parse error"),
ParseErrorKind::MissingCloseParen => f.write_str("missing close paren"),
ParseErrorKind::UnbalancedComma => f.write_str("unbalanced ` and ,"),
ParseErrorKind::UnexpectedEof => f.write_str("unexpected end-of-file"),
ParseErrorKind::UnexpectedToken{expected, found} =>
write!(f, "expected {}; found {}", expected, found),
ParseErrorKind::UnknownCharEscape(ch) =>
write!(f, "unknown char escape: {:?}", ch),
ParseErrorKind::UnmatchedParen => f.write_str("unmatched `)`"),
ParseErrorKind::UnterminatedChar => f.write_str("unterminated char constant"),
ParseErrorKind::UnterminatedComment => f.write_str("unterminated block comment"),
ParseErrorKind::UnterminatedString => f.write_str("unterminated string constant"),
}
}
}
enum Group<'lex> {
Backticks(i32),
CommaAt,
Quotes(u32),
Parens(Vec<Value>, Option<(Span, &'lex str)>),
}
impl<'a, 'lex> Parser<'a, 'lex> {
pub fn new(ctx: &'a Context, lexer: Lexer<'lex>) -> Parser<'a, 'lex> {
Parser{
lexer,
ctx,
name_cache: HashMap::new(),
cur_token: None,
}
}
pub fn skip_shebang(&mut self) {
self.lexer.skip_shebang();
}
pub fn parse_expr(&mut self) -> Result<Value, Error> {
let mut stack = Vec::new();
let mut total_backticks = 0;
loop {
if stack.len() >= self.ctx.restrict().max_syntax_nesting {
return Err(From::from(RestrictError::MaxSyntaxNestingExceeded));
}
let mut doc = self.read_doc_comment()?;
let (sp, tok) = self.next()?;
let r = match tok {
Token::DocComment(_) =>
return Err(From::from(ParseError::new(
doc.unwrap().0, ParseErrorKind::CannotDocumentItem))),
Token::LeftParen => {
if let Some((doc_sp, _)) = doc {
match self.peek()? {
(_, Token::Name("const")) |
(_, Token::Name("define")) |
(_, Token::Name("lambda")) |
(_, Token::Name("macro")) |
(_, Token::Name("struct"))
=> (),
_ => return Err(From::from(ParseError::new(
doc_sp, ParseErrorKind::CannotDocumentItem)))
}
}
stack.push(Group::Parens(Vec::new(), doc.take()));
continue;
}
Token::RightParen => {
let group = stack.pop().ok_or_else(
|| ParseError::new(sp, ParseErrorKind::UnmatchedParen))?;
match group {
Group::Parens(values, doc) =>
insert_doc_comment(values, doc)
.map_err(From::from),
_ => Err(From::from(ParseError::new(sp,
ParseErrorKind::UnexpectedToken{
expected: "expression",
found: ")",
})))
}
}
Token::Float(f) => parse_float(f)
.map(Value::Float)
.map_err(|kind| From::from(ParseError::new(sp, kind))),
Token::Integer(i, base) => parse_integer(self.ctx, i, base, sp)
.map(Value::Integer),
Token::Ratio(r) => parse_ratio(self.ctx, r, sp)
.map(Value::Ratio),
Token::Char(ch) => parse_char(ch)
.map(Value::Char).map_err(From::from),
Token::String(s) => parse_string(s)
.map(|s| s.into()).map_err(From::from),
Token::Byte(b) => parse_byte(b)
.map(|v| v.into()).map_err(From::from),
Token::Bytes(b) => parse_bytes(b)
.map(Value::Bytes).map_err(From::from),
Token::Path(p) => parse_path(p)
.map(|p| p.into()).map_err(From::from),
Token::Name(name) => Ok(self.name_value(name)),
Token::Keyword(name) => Ok(Value::Keyword(self.add_name(name))),
Token::BackQuote => {
total_backticks += 1;
if let Some(&mut Group::Backticks(ref mut n)) = stack.last_mut() {
*n += 1;
continue;
}
stack.push(Group::Backticks(1));
continue;
}
Token::Comma => {
if total_backticks <= 0 {
return Err(From::from(ParseError::new(sp, ParseErrorKind::UnbalancedComma)));
}
total_backticks -= 1;
if let Some(&mut Group::Backticks(ref mut n)) = stack.last_mut() {
*n -= 1;
continue;
}
stack.push(Group::Backticks(-1));
continue;
}
Token::CommaAt => {
if total_backticks <= 0 {
return Err(From::from(ParseError::new(sp, ParseErrorKind::UnbalancedComma)));
}
total_backticks -= 1;
stack.push(Group::CommaAt);
continue;
}
Token::Quote => {
if let Some(&mut Group::Quotes(ref mut n)) = stack.last_mut() {
*n += 1;
continue;
}
stack.push(Group::Quotes(1));
continue;
}
Token::End => {
if let Some((doc_sp, _)) = doc {
return Err(From::from(ParseError::new(doc_sp,
ParseErrorKind::DocCommentEof)));
}
let any_paren = stack.iter().any(|group| {
match *group {
Group::Parens(_, _) => true,
_ => false
}
});
if any_paren {
Err(From::from(ParseError::new(sp,
ParseErrorKind::MissingCloseParen)))
} else {
Err(From::from(ParseError::new(sp,
ParseErrorKind::UnexpectedEof)))
}
}
};
if let Some((doc_sp, _)) = doc {
return Err(From::from(ParseError::new(doc_sp,
ParseErrorKind::CannotDocumentItem)));
}
let mut v = r?;
loop {
match stack.last_mut() {
None => return Ok(v),
Some(&mut Group::Parens(ref mut values, _)) => {
values.push(v);
break;
}
_ => ()
}
let group = stack.pop().unwrap();
match group {
Group::Backticks(n) if n > 0 => {
total_backticks -= n;
v = v.quasiquote(n as u32);
}
Group::Backticks(n) if n < 0 => {
total_backticks -= n;
v = v.comma((-n) as u32);
}
Group::CommaAt => {
total_backticks += 1;
v = v.comma_at(1);
}
Group::Quotes(n) => v = v.quote(n),
_ => ()
}
}
}
}
pub fn parse_single_expr(&mut self) -> Result<Value, Error> {
let expr = self.parse_expr()?;
match self.next()? {
(_, Token::End) => Ok(expr),
(sp, tok) => Err(From::from(ParseError::new(sp, ParseErrorKind::UnexpectedToken{
expected: "eof",
found: tok.name(),
})))
}
}
pub fn parse_exprs(&mut self) -> Result<Vec<Value>, Error> {
let mut res = Vec::new();
if let Some((_, doc)) = self.read_module_doc_comment()? {
res.push(vec![
Value::Name(standard_names::SET_MODULE_DOC),
format_doc_comment(doc).into(),
].into());
}
loop {
match self.peek()? {
(_sp, Token::End) => break,
_ => res.push(self.parse_expr()?)
}
}
Ok(res)
}
pub fn lexer(&self) -> &Lexer<'lex> {
&self.lexer
}
fn read_doc_comment(&mut self)
-> Result<Option<(Span, &'lex str)>, ParseError> {
match self.peek()? {
(sp, Token::DocComment(doc)) => {
self.cur_token.take();
Ok(Some((sp, doc)))
}
_ => Ok(None)
}
}
fn read_module_doc_comment(&mut self)
-> Result<Option<(Span, &'lex str)>, ParseError> {
match self.peek()? {
(sp, Token::DocComment(doc)) if doc.starts_with(MODULE_DOC_COMMENT) => {
self.cur_token.take();
Ok(Some((sp, doc)))
}
_ => Ok(None)
}
}
fn add_name(&mut self, name: &'lex str) -> Name {
let mut names = self.ctx.scope().borrow_names_mut();
*self.name_cache.entry(name).or_insert_with(
|| get_standard_name_for(name).unwrap_or_else(|| names.add(name)))
}
fn name_value(&mut self, name: &'lex str) -> Value {
match name {
"true" => Value::Bool(true),
"false" => Value::Bool(false),
_ => {
let name = self.add_name(name);
Value::Name(name)
}
}
}
fn next(&mut self) -> Result<(Span, Token<'lex>), ParseError> {
let r = self.peek()?;
self.cur_token = None;
Ok(r)
}
fn peek(&mut self) -> Result<(Span, Token<'lex>), ParseError> {
if let Some(tok) = self.cur_token {
Ok(tok)
} else {
let tok = self.lexer.next_token()?;
self.cur_token = Some(tok);
Ok(tok)
}
}
}
fn format_doc_comment(doc: &str) -> String {
let mut buf = String::new();
for line in doc.lines() {
buf.push_str(trim_first(
line.trim_start_matches(char::is_whitespace)
.trim_start_matches(';'), char::is_whitespace));
buf.push('\n');
}
buf
}
fn trim_first<F: FnOnce(char) -> bool>(s: &str, f: F) -> &str {
let mut chars = s.chars();
match chars.next() {
Some(ch) if f(ch) => chars.as_str(),
_ => s
}
}
fn insert_doc_comment(mut items: Vec<Value>, doc: Option<(Span, &str)>)
-> Result<Value, ParseError> {
if let Some((sp, doc)) = doc {
if items.len() == 3 {
items.insert(2, format_doc_comment(doc).into());
} else {
return Err(ParseError::new(sp, ParseErrorKind::CannotDocumentItem));
}
}
Ok(items.into())
}
fn parse_byte(s: &str) -> Result<u8, ParseError> {
let (b, _) = string::parse_byte(s, 0)?;
Ok(b)
}
fn parse_char(s: &str) -> Result<char, ParseError> {
let (ch, _) = string::parse_char(s, 0)?;
Ok(ch)
}
fn parse_bytes(s: &str) -> Result<Bytes, ParseError> {
let (b, _) = if s.starts_with("#br") {
string::parse_raw_byte_string(&s[2..], 0)?
} else {
string::parse_byte_string(&s[2..], 0)?
};
Ok(Bytes::new(b))
}
fn parse_path(s: &str) -> Result<PathBuf, ParseError> {
let (s, _) = if s.starts_with("#pr") {
string::parse_raw_string(&s[2..], 0)?
} else {
string::parse_string(&s[2..], 0)?
};
Ok(PathBuf::from(s))
}
fn parse_string(s: &str) -> Result<String, ParseError> {
let (s, _) = if s.starts_with('r') {
string::parse_raw_string(s, 0)?
} else {
string::parse_string(s, 0)?
};
Ok(s)
}
fn parse_float(s: &str) -> Result<f64, ParseErrorKind> {
strip_underscores(s).parse()
.map_err(|_| ParseErrorKind::LiteralParseError)
}
fn parse_integer(ctx: &Context, s: &str, base: u32, sp: Span)
-> Result<Integer, Error> {
let s = match base {
10 => s,
_ => &s[2..]
};
let s = strip_underscores(s);
check_integer(ctx, &s, base)?;
Integer::from_str_radix(&s, base)
.map_err(|_| From::from(ParseError::new(sp,
ParseErrorKind::LiteralParseError)))
}
fn parse_ratio(ctx: &Context, s: &str, sp: Span) -> Result<Ratio, Error> {
let s = strip_underscores(s);
check_integer(ctx, &s, 10)?;
s.parse().map_err(|_| From::from(ParseError::new(sp,
ParseErrorKind::LiteralParseError)))
}
fn check_integer(ctx: &Context, mut s: &str, base: u32) -> Result<(), RestrictError> {
let limit = ctx.restrict().max_integer_size;
if limit == usize::max_value() {
return Ok(());
}
if s.starts_with('-') {
s = &s[1..].trim_start_matches('0');
} else {
s = s.trim_start_matches('0');
}
let n_bits = (s.len() as f32 * (base as f32).log2()).ceil() as usize;
if n_bits > limit {
Err(RestrictError::IntegerLimitExceeded)
} else {
Ok(())
}
}
fn strip_underscores(s: &str) -> Cow<str> {
if s.contains('_') {
Owned(s.chars().filter(|&ch| ch != '_').collect())
} else {
Borrowed(s)
}
}
#[cfg(test)]
mod test {
use super::{ParseError, ParseErrorKind, Parser};
use crate::error::Error;
use crate::interpreter::Interpreter;
use crate::lexer::{Span, Lexer};
use crate::value::Value;
fn parse(s: &str) -> Result<Value, ParseError> {
let interp = Interpreter::new();
let mut p = Parser::new(interp.context(), Lexer::new(s, 0));
p.parse_single_expr().map_err(|e| {
match e {
Error::ParseError(e) => e,
_ => panic!("parse returned error: {:?}", e)
}
})
}
#[test]
fn test_doc_errors() {
assert_eq!(parse(";; foo\n(const)").unwrap_err(), ParseError{
span: Span{lo: 0, hi: 7}, kind: ParseErrorKind::CannotDocumentItem});
assert_eq!(parse(";; foo\n(const foo)").unwrap_err(), ParseError{
span: Span{lo: 0, hi: 7}, kind: ParseErrorKind::CannotDocumentItem});
assert_eq!(parse(";; foo\n(const foo bar baz)").unwrap_err(), ParseError{
span: Span{lo: 0, hi: 7}, kind: ParseErrorKind::CannotDocumentItem});
}
#[test]
fn test_errors() {
assert_eq!(parse("(foo").unwrap_err(), ParseError{
span: Span{lo: 4, hi: 4}, kind: ParseErrorKind::MissingCloseParen});
assert_eq!(parse("(foo ,bar)").unwrap_err(), ParseError{
span: Span{lo: 5, hi: 6}, kind: ParseErrorKind::UnbalancedComma});
assert_eq!(parse("`(foo ,,bar)").unwrap_err(), ParseError{
span: Span{lo: 7, hi: 8}, kind: ParseErrorKind::UnbalancedComma});
}
#[test]
fn test_lexer_position() {
let interp = Interpreter::new();
let mut p = Parser::new(interp.context(),
Lexer::new("(foo 1 2 3) bar", 0));
p.parse_expr().unwrap();
assert_eq!(p.lexer().current_position(), 11);
}
}