gluon_parser 0.6.1

The parser for the gluon programming language
Documentation
//! The parser is a bit more complex than it needs to be as it needs to be fully specialized to
//! avoid a recompilation every time a later part of the compiler is changed. Due to this the
//! string interner and therefore also garbage collector needs to compiled before the parser.
#![doc(html_root_url="https://docs.rs/gluon_parser/0.5.0")] // # GLUON

#[macro_use]
extern crate log;
extern crate itertools;
#[macro_use]
extern crate quick_error;
extern crate gluon_base as base;
extern crate lalrpop_util;

use std::cell::RefCell;
use std::fmt;

use base::ast::{Comment, Expr, IdentEnv, ValueBinding, SpannedExpr, SpannedPattern, TypedIdent};
use base::error::Errors;
use base::pos::{self, BytePos, Span, Spanned};
use base::symbol::Symbol;
use base::types::{ArcType, TypeCache};

use infix::{OpTable, Reparser};
use layout::Layout;
use token::{Token, Tokenizer};

pub use infix::Error as InfixError;
pub use layout::Error as LayoutError;
pub use token::Error as TokenizeError;

#[cfg_attr(rustfmt, rustfmt_skip)]
mod grammar;
mod infix;
mod layout;
mod token;

fn new_ident<Id>(type_cache: &TypeCache<Id>, name: Id) -> TypedIdent<Id> {
    TypedIdent {
        name: name,
        typ: type_cache.hole(),
    }
}

type LalrpopError<'input> = lalrpop_util::ParseError<
    BytePos,
    Token<'input>,
    Spanned<Error, BytePos>,
>;

/// Shrink hidden spans to fit the visible expressions and flatten singleton blocks.
fn shrink_hidden_spans<Id>(mut expr: SpannedExpr<Id>) -> SpannedExpr<Id> {
    match expr.value {
        Expr::Infix(_, _, ref last) |
        Expr::IfElse(_, _, ref last) |
        Expr::LetBindings(_, ref last) |
        Expr::TypeBindings(_, ref last) => expr.span.end = last.span.end,
        Expr::Lambda(ref lambda) => expr.span.end = lambda.body.span.end,
        Expr::Block(ref mut exprs) => {
            match exprs.len() {
                0 => (),
                1 => return exprs.pop().unwrap(),
                _ => expr.span.end = exprs.last().unwrap().span.end,
            }
        }
        Expr::Match(_, ref alts) => {
            if let Some(last_alt) = alts.last() {
                let end = last_alt.expr.span.end;
                expr.span.end = end;
            }
        }
        Expr::App(_, _) |
        Expr::Ident(_) |
        Expr::Literal(_) |
        Expr::Projection(_, _, _) |
        Expr::Array(_) |
        Expr::Record { .. } |
        Expr::Tuple { .. } |
        Expr::Error => (),
    }
    expr
}

fn transform_errors<'a, Iter>(errors: Iter) -> Errors<Spanned<Error, BytePos>>
where
    Iter: IntoIterator<Item = LalrpopError<'a>>,
{
    errors.into_iter().map(Error::from_lalrpop).collect()
}

struct Expected<'a>(&'a [String]);

impl<'a> fmt::Display for Expected<'a> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self.0.len() {
            0 => (),
            1 => write!(f, "\nExpected ")?,
            _ => write!(f, "\nExpected one of ")?,
        }
        for (i, token) in self.0.iter().enumerate() {
            let sep = match i {
                0 => "",
                i if i + 1 < self.0.len() => ",",
                _ => " or",
            };
            write!(f, "{} {}", sep, token)?;
        }
        Ok(())
    }
}

quick_error! {
    #[derive(Debug, PartialEq)]
    pub enum Error {
        Token(err: TokenizeError) {
            description(err.description())
            display("{}", err)
            from()
        }
        Layout(err: LayoutError) {
            description(err.description())
            display("{}", err)
            from()
        }
        InvalidToken {
            description("invalid token")
            display("Invalid token")
        }
        UnexpectedToken(token: String, expected: Vec<String>) {
            description("unexpected token")
            display("Unexpected token: {}{}", token, Expected(&expected))
        }
        UnexpectedEof(expected: Vec<String>) {
            description("unexpected end of file")
            display("Unexpected end of file{}", Expected(&expected))
        }
        ExtraToken(token: String) {
            description("extra token")
            display("Extra token: {}", token)
        }
        Infix(err: InfixError) {
            description(err.description())
            display("{}", err)
            from()
        }
    }
}

/// LALRPOP currently has an unnecessary set of `"` around each expected token
fn remove_extra_quotes(tokens: &mut [String]) {
    for token in tokens {
        if token.starts_with('"') && token.ends_with('"') {
            token.remove(0);
            token.pop();
        }
    }
}

impl Error {
    fn from_lalrpop(err: LalrpopError) -> Spanned<Error, BytePos> {
        use lalrpop_util::ParseError::*;

        match err {
            InvalidToken { location } => pos::spanned2(location, location, Error::InvalidToken),
            UnrecognizedToken {
                token: Some((lpos, token, rpos)),
                mut expected,
            } => {
                remove_extra_quotes(&mut expected);
                pos::spanned2(
                    lpos,
                    rpos,
                    Error::UnexpectedToken(token.to_string(), expected),
                )
            }
            UnrecognizedToken {
                token: None,
                mut expected,
            } => {
                remove_extra_quotes(&mut expected);
                pos::spanned2(0.into(), 0.into(), Error::UnexpectedEof(expected))
            }
            ExtraToken { token: (lpos, token, rpos) } => {
                pos::spanned2(lpos, rpos, Error::ExtraToken(token.to_string()))
            }
            User { error } => error,
        }
    }
}

/// An iterator which forwards only the `Ok` values. If an `Err` is found the iterator returns
/// `None` and the error can be retrieved using the `result` method.
struct ResultOkIter<I, E> {
    iter: I,
    error: Option<E>,
}

impl<I, E> ResultOkIter<I, E> {
    fn new(iter: I) -> ResultOkIter<I, E> {
        ResultOkIter {
            iter: iter,
            error: None,
        }
    }

    fn result<T>(&mut self, value: T) -> Result<T, E> {
        match self.error.take() {
            Some(err) => Err(err),
            None => Ok(value),
        }
    }
}

impl<I, T, E> Iterator for ResultOkIter<I, E>
where
    I: Iterator<Item = Result<T, E>>,
    E: ::std::fmt::Debug,
{
    type Item = T;

    fn next(&mut self) -> Option<T> {
        match self.iter.next() {
            Some(Ok(t)) => Some(t),
            Some(Err(err)) => {
                self.error = Some(err);
                None
            }
            None => None,
        }
    }
}

/// An iterator which can be shared
struct SharedIter<'a, I: 'a> {
    iter: &'a RefCell<I>,
}

impl<'a, I> Clone for SharedIter<'a, I> {
    fn clone(&self) -> SharedIter<'a, I> {
        SharedIter { iter: self.iter }
    }
}

impl<'a, I> SharedIter<'a, I> {
    fn new(iter: &'a RefCell<I>) -> SharedIter<'a, I> {
        SharedIter { iter: iter }
    }
}

impl<'a, I> Iterator for SharedIter<'a, I>
where
    I: Iterator,
{
    type Item = I::Item;

    fn next(&mut self) -> Option<I::Item> {
        self.iter.borrow_mut().next()
    }
}

pub enum FieldPattern<Id> {
    Type(Spanned<Id, BytePos>, Option<Id>),
    Value(Spanned<Id, BytePos>, Option<SpannedPattern<Id>>),
}

pub enum FieldExpr<Id> {
    Type(Option<Comment>, Spanned<Id, BytePos>, Option<ArcType<Id>>),
    Value(Option<Comment>, Spanned<Id, BytePos>, Option<SpannedExpr<Id>>),
}

// Hack around LALRPOP's limited type syntax
type MutIdentEnv<'env, Id> = &'env mut IdentEnv<Ident = Id>;
type ErrorEnv<'err, 'input> = &'err mut Errors<LalrpopError<'input>>;

pub type ParseErrors = Errors<Spanned<Error, BytePos>>;

macro_rules! layout {
    ($result_ok_iter: ident, $input: expr) => { {
        let tokenizer = Tokenizer::new($input);
        $result_ok_iter = RefCell::new(ResultOkIter::new(tokenizer));

        Layout::new(SharedIter::new(&$result_ok_iter)).map(|token| {
            /// Return the tokenizer error if one exists
            $result_ok_iter.borrow_mut()
                .result(())
                .map_err(|err| {
                    pos::spanned2(err.span.start.absolute,
                                err.span.end.absolute,
                                err.value.into())
                })?;
            let token = token.map_err(|err| pos::spanned(err.span, err.value.into()))?;
            debug!("Lex {:?}", token.value);
            let Span { start, end, .. } = token.span;
            Ok((start.absolute, token.value, end.absolute))
        })
    } }
}

pub fn parse_partial_expr<Id>(symbols: &mut IdentEnv<Ident = Id>,
                              type_cache: &TypeCache<Id>,
                              input: &str)
                              -> Result<SpannedExpr<Id>, (Option<SpannedExpr<Id>>, ParseErrors)>
    where Id: Clone
{
    let result_ok_iter;
    let layout = layout!(result_ok_iter, input);

    let mut parse_errors = Errors::new();

    let result = grammar::parse_TopExpr(input, type_cache, symbols, &mut parse_errors, layout);

    // If there is a tokenizer error it may still exist in the result iterator wrapper.
    // If that is the case we return that error instead of the unexpected EOF error that lalrpop
    // emitted
    if let Err(err) = result_ok_iter.borrow_mut().result(()) {
        parse_errors.pop(); // Remove the EOF error
        parse_errors.push(lalrpop_util::ParseError::User {
            error: pos::spanned2(
                err.span.start.absolute,
                err.span.end.absolute,
                err.value.into(),
            ),
        });
    }

    match result {
        Ok(mut expr) => {
            let mut errors = transform_errors(parse_errors);
            let mut reparser = Reparser::new(OpTable::default(), symbols);
            if let Err(reparse_errors) = reparser.reparse(&mut expr) {
                errors.extend(reparse_errors.into_iter().map(|err| err.map(Error::Infix)));
            }

            if errors.has_errors() {
                Err((Some(expr), errors))
            } else {
                Ok(expr)
            }
        }
        Err(err) => {
            parse_errors.push(err);
            Err((None, transform_errors(parse_errors)))
        }
    }
}

pub fn parse_expr(symbols: &mut IdentEnv<Ident = Symbol>,
                  type_cache: &TypeCache<Symbol>,
                  input: &str)
                  -> Result<SpannedExpr<Symbol>, ParseErrors> {
    parse_partial_expr(symbols, type_cache, input).map_err(|t| t.1)
}

pub type LetOrExpr<Id> = Result<SpannedExpr<Id>, ValueBinding<Id>>;

pub fn parse_partial_let_or_expr<Id>(
    symbols: &mut IdentEnv<Ident = Id>,
    input: &str,
) -> Result<LetOrExpr<Id>, (Option<LetOrExpr<Id>>, ParseErrors)>
where
    Id: Clone,
{
    let result_ok_iter;
    let layout = layout!(result_ok_iter, input);

    let mut parse_errors = Errors::new();

    let type_cache = TypeCache::new();

    let result = grammar::parse_LetOrExpr(input, &type_cache, symbols, &mut parse_errors, layout);

    // If there is a tokenizer error it may still exist in the result iterator wrapper.
    // If that is the case we return that error instead of the unexpected EOF error that lalrpop
    // emitted
    if let Err(err) = result_ok_iter.borrow_mut().result(()) {
        parse_errors.pop(); // Remove the EOF error
        parse_errors.push(lalrpop_util::ParseError::User {
            error: pos::spanned2(
                err.span.start.absolute,
                err.span.end.absolute,
                err.value.into(),
            ),
        });
    }

    match result {
        Ok(mut let_or_expr) => {
            let mut errors = transform_errors(parse_errors);
            let mut reparser = Reparser::new(OpTable::default(), symbols);
            let result = match let_or_expr {
                Ok(ref mut expr) => reparser.reparse(expr),
                Err(ref mut let_binding) => reparser.reparse(&mut let_binding.expr),
            };
            if let Err(reparse_errors) = result {
                errors.extend(reparse_errors.into_iter().map(|err| err.map(Error::Infix)));
            }

            if errors.has_errors() {
                Err((Some(let_or_expr), errors))
            } else {
                Ok(let_or_expr)
            }
        }
        Err(err) => {
            parse_errors.push(err);
            Err((None, transform_errors(parse_errors)))
        }
    }
}

#[cfg(feature = "test")]
pub fn parse_string<'env, 'input>(
    symbols: &'env mut IdentEnv<Ident = String>,
    input: &'input str,
) -> Result<SpannedExpr<String>, (Option<SpannedExpr<String>>, ParseErrors)> {
    parse_partial_expr(symbols, &TypeCache::new(), input)
}

pub fn format_expr(input: &str) -> Result<String, ParseErrors> {
    use base::pretty_print::ExprPrinter;
    use base::source::Source;
    use base::symbol::Symbols;

    let newline = match input.find(|c: char| c == '\n' || c == '\r') {
        Some(i) => {
            if input[i..].starts_with("\r\n") {
                "\r\n"
            } else if input[i..].starts_with("\r") {
                "\r"
            } else {
                "\n"
            }
        }
        None => "\n",
    };

    let expr = parse_expr(&mut Symbols::new(), input)?;

    let source = Source::new(input);
    let printer = ExprPrinter::new(&source);
    Ok(printer.format(100, newline, &expr))
}