parser-haskell 0.2.0

Parses Haskell into an AST.
Documentation
use std::str::FromStr;
use ast::*;
use lalrpop_util::ErrorRecovery;
use trans::{decode_literal, expr_to_pat, rearrange_infix_pat};

grammar<'err>(errors: &'err mut Vec<ErrorRecovery<usize, (usize, &'input str), ()>>);

// A bunch of macros.

Semi<T>: Vec<T> = {
    <v:(<T> ";")*> <e:T?> => match e {
        None=> v,
        Some(e) => {
            let mut v = v;
            v.push(e);
            v
        }
    }
};

// Comma-delimited with minimum zero elements.
Comma<T>: Vec<T> = {
    <v:(<T> ",")*> <e:T?> => match e {
        None=> v,
        Some(e) => {
            let mut v = v;
            v.push(e);
            v
        }
    }
};

// Comma-delimited with minimum one element.
CommaOnce<T>: Vec<T> = {
    <v:(<T> ",")*> <e:T> => {
        let mut v = v;
        v.push(e);
        v
    },
};

Pipe<T>: Vec<T> = {
    <v:(<T> "|")*> <e:T?> => match e {
        None=> v,
        Some(e) => {
            let mut v = v;
            v.push(e);
            v
        }
    }
};

// Common patterns.

Quote: String = {
    r#""[^"]*""# => {
      let a = <>;
      a[1..a.len()-1].to_string()
    }
};

SingleQuote: String = {
    r"'[^']*'" => {
      let a = <>;
      a[1..a.len()-1].to_string()
    }
};

// Note: we only have signed integers for now.
Num: isize = {
    r"0x[0-9A-Fa-f]+" => isize::from_str_radix(&<>[2..], 16).unwrap(),
    r"-?[0-9]+" => isize::from_str(<>).unwrap(),
};

ImportIdent: Ident = {
    r"[a-zA-Z_.][.a-zA-Z_0-9']*(\.:[.+/*=-]+)?" => Ident((<>).to_string()),
};

Ident: Ident = {
    <ImportIdent> => <>,
    "as" => Ident((<>).to_string()),
    "qualified" => Ident((<>).to_string()),
    "hiding" => Ident((<>).to_string()),
};

Ctor: Ident = {
    r":[!#$%&*+./<=>?@^|~:\\-]+" => Ident(<>.to_string()),
};

Operator: String = {
    r"[!#$%&*+/=>?@^~][!#$%&*+./<=>?@^|~\\-]*" => <>.to_string(),
    ":" => <>.to_string(),
    "." => <>.to_string(),
    ".." => <>.to_string(),
    ".|." => <>.to_string(),
    ".&." => <>.to_string(),
    "-" => <>.to_string(),
    "||" => <>.to_string(),
    "<" => <>.to_string(),
    "<=" => <>.to_string(),
    "<>" => <>.to_string(),
    "<+>" => <>.to_string(),
    "<*>" => <>.to_string(),
    "<$>" => <>.to_string(),
};

// Code structures

Assignment: Assignment = {
    <pats:PatList>
      <sets:("|" <CommaOnce<(<ExprSpan> <("<-" <ExprSpan>)?>)>> "=" <ExprSpan>)+>
      => Assignment::Case { pats, sets }, // TODO this
    <pats:PatList> "=" <expr:ExprSpan>
      => Assignment::Assign { pats, expr },
};

Case: CaseCond = {
    <d:PatList>
      <v:("|" <CommaOnce<(<ExprSpan> ("<-" ExprSpan)?)>>  "->" <ExprSpan>)*>
      => CaseCond::Matching(d, v),
    <d:CommaOnce<PatSpan>> "->" <v:CommaOnce<ExprSpan>> <w:Where>
      => CaseCond::Direct(d, v),
};

// TODO name this better
ExprSpanSimple: Vec<Expr> = {
    <Expr+> => {
        <>
          .into_iter().fold(vec![], |mut acc, x| {
            // Process out Records
            if let Expr::RecordArgs(args) = x {
                if let Some(base) = acc.pop() {
                    acc.push(Expr::Record(Box::new(base), args));
                } else {
                    acc.push(Expr::Error);
                }
            } else {
                acc.push(x);
            }
            acc
          })
          .into_iter().fold(vec![], |mut acc, x| {
            // Fix lambas; this should be a parser concern, see Expr::Lambda parsing note below
            let mut lambda_fill = false;
            if let Some(&mut Expr::Lambda(_, ref mut body)) = acc.last_mut() {
                lambda_fill = true;
                if let Expr::Span(ref mut span) = **body {
                    span.push(x.clone());
                }
            }
            if !lambda_fill {
                acc.push(x);
            }
            acc
          })
    },
};

ExprSpan: Expr = {
    <span:Expr*> "let" "{" <a:Semi<Assignment>> "}" "in" "{" <e:ExprSpan> "}" <span_tail:Expr*> => {
        let mut span = span;
        span.push(Expr::Let(a, Box::new(e)));
        span.extend(span_tail);
        Expr::Span(span)
    },
    <ExprSpanSimple> => Expr::Span(<>),
};

Expr: Expr = {
    Quote => Expr::Str(decode_literal(&<>)),
    SingleQuote => Expr::Char(decode_literal(&<>)),
    Num => Expr::Number(<>),

    <Ident> => Expr::Ref(<>),
    <Ctor> => Expr::Operator(<>.0),
    <Operator> => Expr::Operator(<>),

    "(" "," ")" => Expr::Operator(",".into()), // Tuple operator
    "`" <Ident> "`" => Expr::Operator(<>.0),
    "!" => Expr::Operator(<>.to_string()),
    r"\\\\" => Expr::Operator(<>.to_string()),

    // TODO this is supposed to take an ExprSpan as a body, but LALRPOP complains
    // for now, don't take ANY expr so an ExprSpan can be allowed
    r"\\" <i:Ident> <e:PatList?> "->" => Expr::Lambda({
      let mut a = vec![Pat::Ref(i)];
      a.extend(e.unwrap_or(vec![]));
      a
    }, Box::new(Expr::Span(vec![]))),
    r"\\" "(" <e:CommaOnce<PatSpan>> ")" "->" <body:Expr>
      => Expr::Lambda(vec![Pat::Tuple(e)], Box::new(body)),

    "[" <cond:CommaOnce<ExprSpan>> "|" <body:CommaOnce<ListGenerator>> "]"
      => Expr::Generator(cond, body),
    "[" <Comma<ExprSpan>> ("::" TypeGroup)? "]"
      => Expr::Vector(<>),
    "(" <Comma<(<ExprSpan> ("::" TypeGroup)?)>>  ")"
      => Expr::Parens(<>),

    "case" <e:ExprSpan> "of" "{" <s:Semi<Case>> "}"
      => Expr::Case(Box::new(e), s),
    "do" "{" <body:Semi<DoItem>> <w:Where> "}"
      => Expr::Do(body, w),
    "if" "{" <cond:ExprSpan> "}"
      "then" "{" <then_:ExprSpan> "}"
      <else_:("else" "{" <ExprSpan> "}")?> => {
        Expr::If(Box::new(cond), Box::new(then_), else_.map(|x| Box::new(x)))
    },
    "{" <r:Comma<(<Ident> "=" <ExprSpan>)>> ";"? "}"
      => Expr::RecordArgs(r),
};

ListGenerator: () = {
    "let" "{" <Assignment> "}"
      => (),
    <pat:Expr+> "<-" <expr:ExprSpan>
      => (), // expr_to_pat
    <Expr+>
      => (),
};

SContext: () = {
    Ident+ "=>" => (),
    "(" CommaOnce<Ident+> ")" "=>" => (),
};

Item: Item = {
    "import" "qualified"? <ImportList+> ("as" ImportList+)? ("hiding" ImportList+)? => Item::Import(<>),

    "type" <id:Ident> <args:TypeSub*> "=" <p:TypeGroup>
      => Item::Type(id, p, args),
    "data" <id:Ident>
      => Item::Data(id, vec![], vec![], vec![]),
    "data" <id:Ident> <args:TypeSub*> "=" <p:Pipe<TypeGroup>>
      <derives:("deriving" <ImportList>)?>
      => Item::Data(id, p, derives.unwrap_or(vec![]), args),
    "newtype" <id:Ident> <args:TypeSub*> "=" <p:Type>
      <derives:("deriving" <ImportList>)?>
      => Item::Newtype(id, p, derives.unwrap_or(vec![]), args),
    "class" SContext? Ident+
      Where
      => Item::Class,
    "instance" TypeGroup
      Where
      => Item::Instance,
    "infixr" <n:Num> "`" <i:Ident> "`"
      => Item::Infixr(n, i),
    "infixl" <n:Num> "`" <i:Ident> "`"
      => Item::Infixl(n, i),

    <ids:CommaOnce<Ident>> <t:("::" <TypeGroup>)>
      => Item::Prototype(ids, t),
    <a:Assignment> <wh:Where>
      => Item::Assign(Box::new(a), wh),
};

DoItem: DoItem = {
    "let" "{" <Semi<Assignment>> "}" => DoItem::Let(<>),
    <expats:ExprSpanSimple> "<-" <expr:ExprSpan> => {
        let pats = expats.iter().map(expr_to_pat).collect();
        DoItem::Bind(pats, Box::new(expr))
    },
    <ExprSpanSimple>
      => DoItem::Expression(Box::new(Expr::Span(<>))),
};

Where: Where = {
    "where" "{" <Semi<Item>> "}" => <>,
    () => vec![],
};

TypeRecord: (Ident, Ty) = {
    <a:Ident> "::" <b:TypeGroupInner> => (a, b),
};

TypeSub: Ty = {
    "!" <TypeSub>
      => Ty::Not(Box::new(<>)),
    "[" <Type> "]"
      => Ty::Brackets(Box::new(<>)),
    "(" <CommaOnce<TypeGroupInner>> ")"
      => Ty::Tuple(<>),
    "(" ")"
      => Ty::EmptyParen,
    "{" <Comma<TypeRecord>> "}"
      => Ty::Record(<>),

    <Ident>
      => Ty::Ref(<>),
    <Ctor>
      => Ty::Ref(<>),
    ".."
      => Ty::RangeOp, // TODO ??
};

TypeSpan: Ty = {
    <TypeSub+> => Ty::Span(<>),
};

Type: Ty = {
    <a:TypeSpan> "->" <b:Type> => Ty::Pair(Box::new(a), Box::new(b)),
    <TypeSpan> => <>,
};

TypeGroupInner: Ty = {
    ("forall" Ident+ ";")? <a:TypeSpan> "=>" <b:TypeGroupInner> => b,
    ("forall" Ident+ ";")? <a:Type> => a,
};

TypeGroup: Vec<Ty> = {
    <TypeGroupInner> => vec![<>],
};

PatSub: Pat = {
    <Quote> => Pat::Str(decode_literal(&<>)),
    <SingleQuote> => Pat::Char(decode_literal(&<>)),
    <Num> => Pat::Num(<>),

    <Ident> => Pat::Ref(<>),
    <Ctor> => Pat::Ref(<>),
    <Operator> => Pat::Operator(<>),
    "`" <Ident> "`" => Pat::Infix(<>),

    "!" <PatSub>
      => Pat::Not(Box::new(<>)),
    "[" <Comma<PatSpan>> "]"
      => Pat::Brackets(<>),
    "(" <PatSpan> ")"
      => <>,
    "(" <h:PatSpan> "," <t:CommaOnce<PatSpan>> ")" => {
        let mut vec = vec![h];
        vec.extend_from_slice(&t);
        Pat::Tuple(vec)
    },
    "(" <i:Ident> "->" <s: PatSpan> ")"
      => Pat::ViewPattern(i, Box::new(s)),
    "(" ")"
      => Pat::EmptyParen,
    <i:Ident> "{" <body:Comma<PatRecord>> "}"
      => Pat::Record(i, vec![]), //TODO attach this
};

PatRecord: () = {
    <Ident> "=" <PatSpan> => (),
    ".." => (), // RecordWildCards
};

// for matching (Ctor arg arg ...)
PatSpan: Pat = {
    <PatList> => Pat::Span(<>),
};

// for matching e.g. multiple function args
PatList: Vec<Pat> = {
    <PatSub+> => rearrange_infix_pat(<>),
};

ImportName: Ident = {
    ImportIdent => <>,
    ".." => Ident("..".to_string()),
};

ImportList: Vec<Ident> = {
    <ImportIdent> => vec![<>],

    "(" ")" => vec![],
    "(" <a:CommaOnce<(<Ident> ("(" ImportName ")")?)>> ")" => a,
    "(" "(<|>)" ")" => vec![], // ??? see DefTable.hs
};

Section: () = {
    Ident ("(" ".." ")")? => (),
    Ident "(" Ident* ")" => (),
    "module" Ident ("(" CommaOnce<Section> ")")? => (),
};

pub Module: Module = {
    "module" <n:Ident> ("(" Comma<Section> ")")? <w:Where>
      => Module {
        name: n,
        items: w,
      },
};