arrow-parser 0.0.2

Parser for the Arrow programming language
Documentation
use super::ast::Expression;
use super::ast::LiteralTemplatePart;
use super::ast::Node;
use super::error::SyntaxErrorType;
use super::error::SyntaxResult;
use super::lex::lex_template_string_continue;
use super::operator::Associativity;
use super::operator::ExprOperatorName;
use super::parse::Parser;
use super::parse_literal::normalise_literal_float;
use super::parse_literal::normalise_literal_int;
use super::parse_literal::normalise_literal_template_string_part;
use super::parse_operator::BINARY_OPERATOR_MAPPING;
use super::parse_operator::UNARY_OPERATOR_MAPPING;
use super::token::TokenType;
use crate::ast::Statement;
use crate::symbol::ScopeType;
use crate::token::TokenTypeSet;

impl<'a> Parser<'a> {
  pub fn parse_expr(&mut self, terminator: TokenType) -> SyntaxResult<Node<Expression>> {
    self.parse_expr_until(TokenTypeSet::new(&[terminator]))
  }

  pub fn parse_expr_until(&mut self, terminators: TokenTypeSet) -> SyntaxResult<Node<Expression>> {
    self.parse_expr_with_min_prec(1, terminators)
  }

  // Useful for callables, where scope starts at parameters, not block.
  pub fn parse_expr_block_without_new_scope(&mut self) -> SyntaxResult<Node<Expression>> {
    let loc_start = self.require(TokenType::BraceOpen)?.loc;
    let mut statements = Vec::new();
    let mut result = None;
    loop {
      let Node { loc, scope, stx } = self.parse_stmt(false)?;
      match (self.peek()?.typ == TokenType::BraceClose, *stx) {
        (true, Statement::Expression { expression }) => {
          result = Some(expression);
          break;
        }
        (_, stx) => {
          statements.push(Node {
            loc,
            scope,
            stx: Box::new(stx),
          });
        }
      };
      self.require(TokenType::Semicolon)?;
      if self.peek()?.typ == TokenType::BraceClose {
        break;
      };
    }
    let loc_end = self.require(TokenType::BraceClose)?.loc;
    Ok(self.new_expression(loc_start + loc_end, Expression::Block {
      statements,
      result,
    }))
  }

  pub fn parse_expr_block(&mut self) -> SyntaxResult<Node<Expression>> {
    let parent_scope = self.enter_new_scope(ScopeType::Block);
    let res = self.parse_expr_block_without_new_scope()?;
    self.return_to_scope(parent_scope);
    Ok(res)
  }

  pub fn parse_expr_closure(&mut self) -> SyntaxResult<Node<Expression>> {
    let parent_scope = self.enter_new_scope(ScopeType::Closure);
    let (no_params, loc_start) = match self.consume_if(TokenType::BarBar)?.match_loc() {
      Some(loc) => (true, loc),
      None => (false, self.require(TokenType::Bar)?.loc),
    };
    let mut parameters = vec![];
    if !no_params {
      loop {
        if self.peek()?.typ == TokenType::Bar {
          break;
        };
        let (name_loc, name) = self.require_identifier_as_string()?;
        if !self.scope.add_symbol(name.clone()) {
          return Err(name_loc.error(SyntaxErrorType::RedeclaredVar, None));
        };
        parameters.push(name);
        if self.peek()?.typ == TokenType::Bar {
          break;
        };
        self.require(TokenType::Comma)?;
      }
      self.require(TokenType::Bar)?;
    };
    let body = self.parse_expr_block_without_new_scope()?;
    let loc_end = body.loc;
    let closure = self.new_expression(loc_start + loc_end, Expression::Closure {
      parameters,
      body,
    });
    self.return_to_scope(parent_scope);
    Ok(closure)
  }

  pub fn parse_expr_if(&mut self) -> SyntaxResult<Node<Expression>> {
    let loc_start = self.require(TokenType::KeywordIf)?.loc;
    let condition = self.parse_expr(TokenType::BraceOpen)?;
    let consequent = self.parse_expr_block()?;
    let alternate = if self.consume_if(TokenType::KeywordElse)?.is_match() {
      Some(if self.peek()?.typ == TokenType::KeywordIf {
        self.parse_expr_if()?
      } else {
        self.parse_expr_block()?
      })
    } else {
      None
    };
    Ok(self.new_expression(
      loc_start + alternate.as_ref().map(|s| s.loc).unwrap_or(consequent.loc),
      Expression::If {
        condition,
        consequent,
        alternate,
      },
    ))
  }

  pub fn parse_expr_grouping(&mut self) -> SyntaxResult<Node<Expression>> {
    self.require(TokenType::ParenthesisOpen)?;
    let expr =
      self.parse_expr_with_min_prec(1, TokenTypeSet::new(&[TokenType::ParenthesisClose]))?;
    self.require(TokenType::ParenthesisClose)?;
    Ok(expr)
  }

  pub fn parse_expr_literal_array(&mut self) -> SyntaxResult<Node<Expression>> {
    let loc_start = self.require(TokenType::BracketOpen)?.loc;
    let mut entries = vec![];
    loop {
      if self.peek()?.typ == TokenType::BracketClose {
        break;
      };
      let value = self.parse_expr_until(TokenTypeSet::new(&[
        TokenType::Comma,
        TokenType::BracketClose,
      ]))?;
      entries.push(value);
      if self.peek()?.typ == TokenType::BracketClose {
        break;
      };
      self.require(TokenType::Comma)?;
    }
    let loc_end = self.require(TokenType::BracketClose)?.loc;
    Ok(self.new_expression(loc_start + loc_end, Expression::LiteralArray { entries }))
  }

  pub fn parse_expr_literal_object(&mut self) -> SyntaxResult<Node<Expression>> {
    let loc_start = self.require(TokenType::BraceOpen)?.loc;
    let mut fields = vec![];
    loop {
      if self.peek()?.typ == TokenType::BraceClose {
        break;
      };
      let (_, key) = self.require_identifier_as_string()?;
      self.require(TokenType::Colon)?;
      let value = self.parse_expr_until(TokenTypeSet::new(&[
        TokenType::Comma,
        TokenType::BraceClose,
      ]))?;
      fields.push((key, value));
      if self.peek()?.typ == TokenType::BraceClose {
        break;
      };
      self.require(TokenType::Comma)?;
    }
    let loc_end = self.require(TokenType::BraceClose)?.loc;
    Ok(self.new_expression(loc_start + loc_end, Expression::LiteralObject { fields }))
  }

  fn parse_expr_operand(&mut self, terminators: TokenTypeSet) -> SyntaxResult<Node<Expression>> {
    let cp = self.checkpoint();
    let t = self.consume()?;
    let operand = if let Some(&operator) = UNARY_OPERATOR_MAPPING.get(&t.typ) {
      let next_min_prec =
        operator.precedence + (operator.associativity == Associativity::Left) as u8;
      let operand = self.parse_expr_with_min_prec(next_min_prec, terminators)?;
      self.new_expression(t.loc + operand.loc, Expression::Unary {
        operand,
        operator: operator.name,
      })
    } else {
      match t.typ {
        TokenType::BraceOpen => {
          self.restore_checkpoint(cp);
          self.parse_expr_literal_object()?
        }
        TokenType::BracketOpen => {
          self.restore_checkpoint(cp);
          self.parse_expr_literal_array()?
        }
        TokenType::Bar | TokenType::BarBar => {
          self.restore_checkpoint(cp);
          self.parse_expr_closure()?
        }
        TokenType::Identifier => self.new_expression(t.loc, Expression::Var {
          name: self.source_as_string(t.loc),
        }),
        TokenType::KeywordIf => {
          self.restore_checkpoint(cp);
          self.parse_expr_if()?
        }
        TokenType::LiteralFloat => self.new_expression(t.loc, Expression::LiteralFloat {
          value: normalise_literal_float(self.source(t.loc))
            .ok_or_else(|| t.loc.error(SyntaxErrorType::MalformedLiteral, None))?,
        }),
        TokenType::LiteralInt => self.new_expression(t.loc, Expression::LiteralInt {
          value: normalise_literal_int(self.source(t.loc))
            .ok_or_else(|| t.loc.error(SyntaxErrorType::MalformedLiteral, None))?,
        }),
        TokenType::LiteralNone => self.new_expression(t.loc, Expression::LiteralNone {}),
        TokenType::LiteralTemplatePartString => {
          let mut loc = t.loc;
          let mut parts = vec![];
          parts.push(LiteralTemplatePart::String(
            normalise_literal_template_string_part(self.source(t.loc))
              .ok_or_else(|| t.loc.error(SyntaxErrorType::MalformedLiteral, None))?,
          ));
          loop {
            let substitution = self.parse_expr(TokenType::BraceClose)?;
            self.require(TokenType::BraceClose)?;
            parts.push(LiteralTemplatePart::Substitution(substitution));
            let string = lex_template_string_continue(self.lexer_mut())?;
            loc.extend(string.loc);
            parts.push(LiteralTemplatePart::String(
              normalise_literal_template_string_part(self.source(string.loc))
                .ok_or_else(|| string.loc.error(SyntaxErrorType::MalformedLiteral, None))?,
            ));
            if let TokenType::LiteralTemplatePartStringEnd = string.typ {
              break;
            };
          }
          self.new_expression(loc, Expression::LiteralTemplateExpr { parts })
        }
        TokenType::LiteralTemplatePartStringEnd => {
          self.new_expression(t.loc, Expression::LiteralTemplateExpr {
            parts: vec![LiteralTemplatePart::String(
              normalise_literal_template_string_part(self.source(t.loc))
                .ok_or_else(|| t.loc.error(SyntaxErrorType::MalformedLiteral, None))?,
            )],
          })
        }
        TokenType::LiteralTrue | TokenType::LiteralFalse => {
          self.new_expression(t.loc, Expression::LiteralBoolean {
            value: t.typ == TokenType::LiteralTrue,
          })
        }
        TokenType::ParenthesisOpen => {
          self.restore_checkpoint(cp);
          self.parse_expr_grouping()?
        }
        _ => return Err(t.error(SyntaxErrorType::ExpectedSyntax("expression operand"))),
      }
    };
    Ok(operand)
  }

  fn parse_expr_with_min_prec(
    &mut self,
    min_prec: u8,
    terminators: TokenTypeSet,
  ) -> SyntaxResult<Node<Expression>> {
    let mut left = self.parse_expr_operand(terminators)?;

    loop {
      let cp = self.checkpoint();
      let t = self.consume()?;

      if terminators.contains(t.typ) {
        self.restore_checkpoint(cp);
        break;
      };

      let Some(operator) = BINARY_OPERATOR_MAPPING.get(&t.typ) else {
        return Err(t.error(SyntaxErrorType::ExpectedSyntax("expression operator")));
      };

      if operator.precedence < min_prec {
        self.restore_checkpoint(cp);
        break;
      };

      let next_min_prec =
        operator.precedence + (operator.associativity == Associativity::Left) as u8;

      left = match operator.name {
        ExprOperatorName::As => {
          let (impl_name_loc, impl_name) = self.require_identifier_as_string()?;
          self.new_expression(left.loc + impl_name_loc, Expression::Cast {
            value: left,
            typ: impl_name,
          })
        }
        ExprOperatorName::Bind => {
          let loc_start = left.loc;
          let Expression::Var { name: impl_name } = left.stx.as_ref() else {
            return Err(loc_start.error(SyntaxErrorType::BindingToNonImpl, None));
          };
          let (method_name_loc, method_name) = self.require_identifier_as_string()?;
          let mut arguments = vec![];
          let mut loc_end = method_name_loc;
          if self.consume_if(TokenType::ParenthesisOpen)?.is_match() {
            loop {
              if self.peek()?.typ == TokenType::ParenthesisClose {
                break;
              };
              let value = self.parse_expr_until(TokenTypeSet::new(&[
                TokenType::Comma,
                TokenType::ParenthesisClose,
              ]))?;
              arguments.push(value);
              if !self.consume_if(TokenType::Comma)?.is_match() {
                break;
              };
            }
            loc_end = self.require(TokenType::ParenthesisClose)?.loc;
          };
          self.new_expression(loc_start + loc_end, Expression::BindMethod {
            impl_: impl_name.clone(),
            method: method_name,
            arguments,
          })
        }
        ExprOperatorName::Call | ExprOperatorName::OptionalCall => {
          let mut arguments = vec![];
          loop {
            if self.peek()?.typ == TokenType::ParenthesisClose {
              break;
            };
            let value = self.parse_expr_until(TokenTypeSet::new(&[
              TokenType::Comma,
              TokenType::ParenthesisClose,
            ]))?;
            arguments.push(value);
            if !self.consume_if(TokenType::Comma)?.is_match() {
              break;
            };
          }
          let end = self.require(TokenType::ParenthesisClose)?;
          let optional = match operator.name {
            ExprOperatorName::OptionalCall => true,
            _ => false,
          };
          self.new_expression(
            left.loc + end.loc,
            if let Expression::Field {
              object,
              field,
              optional,
            } = *left.stx
            {
              Expression::CallMethod {
                object,
                method: field,
                arguments,
                optional,
              }
            } else {
              Expression::CallValue {
                callee: left,
                arguments,
                optional,
              }
            },
          )
        }
        ExprOperatorName::Index | ExprOperatorName::OptionalIndex => {
          let index = self.parse_expr(TokenType::BracketClose)?;
          let end = self.require(TokenType::BracketClose)?;
          self.new_expression(left.loc + end.loc, Expression::Index {
            object: left,
            index,
            optional: match operator.name {
              ExprOperatorName::OptionalIndex => true,
              _ => false,
            },
          })
        }
        ExprOperatorName::MemberAccess | ExprOperatorName::OptionalMemberAccess => {
          let right_tok = self.consume()?;
          let TokenType::Identifier = right_tok.typ else {
            return Err(
              right_tok.error(SyntaxErrorType::ExpectedSyntax("member access property")),
            );
          };
          let right = right_tok.loc;
          self.new_expression(left.loc + right, Expression::Field {
            object: left,
            field: self.source_as_string(right),
            optional: match operator.name {
              ExprOperatorName::OptionalMemberAccess => true,
              _ => false,
            },
          })
        }
        _ => {
          let right = self.parse_expr_with_min_prec(next_min_prec, terminators)?;
          self.new_expression(left.loc + right.loc, Expression::Binary {
            operator: operator.name,
            left,
            right,
          })
        }
      };
    }

    Ok(left)
  }
}