arrow-parser 0.0.2

Parser for the Arrow programming language
Documentation
use super::error::SyntaxError;
use super::error::SyntaxErrorType;
use super::error::SyntaxResult;
use super::lex::lex_next;
use super::lex::Lexer;
use super::lex::LexerCheckpoint;
use super::source::SourceRange;
use super::token::Token;
use super::token::TokenType;
use crate::ast::Expression;
use crate::ast::ModuleItem;
use crate::ast::Node;
use crate::ast::Statement;
use crate::symbol::Scope;
use crate::symbol::ScopeType;

#[must_use]
pub struct MaybeToken {
  typ: TokenType,
  range: SourceRange,
  matched: bool,
}

impl MaybeToken {
  pub fn is_match(&self) -> bool {
    self.matched
  }

  pub fn match_loc(&self) -> Option<SourceRange> {
    if self.matched {
      Some(self.range)
    } else {
      None
    }
  }

  pub fn error(&self, err: SyntaxErrorType) -> SyntaxError {
    debug_assert!(!self.matched);
    SyntaxError::from_loc(self.range, err, Some(self.typ))
  }

  pub fn and_then<R, F: FnOnce() -> SyntaxResult<R>>(self, f: F) -> SyntaxResult<Option<R>> {
    Ok(if self.matched { Some(f()?) } else { None })
  }
}

pub struct ParserCheckpoint {
  checkpoint: LexerCheckpoint,
}

pub struct Parser<'a> {
  lexer: Lexer<'a>,
  pub(crate) scope: Scope, // The current scope, which will be changed while parsing. It's a field instead of passed via method args to avoid having to propagate that to every method.
}

impl<'a> Parser<'a> {
  pub fn new(lexer: Lexer<'a>, root_scope: Scope) -> Parser<'a> {
    Parser {
      lexer,
      scope: root_scope,
    }
  }

  // Creates a new scope as a child of the current one and updates the current scope reference to it, returning the old scope.
  pub fn enter_new_scope(&mut self, typ: ScopeType) -> Scope {
    let old = self.scope.clone();
    let new = old.new_child(typ);
    self.scope = new;
    old
  }

  pub fn return_to_scope(&mut self, old: Scope) {
    self.scope = old;
  }

  pub fn new_expression(&self, loc: SourceRange, expr: Expression) -> Node<Expression> {
    Node {
      loc,
      stx: Box::new(expr),
      scope: self.scope.clone(),
    }
  }

  pub fn new_statement(&self, loc: SourceRange, stmt: Statement) -> Node<Statement> {
    Node {
      loc,
      stx: Box::new(stmt),
      scope: self.scope.clone(),
    }
  }

  pub fn new_module_item(&self, loc: SourceRange, item: ModuleItem) -> Node<ModuleItem> {
    Node {
      loc,
      stx: Box::new(item),
      scope: self.scope.clone(),
    }
  }

  pub fn lexer_mut(&mut self) -> &mut Lexer<'a> {
    &mut self.lexer
  }

  pub fn source(&self, range: SourceRange) -> &[u8] {
    &self.lexer[range]
  }

  pub fn source_as_string(&self, range: SourceRange) -> String {
    unsafe { String::from_utf8_unchecked(self.source(range).to_vec()) }
  }

  pub fn source_range(&self) -> SourceRange {
    self.lexer.source_range()
  }

  pub fn checkpoint(&self) -> ParserCheckpoint {
    ParserCheckpoint {
      checkpoint: self.lexer.checkpoint(),
    }
  }

  pub fn since_checkpoint(&self, checkpoint: ParserCheckpoint) -> SourceRange {
    self.lexer.since_checkpoint(checkpoint.checkpoint)
  }

  pub fn restore_checkpoint(&mut self, checkpoint: ParserCheckpoint) -> () {
    self.lexer.apply_checkpoint(checkpoint.checkpoint);
  }

  fn forward<K: FnOnce(&Token) -> bool>(&mut self, keep: K) -> SyntaxResult<(bool, Token)> {
    let cp = self.checkpoint();
    let t = lex_next(&mut self.lexer)?;
    let k = keep(&t);
    if !k {
      self.restore_checkpoint(cp);
    };
    Ok((k, t))
  }

  pub fn peek(&mut self) -> SyntaxResult<Token> {
    self.forward(|_| false).map(|r| r.1)
  }

  pub fn consume(&mut self) -> SyntaxResult<Token> {
    self.forward(|_| true).map(|r| r.1)
  }

  pub fn consume_if_pred<F: FnOnce(&Token) -> bool>(
    &mut self,
    pred: F,
  ) -> SyntaxResult<MaybeToken> {
    let (matched, t) = self.forward(pred)?;
    Ok(MaybeToken {
      typ: t.typ,
      matched,
      range: t.loc,
    })
  }

  pub fn consume_if(&mut self, typ: TokenType) -> SyntaxResult<MaybeToken> {
    self.consume_if_pred(|t| t.typ == typ)
  }

  pub fn require_predicate<P: FnOnce(TokenType) -> bool>(
    &mut self,
    pred: P,
    expected: &'static str,
  ) -> SyntaxResult<Token> {
    let t = self.consume()?;
    if !pred(t.typ) {
      Err(t.error(SyntaxErrorType::ExpectedSyntax(expected)))
    } else {
      Ok(t)
    }
  }

  pub fn require(&mut self, typ: TokenType) -> SyntaxResult<Token> {
    let t = self.consume()?;
    if t.typ != typ {
      Err(t.error(SyntaxErrorType::RequiredTokenNotFound(typ)))
    } else {
      Ok(t)
    }
  }

  pub fn require_identifier_as_string(&mut self) -> SyntaxResult<(SourceRange, String)> {
    let loc = self.require(TokenType::Identifier)?.loc;
    Ok((loc, self.source_as_string(loc)))
  }
}