orql 0.1.0 - Docs.rs

//! Parsing of value expressions.

use super::{
    Error, Location, MetaTracker, ParserInner, Prec, Result,
    condition::ParseConditionContext,
    parse_parens,
    precedence::{self, MIN_BINDING_POWER},
};
use crate::{
    ast::{BinaryExpr, BinaryExprOp, Expr, Ident, Identifier, Node, UnaryExpr, UnaryExprOp, Value},
    parser::parse_opened_parens,
    scanner::{Keyword, Reserved, Token, TokenType},
};

mod case;
mod function;

impl<'s, M> ParserInner<'s, M>
where
    M: MetaTracker<'s>,
{
    /// Retrieves an expression parser for the "default" context
    pub(super) fn expr_parser<'p>(&'p mut self) -> ExprParser<'p, 's, M> {
        ExprParser::default(self)
    }

    /// Parses a value expression in the "default" context
    ///
    /// See [ExprParserContext]
    pub(super) fn parse_expr(&mut self) -> Result<Expr<'s, M::NodeId>> {
        self.expr_parser().parse()
    }
}

// ----------------------------------------------------------------------------

/// A context driving the parsing of an expression.
#[derive(Debug)]
pub(super) struct ParseExprContext<'s, ID> {
    /// The corresponding context for parsing condition (as part of
    /// expressions parsed by this expression context setting.)
    // 1. "[…] Aggregate functions can appear in select lists and in ORDER BY and HAVING clauses […]"
    // 2. "[…] Analytic functions can appear only in the select list or ORDER BY clause. […]"
    condition_context: ParseConditionContext,

    /// Allows aggrete function clauses after function calls, e.g. `F(..) WITHIN GROUP (..)`
    allow_aggregate_functions: bool,

    /// Allows analytical function clauses after function calls, e.g. `F(..) OVER ()`
    allow_analytic_functions: bool,

    /// Allows a consumed identifier after an expression to be returned to the
    /// caller for further processing (and not resulting in an error.)  This
    /// chiefly applies for analytical function clauses in which the `OVER`
    /// reserved word must be consumed (to investigate) the token following it
    /// in order to determine whether the word introduces a clause or not. (in
    /// projection item position that word can also be used as an alias.
    allow_left_over_ident: LeftOverIdent<'s, ID>,
}

#[derive(Debug)]
enum LeftOverIdent<'s, ID> {
    NotAllowed,
    Allowed(Option<Node<Ident<'s>, ID>>),
}

impl<'s, ID> ParseExprContext<'s, ID> {
    /// Creates a default context not allowing analytic functions and not
    /// tolerating left over identifiers (after an expression.)
    pub(super) fn default() -> Self {
        Self {
            condition_context: ParseConditionContext::Default,
            allow_aggregate_functions: false,
            allow_analytic_functions: false,
            allow_left_over_ident: LeftOverIdent::NotAllowed,
        }
    }

    /// Create a context for parsing a top-level query projection item
    /// expression.  This will allow analytical function calls and tolerate a
    /// [left over identifier](Self::take_left_over_ident) (which can then be
    /// used as an alias.)
    pub(super) fn for_projection_item() -> Self {
        Self {
            condition_context: ParseConditionContext::ForProjectionItem,
            allow_aggregate_functions: true,
            allow_analytic_functions: true,
            allow_left_over_ident: LeftOverIdent::Allowed(None),
        }
    }

    /// Creates a context for parsing `order_by` expression items. This will
    /// allow analytical function call but _not_ tolerate left over
    /// identifiers.
    pub(super) fn for_order_by() -> Self {
        Self {
            condition_context: ParseConditionContext::ForOrderBy,
            allow_aggregate_functions: true,
            allow_analytic_functions: true,
            allow_left_over_ident: LeftOverIdent::NotAllowed,
        }
    }

    /// Creates a contet for parsing `having` expression items. This will
    /// allow aggregate but not analytical functions and _not_ tollerate left
    /// over tokens.
    pub(super) fn for_having() -> Self {
        Self {
            condition_context: ParseConditionContext::ForHaving,
            allow_aggregate_functions: true,
            allow_analytic_functions: false,
            allow_left_over_ident: LeftOverIdent::NotAllowed,
        }
    }

    pub(super) fn has_left_over_ident(&self) -> bool {
        matches!(self.allow_left_over_ident, LeftOverIdent::Allowed(Some(_)))
    }

    pub(super) fn take_left_over_ident(&mut self) -> Option<Node<Ident<'s>, ID>> {
        match &mut self.allow_left_over_ident {
            LeftOverIdent::NotAllowed => None,
            LeftOverIdent::Allowed(node) => node.take(),
        }
    }

    fn with_left_over_ident(&self, value: LeftOverIdent<'s, ID>) -> Self {
        Self {
            condition_context: self.condition_context,
            allow_aggregate_functions: self.allow_aggregate_functions,
            allow_analytic_functions: self.allow_analytic_functions,
            allow_left_over_ident: value,
        }
    }

    /// Takes the left over ident if it matches `reserved`. Returns `None` if
    /// there is not left over ident, `Some(Ok(..))` if it matches `reserved`,
    /// otherwise `Some(Err)`.
    fn take_left_over_ident_if_reserved(
        &mut self,
        reserved: Reserved,
    ) -> Option<std::result::Result<Node<Ident<'s>, ID>, ()>> {
        if let LeftOverIdent::Allowed(node) = &mut self.allow_left_over_ident
            && let Some(ident) = node
        {
            if reserved.matches(ident) {
                node.take().map(Ok)
            } else {
                Some(Err(()))
            }
        } else {
            None
        }
    }
}

/// A lightweight struct to aid the parsing of expressions.
pub(super) struct ExprParser<'p, 's, M>
where
    M: MetaTracker<'s>,
{
    inner: &'p mut ParserInner<'s, M>,
    context: ParseExprContext<'s, M::NodeId>,
}

impl<'p, 's, M> AsMut<ParserInner<'s, M>> for ExprParser<'p, 's, M>
where
    M: MetaTracker<'s>,
{
    fn as_mut(&mut self) -> &mut ParserInner<'s, M> {
        self.inner
    }
}

impl<'p, 's, M> ExprParser<'p, 's, M>
where
    M: MetaTracker<'s>,
{
    /// Determines whether `t` is the start of an expression.
    pub(super) fn is_start_token(t: &Token<'s>) -> bool {
        // ~ see also [Self::parse_left_]
        matches!(
            t.ttype,
            TokenType::Keyword(Keyword::NULL)
                | TokenType::QuestionMark
                | TokenType::Integer(_)
                | TokenType::Float(_)
                | TokenType::Text(_, _)
                | TokenType::Placeholder(_)
                | TokenType::Plus
                | TokenType::Minus
                | TokenType::LeftParen
                | TokenType::Identifier(_, _)
        )
    }

    /// Sets up a new parser for expressions with the given context
    /// configuration.
    fn new(inner: &'p mut ParserInner<'s, M>, context: ParseExprContext<'s, M::NodeId>) -> Self {
        Self { inner, context }
    }

    /// Sets up a new parser for expressions with the default context
    /// configuration.
    fn default(inner: &'p mut ParserInner<'s, M>) -> Self {
        Self::new(inner, ParseExprContext::default())
    }

    /// Borrows the current context
    pub(super) fn context(&self) -> &ParseExprContext<'s, M::NodeId> {
        &self.context
    }

    /// Resets the context
    pub(super) fn with_context(self, context: ParseExprContext<'s, M::NodeId>) -> Self {
        Self {
            inner: self.inner,
            context,
        }
    }

    /// Extracts the context (consuming `self`)
    pub(super) fn into_context(self) -> ParseExprContext<'s, M::NodeId> {
        self.context
    }

    /// The primary method of the expression parser.
    pub(super) fn parse(&mut self) -> Result<Expr<'s, M::NodeId>> {
        self.parse_(MIN_BINDING_POWER)
    }

    /// Parses a full expression consuming operators as long as they equal to
    /// or higher binding power than `min_bp`.
    fn parse_(&mut self, min_bp: Prec) -> Result<Expr<'s, M::NodeId>> {
        let left = self.parse_left_(min_bp)?;
        self.parse_right_(left, min_bp)
    }

    /// Continues parsing an expression starting with the given, initial
    /// identifier, ie. an already determined, full identifier.
    pub(super) fn parse_with_identifier(
        &mut self,
        ident: Identifier<'s, M::NodeId>,
        loc: Location,
    ) -> Result<Expr<'s, M::NodeId>> {
        let left = self.parse_left_with_identifier_(ident, loc)?;
        self.parse_right_(left, MIN_BINDING_POWER)
    }

    /// Like [Self::parse_with_identifier], but starting with the first word
    /// of a possibly compound identifier.  This is, first determine a full
    /// identifier given the starting `ident` then parse the rest as an
    /// expression.
    pub(super) fn parse_with_ident(
        &mut self,
        ident: Node<Ident<'s>, M::NodeId>,
        loc: Location,
    ) -> Result<Expr<'s, M::NodeId>> {
        let left = self.parse_left_with_ident(ident, loc)?;
        self.parse_right_(left, MIN_BINDING_POWER)
    }

    /// Parses the left hand side of a binary expression.
    // XXX PRIOR and COLLATE; See <https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Compound-Expressions.html>
    fn parse_left_(&mut self, min_bp: Prec) -> Result<Expr<'s, M::NodeId>> {
        let inner = &mut *self.inner;
        let expr = expect_token!(|t = inner.next_token()| "a value or an expression" match {
            TokenType::Keyword(Keyword::NULL) => {
                Expr::Value(Node(Value::Null, inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::QuestionMark => {
                Expr::Value(Node(Value::Placeholder(None), inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::Integer(lit) => {
                Expr::Value(Node(Value::Integer(lit), inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::Float(lit) => {
                Expr::Value(Node(Value::Float(lit), inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::Text(text, national) => {
                Expr::Value(Node(Value::Text(text, national), inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::Placeholder(ident) => {
                Expr::Value(Node(Value::Placeholder(Some(ident)), inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::Identifier(ident, reserved) => {
                let ident_node = Node(ident, inner.meta_tracker.on_node_start(t.loc));
                if let Some(Reserved::CASE) = reserved {
                    match self.parse_case_ident_(ident_node, min_bp)? {
                        ParseCaseIdent::Ident(ident) => self.parse_left_with_ident(ident, t.loc)?,
                        ParseCaseIdent::Expr(expr) => expr,
                    }
                } else {
                    self.parse_left_with_ident(ident_node, t.loc)?
                }
            }
            TokenType::Plus => self.parse_unary_(UnaryExprOp::Add, t.loc)?,
            TokenType::Minus => self.parse_unary_(UnaryExprOp::Sub, t.loc)?,
            TokenType::LeftParen => {
                parse_opened_parens(self, t.into(), |self_, node_id| {
                    Ok(if let Some(Token { ttype: TokenType::Keyword(kw) , ..} ) = self_.inner.peek_token()?
                        && matches!(kw, Keyword::SELECT | Keyword::WITH)
                        {
                            Expr::SubQuery(Node(self_.inner.parse_query()?.into(), node_id))
                        } else {
                            // ~ no "left over idents" allowed within the parens
                            let expr = ExprParser::new(
                                &mut *self_.inner,
                                self_.context.with_left_over_ident(LeftOverIdent::NotAllowed)
                            ).parse()?;
                            Expr::Nested(Node(expr.into(), node_id))
                        })
                })?
            }
        });
        Ok(expr)
    }

    /// Continues parsing a left hand side expression starting with the given,
    /// initial ident (i.e. the possibly for word of a compound identifier) ...
    ///
    /// ... unless there is a left over token remembered by the parser. This method
    /// is typically invoked after [Self::parse_case_ident].
    fn parse_left_with_ident(
        &mut self,
        ident: Node<Ident<'s>, M::NodeId>,
        loc: Location,
    ) -> Result<Expr<'s, M::NodeId>> {
        if self.context.has_left_over_ident() {
            Ok(Expr::Identifier(Identifier::Simple(ident)))
        } else {
            let ident = self.inner.parse_identifier_(ident)?;
            self.parse_left_with_identifier_(ident, loc)
        }
    }

    /// Continues parsing a left hand side starting with the given
    /// identifier. This either turns the given ident into [Expr::Identifier]
    /// or possibly a function call. This method does _not_ attempt to parse a
    /// right hand side.
    fn parse_left_with_identifier_(
        &mut self,
        ident: Identifier<'s, M::NodeId>,
        loc: Location,
    ) -> Result<Expr<'s, M::NodeId>> {
        if matches!(
            self.inner.peek_token()?,
            Some(Token {
                ttype: TokenType::LeftParen,
                ..
            })
        ) {
            self.parse_function_call_(ident, loc)
        } else {
            Ok(Expr::Identifier(ident))
        }
    }

    /// Parses an unary expression given the unary operator and its location.
    pub(super) fn parse_unary(
        &mut self,
        op: UnaryExprOp,
        loc: Location,
    ) -> Result<Expr<'s, M::NodeId>> {
        self.parse_unary_(op, loc)
    }

    fn parse_unary_(&mut self, op: UnaryExprOp, loc: Location) -> Result<Expr<'s, M::NodeId>> {
        Ok(Expr::Unary(Box::new(UnaryExpr {
            op: Node(op, self.inner.meta_tracker.on_node_start(loc)),
            expr: self.parse_(precedence::unary(precedence::UnaryOp::Expr(op)).1)?,
        })))
    }

    /// Given a left hand side, parses a right hand side to complete the expression.
    pub(super) fn parse_right(
        &mut self,
        left: Expr<'s, M::NodeId>,
        min_bp: Prec,
    ) -> Result<Expr<'s, M::NodeId>> {
        self.parse_right_(left, min_bp)
    }

    fn parse_right_(
        &mut self,
        mut left: Expr<'s, M::NodeId>,
        min_bp: Prec,
    ) -> Result<Expr<'s, M::NodeId>> {
        // ~ if the context allowed for a "left over ident" to the left hand
        // side (it very likely an alias not to be confused with an operator)
        // and there is such a "left over" ... that's the end of the expression.
        if self.context.has_left_over_ident() {
            return Ok(left);
        }

        while let Some(t) = self.inner.peek_token()? {
            let op = match t.ttype {
                TokenType::Plus => BinaryExprOp::Add,
                TokenType::Minus => BinaryExprOp::Sub,
                TokenType::Star => BinaryExprOp::Mul,
                TokenType::Slash => BinaryExprOp::Div,
                TokenType::PipePipe => BinaryExprOp::Concat,
                TokenType::RightParen => {
                    let loc = t.loc;
                    if self.inner.nest_level == 0 {
                        return Err(Error::Unbalanced { loc });
                    }
                    break;
                }
                _ => break,
            };
            let (l_bp, r_bp) = precedence::binary(precedence::BinaryOp::Expr(op));
            if l_bp < min_bp {
                break;
            }
            let node_id = {
                let loc = t.loc;
                self.inner.consume_token()?;
                self.inner.meta_tracker.on_node_start(loc)
            };
            let right = self.parse_(r_bp)?;
            left = Expr::Binary(Box::new(BinaryExpr {
                left,
                op: Node(op, node_id),
                right,
            }));
        }
        Ok(left)
    }

    /// Tries to see if a `CASE` ident can be parsed into a `case_expr`.
    pub(super) fn parse_case_ident(
        &mut self,
        reserved_case_token: Node<Ident<'s>, M::NodeId>,
    ) -> Result<ParseCaseIdent<'s, M::NodeId>> {
        self.parse_case_ident_(reserved_case_token, MIN_BINDING_POWER)
    }

    /// Parses an integer literal
    fn parse_integer(&mut self) -> Result<Expr<'s, M::NodeId>> {
        expect_token!(|t = (self.inner).next_token()| "an integer" match {
            TokenType::Integer(lexem) => Ok(Expr::Value(Node(Value::Integer(lexem), self.inner.meta_tracker.on_node_start(t.loc)))),
        })
    }
}

// ----------------------------------------------------------------------------

/// Result of [ParserInner::parse_case_ident].
pub(super) enum ParseCaseIdent<'s, ID> {
    /// Denotes that the reserved case token is to be parsed as an identifier
    Ident(Node<Ident<'s>, ID>),
    /// The reserved case token was parsed into an expression
    Expr(Expr<'s, ID>),
}

impl<'s, ID> From<Expr<'s, ID>> for ParseCaseIdent<'s, ID> {
    fn from(value: Expr<'s, ID>) -> Self {
        Self::Expr(value)
    }
}