orql 0.1.0

A toy SQL parser for a subset of the Oracle dialect.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
//! Parsing of value expressions.

use super::{
    Error, Location, MetaTracker, ParserInner, Prec, Result,
    condition::ParseConditionContext,
    parse_parens,
    precedence::{self, MIN_BINDING_POWER},
};
use crate::{
    ast::{BinaryExpr, BinaryExprOp, Expr, Ident, Identifier, Node, UnaryExpr, UnaryExprOp, Value},
    parser::parse_opened_parens,
    scanner::{Keyword, Reserved, Token, TokenType},
};

mod case;
mod function;

impl<'s, M> ParserInner<'s, M>
where
    M: MetaTracker<'s>,
{
    /// Retrieves an expression parser for the "default" context
    pub(super) fn expr_parser<'p>(&'p mut self) -> ExprParser<'p, 's, M> {
        ExprParser::default(self)
    }

    /// Parses a value expression in the "default" context
    ///
    /// See [ExprParserContext]
    pub(super) fn parse_expr(&mut self) -> Result<Expr<'s, M::NodeId>> {
        self.expr_parser().parse()
    }
}

// ----------------------------------------------------------------------------

/// A context driving the parsing of an expression.
#[derive(Debug)]
pub(super) struct ParseExprContext<'s, ID> {
    /// The corresponding context for parsing condition (as part of
    /// expressions parsed by this expression context setting.)
    // 1. "[…] Aggregate functions can appear in select lists and in ORDER BY and HAVING clauses […]"
    // 2. "[…] Analytic functions can appear only in the select list or ORDER BY clause. […]"
    condition_context: ParseConditionContext,

    /// Allows aggrete function clauses after function calls, e.g. `F(..) WITHIN GROUP (..)`
    allow_aggregate_functions: bool,

    /// Allows analytical function clauses after function calls, e.g. `F(..) OVER ()`
    allow_analytic_functions: bool,

    /// Allows a consumed identifier after an expression to be returned to the
    /// caller for further processing (and not resulting in an error.)  This
    /// chiefly applies for analytical function clauses in which the `OVER`
    /// reserved word must be consumed (to investigate) the token following it
    /// in order to determine whether the word introduces a clause or not. (in
    /// projection item position that word can also be used as an alias.
    allow_left_over_ident: LeftOverIdent<'s, ID>,
}

#[derive(Debug)]
enum LeftOverIdent<'s, ID> {
    NotAllowed,
    Allowed(Option<Node<Ident<'s>, ID>>),
}

impl<'s, ID> ParseExprContext<'s, ID> {
    /// Creates a default context not allowing analytic functions and not
    /// tolerating left over identifiers (after an expression.)
    pub(super) fn default() -> Self {
        Self {
            condition_context: ParseConditionContext::Default,
            allow_aggregate_functions: false,
            allow_analytic_functions: false,
            allow_left_over_ident: LeftOverIdent::NotAllowed,
        }
    }

    /// Create a context for parsing a top-level query projection item
    /// expression.  This will allow analytical function calls and tolerate a
    /// [left over identifier](Self::take_left_over_ident) (which can then be
    /// used as an alias.)
    pub(super) fn for_projection_item() -> Self {
        Self {
            condition_context: ParseConditionContext::ForProjectionItem,
            allow_aggregate_functions: true,
            allow_analytic_functions: true,
            allow_left_over_ident: LeftOverIdent::Allowed(None),
        }
    }

    /// Creates a context for parsing `order_by` expression items. This will
    /// allow analytical function call but _not_ tolerate left over
    /// identifiers.
    pub(super) fn for_order_by() -> Self {
        Self {
            condition_context: ParseConditionContext::ForOrderBy,
            allow_aggregate_functions: true,
            allow_analytic_functions: true,
            allow_left_over_ident: LeftOverIdent::NotAllowed,
        }
    }

    /// Creates a contet for parsing `having` expression items. This will
    /// allow aggregate but not analytical functions and _not_ tollerate left
    /// over tokens.
    pub(super) fn for_having() -> Self {
        Self {
            condition_context: ParseConditionContext::ForHaving,
            allow_aggregate_functions: true,
            allow_analytic_functions: false,
            allow_left_over_ident: LeftOverIdent::NotAllowed,
        }
    }

    pub(super) fn has_left_over_ident(&self) -> bool {
        matches!(self.allow_left_over_ident, LeftOverIdent::Allowed(Some(_)))
    }

    pub(super) fn take_left_over_ident(&mut self) -> Option<Node<Ident<'s>, ID>> {
        match &mut self.allow_left_over_ident {
            LeftOverIdent::NotAllowed => None,
            LeftOverIdent::Allowed(node) => node.take(),
        }
    }

    fn with_left_over_ident(&self, value: LeftOverIdent<'s, ID>) -> Self {
        Self {
            condition_context: self.condition_context,
            allow_aggregate_functions: self.allow_aggregate_functions,
            allow_analytic_functions: self.allow_analytic_functions,
            allow_left_over_ident: value,
        }
    }

    /// Takes the left over ident if it matches `reserved`. Returns `None` if
    /// there is not left over ident, `Some(Ok(..))` if it matches `reserved`,
    /// otherwise `Some(Err)`.
    fn take_left_over_ident_if_reserved(
        &mut self,
        reserved: Reserved,
    ) -> Option<std::result::Result<Node<Ident<'s>, ID>, ()>> {
        if let LeftOverIdent::Allowed(node) = &mut self.allow_left_over_ident
            && let Some(ident) = node
        {
            if reserved.matches(ident) {
                node.take().map(Ok)
            } else {
                Some(Err(()))
            }
        } else {
            None
        }
    }
}

/// A lightweight struct to aid the parsing of expressions.
pub(super) struct ExprParser<'p, 's, M>
where
    M: MetaTracker<'s>,
{
    inner: &'p mut ParserInner<'s, M>,
    context: ParseExprContext<'s, M::NodeId>,
}

impl<'p, 's, M> AsMut<ParserInner<'s, M>> for ExprParser<'p, 's, M>
where
    M: MetaTracker<'s>,
{
    fn as_mut(&mut self) -> &mut ParserInner<'s, M> {
        self.inner
    }
}

impl<'p, 's, M> ExprParser<'p, 's, M>
where
    M: MetaTracker<'s>,
{
    /// Determines whether `t` is the start of an expression.
    pub(super) fn is_start_token(t: &Token<'s>) -> bool {
        // ~ see also [Self::parse_left_]
        matches!(
            t.ttype,
            TokenType::Keyword(Keyword::NULL)
                | TokenType::QuestionMark
                | TokenType::Integer(_)
                | TokenType::Float(_)
                | TokenType::Text(_, _)
                | TokenType::Placeholder(_)
                | TokenType::Plus
                | TokenType::Minus
                | TokenType::LeftParen
                | TokenType::Identifier(_, _)
        )
    }

    /// Sets up a new parser for expressions with the given context
    /// configuration.
    fn new(inner: &'p mut ParserInner<'s, M>, context: ParseExprContext<'s, M::NodeId>) -> Self {
        Self { inner, context }
    }

    /// Sets up a new parser for expressions with the default context
    /// configuration.
    fn default(inner: &'p mut ParserInner<'s, M>) -> Self {
        Self::new(inner, ParseExprContext::default())
    }

    /// Borrows the current context
    pub(super) fn context(&self) -> &ParseExprContext<'s, M::NodeId> {
        &self.context
    }

    /// Resets the context
    pub(super) fn with_context(self, context: ParseExprContext<'s, M::NodeId>) -> Self {
        Self {
            inner: self.inner,
            context,
        }
    }

    /// Extracts the context (consuming `self`)
    pub(super) fn into_context(self) -> ParseExprContext<'s, M::NodeId> {
        self.context
    }

    /// The primary method of the expression parser.
    pub(super) fn parse(&mut self) -> Result<Expr<'s, M::NodeId>> {
        self.parse_(MIN_BINDING_POWER)
    }

    /// Parses a full expression consuming operators as long as they equal to
    /// or higher binding power than `min_bp`.
    fn parse_(&mut self, min_bp: Prec) -> Result<Expr<'s, M::NodeId>> {
        let left = self.parse_left_(min_bp)?;
        self.parse_right_(left, min_bp)
    }

    /// Continues parsing an expression starting with the given, initial
    /// identifier, ie. an already determined, full identifier.
    pub(super) fn parse_with_identifier(
        &mut self,
        ident: Identifier<'s, M::NodeId>,
        loc: Location,
    ) -> Result<Expr<'s, M::NodeId>> {
        let left = self.parse_left_with_identifier_(ident, loc)?;
        self.parse_right_(left, MIN_BINDING_POWER)
    }

    /// Like [Self::parse_with_identifier], but starting with the first word
    /// of a possibly compound identifier.  This is, first determine a full
    /// identifier given the starting `ident` then parse the rest as an
    /// expression.
    pub(super) fn parse_with_ident(
        &mut self,
        ident: Node<Ident<'s>, M::NodeId>,
        loc: Location,
    ) -> Result<Expr<'s, M::NodeId>> {
        let left = self.parse_left_with_ident(ident, loc)?;
        self.parse_right_(left, MIN_BINDING_POWER)
    }

    /// Parses the left hand side of a binary expression.
    // XXX PRIOR and COLLATE; See <https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Compound-Expressions.html>
    fn parse_left_(&mut self, min_bp: Prec) -> Result<Expr<'s, M::NodeId>> {
        let inner = &mut *self.inner;
        let expr = expect_token!(|t = inner.next_token()| "a value or an expression" match {
            TokenType::Keyword(Keyword::NULL) => {
                Expr::Value(Node(Value::Null, inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::QuestionMark => {
                Expr::Value(Node(Value::Placeholder(None), inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::Integer(lit) => {
                Expr::Value(Node(Value::Integer(lit), inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::Float(lit) => {
                Expr::Value(Node(Value::Float(lit), inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::Text(text, national) => {
                Expr::Value(Node(Value::Text(text, national), inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::Placeholder(ident) => {
                Expr::Value(Node(Value::Placeholder(Some(ident)), inner.meta_tracker.on_node_start(t.loc)))
            }
            TokenType::Identifier(ident, reserved) => {
                let ident_node = Node(ident, inner.meta_tracker.on_node_start(t.loc));
                if let Some(Reserved::CASE) = reserved {
                    match self.parse_case_ident_(ident_node, min_bp)? {
                        ParseCaseIdent::Ident(ident) => self.parse_left_with_ident(ident, t.loc)?,
                        ParseCaseIdent::Expr(expr) => expr,
                    }
                } else {
                    self.parse_left_with_ident(ident_node, t.loc)?
                }
            }
            TokenType::Plus => self.parse_unary_(UnaryExprOp::Add, t.loc)?,
            TokenType::Minus => self.parse_unary_(UnaryExprOp::Sub, t.loc)?,
            TokenType::LeftParen => {
                parse_opened_parens(self, t.into(), |self_, node_id| {
                    Ok(if let Some(Token { ttype: TokenType::Keyword(kw) , ..} ) = self_.inner.peek_token()?
                        && matches!(kw, Keyword::SELECT | Keyword::WITH)
                        {
                            Expr::SubQuery(Node(self_.inner.parse_query()?.into(), node_id))
                        } else {
                            // ~ no "left over idents" allowed within the parens
                            let expr = ExprParser::new(
                                &mut *self_.inner,
                                self_.context.with_left_over_ident(LeftOverIdent::NotAllowed)
                            ).parse()?;
                            Expr::Nested(Node(expr.into(), node_id))
                        })
                })?
            }
        });
        Ok(expr)
    }

    /// Continues parsing a left hand side expression starting with the given,
    /// initial ident (i.e. the possibly for word of a compound identifier) ...
    ///
    /// ... unless there is a left over token remembered by the parser. This method
    /// is typically invoked after [Self::parse_case_ident].
    fn parse_left_with_ident(
        &mut self,
        ident: Node<Ident<'s>, M::NodeId>,
        loc: Location,
    ) -> Result<Expr<'s, M::NodeId>> {
        if self.context.has_left_over_ident() {
            Ok(Expr::Identifier(Identifier::Simple(ident)))
        } else {
            let ident = self.inner.parse_identifier_(ident)?;
            self.parse_left_with_identifier_(ident, loc)
        }
    }

    /// Continues parsing a left hand side starting with the given
    /// identifier. This either turns the given ident into [Expr::Identifier]
    /// or possibly a function call. This method does _not_ attempt to parse a
    /// right hand side.
    fn parse_left_with_identifier_(
        &mut self,
        ident: Identifier<'s, M::NodeId>,
        loc: Location,
    ) -> Result<Expr<'s, M::NodeId>> {
        if matches!(
            self.inner.peek_token()?,
            Some(Token {
                ttype: TokenType::LeftParen,
                ..
            })
        ) {
            self.parse_function_call_(ident, loc)
        } else {
            Ok(Expr::Identifier(ident))
        }
    }

    /// Parses an unary expression given the unary operator and its location.
    pub(super) fn parse_unary(
        &mut self,
        op: UnaryExprOp,
        loc: Location,
    ) -> Result<Expr<'s, M::NodeId>> {
        self.parse_unary_(op, loc)
    }

    fn parse_unary_(&mut self, op: UnaryExprOp, loc: Location) -> Result<Expr<'s, M::NodeId>> {
        Ok(Expr::Unary(Box::new(UnaryExpr {
            op: Node(op, self.inner.meta_tracker.on_node_start(loc)),
            expr: self.parse_(precedence::unary(precedence::UnaryOp::Expr(op)).1)?,
        })))
    }

    /// Given a left hand side, parses a right hand side to complete the expression.
    pub(super) fn parse_right(
        &mut self,
        left: Expr<'s, M::NodeId>,
        min_bp: Prec,
    ) -> Result<Expr<'s, M::NodeId>> {
        self.parse_right_(left, min_bp)
    }

    fn parse_right_(
        &mut self,
        mut left: Expr<'s, M::NodeId>,
        min_bp: Prec,
    ) -> Result<Expr<'s, M::NodeId>> {
        // ~ if the context allowed for a "left over ident" to the left hand
        // side (it very likely an alias not to be confused with an operator)
        // and there is such a "left over" ... that's the end of the expression.
        if self.context.has_left_over_ident() {
            return Ok(left);
        }

        while let Some(t) = self.inner.peek_token()? {
            let op = match t.ttype {
                TokenType::Plus => BinaryExprOp::Add,
                TokenType::Minus => BinaryExprOp::Sub,
                TokenType::Star => BinaryExprOp::Mul,
                TokenType::Slash => BinaryExprOp::Div,
                TokenType::PipePipe => BinaryExprOp::Concat,
                TokenType::RightParen => {
                    let loc = t.loc;
                    if self.inner.nest_level == 0 {
                        return Err(Error::Unbalanced { loc });
                    }
                    break;
                }
                _ => break,
            };
            let (l_bp, r_bp) = precedence::binary(precedence::BinaryOp::Expr(op));
            if l_bp < min_bp {
                break;
            }
            let node_id = {
                let loc = t.loc;
                self.inner.consume_token()?;
                self.inner.meta_tracker.on_node_start(loc)
            };
            let right = self.parse_(r_bp)?;
            left = Expr::Binary(Box::new(BinaryExpr {
                left,
                op: Node(op, node_id),
                right,
            }));
        }
        Ok(left)
    }

    /// Tries to see if a `CASE` ident can be parsed into a `case_expr`.
    pub(super) fn parse_case_ident(
        &mut self,
        reserved_case_token: Node<Ident<'s>, M::NodeId>,
    ) -> Result<ParseCaseIdent<'s, M::NodeId>> {
        self.parse_case_ident_(reserved_case_token, MIN_BINDING_POWER)
    }

    /// Parses an integer literal
    fn parse_integer(&mut self) -> Result<Expr<'s, M::NodeId>> {
        expect_token!(|t = (self.inner).next_token()| "an integer" match {
            TokenType::Integer(lexem) => Ok(Expr::Value(Node(Value::Integer(lexem), self.inner.meta_tracker.on_node_start(t.loc)))),
        })
    }
}

// ----------------------------------------------------------------------------

/// Result of [ParserInner::parse_case_ident].
pub(super) enum ParseCaseIdent<'s, ID> {
    /// Denotes that the reserved case token is to be parsed as an identifier
    Ident(Node<Ident<'s>, ID>),
    /// The reserved case token was parsed into an expression
    Expr(Expr<'s, ID>),
}

impl<'s, ID> From<Expr<'s, ID>> for ParseCaseIdent<'s, ID> {
    fn from(value: Expr<'s, ID>) -> Self {
        Self::Expr(value)
    }
}