Skip to main content

reddb_server/storage/query/parser/
mod.rs

1//! RQL Parser
2//!
3//! Parses RQL (RedDB Query Language) tokens into a unified AST.
4//! Supports SQL-like table queries, Cypher-like graph patterns, and joins.
5//!
6//! # Supported Syntax
7//!
8//! ## Table Queries
9//! ```text
10//! SELECT [columns] FROM table [WHERE condition] [ORDER BY ...] [LIMIT n]
11//! ```
12//!
13//! ## Graph Queries
14//! ```text
15//! MATCH pattern [WHERE condition] RETURN projection
16//! ```
17//!
18//! ## Join Queries
19//! ```text
20//! FROM table [alias] JOIN GRAPH pattern ON condition [RETURN projection]
21//! ```
22//!
23//! ## Path Queries
24//! ```text
25//! PATH FROM selector TO selector [VIA edge_types] [RETURN projection]
26//! ```
27//!
28//! ## Vector Queries
29//! ```text
30//! VECTOR SEARCH collection SIMILAR TO [...] [WHERE ...] [METRIC ...] LIMIT k
31//! ```
32//!
33//! ## Hybrid Queries
34//! ```text
35//! HYBRID (structured query) VECTOR SEARCH ... FUSION strategy LIMIT n
36//! ```
37
38mod auth_ddl;
39mod config;
40mod cte;
41mod ddl;
42mod dml;
43mod error;
44mod expr;
45mod filter;
46mod graph;
47mod graph_commands;
48mod hybrid;
49mod index_ddl;
50mod join;
51mod kv;
52pub mod limits;
53mod migration;
54mod path;
55mod probabilistic_commands;
56mod queue;
57mod search_commands;
58mod table;
59mod timeseries;
60mod tree;
61mod vector;
62
63#[cfg(test)]
64mod json_literal_table;
65#[cfg(test)]
66mod property_tests;
67#[cfg(test)]
68mod tests;
69
70pub use error::{ParseError, ParseErrorKind, SafeTokenDisplay};
71pub use limits::ParserLimits;
72
73use super::ast::{QueryExpr, QueryWithCte};
74use super::lexer::{Lexer, Position, Spanned, Token};
75use crate::storage::schema::Value;
76use limits::DepthCounter;
77
78/// RQL Parser
79pub struct Parser<'a> {
80    lexer: Lexer<'a>,
81    /// Current token
82    pub(crate) current: Spanned,
83    /// Recursion-depth tracker. Each enter/exit of a recursive
84    /// descent point should bracket itself with [`enter_depth`] /
85    /// [`exit_depth`] (see `parse_expr_prec`).
86    pub(crate) depth: DepthCounter,
87    /// Tracks placeholder style used so far in this statement.
88    /// Mixing `$N` and `?` in one statement is a parse error
89    /// (PRD #351 / issue #354).
90    pub(crate) placeholder_mode: PlaceholderMode,
91    /// Counter for `?` positional placeholders, numbered 1-based
92    /// in source. Unused when mode is `Dollar` or `None`.
93    pub(crate) question_count: usize,
94}
95
96/// Placeholder style locked in by the first placeholder seen.
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98pub(crate) enum PlaceholderMode {
99    None,
100    Dollar,
101    Question,
102}
103
104impl<'a> Parser<'a> {
105    /// Create a new parser with default DoS limits.
106    pub fn new(input: &'a str) -> Result<Self, ParseError> {
107        Self::with_limits(input, ParserLimits::default())
108    }
109
110    /// Create a new parser with custom DoS limits.
111    pub fn with_limits(input: &'a str, limits: ParserLimits) -> Result<Self, ParseError> {
112        // Input-byte cap is enforced before the lexer is even
113        // constructed: the lexer holds a `Chars` iterator over the
114        // input slice, so refusing oversized input here keeps the
115        // pathological case (10 GiB blob) cheap.
116        if input.len() > limits.max_input_bytes {
117            return Err(ParseError::input_too_large(
118                "max_input_bytes",
119                limits.max_input_bytes,
120                Position::new(1, 1, 0),
121            ));
122        }
123        let mut lexer = Lexer::with_limits(input, limits);
124        let current = lexer.next_token()?;
125        Ok(Self {
126            lexer,
127            current,
128            depth: DepthCounter::new(limits.max_depth),
129            placeholder_mode: PlaceholderMode::None,
130            question_count: 0,
131        })
132    }
133
134    /// Increment the recursion-depth counter or bail with
135    /// `ParseError::DepthLimit`. Pair with [`exit_depth`] (use the
136    /// `with_depth` helper for RAII-style bracketing).
137    pub(crate) fn enter_depth(&mut self) -> Result<(), ParseError> {
138        self.depth.depth += 1;
139        if self.depth.depth > self.depth.max_depth {
140            return Err(ParseError::depth_limit(
141                "max_depth",
142                self.depth.max_depth,
143                self.position(),
144            ));
145        }
146        Ok(())
147    }
148
149    /// Decrement the recursion-depth counter. Always called on the
150    /// success path; on the error path, the counter is rebalanced
151    /// when `Parser` itself is dropped (it's only consulted while
152    /// parsing is in progress).
153    pub(crate) fn exit_depth(&mut self) {
154        if self.depth.depth > 0 {
155            self.depth.depth -= 1;
156        }
157    }
158
159    /// Get current position
160    pub fn position(&self) -> Position {
161        self.current.start
162    }
163
164    /// Advance to next token
165    pub fn advance(&mut self) -> Result<Token, ParseError> {
166        let old = std::mem::replace(&mut self.current, self.lexer.next_token()?);
167        Ok(old.token)
168    }
169
170    /// Peek at current token
171    pub fn peek(&self) -> &Token {
172        &self.current.token
173    }
174
175    /// Peek one token past the current parser position without consuming it.
176    pub fn peek_next(&mut self) -> Result<&Token, ParseError> {
177        Ok(&self.lexer.peek_token()?.token)
178    }
179
180    /// Check if current token matches
181    pub fn check(&self, expected: &Token) -> bool {
182        std::mem::discriminant(&self.current.token) == std::mem::discriminant(expected)
183    }
184
185    /// Check if current token is a specific keyword
186    pub fn check_keyword(&self, keyword: &Token) -> bool {
187        self.check(keyword)
188    }
189
190    /// Consume a specific token or error
191    pub fn expect(&mut self, expected: Token) -> Result<Token, ParseError> {
192        if self.check(&expected) {
193            self.advance()
194        } else {
195            Err(ParseError::expected(
196                vec![&expected.to_string()],
197                &self.current.token,
198                self.position(),
199            ))
200        }
201    }
202
203    /// Consume an identifier and return its value
204    pub fn expect_ident(&mut self) -> Result<String, ParseError> {
205        match &self.current.token {
206            Token::Ident(name) => {
207                let name = name.clone();
208                self.advance()?;
209                Ok(name)
210            }
211            other => Err(ParseError::expected(
212                vec!["identifier"],
213                other,
214                self.position(),
215            )),
216        }
217    }
218
219    /// Consume an identifier or keyword (for type names where keywords are valid)
220    pub fn expect_ident_or_keyword(&mut self) -> Result<String, ParseError> {
221        // Get the string representation of the current token
222        let name = match &self.current.token {
223            Token::Ident(name) => name.clone(),
224            // Keywords that can be type names (convert to uppercase for type matching)
225            Token::Contains => "CONTAINS".to_string(),
226            Token::Left => "LEFT".to_string(),
227            Token::Right => "RIGHT".to_string(),
228            Token::First => "FIRST".to_string(),
229            Token::Last => "LAST".to_string(),
230            Token::In => "IN".to_string(),
231            Token::By => "BY".to_string(),
232            // Any other keyword - use its display string
233            other => other.to_string(),
234        };
235
236        // Only advance for valid type-name-like tokens
237        match &self.current.token {
238            // Identifiers are always valid
239            Token::Ident(_) => {
240                self.advance()?;
241                Ok(name)
242            }
243            // These keywords can be type names
244            Token::Contains
245            | Token::Left
246            | Token::Right
247            | Token::First
248            | Token::Last
249            | Token::In
250            | Token::By => {
251                self.advance()?;
252                Ok(name)
253            }
254            // Reject structural tokens that can't be type names
255            Token::Eof
256            | Token::LParen
257            | Token::RParen
258            | Token::LBracket
259            | Token::RBracket
260            | Token::Comma
261            | Token::Dot
262            | Token::Eq
263            | Token::Lt
264            | Token::Gt
265            | Token::Le
266            | Token::Ge
267            | Token::Arrow
268            | Token::ArrowLeft
269            | Token::Dash
270            | Token::Colon
271            | Token::Semi
272            | Token::Star
273            | Token::Plus
274            | Token::Slash => Err(ParseError::expected(
275                vec!["identifier or type name"],
276                &self.current.token,
277                self.position(),
278            )),
279            // All other keywords can potentially be type names
280            _ => {
281                self.advance()?;
282                Ok(name)
283            }
284        }
285    }
286
287    /// Try to consume a token, returning true if consumed
288    pub fn consume(&mut self, expected: &Token) -> Result<bool, ParseError> {
289        if self.check(expected) {
290            self.advance()?;
291            Ok(true)
292        } else {
293            Ok(false)
294        }
295    }
296
297    /// Try to consume an identifier case-insensitively (for keywords not in Token enum)
298    pub fn consume_ident_ci(&mut self, expected: &str) -> Result<bool, ParseError> {
299        match self.peek().clone() {
300            Token::Ident(name) if name.eq_ignore_ascii_case(expected) => {
301                self.advance()?;
302                Ok(true)
303            }
304            _ => Ok(false),
305        }
306    }
307
308    /// Parse a complete query
309    pub fn parse(&mut self) -> Result<QueryExpr, ParseError> {
310        let query = self.parse_frontend_statement()?.into_query_expr();
311
312        // Expect end of input
313        if !self.check(&Token::Eof) {
314            return Err(ParseError::new(
315                // F-05: route the offending token through `SafeTokenDisplay`
316                // so the user-controlled `Ident` / `String` / `JsonLiteral`
317                // payloads are escaped before the message lands in the
318                // downstream JSON / audit / log / gRPC sinks.
319                format!(
320                    "Unexpected token after query: {}",
321                    error::SafeTokenDisplay(&self.current.token)
322                ),
323                self.position(),
324            ));
325        }
326
327        Ok(query)
328    }
329
330    /// Parse the main query expression (without CTEs)
331    pub fn parse_query_expr(&mut self) -> Result<QueryExpr, ParseError> {
332        self.parse_frontend_statement()
333            .map(|statement| statement.into_query_expr())
334    }
335
336    /// Parse an integer literal
337    pub fn parse_integer(&mut self) -> Result<i64, ParseError> {
338        match &self.current.token {
339            Token::Integer(n) => {
340                let n = *n;
341                self.advance()?;
342                Ok(n)
343            }
344            other => Err(ParseError::expected(
345                vec!["integer"],
346                other,
347                self.position(),
348            )),
349        }
350    }
351
352    /// Parse a `$N` or `?` placeholder in a non-Expr slot (e.g. the
353    /// `LIMIT` / `MIN_SCORE` slots of SEARCH SIMILAR; issue #361). The
354    /// `field` name is used only to enrich placeholder-mixing errors.
355    /// Returns the 0-based parameter index.
356    pub fn parse_param_slot(&mut self, field: &'static str) -> Result<usize, ParseError> {
357        match self.peek().clone() {
358            Token::Dollar => {
359                self.advance()?;
360                let n = match *self.peek() {
361                    Token::Integer(n) if n >= 1 => {
362                        self.advance()?;
363                        n as usize
364                    }
365                    _ => {
366                        return Err(ParseError::new(
367                            format!("expected `$N` (N >= 1) for {field} parameter"),
368                            self.position(),
369                        ));
370                    }
371                };
372                if self.placeholder_mode == PlaceholderMode::Question {
373                    return Err(ParseError::new(
374                        "cannot mix `?` and `$N` placeholders in one statement".to_string(),
375                        self.position(),
376                    ));
377                }
378                self.placeholder_mode = PlaceholderMode::Dollar;
379                Ok(n - 1)
380            }
381            Token::Question => {
382                self.advance()?;
383                if self.placeholder_mode == PlaceholderMode::Dollar {
384                    return Err(ParseError::new(
385                        "cannot mix `?` and `$N` placeholders in one statement".to_string(),
386                        self.position(),
387                    ));
388                }
389                self.placeholder_mode = PlaceholderMode::Question;
390                self.question_count += 1;
391                Ok(self.question_count - 1)
392            }
393            other => Err(ParseError::expected(
394                vec!["$N", "?"],
395                &other,
396                self.position(),
397            )),
398        }
399    }
400
401    /// Parse a strictly-positive integer literal (`> 0`).
402    ///
403    /// Surfaces a `ValueOutOfRange` error for `field` when the literal
404    /// is `0`, negative, or starts with a unary `-` (which the bare
405    /// `parse_integer` rejects with a confusing "expected: integer"
406    /// message). Used by modifier slots like `MAX_SIZE`, `CAPACITY`,
407    /// `WIDTH`, `DEPTH`, `K` where zero or negative values are
408    /// semantically meaningless.
409    pub fn parse_positive_integer(&mut self, field: &'static str) -> Result<i64, ParseError> {
410        let pos = self.position();
411        // Detect the unary-minus path up-front so we surface a
412        // clearer "must be positive" message instead of the generic
413        // "expected: integer".
414        if matches!(self.current.token, Token::Minus | Token::Dash) {
415            return Err(ParseError::value_out_of_range(
416                field,
417                "must be a positive integer",
418                pos,
419            ));
420        }
421        let raw = self.parse_integer()?;
422        if raw <= 0 {
423            return Err(ParseError::value_out_of_range(
424                field,
425                "must be a positive integer",
426                pos,
427            ));
428        }
429        Ok(raw)
430    }
431
432    /// Parse float literal. Accepts an optional leading unary `-`
433    /// followed by a `Float` or `Integer` token so positions like
434    /// vector literals (`[-0.1, …]`), `THRESHOLD -0.5`, `MIN_SCORE
435    /// -0.1`, `RERANK(-0.3)`, `UNION(0.7, -0.3)`, and geo coordinates
436    /// (`LATITUDE -33.86`) parse correctly. See bug #107.
437    pub fn parse_float(&mut self) -> Result<f64, ParseError> {
438        let negate = if matches!(self.current.token, Token::Minus | Token::Dash) {
439            self.advance()?;
440            true
441        } else {
442            false
443        };
444        let value = match &self.current.token {
445            Token::Float(n) => {
446                let n = *n;
447                self.advance()?;
448                n
449            }
450            Token::Integer(n) => {
451                let n = *n as f64;
452                self.advance()?;
453                n
454            }
455            other => {
456                return Err(ParseError::expected(vec!["number"], other, self.position()));
457            }
458        };
459        Ok(if negate { -value } else { value })
460    }
461
462    /// Parse a string literal
463    pub fn parse_string(&mut self) -> Result<String, ParseError> {
464        match &self.current.token {
465            Token::String(s) => {
466                let s = s.clone();
467                self.advance()?;
468                Ok(s)
469            }
470            other => Err(ParseError::expected(vec!["string"], other, self.position())),
471        }
472    }
473
474    /// Parse a value (delegates to parse_literal_value for full JSON support)
475    pub fn parse_value(&mut self) -> Result<Value, ParseError> {
476        self.parse_literal_value()
477    }
478
479    /// Parse value list for IN clause
480    pub fn parse_value_list(&mut self) -> Result<Vec<Value>, ParseError> {
481        let mut values = Vec::new();
482        loop {
483            values.push(self.parse_value()?);
484            if !self.consume(&Token::Comma)? {
485                break;
486            }
487        }
488        Ok(values)
489    }
490
491    /// Phase 1 cutover bridge: parse an expression via the new Pratt
492    /// parser (`parser/expr.rs`) and try to fold it back into a
493    /// literal `Value`. Used by INSERT VALUES / UPDATE SET / DEFAULT
494    /// slots that still store `Value` in their AST nodes — the bridge
495    /// lets them benefit from the full Expr grammar (parenthesised
496    /// literals, unary minus, CAST literals) without an AST cascade.
497    ///
498    /// Folds these Expr shapes:
499    /// - `Expr::Literal { value, .. }` → `value`
500    /// - `Expr::UnaryOp { Neg, operand: Literal(Integer/Float), .. }`
501    ///   → negated value
502    /// - `Expr::Cast { inner: Literal(text), target, .. }` →
503    ///   coerce(text, target) via schema::coerce
504    ///
505    /// Anything else returns an error so callers can decide whether
506    /// to fall back to the legacy `parse_literal_value` path or
507    /// surface a "non-literal not supported in this position" error.
508    pub fn parse_expr_value(&mut self) -> Result<Value, ParseError> {
509        let expr = self.parse_expr()?;
510        super::sql_lowering::fold_expr_to_value(expr)
511            .map_err(|msg| ParseError::new(msg, self.position()))
512    }
513}
514
515/// Parse an RQL query string into a `QueryWithCte`. A leading `WITH`
516/// is consumed as a CTE prelude; any other statement is returned in
517/// the `QueryWithCte::simple` shape (`with_clause: None`). Callers
518/// that don't care about CTEs read `.query` to recover the legacy
519/// `QueryExpr` shape.
520pub fn parse(input: &str) -> Result<QueryWithCte, ParseError> {
521    let mut parser = Parser::new(input)?;
522    parser.parse_with_cte()
523}