reddb_server/storage/query/parser/
mod.rs

1//! RQL Parser
2//!
3//! Parses RQL (RedDB Query Language) tokens into a unified AST.
4//! Supports SQL-like table queries, Cypher-like graph patterns, and joins.
5//!
6//! # Supported Syntax
7//!
8//! ## Table Queries
9//! ```text
10//! SELECT [columns] FROM table [WHERE condition] [ORDER BY ...] [LIMIT n]
11//! ```
12//!
13//! ## Graph Queries
14//! ```text
15//! MATCH pattern [WHERE condition] RETURN projection
16//! ```
17//!
18//! ## Join Queries
19//! ```text
20//! FROM table [alias] JOIN GRAPH pattern ON condition [RETURN projection]
21//! ```
22//!
23//! ## Path Queries
24//! ```text
25//! PATH FROM selector TO selector [VIA edge_types] [RETURN projection]
26//! ```
27//!
28//! ## Vector Queries
29//! ```text
30//! VECTOR SEARCH collection SIMILAR TO [...] [WHERE ...] [METRIC ...] LIMIT k
31//! ```
32//!
33//! ## Hybrid Queries
34//! ```text
35//! HYBRID (structured query) VECTOR SEARCH ... FUSION strategy LIMIT n
36//! ```
37
38mod auth_ddl;
39mod config;
40mod cte;
41mod ddl;
42mod dml;
43mod error;
44mod expr;
45mod filter;
46mod graph;
47mod graph_commands;
48mod hybrid;
49mod index_ddl;
50mod join;
51mod kv;
52pub mod limits;
53mod migration;
54mod path;
55mod probabilistic_commands;
56mod queue;
57mod search_commands;
58mod table;
59mod timeseries;
60mod tree;
61mod vector;
62
63#[cfg(test)]
64mod json_literal_table;
65#[cfg(test)]
66mod property_tests;
67#[cfg(test)]
68mod tests;
69
70pub use error::{ParseError, ParseErrorKind, SafeTokenDisplay};
71pub use limits::ParserLimits;
72
73use super::ast::{QueryExpr, QueryWithCte, Span};
74use super::lexer::{Lexer, Position, Spanned, Token};
75use crate::storage::schema::Value;
76use limits::DepthCounter;
77
78/// RQL Parser
79pub struct Parser<'a> {
80    lexer: Lexer<'a>,
81    /// Current token
82    pub(crate) current: Spanned,
83    /// Recursion-depth tracker. Each enter/exit of a recursive
84    /// descent point should bracket itself with [`enter_depth`] /
85    /// [`exit_depth`] (see `parse_expr_prec`).
86    pub(crate) depth: DepthCounter,
87    /// Tracks placeholder style used so far in this statement.
88    /// Mixing `$N` and `?` in one statement is a parse error
89    /// (PRD #351 / issue #354).
90    pub(crate) placeholder_mode: PlaceholderMode,
91    /// Counter for `?` positional placeholders, numbered 1-based
92    /// in source. Unused when mode is `Dollar` or `None`.
93    pub(crate) question_count: usize,
94}
95
96/// Placeholder style locked in by the first placeholder seen.
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98pub(crate) enum PlaceholderMode {
99    None,
100    Dollar,
101    Question,
102}
103
104impl<'a> Parser<'a> {
105    /// Create a new parser with default DoS limits.
106    pub fn new(input: &'a str) -> Result<Self, ParseError> {
107        Self::with_limits(input, ParserLimits::default())
108    }
109
110    /// Create a new parser with custom DoS limits.
111    pub fn with_limits(input: &'a str, limits: ParserLimits) -> Result<Self, ParseError> {
112        // Input-byte cap is enforced before the lexer is even
113        // constructed: the lexer holds a `Chars` iterator over the
114        // input slice, so refusing oversized input here keeps the
115        // pathological case (10 GiB blob) cheap.
116        if input.len() > limits.max_input_bytes {
117            return Err(ParseError::input_too_large(
118                "max_input_bytes",
119                limits.max_input_bytes,
120                Position::new(1, 1, 0),
121            ));
122        }
123        let mut lexer = Lexer::with_limits(input, limits);
124        let current = lexer.next_token()?;
125        Ok(Self {
126            lexer,
127            current,
128            depth: DepthCounter::new(limits.max_depth),
129            placeholder_mode: PlaceholderMode::None,
130            question_count: 0,
131        })
132    }
133
134    /// Increment the recursion-depth counter or bail with
135    /// `ParseError::DepthLimit`. Pair with [`exit_depth`] (use the
136    /// `with_depth` helper for RAII-style bracketing).
137    pub(crate) fn enter_depth(&mut self) -> Result<(), ParseError> {
138        self.depth.depth += 1;
139        if self.depth.depth > self.depth.max_depth {
140            return Err(ParseError::depth_limit(
141                "max_depth",
142                self.depth.max_depth,
143                self.position(),
144            ));
145        }
146        Ok(())
147    }
148
149    /// Decrement the recursion-depth counter. Always called on the
150    /// success path; on the error path, the counter is rebalanced
151    /// when `Parser` itself is dropped (it's only consulted while
152    /// parsing is in progress).
153    pub(crate) fn exit_depth(&mut self) {
154        if self.depth.depth > 0 {
155            self.depth.depth -= 1;
156        }
157    }
158
159    /// Get current position
160    pub fn position(&self) -> Position {
161        self.current.start
162    }
163
164    /// Advance to next token
165    pub fn advance(&mut self) -> Result<Token, ParseError> {
166        let old = std::mem::replace(&mut self.current, self.lexer.next_token()?);
167        Ok(old.token)
168    }
169
170    /// Peek at current token
171    pub fn peek(&self) -> &Token {
172        &self.current.token
173    }
174
175    /// Peek one token past the current parser position without consuming it.
176    pub fn peek_next(&mut self) -> Result<&Token, ParseError> {
177        Ok(&self.lexer.peek_token()?.token)
178    }
179
180    /// Check if current token matches
181    pub fn check(&self, expected: &Token) -> bool {
182        std::mem::discriminant(&self.current.token) == std::mem::discriminant(expected)
183    }
184
185    /// Check if current token is a specific keyword
186    pub fn check_keyword(&self, keyword: &Token) -> bool {
187        self.check(keyword)
188    }
189
190    /// Consume a specific token or error
191    pub fn expect(&mut self, expected: Token) -> Result<Token, ParseError> {
192        if self.check(&expected) {
193            self.advance()
194        } else {
195            Err(ParseError::expected(
196                vec![&expected.to_string()],
197                &self.current.token,
198                self.position(),
199            ))
200        }
201    }
202
203    /// Consume an identifier and return its value
204    pub fn expect_ident(&mut self) -> Result<String, ParseError> {
205        match &self.current.token {
206            Token::Ident(name) => {
207                let name = name.clone();
208                self.advance()?;
209                Ok(name)
210            }
211            other => Err(ParseError::expected(
212                vec!["identifier"],
213                other,
214                self.position(),
215            )),
216        }
217    }
218
219    /// Consume an identifier or aggregate keyword when the grammar expects
220    /// a user-defined column name.
221    pub fn expect_column_ident(&mut self) -> Result<String, ParseError> {
222        let name = match &self.current.token {
223            Token::Ident(name) => name.clone(),
224            Token::Count => "count".to_string(),
225            Token::Sum => "sum".to_string(),
226            Token::Avg => "avg".to_string(),
227            Token::Min => "min".to_string(),
228            Token::Max => "max".to_string(),
229            other => {
230                return Err(ParseError::expected(
231                    vec!["identifier"],
232                    other,
233                    self.position(),
234                ));
235            }
236        };
237        self.advance()?;
238        Ok(name)
239    }
240
241    /// Consume an identifier or keyword (for type names where keywords are valid)
242    pub fn expect_ident_or_keyword(&mut self) -> Result<String, ParseError> {
243        // Get the string representation of the current token
244        let name = match &self.current.token {
245            Token::Ident(name) => name.clone(),
246            Token::Count => "count".to_string(),
247            Token::Sum => "sum".to_string(),
248            Token::Avg => "avg".to_string(),
249            Token::Min => "min".to_string(),
250            Token::Max => "max".to_string(),
251            // Keywords that can be type names (convert to uppercase for type matching)
252            Token::Contains => "CONTAINS".to_string(),
253            Token::Left => "LEFT".to_string(),
254            Token::Right => "RIGHT".to_string(),
255            Token::First => "FIRST".to_string(),
256            Token::Last => "LAST".to_string(),
257            Token::In => "IN".to_string(),
258            Token::By => "BY".to_string(),
259            // Any other keyword - use its display string
260            other => other.to_string(),
261        };
262
263        // Only advance for valid type-name-like tokens
264        match &self.current.token {
265            // Identifiers are always valid
266            Token::Ident(_) => {
267                self.advance()?;
268                Ok(name)
269            }
270            // These keywords can be type names
271            Token::Contains
272            | Token::Left
273            | Token::Right
274            | Token::First
275            | Token::Last
276            | Token::In
277            | Token::By => {
278                self.advance()?;
279                Ok(name)
280            }
281            // Reject structural tokens that can't be type names
282            Token::Eof
283            | Token::LParen
284            | Token::RParen
285            | Token::LBracket
286            | Token::RBracket
287            | Token::Comma
288            | Token::Dot
289            | Token::Eq
290            | Token::Lt
291            | Token::Gt
292            | Token::Le
293            | Token::Ge
294            | Token::Arrow
295            | Token::ArrowLeft
296            | Token::Dash
297            | Token::Colon
298            | Token::Semi
299            | Token::Star
300            | Token::Plus
301            | Token::Slash => Err(ParseError::expected(
302                vec!["identifier or type name"],
303                &self.current.token,
304                self.position(),
305            )),
306            // All other keywords can potentially be type names
307            _ => {
308                self.advance()?;
309                Ok(name)
310            }
311        }
312    }
313
314    /// Try to consume a token, returning true if consumed
315    pub fn consume(&mut self, expected: &Token) -> Result<bool, ParseError> {
316        if self.check(expected) {
317            self.advance()?;
318            Ok(true)
319        } else {
320            Ok(false)
321        }
322    }
323
324    /// Try to consume an identifier case-insensitively (for keywords not in Token enum)
325    pub fn consume_ident_ci(&mut self, expected: &str) -> Result<bool, ParseError> {
326        match self.peek().clone() {
327            Token::Ident(name) if name.eq_ignore_ascii_case(expected) => {
328                self.advance()?;
329                Ok(true)
330            }
331            _ => Ok(false),
332        }
333    }
334
335    /// Parse a complete query
336    pub fn parse(&mut self) -> Result<QueryExpr, ParseError> {
337        let query = self.parse_frontend_statement()?.into_query_expr();
338
339        // Expect end of input
340        if !self.check(&Token::Eof) {
341            return Err(ParseError::new(
342                // F-05: route the offending token through `SafeTokenDisplay`
343                // so the user-controlled `Ident` / `String` / `JsonLiteral`
344                // payloads are escaped before the message lands in the
345                // downstream JSON / audit / log / gRPC sinks.
346                format!(
347                    "Unexpected token after query: {}",
348                    error::SafeTokenDisplay(&self.current.token)
349                ),
350                self.position(),
351            ));
352        }
353
354        Ok(query)
355    }
356
357    /// Parse the main query expression (without CTEs)
358    pub fn parse_query_expr(&mut self) -> Result<QueryExpr, ParseError> {
359        self.parse_frontend_statement()
360            .map(|statement| statement.into_query_expr())
361    }
362
363    /// Parse an integer literal
364    pub fn parse_integer(&mut self) -> Result<i64, ParseError> {
365        match &self.current.token {
366            Token::Integer(n) => {
367                let n = *n;
368                self.advance()?;
369                Ok(n)
370            }
371            other => Err(ParseError::expected(
372                vec!["integer"],
373                other,
374                self.position(),
375            )),
376        }
377    }
378
379    /// Parse a `$N` or `?` placeholder in a non-Expr slot (e.g. the
380    /// `LIMIT` / `MIN_SCORE` slots of SEARCH SIMILAR; issue #361). The
381    /// `field` name is used only to enrich placeholder-mixing errors.
382    /// Returns the 0-based parameter index.
383    pub fn parse_param_slot(&mut self, field: &'static str) -> Result<usize, ParseError> {
384        match self.peek().clone() {
385            Token::Dollar => {
386                self.advance()?;
387                let n = match *self.peek() {
388                    Token::Integer(n) if n >= 1 => {
389                        self.advance()?;
390                        n as usize
391                    }
392                    _ => {
393                        return Err(ParseError::new(
394                            format!("expected `$N` (N >= 1) for {field} parameter"),
395                            self.position(),
396                        ));
397                    }
398                };
399                if self.placeholder_mode == PlaceholderMode::Question {
400                    return Err(ParseError::new(
401                        "cannot mix `?` and `$N` placeholders in one statement".to_string(),
402                        self.position(),
403                    ));
404                }
405                self.placeholder_mode = PlaceholderMode::Dollar;
406                Ok(n - 1)
407            }
408            Token::Question => {
409                let (index, _) = self.parse_question_param_index()?;
410                Ok(index)
411            }
412            other => Err(ParseError::expected(
413                vec!["$N", "?"],
414                &other,
415                self.position(),
416            )),
417        }
418    }
419
420    /// Parse a question-style positional placeholder. Bare `?` slots are
421    /// assigned left-to-right. Immediate `?N` slots use the explicit
422    /// 1-based index, matching `$N` without changing the placeholder
423    /// family.
424    pub(crate) fn parse_question_param_index(&mut self) -> Result<(usize, Span), ParseError> {
425        let start = self.position();
426        let question_end = self.current.end;
427        self.expect(Token::Question)?;
428        if self.placeholder_mode == PlaceholderMode::Dollar {
429            return Err(ParseError::new(
430                "cannot mix `?` and `$N` placeholders in one statement".to_string(),
431                self.position(),
432            ));
433        }
434        self.placeholder_mode = PlaceholderMode::Question;
435
436        if let Token::Integer(n) = *self.peek() {
437            if self.current.start.offset == question_end.offset {
438                if n < 1 {
439                    return Err(ParseError::new(
440                        "placeholder index must be >= 1".to_string(),
441                        self.position(),
442                    ));
443                }
444                let end = self.current.end;
445                self.advance()?;
446                let index = n as usize - 1;
447                self.question_count = self.question_count.max(index + 1);
448                return Ok((index, Span::new(start, end)));
449            }
450        }
451
452        self.question_count += 1;
453        Ok((self.question_count - 1, Span::new(start, question_end)))
454    }
455
456    /// Parse a strictly-positive integer literal (`> 0`).
457    ///
458    /// Surfaces a `ValueOutOfRange` error for `field` when the literal
459    /// is `0`, negative, or starts with a unary `-` (which the bare
460    /// `parse_integer` rejects with a confusing "expected: integer"
461    /// message). Used by modifier slots like `MAX_SIZE`, `CAPACITY`,
462    /// `WIDTH`, `DEPTH`, `K` where zero or negative values are
463    /// semantically meaningless.
464    pub fn parse_positive_integer(&mut self, field: &'static str) -> Result<i64, ParseError> {
465        let pos = self.position();
466        // Detect the unary-minus path up-front so we surface a
467        // clearer "must be positive" message instead of the generic
468        // "expected: integer".
469        if matches!(self.current.token, Token::Minus | Token::Dash) {
470            return Err(ParseError::value_out_of_range(
471                field,
472                "must be a positive integer",
473                pos,
474            ));
475        }
476        let raw = self.parse_integer()?;
477        if raw <= 0 {
478            return Err(ParseError::value_out_of_range(
479                field,
480                "must be a positive integer",
481                pos,
482            ));
483        }
484        Ok(raw)
485    }
486
487    /// Parse float literal. Accepts an optional leading unary `-`
488    /// followed by a `Float` or `Integer` token so positions like
489    /// vector literals (`[-0.1, …]`), `THRESHOLD -0.5`, `MIN_SCORE
490    /// -0.1`, `RERANK(-0.3)`, `UNION(0.7, -0.3)`, and geo coordinates
491    /// (`LATITUDE -33.86`) parse correctly. See bug #107.
492    pub fn parse_float(&mut self) -> Result<f64, ParseError> {
493        let negate = if matches!(self.current.token, Token::Minus | Token::Dash) {
494            self.advance()?;
495            true
496        } else {
497            false
498        };
499        let value = match &self.current.token {
500            Token::Float(n) => {
501                let n = *n;
502                self.advance()?;
503                n
504            }
505            Token::Integer(n) => {
506                let n = *n as f64;
507                self.advance()?;
508                n
509            }
510            other => {
511                return Err(ParseError::expected(vec!["number"], other, self.position()));
512            }
513        };
514        Ok(if negate { -value } else { value })
515    }
516
517    /// Parse a string literal
518    pub fn parse_string(&mut self) -> Result<String, ParseError> {
519        match &self.current.token {
520            Token::String(s) => {
521                let s = s.clone();
522                self.advance()?;
523                Ok(s)
524            }
525            other => Err(ParseError::expected(vec!["string"], other, self.position())),
526        }
527    }
528
529    /// Parse a value (delegates to parse_literal_value for full JSON support)
530    pub fn parse_value(&mut self) -> Result<Value, ParseError> {
531        self.parse_literal_value()
532    }
533
534    /// Parse value list for IN clause
535    pub fn parse_value_list(&mut self) -> Result<Vec<Value>, ParseError> {
536        let mut values = Vec::new();
537        loop {
538            values.push(self.parse_value()?);
539            if !self.consume(&Token::Comma)? {
540                break;
541            }
542        }
543        Ok(values)
544    }
545
546    /// Phase 1 cutover bridge: parse an expression via the new Pratt
547    /// parser (`parser/expr.rs`) and try to fold it back into a
548    /// literal `Value`. Used by INSERT VALUES / UPDATE SET / DEFAULT
549    /// slots that still store `Value` in their AST nodes — the bridge
550    /// lets them benefit from the full Expr grammar (parenthesised
551    /// literals, unary minus, CAST literals) without an AST cascade.
552    ///
553    /// Folds these Expr shapes:
554    /// - `Expr::Literal { value, .. }` → `value`
555    /// - `Expr::UnaryOp { Neg, operand: Literal(Integer/Float), .. }`
556    ///   → negated value
557    /// - `Expr::Cast { inner: Literal(text), target, .. }` →
558    ///   coerce(text, target) via schema::coerce
559    ///
560    /// Anything else returns an error so callers can decide whether
561    /// to fall back to the legacy `parse_literal_value` path or
562    /// surface a "non-literal not supported in this position" error.
563    pub fn parse_expr_value(&mut self) -> Result<Value, ParseError> {
564        let expr = self.parse_expr()?;
565        super::sql_lowering::fold_expr_to_value(expr)
566            .map_err(|msg| ParseError::new(msg, self.position()))
567    }
568}
569
570/// Parse an RQL query string into a `QueryWithCte`. A leading `WITH`
571/// is consumed as a CTE prelude; any other statement is returned in
572/// the `QueryWithCte::simple` shape (`with_clause: None`). Callers
573/// that don't care about CTEs read `.query` to recover the legacy
574/// `QueryExpr` shape.
575pub fn parse(input: &str) -> Result<QueryWithCte, ParseError> {
576    let mut parser = Parser::new(input)?;
577    parser.parse_with_cte()
578}
reddb_server/storage/query/parser/mod.rs

reddb_server/storage/query/parser/
mod.rs