Skip to main content

reddb_server/storage/query/parser/
mod.rs

1//! RQL Parser
2//!
3//! Parses RQL (RedDB Query Language) tokens into a unified AST.
4//! Supports SQL-like table queries, Cypher-like graph patterns, and joins.
5//!
6//! # Supported Syntax
7//!
8//! ## Table Queries
9//! ```text
10//! SELECT [columns] FROM table [WHERE condition] [ORDER BY ...] [LIMIT n]
11//! ```
12//!
13//! ## Graph Queries
14//! ```text
15//! MATCH pattern [WHERE condition] RETURN projection
16//! ```
17//!
18//! ## Join Queries
19//! ```text
20//! FROM table [alias] JOIN GRAPH pattern ON condition [RETURN projection]
21//! ```
22//!
23//! ## Path Queries
24//! ```text
25//! PATH FROM selector TO selector [VIA edge_types] [RETURN projection]
26//! ```
27//!
28//! ## Vector Queries
29//! ```text
30//! VECTOR SEARCH collection SIMILAR TO [...] [WHERE ...] [METRIC ...] LIMIT k
31//! ```
32//!
33//! ## Hybrid Queries
34//! ```text
35//! HYBRID (structured query) VECTOR SEARCH ... FUSION strategy LIMIT n
36//! ```
37
38mod auth_ddl;
39mod config;
40mod cte;
41mod ddl;
42mod dml;
43mod error;
44mod expr;
45mod filter;
46mod graph;
47mod graph_commands;
48mod hybrid;
49mod index_ddl;
50mod join;
51mod kv;
52pub mod limits;
53mod migration;
54mod path;
55mod probabilistic_commands;
56mod queue;
57mod search_commands;
58mod table;
59mod timeseries;
60mod tree;
61mod vector;
62
63#[cfg(test)]
64mod json_literal_table;
65#[cfg(test)]
66mod property_tests;
67#[cfg(test)]
68mod tests;
69
70pub use error::{ParseError, ParseErrorKind, SafeTokenDisplay};
71pub use limits::ParserLimits;
72
73use super::ast::{QueryExpr, QueryWithCte};
74use super::lexer::{Lexer, Position, Spanned, Token};
75use crate::storage::schema::Value;
76use limits::DepthCounter;
77
78/// RQL Parser
79pub struct Parser<'a> {
80    lexer: Lexer<'a>,
81    /// Current token
82    pub(crate) current: Spanned,
83    /// Recursion-depth tracker. Each enter/exit of a recursive
84    /// descent point should bracket itself with [`enter_depth`] /
85    /// [`exit_depth`] (see `parse_expr_prec`).
86    pub(crate) depth: DepthCounter,
87}
88
89impl<'a> Parser<'a> {
90    /// Create a new parser with default DoS limits.
91    pub fn new(input: &'a str) -> Result<Self, ParseError> {
92        Self::with_limits(input, ParserLimits::default())
93    }
94
95    /// Create a new parser with custom DoS limits.
96    pub fn with_limits(input: &'a str, limits: ParserLimits) -> Result<Self, ParseError> {
97        // Input-byte cap is enforced before the lexer is even
98        // constructed: the lexer holds a `Chars` iterator over the
99        // input slice, so refusing oversized input here keeps the
100        // pathological case (10 GiB blob) cheap.
101        if input.len() > limits.max_input_bytes {
102            return Err(ParseError::input_too_large(
103                "max_input_bytes",
104                limits.max_input_bytes,
105                Position::new(1, 1, 0),
106            ));
107        }
108        let mut lexer = Lexer::with_limits(input, limits);
109        let current = lexer.next_token()?;
110        Ok(Self {
111            lexer,
112            current,
113            depth: DepthCounter::new(limits.max_depth),
114        })
115    }
116
117    /// Increment the recursion-depth counter or bail with
118    /// `ParseError::DepthLimit`. Pair with [`exit_depth`] (use the
119    /// `with_depth` helper for RAII-style bracketing).
120    pub(crate) fn enter_depth(&mut self) -> Result<(), ParseError> {
121        self.depth.depth += 1;
122        if self.depth.depth > self.depth.max_depth {
123            return Err(ParseError::depth_limit(
124                "max_depth",
125                self.depth.max_depth,
126                self.position(),
127            ));
128        }
129        Ok(())
130    }
131
132    /// Decrement the recursion-depth counter. Always called on the
133    /// success path; on the error path, the counter is rebalanced
134    /// when `Parser` itself is dropped (it's only consulted while
135    /// parsing is in progress).
136    pub(crate) fn exit_depth(&mut self) {
137        if self.depth.depth > 0 {
138            self.depth.depth -= 1;
139        }
140    }
141
142    /// Get current position
143    pub fn position(&self) -> Position {
144        self.current.start
145    }
146
147    /// Advance to next token
148    pub fn advance(&mut self) -> Result<Token, ParseError> {
149        let old = std::mem::replace(&mut self.current, self.lexer.next_token()?);
150        Ok(old.token)
151    }
152
153    /// Peek at current token
154    pub fn peek(&self) -> &Token {
155        &self.current.token
156    }
157
158    /// Peek one token past the current parser position without consuming it.
159    pub fn peek_next(&mut self) -> Result<&Token, ParseError> {
160        Ok(&self.lexer.peek_token()?.token)
161    }
162
163    /// Check if current token matches
164    pub fn check(&self, expected: &Token) -> bool {
165        std::mem::discriminant(&self.current.token) == std::mem::discriminant(expected)
166    }
167
168    /// Check if current token is a specific keyword
169    pub fn check_keyword(&self, keyword: &Token) -> bool {
170        self.check(keyword)
171    }
172
173    /// Consume a specific token or error
174    pub fn expect(&mut self, expected: Token) -> Result<Token, ParseError> {
175        if self.check(&expected) {
176            self.advance()
177        } else {
178            Err(ParseError::expected(
179                vec![&expected.to_string()],
180                &self.current.token,
181                self.position(),
182            ))
183        }
184    }
185
186    /// Consume an identifier and return its value
187    pub fn expect_ident(&mut self) -> Result<String, ParseError> {
188        match &self.current.token {
189            Token::Ident(name) => {
190                let name = name.clone();
191                self.advance()?;
192                Ok(name)
193            }
194            other => Err(ParseError::expected(
195                vec!["identifier"],
196                other,
197                self.position(),
198            )),
199        }
200    }
201
202    /// Consume an identifier or keyword (for type names where keywords are valid)
203    pub fn expect_ident_or_keyword(&mut self) -> Result<String, ParseError> {
204        // Get the string representation of the current token
205        let name = match &self.current.token {
206            Token::Ident(name) => name.clone(),
207            // Keywords that can be type names (convert to uppercase for type matching)
208            Token::Contains => "CONTAINS".to_string(),
209            Token::Left => "LEFT".to_string(),
210            Token::Right => "RIGHT".to_string(),
211            Token::First => "FIRST".to_string(),
212            Token::Last => "LAST".to_string(),
213            Token::In => "IN".to_string(),
214            Token::By => "BY".to_string(),
215            // Any other keyword - use its display string
216            other => other.to_string(),
217        };
218
219        // Only advance for valid type-name-like tokens
220        match &self.current.token {
221            // Identifiers are always valid
222            Token::Ident(_) => {
223                self.advance()?;
224                Ok(name)
225            }
226            // These keywords can be type names
227            Token::Contains
228            | Token::Left
229            | Token::Right
230            | Token::First
231            | Token::Last
232            | Token::In
233            | Token::By => {
234                self.advance()?;
235                Ok(name)
236            }
237            // Reject structural tokens that can't be type names
238            Token::Eof
239            | Token::LParen
240            | Token::RParen
241            | Token::LBracket
242            | Token::RBracket
243            | Token::Comma
244            | Token::Dot
245            | Token::Eq
246            | Token::Lt
247            | Token::Gt
248            | Token::Le
249            | Token::Ge
250            | Token::Arrow
251            | Token::ArrowLeft
252            | Token::Dash
253            | Token::Colon
254            | Token::Semi
255            | Token::Star
256            | Token::Plus
257            | Token::Slash => Err(ParseError::expected(
258                vec!["identifier or type name"],
259                &self.current.token,
260                self.position(),
261            )),
262            // All other keywords can potentially be type names
263            _ => {
264                self.advance()?;
265                Ok(name)
266            }
267        }
268    }
269
270    /// Try to consume a token, returning true if consumed
271    pub fn consume(&mut self, expected: &Token) -> Result<bool, ParseError> {
272        if self.check(expected) {
273            self.advance()?;
274            Ok(true)
275        } else {
276            Ok(false)
277        }
278    }
279
280    /// Try to consume an identifier case-insensitively (for keywords not in Token enum)
281    pub fn consume_ident_ci(&mut self, expected: &str) -> Result<bool, ParseError> {
282        match self.peek().clone() {
283            Token::Ident(name) if name.eq_ignore_ascii_case(expected) => {
284                self.advance()?;
285                Ok(true)
286            }
287            _ => Ok(false),
288        }
289    }
290
291    /// Parse a complete query
292    pub fn parse(&mut self) -> Result<QueryExpr, ParseError> {
293        let query = self.parse_frontend_statement()?.into_query_expr();
294
295        // Expect end of input
296        if !self.check(&Token::Eof) {
297            return Err(ParseError::new(
298                // F-05: route the offending token through `SafeTokenDisplay`
299                // so the user-controlled `Ident` / `String` / `JsonLiteral`
300                // payloads are escaped before the message lands in the
301                // downstream JSON / audit / log / gRPC sinks.
302                format!(
303                    "Unexpected token after query: {}",
304                    error::SafeTokenDisplay(&self.current.token)
305                ),
306                self.position(),
307            ));
308        }
309
310        Ok(query)
311    }
312
313    /// Parse the main query expression (without CTEs)
314    pub fn parse_query_expr(&mut self) -> Result<QueryExpr, ParseError> {
315        self.parse_frontend_statement()
316            .map(|statement| statement.into_query_expr())
317    }
318
319    /// Parse an integer literal
320    pub fn parse_integer(&mut self) -> Result<i64, ParseError> {
321        match &self.current.token {
322            Token::Integer(n) => {
323                let n = *n;
324                self.advance()?;
325                Ok(n)
326            }
327            other => Err(ParseError::expected(
328                vec!["integer"],
329                other,
330                self.position(),
331            )),
332        }
333    }
334
335    /// Parse a strictly-positive integer literal (`> 0`).
336    ///
337    /// Surfaces a `ValueOutOfRange` error for `field` when the literal
338    /// is `0`, negative, or starts with a unary `-` (which the bare
339    /// `parse_integer` rejects with a confusing "expected: integer"
340    /// message). Used by modifier slots like `MAX_SIZE`, `CAPACITY`,
341    /// `WIDTH`, `DEPTH`, `K` where zero or negative values are
342    /// semantically meaningless.
343    pub fn parse_positive_integer(&mut self, field: &'static str) -> Result<i64, ParseError> {
344        let pos = self.position();
345        // Detect the unary-minus path up-front so we surface a
346        // clearer "must be positive" message instead of the generic
347        // "expected: integer".
348        if matches!(self.current.token, Token::Minus | Token::Dash) {
349            return Err(ParseError::value_out_of_range(
350                field,
351                "must be a positive integer",
352                pos,
353            ));
354        }
355        let raw = self.parse_integer()?;
356        if raw <= 0 {
357            return Err(ParseError::value_out_of_range(
358                field,
359                "must be a positive integer",
360                pos,
361            ));
362        }
363        Ok(raw)
364    }
365
366    /// Parse float literal. Accepts an optional leading unary `-`
367    /// followed by a `Float` or `Integer` token so positions like
368    /// vector literals (`[-0.1, …]`), `THRESHOLD -0.5`, `MIN_SCORE
369    /// -0.1`, `RERANK(-0.3)`, `UNION(0.7, -0.3)`, and geo coordinates
370    /// (`LATITUDE -33.86`) parse correctly. See bug #107.
371    pub fn parse_float(&mut self) -> Result<f64, ParseError> {
372        let negate = if matches!(self.current.token, Token::Minus | Token::Dash) {
373            self.advance()?;
374            true
375        } else {
376            false
377        };
378        let value = match &self.current.token {
379            Token::Float(n) => {
380                let n = *n;
381                self.advance()?;
382                n
383            }
384            Token::Integer(n) => {
385                let n = *n as f64;
386                self.advance()?;
387                n
388            }
389            other => {
390                return Err(ParseError::expected(vec!["number"], other, self.position()));
391            }
392        };
393        Ok(if negate { -value } else { value })
394    }
395
396    /// Parse a string literal
397    pub fn parse_string(&mut self) -> Result<String, ParseError> {
398        match &self.current.token {
399            Token::String(s) => {
400                let s = s.clone();
401                self.advance()?;
402                Ok(s)
403            }
404            other => Err(ParseError::expected(vec!["string"], other, self.position())),
405        }
406    }
407
408    /// Parse a value (delegates to parse_literal_value for full JSON support)
409    pub fn parse_value(&mut self) -> Result<Value, ParseError> {
410        self.parse_literal_value()
411    }
412
413    /// Parse value list for IN clause
414    pub fn parse_value_list(&mut self) -> Result<Vec<Value>, ParseError> {
415        let mut values = Vec::new();
416        loop {
417            values.push(self.parse_value()?);
418            if !self.consume(&Token::Comma)? {
419                break;
420            }
421        }
422        Ok(values)
423    }
424
425    /// Phase 1 cutover bridge: parse an expression via the new Pratt
426    /// parser (`parser/expr.rs`) and try to fold it back into a
427    /// literal `Value`. Used by INSERT VALUES / UPDATE SET / DEFAULT
428    /// slots that still store `Value` in their AST nodes — the bridge
429    /// lets them benefit from the full Expr grammar (parenthesised
430    /// literals, unary minus, CAST literals) without an AST cascade.
431    ///
432    /// Folds these Expr shapes:
433    /// - `Expr::Literal { value, .. }` → `value`
434    /// - `Expr::UnaryOp { Neg, operand: Literal(Integer/Float), .. }`
435    ///   → negated value
436    /// - `Expr::Cast { inner: Literal(text), target, .. }` →
437    ///   coerce(text, target) via schema::coerce
438    ///
439    /// Anything else returns an error so callers can decide whether
440    /// to fall back to the legacy `parse_literal_value` path or
441    /// surface a "non-literal not supported in this position" error.
442    pub fn parse_expr_value(&mut self) -> Result<Value, ParseError> {
443        let expr = self.parse_expr()?;
444        super::sql_lowering::fold_expr_to_value(expr)
445            .map_err(|msg| ParseError::new(msg, self.position()))
446    }
447}
448
449/// Parse an RQL query string into a `QueryWithCte`. A leading `WITH`
450/// is consumed as a CTE prelude; any other statement is returned in
451/// the `QueryWithCte::simple` shape (`with_clause: None`). Callers
452/// that don't care about CTEs read `.query` to recover the legacy
453/// `QueryExpr` shape.
454pub fn parse(input: &str) -> Result<QueryWithCte, ParseError> {
455    let mut parser = Parser::new(input)?;
456    parser.parse_with_cte()
457}