reddb_server/storage/query/parser/mod.rs
1//! RQL Parser
2//!
3//! Parses RQL (RedDB Query Language) tokens into a unified AST.
4//! Supports SQL-like table queries, Cypher-like graph patterns, and joins.
5//!
6//! # Supported Syntax
7//!
8//! ## Table Queries
9//! ```text
10//! SELECT [columns] FROM table [WHERE condition] [ORDER BY ...] [LIMIT n]
11//! ```
12//!
13//! ## Graph Queries
14//! ```text
15//! MATCH pattern [WHERE condition] RETURN projection
16//! ```
17//!
18//! ## Join Queries
19//! ```text
20//! FROM table [alias] JOIN GRAPH pattern ON condition [RETURN projection]
21//! ```
22//!
23//! ## Path Queries
24//! ```text
25//! PATH FROM selector TO selector [VIA edge_types] [RETURN projection]
26//! ```
27//!
28//! ## Vector Queries
29//! ```text
30//! VECTOR SEARCH collection SIMILAR TO [...] [WHERE ...] [METRIC ...] LIMIT k
31//! ```
32//!
33//! ## Hybrid Queries
34//! ```text
35//! HYBRID (structured query) VECTOR SEARCH ... FUSION strategy LIMIT n
36//! ```
37
38mod auth_ddl;
39mod config;
40mod cte;
41mod ddl;
42mod dml;
43mod error;
44mod expr;
45mod filter;
46mod graph;
47mod graph_commands;
48mod hybrid;
49mod index_ddl;
50mod join;
51mod kv;
52pub mod limits;
53mod migration;
54mod path;
55mod probabilistic_commands;
56mod queue;
57mod search_commands;
58mod table;
59mod timeseries;
60mod tree;
61mod vector;
62
63#[cfg(test)]
64mod json_literal_table;
65#[cfg(test)]
66mod property_tests;
67#[cfg(test)]
68mod tests;
69
70pub use error::{ParseError, ParseErrorKind, SafeTokenDisplay};
71pub use limits::ParserLimits;
72
73use super::ast::{QueryExpr, QueryWithCte};
74use super::lexer::{Lexer, Position, Spanned, Token};
75use crate::storage::schema::Value;
76use limits::DepthCounter;
77
78/// RQL Parser
79pub struct Parser<'a> {
80 lexer: Lexer<'a>,
81 /// Current token
82 pub(crate) current: Spanned,
83 /// Recursion-depth tracker. Each enter/exit of a recursive
84 /// descent point should bracket itself with [`enter_depth`] /
85 /// [`exit_depth`] (see `parse_expr_prec`).
86 pub(crate) depth: DepthCounter,
87 /// Tracks placeholder style used so far in this statement.
88 /// Mixing `$N` and `?` in one statement is a parse error
89 /// (PRD #351 / issue #354).
90 pub(crate) placeholder_mode: PlaceholderMode,
91 /// Counter for `?` positional placeholders, numbered 1-based
92 /// in source. Unused when mode is `Dollar` or `None`.
93 pub(crate) question_count: usize,
94}
95
96/// Placeholder style locked in by the first placeholder seen.
97#[derive(Debug, Clone, Copy, PartialEq, Eq)]
98pub(crate) enum PlaceholderMode {
99 None,
100 Dollar,
101 Question,
102}
103
104impl<'a> Parser<'a> {
105 /// Create a new parser with default DoS limits.
106 pub fn new(input: &'a str) -> Result<Self, ParseError> {
107 Self::with_limits(input, ParserLimits::default())
108 }
109
110 /// Create a new parser with custom DoS limits.
111 pub fn with_limits(input: &'a str, limits: ParserLimits) -> Result<Self, ParseError> {
112 // Input-byte cap is enforced before the lexer is even
113 // constructed: the lexer holds a `Chars` iterator over the
114 // input slice, so refusing oversized input here keeps the
115 // pathological case (10 GiB blob) cheap.
116 if input.len() > limits.max_input_bytes {
117 return Err(ParseError::input_too_large(
118 "max_input_bytes",
119 limits.max_input_bytes,
120 Position::new(1, 1, 0),
121 ));
122 }
123 let mut lexer = Lexer::with_limits(input, limits);
124 let current = lexer.next_token()?;
125 Ok(Self {
126 lexer,
127 current,
128 depth: DepthCounter::new(limits.max_depth),
129 placeholder_mode: PlaceholderMode::None,
130 question_count: 0,
131 })
132 }
133
134 /// Increment the recursion-depth counter or bail with
135 /// `ParseError::DepthLimit`. Pair with [`exit_depth`] (use the
136 /// `with_depth` helper for RAII-style bracketing).
137 pub(crate) fn enter_depth(&mut self) -> Result<(), ParseError> {
138 self.depth.depth += 1;
139 if self.depth.depth > self.depth.max_depth {
140 return Err(ParseError::depth_limit(
141 "max_depth",
142 self.depth.max_depth,
143 self.position(),
144 ));
145 }
146 Ok(())
147 }
148
149 /// Decrement the recursion-depth counter. Always called on the
150 /// success path; on the error path, the counter is rebalanced
151 /// when `Parser` itself is dropped (it's only consulted while
152 /// parsing is in progress).
153 pub(crate) fn exit_depth(&mut self) {
154 if self.depth.depth > 0 {
155 self.depth.depth -= 1;
156 }
157 }
158
159 /// Get current position
160 pub fn position(&self) -> Position {
161 self.current.start
162 }
163
164 /// Advance to next token
165 pub fn advance(&mut self) -> Result<Token, ParseError> {
166 let old = std::mem::replace(&mut self.current, self.lexer.next_token()?);
167 Ok(old.token)
168 }
169
170 /// Peek at current token
171 pub fn peek(&self) -> &Token {
172 &self.current.token
173 }
174
175 /// Peek one token past the current parser position without consuming it.
176 pub fn peek_next(&mut self) -> Result<&Token, ParseError> {
177 Ok(&self.lexer.peek_token()?.token)
178 }
179
180 /// Check if current token matches
181 pub fn check(&self, expected: &Token) -> bool {
182 std::mem::discriminant(&self.current.token) == std::mem::discriminant(expected)
183 }
184
185 /// Check if current token is a specific keyword
186 pub fn check_keyword(&self, keyword: &Token) -> bool {
187 self.check(keyword)
188 }
189
190 /// Consume a specific token or error
191 pub fn expect(&mut self, expected: Token) -> Result<Token, ParseError> {
192 if self.check(&expected) {
193 self.advance()
194 } else {
195 Err(ParseError::expected(
196 vec![&expected.to_string()],
197 &self.current.token,
198 self.position(),
199 ))
200 }
201 }
202
203 /// Consume an identifier and return its value
204 pub fn expect_ident(&mut self) -> Result<String, ParseError> {
205 match &self.current.token {
206 Token::Ident(name) => {
207 let name = name.clone();
208 self.advance()?;
209 Ok(name)
210 }
211 other => Err(ParseError::expected(
212 vec!["identifier"],
213 other,
214 self.position(),
215 )),
216 }
217 }
218
219 /// Consume an identifier or keyword (for type names where keywords are valid)
220 pub fn expect_ident_or_keyword(&mut self) -> Result<String, ParseError> {
221 // Get the string representation of the current token
222 let name = match &self.current.token {
223 Token::Ident(name) => name.clone(),
224 // Keywords that can be type names (convert to uppercase for type matching)
225 Token::Contains => "CONTAINS".to_string(),
226 Token::Left => "LEFT".to_string(),
227 Token::Right => "RIGHT".to_string(),
228 Token::First => "FIRST".to_string(),
229 Token::Last => "LAST".to_string(),
230 Token::In => "IN".to_string(),
231 Token::By => "BY".to_string(),
232 // Any other keyword - use its display string
233 other => other.to_string(),
234 };
235
236 // Only advance for valid type-name-like tokens
237 match &self.current.token {
238 // Identifiers are always valid
239 Token::Ident(_) => {
240 self.advance()?;
241 Ok(name)
242 }
243 // These keywords can be type names
244 Token::Contains
245 | Token::Left
246 | Token::Right
247 | Token::First
248 | Token::Last
249 | Token::In
250 | Token::By => {
251 self.advance()?;
252 Ok(name)
253 }
254 // Reject structural tokens that can't be type names
255 Token::Eof
256 | Token::LParen
257 | Token::RParen
258 | Token::LBracket
259 | Token::RBracket
260 | Token::Comma
261 | Token::Dot
262 | Token::Eq
263 | Token::Lt
264 | Token::Gt
265 | Token::Le
266 | Token::Ge
267 | Token::Arrow
268 | Token::ArrowLeft
269 | Token::Dash
270 | Token::Colon
271 | Token::Semi
272 | Token::Star
273 | Token::Plus
274 | Token::Slash => Err(ParseError::expected(
275 vec!["identifier or type name"],
276 &self.current.token,
277 self.position(),
278 )),
279 // All other keywords can potentially be type names
280 _ => {
281 self.advance()?;
282 Ok(name)
283 }
284 }
285 }
286
287 /// Try to consume a token, returning true if consumed
288 pub fn consume(&mut self, expected: &Token) -> Result<bool, ParseError> {
289 if self.check(expected) {
290 self.advance()?;
291 Ok(true)
292 } else {
293 Ok(false)
294 }
295 }
296
297 /// Try to consume an identifier case-insensitively (for keywords not in Token enum)
298 pub fn consume_ident_ci(&mut self, expected: &str) -> Result<bool, ParseError> {
299 match self.peek().clone() {
300 Token::Ident(name) if name.eq_ignore_ascii_case(expected) => {
301 self.advance()?;
302 Ok(true)
303 }
304 _ => Ok(false),
305 }
306 }
307
308 /// Parse a complete query
309 pub fn parse(&mut self) -> Result<QueryExpr, ParseError> {
310 let query = self.parse_frontend_statement()?.into_query_expr();
311
312 // Expect end of input
313 if !self.check(&Token::Eof) {
314 return Err(ParseError::new(
315 // F-05: route the offending token through `SafeTokenDisplay`
316 // so the user-controlled `Ident` / `String` / `JsonLiteral`
317 // payloads are escaped before the message lands in the
318 // downstream JSON / audit / log / gRPC sinks.
319 format!(
320 "Unexpected token after query: {}",
321 error::SafeTokenDisplay(&self.current.token)
322 ),
323 self.position(),
324 ));
325 }
326
327 Ok(query)
328 }
329
330 /// Parse the main query expression (without CTEs)
331 pub fn parse_query_expr(&mut self) -> Result<QueryExpr, ParseError> {
332 self.parse_frontend_statement()
333 .map(|statement| statement.into_query_expr())
334 }
335
336 /// Parse an integer literal
337 pub fn parse_integer(&mut self) -> Result<i64, ParseError> {
338 match &self.current.token {
339 Token::Integer(n) => {
340 let n = *n;
341 self.advance()?;
342 Ok(n)
343 }
344 other => Err(ParseError::expected(
345 vec!["integer"],
346 other,
347 self.position(),
348 )),
349 }
350 }
351
352 /// Parse a `$N` or `?` placeholder in a non-Expr slot (e.g. the
353 /// `LIMIT` / `MIN_SCORE` slots of SEARCH SIMILAR; issue #361). The
354 /// `field` name is used only to enrich placeholder-mixing errors.
355 /// Returns the 0-based parameter index.
356 pub fn parse_param_slot(&mut self, field: &'static str) -> Result<usize, ParseError> {
357 match self.peek().clone() {
358 Token::Dollar => {
359 self.advance()?;
360 let n = match *self.peek() {
361 Token::Integer(n) if n >= 1 => {
362 self.advance()?;
363 n as usize
364 }
365 _ => {
366 return Err(ParseError::new(
367 format!("expected `$N` (N >= 1) for {field} parameter"),
368 self.position(),
369 ));
370 }
371 };
372 if self.placeholder_mode == PlaceholderMode::Question {
373 return Err(ParseError::new(
374 "cannot mix `?` and `$N` placeholders in one statement".to_string(),
375 self.position(),
376 ));
377 }
378 self.placeholder_mode = PlaceholderMode::Dollar;
379 Ok(n - 1)
380 }
381 Token::Question => {
382 self.advance()?;
383 if self.placeholder_mode == PlaceholderMode::Dollar {
384 return Err(ParseError::new(
385 "cannot mix `?` and `$N` placeholders in one statement".to_string(),
386 self.position(),
387 ));
388 }
389 self.placeholder_mode = PlaceholderMode::Question;
390 self.question_count += 1;
391 Ok(self.question_count - 1)
392 }
393 other => Err(ParseError::expected(
394 vec!["$N", "?"],
395 &other,
396 self.position(),
397 )),
398 }
399 }
400
401 /// Parse a strictly-positive integer literal (`> 0`).
402 ///
403 /// Surfaces a `ValueOutOfRange` error for `field` when the literal
404 /// is `0`, negative, or starts with a unary `-` (which the bare
405 /// `parse_integer` rejects with a confusing "expected: integer"
406 /// message). Used by modifier slots like `MAX_SIZE`, `CAPACITY`,
407 /// `WIDTH`, `DEPTH`, `K` where zero or negative values are
408 /// semantically meaningless.
409 pub fn parse_positive_integer(&mut self, field: &'static str) -> Result<i64, ParseError> {
410 let pos = self.position();
411 // Detect the unary-minus path up-front so we surface a
412 // clearer "must be positive" message instead of the generic
413 // "expected: integer".
414 if matches!(self.current.token, Token::Minus | Token::Dash) {
415 return Err(ParseError::value_out_of_range(
416 field,
417 "must be a positive integer",
418 pos,
419 ));
420 }
421 let raw = self.parse_integer()?;
422 if raw <= 0 {
423 return Err(ParseError::value_out_of_range(
424 field,
425 "must be a positive integer",
426 pos,
427 ));
428 }
429 Ok(raw)
430 }
431
432 /// Parse float literal. Accepts an optional leading unary `-`
433 /// followed by a `Float` or `Integer` token so positions like
434 /// vector literals (`[-0.1, …]`), `THRESHOLD -0.5`, `MIN_SCORE
435 /// -0.1`, `RERANK(-0.3)`, `UNION(0.7, -0.3)`, and geo coordinates
436 /// (`LATITUDE -33.86`) parse correctly. See bug #107.
437 pub fn parse_float(&mut self) -> Result<f64, ParseError> {
438 let negate = if matches!(self.current.token, Token::Minus | Token::Dash) {
439 self.advance()?;
440 true
441 } else {
442 false
443 };
444 let value = match &self.current.token {
445 Token::Float(n) => {
446 let n = *n;
447 self.advance()?;
448 n
449 }
450 Token::Integer(n) => {
451 let n = *n as f64;
452 self.advance()?;
453 n
454 }
455 other => {
456 return Err(ParseError::expected(vec!["number"], other, self.position()));
457 }
458 };
459 Ok(if negate { -value } else { value })
460 }
461
462 /// Parse a string literal
463 pub fn parse_string(&mut self) -> Result<String, ParseError> {
464 match &self.current.token {
465 Token::String(s) => {
466 let s = s.clone();
467 self.advance()?;
468 Ok(s)
469 }
470 other => Err(ParseError::expected(vec!["string"], other, self.position())),
471 }
472 }
473
474 /// Parse a value (delegates to parse_literal_value for full JSON support)
475 pub fn parse_value(&mut self) -> Result<Value, ParseError> {
476 self.parse_literal_value()
477 }
478
479 /// Parse value list for IN clause
480 pub fn parse_value_list(&mut self) -> Result<Vec<Value>, ParseError> {
481 let mut values = Vec::new();
482 loop {
483 values.push(self.parse_value()?);
484 if !self.consume(&Token::Comma)? {
485 break;
486 }
487 }
488 Ok(values)
489 }
490
491 /// Phase 1 cutover bridge: parse an expression via the new Pratt
492 /// parser (`parser/expr.rs`) and try to fold it back into a
493 /// literal `Value`. Used by INSERT VALUES / UPDATE SET / DEFAULT
494 /// slots that still store `Value` in their AST nodes — the bridge
495 /// lets them benefit from the full Expr grammar (parenthesised
496 /// literals, unary minus, CAST literals) without an AST cascade.
497 ///
498 /// Folds these Expr shapes:
499 /// - `Expr::Literal { value, .. }` → `value`
500 /// - `Expr::UnaryOp { Neg, operand: Literal(Integer/Float), .. }`
501 /// → negated value
502 /// - `Expr::Cast { inner: Literal(text), target, .. }` →
503 /// coerce(text, target) via schema::coerce
504 ///
505 /// Anything else returns an error so callers can decide whether
506 /// to fall back to the legacy `parse_literal_value` path or
507 /// surface a "non-literal not supported in this position" error.
508 pub fn parse_expr_value(&mut self) -> Result<Value, ParseError> {
509 let expr = self.parse_expr()?;
510 super::sql_lowering::fold_expr_to_value(expr)
511 .map_err(|msg| ParseError::new(msg, self.position()))
512 }
513}
514
515/// Parse an RQL query string into a `QueryWithCte`. A leading `WITH`
516/// is consumed as a CTE prelude; any other statement is returned in
517/// the `QueryWithCte::simple` shape (`with_clause: None`). Callers
518/// that don't care about CTEs read `.query` to recover the legacy
519/// `QueryExpr` shape.
520pub fn parse(input: &str) -> Result<QueryWithCte, ParseError> {
521 let mut parser = Parser::new(input)?;
522 parser.parse_with_cte()
523}