reddb_server/storage/query/parser/mod.rs
1//! RQL Parser
2//!
3//! Parses RQL (RedDB Query Language) tokens into a unified AST.
4//! Supports SQL-like table queries, Cypher-like graph patterns, and joins.
5//!
6//! # Supported Syntax
7//!
8//! ## Table Queries
9//! ```text
10//! SELECT [columns] FROM table [WHERE condition] [ORDER BY ...] [LIMIT n]
11//! ```
12//!
13//! ## Graph Queries
14//! ```text
15//! MATCH pattern [WHERE condition] RETURN projection
16//! ```
17//!
18//! ## Join Queries
19//! ```text
20//! FROM table [alias] JOIN GRAPH pattern ON condition [RETURN projection]
21//! ```
22//!
23//! ## Path Queries
24//! ```text
25//! PATH FROM selector TO selector [VIA edge_types] [RETURN projection]
26//! ```
27//!
28//! ## Vector Queries
29//! ```text
30//! VECTOR SEARCH collection SIMILAR TO [...] [WHERE ...] [METRIC ...] LIMIT k
31//! ```
32//!
33//! ## Hybrid Queries
34//! ```text
35//! HYBRID (structured query) VECTOR SEARCH ... FUSION strategy LIMIT n
36//! ```
37
38mod auth_ddl;
39mod config;
40mod cte;
41mod ddl;
42mod dml;
43mod error;
44mod expr;
45mod filter;
46mod graph;
47mod graph_commands;
48mod hybrid;
49mod index_ddl;
50mod join;
51mod kv;
52pub mod limits;
53mod migration;
54mod path;
55mod probabilistic_commands;
56mod queue;
57mod search_commands;
58mod table;
59mod timeseries;
60mod tree;
61mod vector;
62
63#[cfg(test)]
64mod json_literal_table;
65#[cfg(test)]
66mod property_tests;
67#[cfg(test)]
68mod tests;
69
70pub use error::{ParseError, ParseErrorKind, SafeTokenDisplay};
71pub use limits::ParserLimits;
72
73use super::ast::{QueryExpr, QueryWithCte};
74use super::lexer::{Lexer, Position, Spanned, Token};
75use crate::storage::schema::Value;
76use limits::DepthCounter;
77
78/// RQL Parser
79pub struct Parser<'a> {
80 lexer: Lexer<'a>,
81 /// Current token
82 pub(crate) current: Spanned,
83 /// Recursion-depth tracker. Each enter/exit of a recursive
84 /// descent point should bracket itself with [`enter_depth`] /
85 /// [`exit_depth`] (see `parse_expr_prec`).
86 pub(crate) depth: DepthCounter,
87}
88
89impl<'a> Parser<'a> {
90 /// Create a new parser with default DoS limits.
91 pub fn new(input: &'a str) -> Result<Self, ParseError> {
92 Self::with_limits(input, ParserLimits::default())
93 }
94
95 /// Create a new parser with custom DoS limits.
96 pub fn with_limits(input: &'a str, limits: ParserLimits) -> Result<Self, ParseError> {
97 // Input-byte cap is enforced before the lexer is even
98 // constructed: the lexer holds a `Chars` iterator over the
99 // input slice, so refusing oversized input here keeps the
100 // pathological case (10 GiB blob) cheap.
101 if input.len() > limits.max_input_bytes {
102 return Err(ParseError::input_too_large(
103 "max_input_bytes",
104 limits.max_input_bytes,
105 Position::new(1, 1, 0),
106 ));
107 }
108 let mut lexer = Lexer::with_limits(input, limits);
109 let current = lexer.next_token()?;
110 Ok(Self {
111 lexer,
112 current,
113 depth: DepthCounter::new(limits.max_depth),
114 })
115 }
116
117 /// Increment the recursion-depth counter or bail with
118 /// `ParseError::DepthLimit`. Pair with [`exit_depth`] (use the
119 /// `with_depth` helper for RAII-style bracketing).
120 pub(crate) fn enter_depth(&mut self) -> Result<(), ParseError> {
121 self.depth.depth += 1;
122 if self.depth.depth > self.depth.max_depth {
123 return Err(ParseError::depth_limit(
124 "max_depth",
125 self.depth.max_depth,
126 self.position(),
127 ));
128 }
129 Ok(())
130 }
131
132 /// Decrement the recursion-depth counter. Always called on the
133 /// success path; on the error path, the counter is rebalanced
134 /// when `Parser` itself is dropped (it's only consulted while
135 /// parsing is in progress).
136 pub(crate) fn exit_depth(&mut self) {
137 if self.depth.depth > 0 {
138 self.depth.depth -= 1;
139 }
140 }
141
142 /// Get current position
143 pub fn position(&self) -> Position {
144 self.current.start
145 }
146
147 /// Advance to next token
148 pub fn advance(&mut self) -> Result<Token, ParseError> {
149 let old = std::mem::replace(&mut self.current, self.lexer.next_token()?);
150 Ok(old.token)
151 }
152
153 /// Peek at current token
154 pub fn peek(&self) -> &Token {
155 &self.current.token
156 }
157
158 /// Peek one token past the current parser position without consuming it.
159 pub fn peek_next(&mut self) -> Result<&Token, ParseError> {
160 Ok(&self.lexer.peek_token()?.token)
161 }
162
163 /// Check if current token matches
164 pub fn check(&self, expected: &Token) -> bool {
165 std::mem::discriminant(&self.current.token) == std::mem::discriminant(expected)
166 }
167
168 /// Check if current token is a specific keyword
169 pub fn check_keyword(&self, keyword: &Token) -> bool {
170 self.check(keyword)
171 }
172
173 /// Consume a specific token or error
174 pub fn expect(&mut self, expected: Token) -> Result<Token, ParseError> {
175 if self.check(&expected) {
176 self.advance()
177 } else {
178 Err(ParseError::expected(
179 vec![&expected.to_string()],
180 &self.current.token,
181 self.position(),
182 ))
183 }
184 }
185
186 /// Consume an identifier and return its value
187 pub fn expect_ident(&mut self) -> Result<String, ParseError> {
188 match &self.current.token {
189 Token::Ident(name) => {
190 let name = name.clone();
191 self.advance()?;
192 Ok(name)
193 }
194 other => Err(ParseError::expected(
195 vec!["identifier"],
196 other,
197 self.position(),
198 )),
199 }
200 }
201
202 /// Consume an identifier or keyword (for type names where keywords are valid)
203 pub fn expect_ident_or_keyword(&mut self) -> Result<String, ParseError> {
204 // Get the string representation of the current token
205 let name = match &self.current.token {
206 Token::Ident(name) => name.clone(),
207 // Keywords that can be type names (convert to uppercase for type matching)
208 Token::Contains => "CONTAINS".to_string(),
209 Token::Left => "LEFT".to_string(),
210 Token::Right => "RIGHT".to_string(),
211 Token::First => "FIRST".to_string(),
212 Token::Last => "LAST".to_string(),
213 Token::In => "IN".to_string(),
214 Token::By => "BY".to_string(),
215 // Any other keyword - use its display string
216 other => other.to_string(),
217 };
218
219 // Only advance for valid type-name-like tokens
220 match &self.current.token {
221 // Identifiers are always valid
222 Token::Ident(_) => {
223 self.advance()?;
224 Ok(name)
225 }
226 // These keywords can be type names
227 Token::Contains
228 | Token::Left
229 | Token::Right
230 | Token::First
231 | Token::Last
232 | Token::In
233 | Token::By => {
234 self.advance()?;
235 Ok(name)
236 }
237 // Reject structural tokens that can't be type names
238 Token::Eof
239 | Token::LParen
240 | Token::RParen
241 | Token::LBracket
242 | Token::RBracket
243 | Token::Comma
244 | Token::Dot
245 | Token::Eq
246 | Token::Lt
247 | Token::Gt
248 | Token::Le
249 | Token::Ge
250 | Token::Arrow
251 | Token::ArrowLeft
252 | Token::Dash
253 | Token::Colon
254 | Token::Semi
255 | Token::Star
256 | Token::Plus
257 | Token::Slash => Err(ParseError::expected(
258 vec!["identifier or type name"],
259 &self.current.token,
260 self.position(),
261 )),
262 // All other keywords can potentially be type names
263 _ => {
264 self.advance()?;
265 Ok(name)
266 }
267 }
268 }
269
270 /// Try to consume a token, returning true if consumed
271 pub fn consume(&mut self, expected: &Token) -> Result<bool, ParseError> {
272 if self.check(expected) {
273 self.advance()?;
274 Ok(true)
275 } else {
276 Ok(false)
277 }
278 }
279
280 /// Try to consume an identifier case-insensitively (for keywords not in Token enum)
281 pub fn consume_ident_ci(&mut self, expected: &str) -> Result<bool, ParseError> {
282 match self.peek().clone() {
283 Token::Ident(name) if name.eq_ignore_ascii_case(expected) => {
284 self.advance()?;
285 Ok(true)
286 }
287 _ => Ok(false),
288 }
289 }
290
291 /// Parse a complete query
292 pub fn parse(&mut self) -> Result<QueryExpr, ParseError> {
293 let query = self.parse_frontend_statement()?.into_query_expr();
294
295 // Expect end of input
296 if !self.check(&Token::Eof) {
297 return Err(ParseError::new(
298 // F-05: route the offending token through `SafeTokenDisplay`
299 // so the user-controlled `Ident` / `String` / `JsonLiteral`
300 // payloads are escaped before the message lands in the
301 // downstream JSON / audit / log / gRPC sinks.
302 format!(
303 "Unexpected token after query: {}",
304 error::SafeTokenDisplay(&self.current.token)
305 ),
306 self.position(),
307 ));
308 }
309
310 Ok(query)
311 }
312
313 /// Parse the main query expression (without CTEs)
314 pub fn parse_query_expr(&mut self) -> Result<QueryExpr, ParseError> {
315 self.parse_frontend_statement()
316 .map(|statement| statement.into_query_expr())
317 }
318
319 /// Parse an integer literal
320 pub fn parse_integer(&mut self) -> Result<i64, ParseError> {
321 match &self.current.token {
322 Token::Integer(n) => {
323 let n = *n;
324 self.advance()?;
325 Ok(n)
326 }
327 other => Err(ParseError::expected(
328 vec!["integer"],
329 other,
330 self.position(),
331 )),
332 }
333 }
334
335 /// Parse a strictly-positive integer literal (`> 0`).
336 ///
337 /// Surfaces a `ValueOutOfRange` error for `field` when the literal
338 /// is `0`, negative, or starts with a unary `-` (which the bare
339 /// `parse_integer` rejects with a confusing "expected: integer"
340 /// message). Used by modifier slots like `MAX_SIZE`, `CAPACITY`,
341 /// `WIDTH`, `DEPTH`, `K` where zero or negative values are
342 /// semantically meaningless.
343 pub fn parse_positive_integer(&mut self, field: &'static str) -> Result<i64, ParseError> {
344 let pos = self.position();
345 // Detect the unary-minus path up-front so we surface a
346 // clearer "must be positive" message instead of the generic
347 // "expected: integer".
348 if matches!(self.current.token, Token::Minus | Token::Dash) {
349 return Err(ParseError::value_out_of_range(
350 field,
351 "must be a positive integer",
352 pos,
353 ));
354 }
355 let raw = self.parse_integer()?;
356 if raw <= 0 {
357 return Err(ParseError::value_out_of_range(
358 field,
359 "must be a positive integer",
360 pos,
361 ));
362 }
363 Ok(raw)
364 }
365
366 /// Parse float literal. Accepts an optional leading unary `-`
367 /// followed by a `Float` or `Integer` token so positions like
368 /// vector literals (`[-0.1, …]`), `THRESHOLD -0.5`, `MIN_SCORE
369 /// -0.1`, `RERANK(-0.3)`, `UNION(0.7, -0.3)`, and geo coordinates
370 /// (`LATITUDE -33.86`) parse correctly. See bug #107.
371 pub fn parse_float(&mut self) -> Result<f64, ParseError> {
372 let negate = if matches!(self.current.token, Token::Minus | Token::Dash) {
373 self.advance()?;
374 true
375 } else {
376 false
377 };
378 let value = match &self.current.token {
379 Token::Float(n) => {
380 let n = *n;
381 self.advance()?;
382 n
383 }
384 Token::Integer(n) => {
385 let n = *n as f64;
386 self.advance()?;
387 n
388 }
389 other => {
390 return Err(ParseError::expected(vec!["number"], other, self.position()));
391 }
392 };
393 Ok(if negate { -value } else { value })
394 }
395
396 /// Parse a string literal
397 pub fn parse_string(&mut self) -> Result<String, ParseError> {
398 match &self.current.token {
399 Token::String(s) => {
400 let s = s.clone();
401 self.advance()?;
402 Ok(s)
403 }
404 other => Err(ParseError::expected(vec!["string"], other, self.position())),
405 }
406 }
407
408 /// Parse a value (delegates to parse_literal_value for full JSON support)
409 pub fn parse_value(&mut self) -> Result<Value, ParseError> {
410 self.parse_literal_value()
411 }
412
413 /// Parse value list for IN clause
414 pub fn parse_value_list(&mut self) -> Result<Vec<Value>, ParseError> {
415 let mut values = Vec::new();
416 loop {
417 values.push(self.parse_value()?);
418 if !self.consume(&Token::Comma)? {
419 break;
420 }
421 }
422 Ok(values)
423 }
424
425 /// Phase 1 cutover bridge: parse an expression via the new Pratt
426 /// parser (`parser/expr.rs`) and try to fold it back into a
427 /// literal `Value`. Used by INSERT VALUES / UPDATE SET / DEFAULT
428 /// slots that still store `Value` in their AST nodes — the bridge
429 /// lets them benefit from the full Expr grammar (parenthesised
430 /// literals, unary minus, CAST literals) without an AST cascade.
431 ///
432 /// Folds these Expr shapes:
433 /// - `Expr::Literal { value, .. }` → `value`
434 /// - `Expr::UnaryOp { Neg, operand: Literal(Integer/Float), .. }`
435 /// → negated value
436 /// - `Expr::Cast { inner: Literal(text), target, .. }` →
437 /// coerce(text, target) via schema::coerce
438 ///
439 /// Anything else returns an error so callers can decide whether
440 /// to fall back to the legacy `parse_literal_value` path or
441 /// surface a "non-literal not supported in this position" error.
442 pub fn parse_expr_value(&mut self) -> Result<Value, ParseError> {
443 let expr = self.parse_expr()?;
444 super::sql_lowering::fold_expr_to_value(expr)
445 .map_err(|msg| ParseError::new(msg, self.position()))
446 }
447}
448
449/// Parse an RQL query string into a `QueryWithCte`. A leading `WITH`
450/// is consumed as a CTE prelude; any other statement is returned in
451/// the `QueryWithCte::simple` shape (`with_clause: None`). Callers
452/// that don't care about CTEs read `.query` to recover the legacy
453/// `QueryExpr` shape.
454pub fn parse(input: &str) -> Result<QueryWithCte, ParseError> {
455 let mut parser = Parser::new(input)?;
456 parser.parse_with_cte()
457}