reddb_server/storage/query/ast.rs
1//! Unified Query AST
2//!
3//! Defines the abstract syntax tree for unified table+graph queries.
4//! Supports:
5//! - Pure table queries (SELECT ... FROM ...)
6//! - Pure graph queries (MATCH (a)-[r]->(b) ...)
7//! - Table-graph joins (FROM t JOIN GRAPH ...)
8//! - Path queries (PATH FROM ... TO ... VIA ...)
9//!
10//! # Examples
11//!
12//! ```text
13//! -- Table query
14//! SELECT ip, ports FROM hosts WHERE os = 'Linux'
15//!
16//! -- Graph query
17//! MATCH (h:Host)-[:HAS_SERVICE]->(s:Service)
18//! WHERE h.ip STARTS WITH '192.168'
19//! RETURN h, s
20//!
21//! -- Join query
22//! FROM hosts h
23//! JOIN GRAPH (h)-[:HAS_VULN]->(v:Vulnerability) AS g
24//! WHERE h.criticality > 7
25//! RETURN h.ip, h.hostname, v.cve
26//!
27//! -- Path query
28//! PATH FROM host('192.168.1.1') TO host('10.0.0.1')
29//! VIA [:AUTH_ACCESS, :CONNECTS_TO]
30//! RETURN path
31//! ```
32
33#[path = "builders.rs"]
34mod builders;
35#[path = "core.rs"]
36mod core;
37
38pub use builders::*;
39pub use core::*;
40
41#[cfg(test)]
42#[path = "tests.rs"]
43mod tests;
44
45// ============================================================================
46// Fase 2 — Expression AST (Week 1 foundation)
47//
48// Types below are the foundation for the parser v2 rewrite described in
49// `/home/cyber/.claude/plans/squishy-mixing-honey.md` (Fase 2). They are
50// additive — existing `Filter`, `Projection`, `OrderByClause`, and
51// `TableQuery` keep working unchanged. Future weeks migrate those AST
52// slots to carry an `Expr` instead of ad-hoc `FieldRef` / `Value` /
53// `String` fields so deferred Fase 1 items (1.6 ORDER BY expression,
54// 1.7 FROM (SELECT …)) can land without further AST churn.
55//
56// Design notes:
57// - `Expr` is an *untyped* syntactic tree. Semantic resolution — type
58// inference, name resolution, coercion pathway — happens in the
59// `analyze/` pass once it exists (Week 2-3 of Fase 2).
60// - `Span` uses the existing `lexer::Position` so errors can point at
61// the original source range without re-tokenising.
62// - `BinOp` is a flat enum; precedence lives in the parser, not here.
63// ============================================================================
64
65use crate::storage::query::lexer::Position;
66use crate::storage::schema::{DataType, Value};
67
68/// Half-open byte / line / column range into the original input string.
69/// Both endpoints come from the lexer so downstream passes can re-open
70/// the source and print a caret-pointed diagnostic without re-lexing.
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
72pub struct Span {
73 pub start: Position,
74 pub end: Position,
75}
76
77impl Span {
78 pub fn new(start: Position, end: Position) -> Self {
79 Self { start, end }
80 }
81
82 /// A synthetic span marker used when a node is constructed
83 /// programmatically rather than parsed from source. Debug diagnostics
84 /// should check for this via `is_synthetic()` and suppress location
85 /// pointers rather than printing `0:0`.
86 pub fn synthetic() -> Self {
87 Self::default()
88 }
89
90 pub fn is_synthetic(&self) -> bool {
91 self.start == Position::default() && self.end == Position::default()
92 }
93}
94
95/// Syntactic binary operators. Parsed precedence determines grouping;
96/// this enum only identifies the operator itself. Comparison and logical
97/// operators live alongside arithmetic so a single `Expr::BinaryOp`
98/// walker can cover every infix form the parser emits.
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
100pub enum BinOp {
101 // Arithmetic
102 Add,
103 Sub,
104 Mul,
105 Div,
106 Mod,
107 // String
108 Concat,
109 // Comparison
110 Eq,
111 Ne,
112 Lt,
113 Le,
114 Gt,
115 Ge,
116 // Logical
117 And,
118 Or,
119}
120
121impl BinOp {
122 /// Left-binding precedence for Pratt parsing. Higher = binds tighter.
123 /// Mirrors PG gram.y's precedence table for the operators we have.
124 pub fn precedence(self) -> u8 {
125 match self {
126 BinOp::Or => 10,
127 BinOp::And => 20,
128 BinOp::Eq | BinOp::Ne | BinOp::Lt | BinOp::Le | BinOp::Gt | BinOp::Ge => 30,
129 BinOp::Concat => 40,
130 BinOp::Add | BinOp::Sub => 50,
131 BinOp::Mul | BinOp::Div | BinOp::Mod => 60,
132 }
133 }
134}
135
136/// Unary operators — only the two real unaries SQL actually has.
137#[derive(Debug, Clone, Copy, PartialEq, Eq)]
138pub enum UnaryOp {
139 /// Arithmetic negation: `-expr`
140 Neg,
141 /// Logical negation: `NOT expr`
142 Not,
143}
144
145/// The syntactic expression tree. Every node carries a `Span` so
146/// semantic errors from the analyze pass can point back at the exact
147/// token range. Created by the Fase 2 parser, consumed by the analyzer
148/// and (eventually) the planner.
149#[derive(Debug, Clone, PartialEq)]
150pub enum Expr {
151 /// A literal value (number, string, boolean, null).
152 Literal { value: Value, span: Span },
153 /// Reference to a column (possibly qualified by table / alias).
154 Column { field: FieldRef, span: Span },
155 /// Query parameter placeholder (`?` or `$n`). Used by prepared
156 /// statements in Fase 4 — the plan cache strips these so repeated
157 /// bindings reuse the same plan.
158 Parameter { index: usize, span: Span },
159 /// Binary infix operator: `lhs <op> rhs`.
160 BinaryOp {
161 op: BinOp,
162 lhs: Box<Expr>,
163 rhs: Box<Expr>,
164 span: Span,
165 },
166 /// Prefix unary operator.
167 UnaryOp {
168 op: UnaryOp,
169 operand: Box<Expr>,
170 span: Span,
171 },
172 /// `CAST(expr AS type)` / `expr::type`.
173 Cast {
174 inner: Box<Expr>,
175 target: DataType,
176 span: Span,
177 },
178 /// Function / aggregate call.
179 FunctionCall {
180 name: String,
181 args: Vec<Expr>,
182 span: Span,
183 },
184 /// `CASE WHEN cond THEN val [...] [ELSE val] END`.
185 Case {
186 branches: Vec<(Expr, Expr)>,
187 else_: Option<Box<Expr>>,
188 span: Span,
189 },
190 /// `IS NULL` / `IS NOT NULL`. Kept as a distinct variant because
191 /// SQL treats them as unary postfix operators with special
192 /// three-valued semantics.
193 IsNull {
194 operand: Box<Expr>,
195 negated: bool,
196 span: Span,
197 },
198 /// `expr IN (v1, v2, …)`. The rhs list is `Vec<Expr>` — at Week 1
199 /// only literal lists survive analyze; correlated subquery lists
200 /// land in Week 3 alongside the `Subquery` variant below.
201 InList {
202 target: Box<Expr>,
203 values: Vec<Expr>,
204 negated: bool,
205 span: Span,
206 },
207 /// `expr BETWEEN low AND high` — first-class so pushdown can
208 /// recognise range predicates without decomposing to `>=` and `<=`.
209 Between {
210 target: Box<Expr>,
211 low: Box<Expr>,
212 high: Box<Expr>,
213 negated: bool,
214 span: Span,
215 },
216 /// Parenthesized SELECT used in an expression context.
217 Subquery { query: ExprSubquery, span: Span },
218 /// Window function call: `fn(args) OVER (PARTITION BY ... ORDER BY
219 /// ... [frame])`. Carries the same `name`/`args` payload as a plain
220 /// `FunctionCall` plus a `WindowSpec` describing the OVER clause.
221 /// Issue #589 slice 7a — parser + AST only; no runtime execution.
222 WindowFunctionCall {
223 name: String,
224 args: Vec<Expr>,
225 window: WindowSpec,
226 span: Span,
227 },
228}
229
230#[derive(Debug, Clone)]
231pub struct ExprSubquery {
232 pub query: Box<QueryExpr>,
233}
234
235impl PartialEq for ExprSubquery {
236 fn eq(&self, other: &Self) -> bool {
237 format!("{:?}", self.query) == format!("{:?}", other.query)
238 }
239}
240
241impl Expr {
242 /// Extract the span of this expression. Synthetic nodes return
243 /// `Span::synthetic()` — callers that need a real location should
244 /// check `span.is_synthetic()` before rendering diagnostics.
245 pub fn span(&self) -> Span {
246 match self {
247 Expr::Literal { span, .. }
248 | Expr::Column { span, .. }
249 | Expr::Parameter { span, .. }
250 | Expr::BinaryOp { span, .. }
251 | Expr::UnaryOp { span, .. }
252 | Expr::Cast { span, .. }
253 | Expr::FunctionCall { span, .. }
254 | Expr::Case { span, .. }
255 | Expr::IsNull { span, .. }
256 | Expr::InList { span, .. }
257 | Expr::Between { span, .. }
258 | Expr::Subquery { span, .. }
259 | Expr::WindowFunctionCall { span, .. } => *span,
260 }
261 }
262
263 /// Constructor shortcut for the common `Literal` case.
264 pub fn lit(value: Value) -> Self {
265 Expr::Literal {
266 value,
267 span: Span::synthetic(),
268 }
269 }
270
271 /// Constructor shortcut for the common `Column` case.
272 pub fn col(field: FieldRef) -> Self {
273 Expr::Column {
274 field,
275 span: Span::synthetic(),
276 }
277 }
278
279 /// Convenience: build a binary operation with a synthetic span.
280 /// Used by unit tests and by the Projection → Expr shim while the
281 /// migration is in flight.
282 pub fn binop(op: BinOp, lhs: Expr, rhs: Expr) -> Self {
283 Expr::BinaryOp {
284 op,
285 lhs: Box::new(lhs),
286 rhs: Box::new(rhs),
287 span: Span::synthetic(),
288 }
289 }
290}
291
292#[cfg(test)]
293mod expr_tests {
294 use super::*;
295
296 #[test]
297 fn precedence_orders_mul_over_add_and_and_over_or() {
298 // Higher precedence binds tighter — classic `a OR b AND c` trap.
299 assert!(BinOp::Mul.precedence() > BinOp::Add.precedence());
300 assert!(BinOp::Add.precedence() > BinOp::Eq.precedence());
301 assert!(BinOp::Eq.precedence() > BinOp::And.precedence());
302 assert!(BinOp::And.precedence() > BinOp::Or.precedence());
303 }
304
305 #[test]
306 fn span_synthetic_round_trip() {
307 let s = Span::synthetic();
308 assert!(s.is_synthetic());
309 let real = Span::new(Position::new(1, 1, 0), Position::new(1, 5, 4));
310 assert!(!real.is_synthetic());
311 }
312
313 #[test]
314 fn expr_constructors_carry_synthetic_span() {
315 let lit = Expr::lit(Value::Integer(42));
316 assert!(lit.span().is_synthetic());
317 assert_eq!(
318 lit,
319 Expr::Literal {
320 value: Value::Integer(42),
321 span: Span::synthetic(),
322 }
323 );
324 }
325
326 #[test]
327 fn binop_shortcut_nests() {
328 // a + b * c parses to Add(a, Mul(b, c)) under normal precedence
329 let expr = Expr::binop(
330 BinOp::Add,
331 Expr::col(FieldRef::column("", "a")),
332 Expr::binop(
333 BinOp::Mul,
334 Expr::col(FieldRef::column("", "b")),
335 Expr::col(FieldRef::column("", "c")),
336 ),
337 );
338 match expr {
339 Expr::BinaryOp {
340 op: BinOp::Add,
341 rhs,
342 ..
343 } => match *rhs {
344 Expr::BinaryOp { op: BinOp::Mul, .. } => {}
345 other => panic!("expected Mul on rhs, got {:?}", other),
346 },
347 other => panic!("expected Add at root, got {:?}", other),
348 }
349 }
350}