Skip to main content

reddb_rql/
ast.rs

1//! Unified Query AST
2//!
3//! Defines the abstract syntax tree for unified table+graph queries.
4//! Supports:
5//! - Pure table queries (SELECT ... FROM ...)
6//! - Pure graph queries (MATCH (a)-[r]->(b) ...)
7//! - Table-graph joins (FROM t JOIN GRAPH ...)
8//! - Path queries (PATH FROM ... TO ... VIA ...)
9//!
10//! # Examples
11//!
12//! ```text
13//! -- Table query
14//! SELECT ip, ports FROM hosts WHERE os = 'Linux'
15//!
16//! -- Graph query
17//! MATCH (h:Host)-[:HAS_SERVICE]->(s:Service)
18//! WHERE h.ip STARTS WITH '192.168'
19//! RETURN h, s
20//!
21//! -- Join query
22//! FROM hosts h
23//! JOIN GRAPH (h)-[:HAS_VULN]->(v:Vulnerability) AS g
24//! WHERE h.criticality > 7
25//! RETURN h.ip, h.hostname, v.cve
26//!
27//! -- Path query
28//! PATH FROM host('192.168.1.1') TO host('10.0.0.1')
29//! VIA [:AUTH_ACCESS, :CONNECTS_TO]
30//! RETURN path
31//! ```
32
33#[path = "builders.rs"]
34mod builders;
35#[path = "core.rs"]
36mod core;
37
38pub use builders::*;
39pub use core::*;
40
41#[cfg(test)]
42#[path = "ast_tests.rs"]
43mod tests;
44
45// ============================================================================
46// Fase 2 — Expression AST (Week 1 foundation)
47//
48// Types below are the foundation for the parser v2 rewrite described in
49// `/home/cyber/.claude/plans/squishy-mixing-honey.md` (Fase 2). They are
50// additive — existing `Filter`, `Projection`, `OrderByClause`, and
51// `TableQuery` keep working unchanged. Future weeks migrate those AST
52// slots to carry an `Expr` instead of ad-hoc `FieldRef` / `Value` /
53// `String` fields so deferred Fase 1 items (1.6 ORDER BY expression,
54// 1.7 FROM (SELECT …)) can land without further AST churn.
55//
56// Design notes:
57// - `Expr` is an *untyped* syntactic tree. Semantic resolution — type
58//   inference, name resolution, coercion pathway — happens in the
59//   `analyze/` pass once it exists (Week 2-3 of Fase 2).
60// - `Span` uses the existing `lexer::Position` so errors can point at
61//   the original source range without re-tokenising.
62// - `BinOp` is a flat enum; precedence lives in the parser, not here.
63// ============================================================================
64
65use crate::lexer::Position;
66use reddb_types::types::{DataType, Value};
67
68/// Half-open byte / line / column range into the original input string.
69/// Both endpoints come from the lexer so downstream passes can re-open
70/// the source and print a caret-pointed diagnostic without re-lexing.
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
72pub struct Span {
73    pub start: Position,
74    pub end: Position,
75}
76
77impl Span {
78    pub fn new(start: Position, end: Position) -> Self {
79        Self { start, end }
80    }
81
82    /// A synthetic span marker used when a node is constructed
83    /// programmatically rather than parsed from source. Debug diagnostics
84    /// should check for this via `is_synthetic()` and suppress location
85    /// pointers rather than printing `0:0`.
86    pub fn synthetic() -> Self {
87        Self::default()
88    }
89
90    pub fn is_synthetic(&self) -> bool {
91        self.start == Position::default() && self.end == Position::default()
92    }
93}
94
95/// Syntactic binary operators — the operator vocabulary.
96///
97/// `BinOp` was re-homed into the neutral keystone crate
98/// [`reddb_types::operator`] (ADR 0052): the coercion spine that keys
99/// overload resolution on it now lives there, and leaving the enum behind
100/// would force that crate to depend back on `reddb-server`. This re-export
101/// keeps the historical `ast::BinOp` path resolving so every parser /
102/// analyzer / evaluator call-site stays untouched.
103pub use reddb_types::operator::BinOp;
104
105/// Unary operators — only the two real unaries SQL actually has.
106#[derive(Debug, Clone, Copy, PartialEq, Eq)]
107pub enum UnaryOp {
108    /// Arithmetic negation: `-expr`
109    Neg,
110    /// Logical negation: `NOT expr`
111    Not,
112}
113
114/// The syntactic expression tree. Every node carries a `Span` so
115/// semantic errors from the analyze pass can point back at the exact
116/// token range. Created by the Fase 2 parser, consumed by the analyzer
117/// and (eventually) the planner.
118#[derive(Debug, Clone, PartialEq)]
119pub enum Expr {
120    /// A literal value (number, string, boolean, null).
121    Literal { value: Value, span: Span },
122    /// Reference to a column (possibly qualified by table / alias).
123    Column { field: FieldRef, span: Span },
124    /// Query parameter placeholder (`?` or `$n`). Used by prepared
125    /// statements in Fase 4 — the plan cache strips these so repeated
126    /// bindings reuse the same plan.
127    Parameter { index: usize, span: Span },
128    /// Binary infix operator: `lhs <op> rhs`.
129    BinaryOp {
130        op: BinOp,
131        lhs: Box<Expr>,
132        rhs: Box<Expr>,
133        span: Span,
134    },
135    /// Prefix unary operator.
136    UnaryOp {
137        op: UnaryOp,
138        operand: Box<Expr>,
139        span: Span,
140    },
141    /// `CAST(expr AS type)` / `expr::type`.
142    Cast {
143        inner: Box<Expr>,
144        target: DataType,
145        span: Span,
146    },
147    /// Function / aggregate call.
148    FunctionCall {
149        name: String,
150        args: Vec<Expr>,
151        span: Span,
152    },
153    /// `CASE WHEN cond THEN val [...] [ELSE val] END`.
154    Case {
155        branches: Vec<(Expr, Expr)>,
156        else_: Option<Box<Expr>>,
157        span: Span,
158    },
159    /// `IS NULL` / `IS NOT NULL`. Kept as a distinct variant because
160    /// SQL treats them as unary postfix operators with special
161    /// three-valued semantics.
162    IsNull {
163        operand: Box<Expr>,
164        negated: bool,
165        span: Span,
166    },
167    /// `expr IN (v1, v2, …)`. The rhs list is `Vec<Expr>` — at Week 1
168    /// only literal lists survive analyze; correlated subquery lists
169    /// land in Week 3 alongside the `Subquery` variant below.
170    InList {
171        target: Box<Expr>,
172        values: Vec<Expr>,
173        negated: bool,
174        span: Span,
175    },
176    /// `expr BETWEEN low AND high` — first-class so pushdown can
177    /// recognise range predicates without decomposing to `>=` and `<=`.
178    Between {
179        target: Box<Expr>,
180        low: Box<Expr>,
181        high: Box<Expr>,
182        negated: bool,
183        span: Span,
184    },
185    /// Parenthesized SELECT used in an expression context.
186    Subquery { query: ExprSubquery, span: Span },
187    /// Window function call: `fn(args) OVER (PARTITION BY ... ORDER BY
188    /// ... [frame])`. Carries the same `name`/`args` payload as a plain
189    /// `FunctionCall` plus a `WindowSpec` describing the OVER clause.
190    /// Issue #589 slice 7a — parser + AST only; no runtime execution.
191    WindowFunctionCall {
192        name: String,
193        args: Vec<Expr>,
194        window: WindowSpec,
195        span: Span,
196    },
197}
198
199#[derive(Debug, Clone)]
200pub struct ExprSubquery {
201    pub query: Box<QueryExpr>,
202}
203
204impl PartialEq for ExprSubquery {
205    fn eq(&self, other: &Self) -> bool {
206        format!("{:?}", self.query) == format!("{:?}", other.query)
207    }
208}
209
210impl Expr {
211    /// Extract the span of this expression. Synthetic nodes return
212    /// `Span::synthetic()` — callers that need a real location should
213    /// check `span.is_synthetic()` before rendering diagnostics.
214    pub fn span(&self) -> Span {
215        match self {
216            Expr::Literal { span, .. }
217            | Expr::Column { span, .. }
218            | Expr::Parameter { span, .. }
219            | Expr::BinaryOp { span, .. }
220            | Expr::UnaryOp { span, .. }
221            | Expr::Cast { span, .. }
222            | Expr::FunctionCall { span, .. }
223            | Expr::Case { span, .. }
224            | Expr::IsNull { span, .. }
225            | Expr::InList { span, .. }
226            | Expr::Between { span, .. }
227            | Expr::Subquery { span, .. }
228            | Expr::WindowFunctionCall { span, .. } => *span,
229        }
230    }
231
232    /// Constructor shortcut for the common `Literal` case.
233    pub fn lit(value: Value) -> Self {
234        Expr::Literal {
235            value,
236            span: Span::synthetic(),
237        }
238    }
239
240    /// Constructor shortcut for the common `Column` case.
241    pub fn col(field: FieldRef) -> Self {
242        Expr::Column {
243            field,
244            span: Span::synthetic(),
245        }
246    }
247
248    /// Convenience: build a binary operation with a synthetic span.
249    /// Used by unit tests and by the Projection → Expr shim while the
250    /// migration is in flight.
251    pub fn binop(op: BinOp, lhs: Expr, rhs: Expr) -> Self {
252        Expr::BinaryOp {
253            op,
254            lhs: Box::new(lhs),
255            rhs: Box::new(rhs),
256            span: Span::synthetic(),
257        }
258    }
259}
260
261#[cfg(test)]
262mod expr_tests {
263    use super::*;
264
265    #[test]
266    fn precedence_orders_mul_over_add_and_and_over_or() {
267        // Higher precedence binds tighter — classic `a OR b AND c` trap.
268        assert!(BinOp::Mul.precedence() > BinOp::Add.precedence());
269        assert!(BinOp::Add.precedence() > BinOp::Eq.precedence());
270        assert!(BinOp::Eq.precedence() > BinOp::And.precedence());
271        assert!(BinOp::And.precedence() > BinOp::Or.precedence());
272    }
273
274    #[test]
275    fn span_synthetic_round_trip() {
276        let s = Span::synthetic();
277        assert!(s.is_synthetic());
278        let real = Span::new(Position::new(1, 1, 0), Position::new(1, 5, 4));
279        assert!(!real.is_synthetic());
280    }
281
282    #[test]
283    fn expr_constructors_carry_synthetic_span() {
284        let lit = Expr::lit(Value::Integer(42));
285        assert!(lit.span().is_synthetic());
286        assert_eq!(
287            lit,
288            Expr::Literal {
289                value: Value::Integer(42),
290                span: Span::synthetic(),
291            }
292        );
293    }
294
295    #[test]
296    fn binop_shortcut_nests() {
297        // a + b * c parses to Add(a, Mul(b, c)) under normal precedence
298        let expr = Expr::binop(
299            BinOp::Add,
300            Expr::col(FieldRef::column("", "a")),
301            Expr::binop(
302                BinOp::Mul,
303                Expr::col(FieldRef::column("", "b")),
304                Expr::col(FieldRef::column("", "c")),
305            ),
306        );
307        match expr {
308            Expr::BinaryOp {
309                op: BinOp::Add,
310                rhs,
311                ..
312            } => match *rhs {
313                Expr::BinaryOp { op: BinOp::Mul, .. } => {}
314                other => panic!("expected Mul on rhs, got {:?}", other),
315            },
316            other => panic!("expected Add at root, got {:?}", other),
317        }
318    }
319}