ruvector_graph/cypher/
ast.rs

1//! Abstract Syntax Tree definitions for Cypher query language
2//!
3//! Represents the parsed structure of Cypher queries including:
4//! - Pattern matching (MATCH, OPTIONAL MATCH)
5//! - Filtering (WHERE)
6//! - Projections (RETURN, WITH)
7//! - Mutations (CREATE, MERGE, DELETE, SET)
8//! - Aggregations and ordering
9//! - Hyperedge support for N-ary relationships
10
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13
14/// Top-level query representation
15#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
16pub struct Query {
17    pub statements: Vec<Statement>,
18}
19
20/// Individual query statement
21#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
22pub enum Statement {
23    Match(MatchClause),
24    Create(CreateClause),
25    Merge(MergeClause),
26    Delete(DeleteClause),
27    Set(SetClause),
28    Remove(RemoveClause),
29    Return(ReturnClause),
30    With(WithClause),
31}
32
33/// MATCH clause for pattern matching
34#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
35pub struct MatchClause {
36    pub optional: bool,
37    pub patterns: Vec<Pattern>,
38    pub where_clause: Option<WhereClause>,
39}
40
41/// Pattern matching expressions
42#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
43pub enum Pattern {
44    /// Simple node pattern: (n:Label {props})
45    Node(NodePattern),
46    /// Relationship pattern: (a)-[r:TYPE]->(b)
47    Relationship(RelationshipPattern),
48    /// Path pattern: p = (a)-[*1..5]->(b)
49    Path(PathPattern),
50    /// Hyperedge pattern for N-ary relationships: (a)-[r:TYPE]->(b,c,d)
51    Hyperedge(HyperedgePattern),
52}
53
54/// Node pattern: (variable:Label {property: value})
55#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
56pub struct NodePattern {
57    pub variable: Option<String>,
58    pub labels: Vec<String>,
59    pub properties: Option<PropertyMap>,
60}
61
62/// Relationship pattern: [variable:Type {properties}]
63#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
64pub struct RelationshipPattern {
65    pub variable: Option<String>,
66    pub rel_type: Option<String>,
67    pub properties: Option<PropertyMap>,
68    pub direction: Direction,
69    pub range: Option<RelationshipRange>,
70    /// Source node pattern
71    pub from: Box<NodePattern>,
72    /// Target - can be a NodePattern or another Pattern for chained relationships
73    /// For simple relationships like (a)-[r]->(b), this is just the node
74    /// For chained patterns like (a)-[r]->(b)<-[s]-(c), the target is nested
75    pub to: Box<Pattern>,
76}
77
78/// Hyperedge pattern for N-ary relationships
79/// Example: (person)-[r:TRANSACTION]->(account1, account2, merchant)
80#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
81pub struct HyperedgePattern {
82    pub variable: Option<String>,
83    pub rel_type: String,
84    pub properties: Option<PropertyMap>,
85    pub from: Box<NodePattern>,
86    pub to: Vec<NodePattern>, // Multiple target nodes for N-ary relationships
87    pub arity: usize,         // Number of participating nodes (including source)
88}
89
90/// Relationship direction
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
92pub enum Direction {
93    Outgoing,   // ->
94    Incoming,   // <-
95    Undirected, // -
96}
97
98/// Relationship range for path queries: [*min..max]
99#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
100pub struct RelationshipRange {
101    pub min: Option<usize>,
102    pub max: Option<usize>,
103}
104
105/// Path pattern: p = (a)-[*]->(b)
106#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
107pub struct PathPattern {
108    pub variable: String,
109    pub pattern: Box<Pattern>,
110}
111
112/// Property map: {key: value, ...}
113pub type PropertyMap = HashMap<String, Expression>;
114
115/// WHERE clause for filtering
116#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
117pub struct WhereClause {
118    pub condition: Expression,
119}
120
121/// CREATE clause for creating nodes and relationships
122#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
123pub struct CreateClause {
124    pub patterns: Vec<Pattern>,
125}
126
127/// MERGE clause for create-or-match
128#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
129pub struct MergeClause {
130    pub pattern: Pattern,
131    pub on_create: Option<SetClause>,
132    pub on_match: Option<SetClause>,
133}
134
135/// DELETE clause
136#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
137pub struct DeleteClause {
138    pub detach: bool,
139    pub expressions: Vec<Expression>,
140}
141
142/// SET clause for updating properties
143#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
144pub struct SetClause {
145    pub items: Vec<SetItem>,
146}
147
148#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
149pub enum SetItem {
150    Property {
151        variable: String,
152        property: String,
153        value: Expression,
154    },
155    Variable {
156        variable: String,
157        value: Expression,
158    },
159    Labels {
160        variable: String,
161        labels: Vec<String>,
162    },
163}
164
165/// REMOVE clause for removing properties or labels
166#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
167pub struct RemoveClause {
168    pub items: Vec<RemoveItem>,
169}
170
171#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
172pub enum RemoveItem {
173    /// Remove a property: REMOVE n.property
174    Property { variable: String, property: String },
175    /// Remove labels: REMOVE n:Label1:Label2
176    Labels {
177        variable: String,
178        labels: Vec<String>,
179    },
180}
181
182/// RETURN clause for projection
183#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
184pub struct ReturnClause {
185    pub distinct: bool,
186    pub items: Vec<ReturnItem>,
187    pub order_by: Option<OrderBy>,
188    pub skip: Option<Expression>,
189    pub limit: Option<Expression>,
190}
191
192/// WITH clause for chaining queries
193#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
194pub struct WithClause {
195    pub distinct: bool,
196    pub items: Vec<ReturnItem>,
197    pub where_clause: Option<WhereClause>,
198    pub order_by: Option<OrderBy>,
199    pub skip: Option<Expression>,
200    pub limit: Option<Expression>,
201}
202
203/// Return item: expression AS alias
204#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
205pub struct ReturnItem {
206    pub expression: Expression,
207    pub alias: Option<String>,
208}
209
210/// ORDER BY clause
211#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
212pub struct OrderBy {
213    pub items: Vec<OrderByItem>,
214}
215
216#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
217pub struct OrderByItem {
218    pub expression: Expression,
219    pub ascending: bool,
220}
221
222/// Expression tree
223#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
224pub enum Expression {
225    // Literals
226    Integer(i64),
227    Float(f64),
228    String(String),
229    Boolean(bool),
230    Null,
231
232    // Variables and properties
233    Variable(String),
234    Property {
235        object: Box<Expression>,
236        property: String,
237    },
238
239    // Collections
240    List(Vec<Expression>),
241    Map(HashMap<String, Expression>),
242
243    // Operators
244    BinaryOp {
245        left: Box<Expression>,
246        op: BinaryOperator,
247        right: Box<Expression>,
248    },
249    UnaryOp {
250        op: UnaryOperator,
251        operand: Box<Expression>,
252    },
253
254    // Functions and aggregations
255    FunctionCall {
256        name: String,
257        args: Vec<Expression>,
258    },
259    Aggregation {
260        function: AggregationFunction,
261        expression: Box<Expression>,
262        distinct: bool,
263    },
264
265    // Pattern predicates
266    PatternPredicate(Box<Pattern>),
267
268    // Case expressions
269    Case {
270        expression: Option<Box<Expression>>,
271        alternatives: Vec<(Expression, Expression)>,
272        default: Option<Box<Expression>>,
273    },
274}
275
276/// Binary operators
277#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
278pub enum BinaryOperator {
279    // Arithmetic
280    Add,
281    Subtract,
282    Multiply,
283    Divide,
284    Modulo,
285    Power,
286
287    // Comparison
288    Equal,
289    NotEqual,
290    LessThan,
291    LessThanOrEqual,
292    GreaterThan,
293    GreaterThanOrEqual,
294
295    // Logical
296    And,
297    Or,
298    Xor,
299
300    // String
301    Contains,
302    StartsWith,
303    EndsWith,
304    Matches, // Regex
305
306    // Collection
307    In,
308
309    // Null checking
310    Is,
311    IsNot,
312}
313
314/// Unary operators
315#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
316pub enum UnaryOperator {
317    Not,
318    Minus,
319    Plus,
320    IsNull,
321    IsNotNull,
322}
323
324/// Aggregation functions
325#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
326pub enum AggregationFunction {
327    Count,
328    Sum,
329    Avg,
330    Min,
331    Max,
332    Collect,
333    StdDev,
334    StdDevP,
335    Percentile,
336}
337
338impl Query {
339    pub fn new(statements: Vec<Statement>) -> Self {
340        Self { statements }
341    }
342
343    /// Check if query contains only read operations
344    pub fn is_read_only(&self) -> bool {
345        self.statements.iter().all(|stmt| {
346            matches!(
347                stmt,
348                Statement::Match(_) | Statement::Return(_) | Statement::With(_)
349            )
350        })
351    }
352
353    /// Check if query contains hyperedges
354    pub fn has_hyperedges(&self) -> bool {
355        self.statements.iter().any(|stmt| match stmt {
356            Statement::Match(m) => m
357                .patterns
358                .iter()
359                .any(|p| matches!(p, Pattern::Hyperedge(_))),
360            Statement::Create(c) => c
361                .patterns
362                .iter()
363                .any(|p| matches!(p, Pattern::Hyperedge(_))),
364            Statement::Merge(m) => matches!(&m.pattern, Pattern::Hyperedge(_)),
365            _ => false,
366        })
367    }
368}
369
370impl Pattern {
371    /// Get the arity of the pattern (number of nodes involved)
372    pub fn arity(&self) -> usize {
373        match self {
374            Pattern::Node(_) => 1,
375            Pattern::Relationship(_) => 2,
376            Pattern::Path(_) => 2, // Simplified, could be variable
377            Pattern::Hyperedge(h) => h.arity,
378        }
379    }
380}
381
382impl Expression {
383    /// Check if expression is constant (no variables)
384    pub fn is_constant(&self) -> bool {
385        match self {
386            Expression::Integer(_)
387            | Expression::Float(_)
388            | Expression::String(_)
389            | Expression::Boolean(_)
390            | Expression::Null => true,
391            Expression::List(items) => items.iter().all(|e| e.is_constant()),
392            Expression::Map(map) => map.values().all(|e| e.is_constant()),
393            Expression::BinaryOp { left, right, .. } => left.is_constant() && right.is_constant(),
394            Expression::UnaryOp { operand, .. } => operand.is_constant(),
395            _ => false,
396        }
397    }
398
399    /// Check if expression contains aggregation
400    pub fn has_aggregation(&self) -> bool {
401        match self {
402            Expression::Aggregation { .. } => true,
403            Expression::BinaryOp { left, right, .. } => {
404                left.has_aggregation() || right.has_aggregation()
405            }
406            Expression::UnaryOp { operand, .. } => operand.has_aggregation(),
407            Expression::FunctionCall { args, .. } => args.iter().any(|e| e.has_aggregation()),
408            Expression::List(items) => items.iter().any(|e| e.has_aggregation()),
409            Expression::Property { object, .. } => object.has_aggregation(),
410            _ => false,
411        }
412    }
413}
414
415#[cfg(test)]
416mod tests {
417    use super::*;
418
419    #[test]
420    fn test_query_is_read_only() {
421        let query = Query::new(vec![
422            Statement::Match(MatchClause {
423                optional: false,
424                patterns: vec![],
425                where_clause: None,
426            }),
427            Statement::Return(ReturnClause {
428                distinct: false,
429                items: vec![],
430                order_by: None,
431                skip: None,
432                limit: None,
433            }),
434        ]);
435        assert!(query.is_read_only());
436    }
437
438    #[test]
439    fn test_expression_is_constant() {
440        assert!(Expression::Integer(42).is_constant());
441        assert!(Expression::String("test".to_string()).is_constant());
442        assert!(!Expression::Variable("x".to_string()).is_constant());
443    }
444
445    #[test]
446    fn test_hyperedge_arity() {
447        let hyperedge = Pattern::Hyperedge(HyperedgePattern {
448            variable: Some("r".to_string()),
449            rel_type: "TRANSACTION".to_string(),
450            properties: None,
451            from: Box::new(NodePattern {
452                variable: Some("a".to_string()),
453                labels: vec![],
454                properties: None,
455            }),
456            to: vec![
457                NodePattern {
458                    variable: Some("b".to_string()),
459                    labels: vec![],
460                    properties: None,
461                },
462                NodePattern {
463                    variable: Some("c".to_string()),
464                    labels: vec![],
465                    properties: None,
466                },
467            ],
468            arity: 3,
469        });
470        assert_eq!(hyperedge.arity(), 3);
471    }
472}