ruvector_graph/cypher/
parser.rs

1//! Recursive descent parser for Cypher query language
2//!
3//! Converts token stream into Abstract Syntax Tree (AST).
4
5use super::ast::*;
6use super::lexer::{tokenize, Token, TokenKind};
7use thiserror::Error;
8
9#[derive(Debug, Error)]
10pub enum ParseError {
11    #[error(
12        "Unexpected token: expected {expected}, found {found} at line {line}, column {column}"
13    )]
14    UnexpectedToken {
15        expected: String,
16        found: String,
17        line: usize,
18        column: usize,
19    },
20    #[error("Unexpected end of input")]
21    UnexpectedEof,
22    #[error("Lexer error: {0}")]
23    LexerError(#[from] super::lexer::LexerError),
24    #[error("Invalid syntax: {0}")]
25    InvalidSyntax(String),
26}
27
28type ParseResult<T> = Result<T, ParseError>;
29
30pub struct Parser {
31    tokens: Vec<Token>,
32    current: usize,
33}
34
35impl Parser {
36    fn new(tokens: Vec<Token>) -> Self {
37        Self { tokens, current: 0 }
38    }
39
40    fn is_at_end(&self) -> bool {
41        matches!(self.peek().kind, TokenKind::Eof)
42    }
43
44    fn peek(&self) -> &Token {
45        &self.tokens[self.current]
46    }
47
48    fn previous(&self) -> &Token {
49        &self.tokens[self.current - 1]
50    }
51
52    fn advance(&mut self) -> &Token {
53        if !self.is_at_end() {
54            self.current += 1;
55        }
56        self.previous()
57    }
58
59    fn check(&self, kind: &TokenKind) -> bool {
60        if self.is_at_end() {
61            return false;
62        }
63        std::mem::discriminant(&self.peek().kind) == std::mem::discriminant(kind)
64    }
65
66    fn match_token(&mut self, kinds: &[TokenKind]) -> bool {
67        for kind in kinds {
68            if self.check(kind) {
69                self.advance();
70                return true;
71            }
72        }
73        false
74    }
75
76    fn consume(&mut self, kind: TokenKind, message: &str) -> ParseResult<&Token> {
77        if self.check(&kind) {
78            Ok(self.advance())
79        } else {
80            let token = self.peek();
81            Err(ParseError::UnexpectedToken {
82                expected: message.to_string(),
83                found: token.kind.to_string(),
84                line: token.position.line,
85                column: token.position.column,
86            })
87        }
88    }
89
90    fn parse_query(&mut self) -> ParseResult<Query> {
91        let mut statements = Vec::new();
92
93        while !self.is_at_end() {
94            statements.push(self.parse_statement()?);
95            self.match_token(&[TokenKind::Semicolon]);
96        }
97
98        // Reject empty queries
99        if statements.is_empty() {
100            return Err(ParseError::InvalidSyntax(
101                "Empty query - expected at least one statement".to_string(),
102            ));
103        }
104
105        Ok(Query { statements })
106    }
107
108    fn parse_statement(&mut self) -> ParseResult<Statement> {
109        match &self.peek().kind {
110            TokenKind::Match | TokenKind::OptionalMatch => {
111                Ok(Statement::Match(self.parse_match()?))
112            }
113            TokenKind::Create => Ok(Statement::Create(self.parse_create()?)),
114            TokenKind::Merge => Ok(Statement::Merge(self.parse_merge()?)),
115            TokenKind::Delete | TokenKind::DetachDelete => {
116                Ok(Statement::Delete(self.parse_delete()?))
117            }
118            TokenKind::Set => Ok(Statement::Set(self.parse_set()?)),
119            TokenKind::Remove => Ok(Statement::Remove(self.parse_remove()?)),
120            TokenKind::Return => Ok(Statement::Return(self.parse_return()?)),
121            TokenKind::With => Ok(Statement::With(self.parse_with()?)),
122            _ => {
123                let token = self.peek();
124                Err(ParseError::UnexpectedToken {
125                    expected: "statement keyword".to_string(),
126                    found: token.kind.to_string(),
127                    line: token.position.line,
128                    column: token.position.column,
129                })
130            }
131        }
132    }
133
134    fn parse_match(&mut self) -> ParseResult<MatchClause> {
135        let optional = self.match_token(&[TokenKind::OptionalMatch]);
136        if !optional {
137            self.consume(TokenKind::Match, "MATCH")?;
138        }
139
140        let patterns = self.parse_patterns()?;
141
142        let where_clause = if self.match_token(&[TokenKind::Where]) {
143            Some(WhereClause {
144                condition: self.parse_expression()?,
145            })
146        } else {
147            None
148        };
149
150        Ok(MatchClause {
151            optional,
152            patterns,
153            where_clause,
154        })
155    }
156
157    fn parse_patterns(&mut self) -> ParseResult<Vec<Pattern>> {
158        let mut patterns = vec![self.parse_pattern()?];
159
160        while self.match_token(&[TokenKind::Comma]) {
161            patterns.push(self.parse_pattern()?);
162        }
163
164        Ok(patterns)
165    }
166
167    fn parse_pattern(&mut self) -> ParseResult<Pattern> {
168        // Check for path pattern: p = (...)
169        if let TokenKind::Identifier(var) = &self.peek().kind {
170            let var = var.clone();
171            if self.tokens.get(self.current + 1).map(|t| &t.kind) == Some(&TokenKind::Equal) {
172                self.advance(); // consume identifier
173                self.advance(); // consume =
174                return Ok(Pattern::Path(PathPattern {
175                    variable: var,
176                    pattern: Box::new(self.parse_pattern()?),
177                }));
178            }
179        }
180
181        self.parse_relationship_pattern()
182    }
183
184    fn parse_relationship_pattern(&mut self) -> ParseResult<Pattern> {
185        let from = self.parse_node_pattern()?;
186
187        // Check for relationship - can start with `-` or `<-`
188        if self.check(&TokenKind::Dash) || self.check(&TokenKind::LeftArrow) {
189            // Determine if this is an incoming relationship (<-)
190            let starts_with_incoming = self.match_token(&[TokenKind::LeftArrow]);
191            if !starts_with_incoming {
192                self.consume(TokenKind::Dash, "-")?;
193            }
194
195            // Parse relationship details [r:TYPE {props} *min..max]
196            let (variable, rel_type, properties, range) =
197                if self.match_token(&[TokenKind::LeftBracket]) {
198                    let variable = if let TokenKind::Identifier(v) = &self.peek().kind {
199                        let v = v.clone();
200                        self.advance();
201                        Some(v)
202                    } else {
203                        None
204                    };
205
206                    let rel_type = if self.match_token(&[TokenKind::Colon]) {
207                        if let TokenKind::Identifier(t) = &self.peek().kind {
208                            let t = t.clone();
209                            self.advance();
210                            Some(t)
211                        } else {
212                            None
213                        }
214                    } else {
215                        None
216                    };
217
218                    let properties = if self.check(&TokenKind::LeftBrace) {
219                        Some(self.parse_property_map()?)
220                    } else {
221                        None
222                    };
223
224                    let range = if self.match_token(&[TokenKind::Star]) {
225                        Some(self.parse_relationship_range()?)
226                    } else {
227                        None
228                    };
229
230                    self.consume(TokenKind::RightBracket, "]")?;
231                    (variable, rel_type, properties, range)
232                } else {
233                    (None, None, None, None)
234                };
235
236            // Determine final direction based on ending pattern:
237            // -[r]->  = Outgoing
238            // <-[r]-  = Incoming
239            // -[r]-   = Undirected
240            // (also handle cases where we chain with another node)
241            let direction = if self.match_token(&[TokenKind::Arrow]) {
242                // Ends with -> means Outgoing
243                Direction::Outgoing
244            } else if self.match_token(&[TokenKind::Dash]) {
245                // Ends with just -
246                if starts_with_incoming {
247                    // <-[r]- means Incoming
248                    Direction::Incoming
249                } else {
250                    // -[r]- means Undirected
251                    Direction::Undirected
252                }
253            } else {
254                return Err(ParseError::InvalidSyntax(
255                    "Expected '->' or '-' after relationship".to_string(),
256                ));
257            };
258
259            // Parse target node(s) - check for hyperedge
260            self.consume(TokenKind::LeftParen, "(")?;
261
262            let mut target_nodes = vec![self.parse_node_pattern_content()?];
263
264            // Check for multiple target nodes (hyperedge)
265            while self.match_token(&[TokenKind::Comma]) {
266                target_nodes.push(self.parse_node_pattern_content()?);
267            }
268
269            self.consume(TokenKind::RightParen, ")")?;
270
271            // If multiple targets, create hyperedge
272            if target_nodes.len() > 1 {
273                return Ok(Pattern::Hyperedge(HyperedgePattern {
274                    variable,
275                    rel_type: rel_type.ok_or_else(|| {
276                        ParseError::InvalidSyntax(
277                            "Hyperedge requires relationship type".to_string(),
278                        )
279                    })?,
280                    properties,
281                    from: Box::new(from),
282                    arity: target_nodes.len() + 1, // +1 for source node
283                    to: target_nodes,
284                }));
285            }
286
287            // Get the single target node pattern
288            let target_node = target_nodes.into_iter().next().unwrap();
289
290            // Check if there's a chained pattern (another relationship starting from target)
291            if self.check(&TokenKind::Dash) || self.check(&TokenKind::LeftArrow) {
292                // There's a chained pattern - recursively parse from the target node
293                let chained = self.parse_chained_pattern(target_node)?;
294
295                // Create the first relationship pattern with the chained pattern as target
296                Ok(Pattern::Relationship(RelationshipPattern {
297                    variable,
298                    rel_type,
299                    properties,
300                    direction,
301                    range,
302                    from: Box::new(from),
303                    to: Box::new(chained),
304                }))
305            } else {
306                Ok(Pattern::Relationship(RelationshipPattern {
307                    variable,
308                    rel_type,
309                    properties,
310                    direction,
311                    range,
312                    from: Box::new(from),
313                    to: Box::new(Pattern::Node(target_node)),
314                }))
315            }
316        } else {
317            Ok(Pattern::Node(from))
318        }
319    }
320
321    /// Parse a chained pattern where we already have the starting node pattern
322    fn parse_chained_pattern(&mut self, from: NodePattern) -> ParseResult<Pattern> {
323        // Check for relationship - can start with `-` or `<-`
324        if self.check(&TokenKind::Dash) || self.check(&TokenKind::LeftArrow) {
325            let starts_with_incoming = self.match_token(&[TokenKind::LeftArrow]);
326            if !starts_with_incoming {
327                self.consume(TokenKind::Dash, "-")?;
328            }
329
330            // Parse relationship details [r:TYPE {props} *min..max]
331            let (variable, rel_type, properties, range) =
332                if self.match_token(&[TokenKind::LeftBracket]) {
333                    let variable = if let TokenKind::Identifier(v) = &self.peek().kind {
334                        let v = v.clone();
335                        self.advance();
336                        Some(v)
337                    } else {
338                        None
339                    };
340
341                    let rel_type = if self.match_token(&[TokenKind::Colon]) {
342                        if let TokenKind::Identifier(t) = &self.peek().kind {
343                            let t = t.clone();
344                            self.advance();
345                            Some(t)
346                        } else {
347                            None
348                        }
349                    } else {
350                        None
351                    };
352
353                    let properties = if self.check(&TokenKind::LeftBrace) {
354                        Some(self.parse_property_map()?)
355                    } else {
356                        None
357                    };
358
359                    let range = if self.match_token(&[TokenKind::Star]) {
360                        Some(self.parse_relationship_range()?)
361                    } else {
362                        None
363                    };
364
365                    self.consume(TokenKind::RightBracket, "]")?;
366                    (variable, rel_type, properties, range)
367                } else {
368                    (None, None, None, None)
369                };
370
371            // Determine final direction
372            let direction = if self.match_token(&[TokenKind::Arrow]) {
373                Direction::Outgoing
374            } else if self.match_token(&[TokenKind::Dash]) {
375                if starts_with_incoming {
376                    Direction::Incoming
377                } else {
378                    Direction::Undirected
379                }
380            } else {
381                return Err(ParseError::InvalidSyntax(
382                    "Expected '->' or '-' after relationship".to_string(),
383                ));
384            };
385
386            // Parse target node
387            self.consume(TokenKind::LeftParen, "(")?;
388            let target_node = self.parse_node_pattern_content()?;
389            self.consume(TokenKind::RightParen, ")")?;
390
391            // Check for another chained pattern
392            if self.check(&TokenKind::Dash) || self.check(&TokenKind::LeftArrow) {
393                let chained = self.parse_chained_pattern(target_node)?;
394
395                Ok(Pattern::Relationship(RelationshipPattern {
396                    variable,
397                    rel_type,
398                    properties,
399                    direction,
400                    range,
401                    from: Box::new(from),
402                    to: Box::new(chained),
403                }))
404            } else {
405                Ok(Pattern::Relationship(RelationshipPattern {
406                    variable,
407                    rel_type,
408                    properties,
409                    direction,
410                    range,
411                    from: Box::new(from),
412                    to: Box::new(Pattern::Node(target_node)),
413                }))
414            }
415        } else {
416            Ok(Pattern::Node(from))
417        }
418    }
419
420    fn parse_node_pattern(&mut self) -> ParseResult<NodePattern> {
421        self.consume(TokenKind::LeftParen, "(")?;
422        let node = self.parse_node_pattern_content()?;
423        self.consume(TokenKind::RightParen, ")")?;
424        Ok(node)
425    }
426
427    fn parse_node_pattern_content(&mut self) -> ParseResult<NodePattern> {
428        let variable = if let TokenKind::Identifier(v) = &self.peek().kind {
429            let v = v.clone();
430            // Check if next token is : (label) or { (properties)
431            if !self
432                .tokens
433                .get(self.current + 1)
434                .map(|t| matches!(t.kind, TokenKind::Colon | TokenKind::LeftBrace))
435                .unwrap_or(false)
436            {
437                // Variable only, no labels or properties - advance and return
438                self.advance();
439                return Ok(NodePattern {
440                    variable: Some(v),
441                    labels: vec![],
442                    properties: None,
443                });
444            }
445            self.advance();
446            Some(v)
447        } else {
448            None
449        };
450
451        let mut labels = Vec::new();
452        while self.match_token(&[TokenKind::Colon]) {
453            if let TokenKind::Identifier(label) = &self.peek().kind {
454                labels.push(label.clone());
455                self.advance();
456            }
457        }
458
459        let properties = if self.check(&TokenKind::LeftBrace) {
460            Some(self.parse_property_map()?)
461        } else {
462            None
463        };
464
465        Ok(NodePattern {
466            variable,
467            labels,
468            properties,
469        })
470    }
471
472    fn parse_property_map(&mut self) -> ParseResult<PropertyMap> {
473        self.consume(TokenKind::LeftBrace, "{")?;
474        let mut map = PropertyMap::new();
475
476        if !self.check(&TokenKind::RightBrace) {
477            loop {
478                let key = if let TokenKind::Identifier(k) = &self.peek().kind {
479                    k.clone()
480                } else {
481                    return Err(ParseError::InvalidSyntax(
482                        "Expected property name".to_string(),
483                    ));
484                };
485                self.advance();
486
487                self.consume(TokenKind::Colon, ":")?;
488                let value = self.parse_expression()?;
489                map.insert(key, value);
490
491                if !self.match_token(&[TokenKind::Comma]) {
492                    break;
493                }
494            }
495        }
496
497        self.consume(TokenKind::RightBrace, "}")?;
498        Ok(map)
499    }
500
501    fn parse_relationship_range(&mut self) -> ParseResult<RelationshipRange> {
502        let min = if let TokenKind::Integer(n) = self.peek().kind {
503            self.advance();
504            Some(n as usize)
505        } else {
506            None
507        };
508
509        let max = if self.match_token(&[TokenKind::DotDot]) {
510            if let TokenKind::Integer(n) = self.peek().kind {
511                self.advance();
512                Some(n as usize)
513            } else {
514                None
515            }
516        } else {
517            min
518        };
519
520        Ok(RelationshipRange { min, max })
521    }
522
523    fn parse_create(&mut self) -> ParseResult<CreateClause> {
524        self.consume(TokenKind::Create, "CREATE")?;
525        let patterns = self.parse_patterns()?;
526        Ok(CreateClause { patterns })
527    }
528
529    fn parse_merge(&mut self) -> ParseResult<MergeClause> {
530        self.consume(TokenKind::Merge, "MERGE")?;
531        let pattern = self.parse_pattern()?;
532
533        let mut on_create = None;
534        let mut on_match = None;
535
536        while self.peek().kind == TokenKind::OnCreate || self.peek().kind == TokenKind::OnMatch {
537            if self.match_token(&[TokenKind::OnCreate]) {
538                on_create = Some(self.parse_set()?);
539            } else if self.match_token(&[TokenKind::OnMatch]) {
540                on_match = Some(self.parse_set()?);
541            }
542        }
543
544        Ok(MergeClause {
545            pattern,
546            on_create,
547            on_match,
548        })
549    }
550
551    fn parse_delete(&mut self) -> ParseResult<DeleteClause> {
552        let detach = self.match_token(&[TokenKind::DetachDelete]);
553        if !detach {
554            self.consume(TokenKind::Delete, "DELETE")?;
555        }
556
557        let mut expressions = vec![self.parse_expression()?];
558        while self.match_token(&[TokenKind::Comma]) {
559            expressions.push(self.parse_expression()?);
560        }
561
562        Ok(DeleteClause {
563            detach,
564            expressions,
565        })
566    }
567
568    fn parse_set(&mut self) -> ParseResult<SetClause> {
569        self.consume(TokenKind::Set, "SET")?;
570        let mut items = vec![];
571
572        loop {
573            if let TokenKind::Identifier(var) = &self.peek().kind {
574                let var = var.clone();
575                self.advance();
576
577                if self.match_token(&[TokenKind::Dot]) {
578                    if let TokenKind::Identifier(prop) = &self.peek().kind {
579                        let prop = prop.clone();
580                        self.advance();
581                        self.consume(TokenKind::Equal, "=")?;
582                        let value = self.parse_expression()?;
583                        items.push(SetItem::Property {
584                            variable: var,
585                            property: prop,
586                            value,
587                        });
588                    }
589                } else if self.match_token(&[TokenKind::Equal]) {
590                    let value = self.parse_expression()?;
591                    items.push(SetItem::Variable {
592                        variable: var,
593                        value,
594                    });
595                }
596            }
597
598            if !self.match_token(&[TokenKind::Comma]) {
599                break;
600            }
601        }
602
603        Ok(SetClause { items })
604    }
605
606    fn parse_remove(&mut self) -> ParseResult<RemoveClause> {
607        self.consume(TokenKind::Remove, "REMOVE")?;
608        let mut items = vec![];
609
610        loop {
611            if let TokenKind::Identifier(var) = &self.peek().kind {
612                let var = var.clone();
613                self.advance();
614
615                if self.match_token(&[TokenKind::Dot]) {
616                    // Remove property: REMOVE n.property
617                    if let TokenKind::Identifier(prop) = &self.peek().kind {
618                        let prop = prop.clone();
619                        self.advance();
620                        items.push(RemoveItem::Property {
621                            variable: var,
622                            property: prop,
623                        });
624                    } else {
625                        return Err(ParseError::InvalidSyntax(
626                            "Expected property name after '.'".to_string(),
627                        ));
628                    }
629                } else if self.match_token(&[TokenKind::Colon]) {
630                    // Remove labels: REMOVE n:Label1:Label2
631                    let mut labels = vec![];
632                    if let TokenKind::Identifier(label) = &self.peek().kind {
633                        labels.push(label.clone());
634                        self.advance();
635                    }
636                    // Handle multiple labels
637                    while self.match_token(&[TokenKind::Colon]) {
638                        if let TokenKind::Identifier(label) = &self.peek().kind {
639                            labels.push(label.clone());
640                            self.advance();
641                        }
642                    }
643                    items.push(RemoveItem::Labels {
644                        variable: var,
645                        labels,
646                    });
647                } else {
648                    return Err(ParseError::InvalidSyntax(
649                        "Expected '.' or ':' after variable in REMOVE".to_string(),
650                    ));
651                }
652            } else {
653                return Err(ParseError::InvalidSyntax(
654                    "Expected variable in REMOVE".to_string(),
655                ));
656            }
657
658            if !self.match_token(&[TokenKind::Comma]) {
659                break;
660            }
661        }
662
663        Ok(RemoveClause { items })
664    }
665
666    fn parse_return(&mut self) -> ParseResult<ReturnClause> {
667        self.consume(TokenKind::Return, "RETURN")?;
668        let distinct = self.match_token(&[TokenKind::Distinct]);
669
670        let items = self.parse_return_items()?;
671        let order_by = self.parse_order_by()?;
672
673        let skip = if self.match_token(&[TokenKind::Skip]) {
674            Some(self.parse_expression()?)
675        } else {
676            None
677        };
678
679        let limit = if self.match_token(&[TokenKind::Limit]) {
680            Some(self.parse_expression()?)
681        } else {
682            None
683        };
684
685        Ok(ReturnClause {
686            distinct,
687            items,
688            order_by,
689            skip,
690            limit,
691        })
692    }
693
694    fn parse_with(&mut self) -> ParseResult<WithClause> {
695        self.consume(TokenKind::With, "WITH")?;
696        let distinct = self.match_token(&[TokenKind::Distinct]);
697
698        let items = self.parse_return_items()?;
699
700        let where_clause = if self.match_token(&[TokenKind::Where]) {
701            Some(WhereClause {
702                condition: self.parse_expression()?,
703            })
704        } else {
705            None
706        };
707
708        let order_by = self.parse_order_by()?;
709
710        let skip = if self.match_token(&[TokenKind::Skip]) {
711            Some(self.parse_expression()?)
712        } else {
713            None
714        };
715
716        let limit = if self.match_token(&[TokenKind::Limit]) {
717            Some(self.parse_expression()?)
718        } else {
719            None
720        };
721
722        Ok(WithClause {
723            distinct,
724            items,
725            where_clause,
726            order_by,
727            skip,
728            limit,
729        })
730    }
731
732    fn parse_return_items(&mut self) -> ParseResult<Vec<ReturnItem>> {
733        let mut items = vec![];
734
735        loop {
736            let expression = self.parse_expression()?;
737            let alias = if self.match_token(&[TokenKind::As]) {
738                if let TokenKind::Identifier(name) = &self.peek().kind {
739                    let name = name.clone();
740                    self.advance();
741                    Some(name)
742                } else {
743                    None
744                }
745            } else {
746                None
747            };
748
749            items.push(ReturnItem { expression, alias });
750
751            if !self.match_token(&[TokenKind::Comma]) {
752                break;
753            }
754        }
755
756        Ok(items)
757    }
758
759    fn parse_order_by(&mut self) -> ParseResult<Option<OrderBy>> {
760        if !self.match_token(&[TokenKind::OrderBy]) {
761            return Ok(None);
762        }
763
764        let mut items = vec![];
765
766        loop {
767            let expression = self.parse_expression()?;
768            let ascending = if self.match_token(&[TokenKind::Desc]) {
769                false
770            } else {
771                self.match_token(&[TokenKind::Asc]);
772                true
773            };
774
775            items.push(OrderByItem {
776                expression,
777                ascending,
778            });
779
780            if !self.match_token(&[TokenKind::Comma]) {
781                break;
782            }
783        }
784
785        Ok(Some(OrderBy { items }))
786    }
787
788    fn parse_expression(&mut self) -> ParseResult<Expression> {
789        self.parse_or()
790    }
791
792    fn parse_or(&mut self) -> ParseResult<Expression> {
793        let mut expr = self.parse_xor()?;
794
795        while self.match_token(&[TokenKind::Or]) {
796            let right = self.parse_xor()?;
797            expr = Expression::BinaryOp {
798                left: Box::new(expr),
799                op: BinaryOperator::Or,
800                right: Box::new(right),
801            };
802        }
803
804        Ok(expr)
805    }
806
807    fn parse_xor(&mut self) -> ParseResult<Expression> {
808        let mut expr = self.parse_and()?;
809
810        while self.match_token(&[TokenKind::Xor]) {
811            let right = self.parse_and()?;
812            expr = Expression::BinaryOp {
813                left: Box::new(expr),
814                op: BinaryOperator::Xor,
815                right: Box::new(right),
816            };
817        }
818
819        Ok(expr)
820    }
821
822    fn parse_and(&mut self) -> ParseResult<Expression> {
823        let mut expr = self.parse_comparison()?;
824
825        while self.match_token(&[TokenKind::And]) {
826            let right = self.parse_comparison()?;
827            expr = Expression::BinaryOp {
828                left: Box::new(expr),
829                op: BinaryOperator::And,
830                right: Box::new(right),
831            };
832        }
833
834        Ok(expr)
835    }
836
837    fn parse_comparison(&mut self) -> ParseResult<Expression> {
838        let mut expr = self.parse_additive()?;
839
840        if let Some(op) = self.parse_comparison_op() {
841            let right = self.parse_additive()?;
842            expr = Expression::BinaryOp {
843                left: Box::new(expr),
844                op,
845                right: Box::new(right),
846            };
847        }
848
849        Ok(expr)
850    }
851
852    fn parse_comparison_op(&mut self) -> Option<BinaryOperator> {
853        if self.match_token(&[TokenKind::Equal]) {
854            Some(BinaryOperator::Equal)
855        } else if self.match_token(&[TokenKind::NotEqual]) {
856            Some(BinaryOperator::NotEqual)
857        } else if self.match_token(&[TokenKind::LessThanOrEqual]) {
858            Some(BinaryOperator::LessThanOrEqual)
859        } else if self.match_token(&[TokenKind::GreaterThanOrEqual]) {
860            Some(BinaryOperator::GreaterThanOrEqual)
861        } else if self.match_token(&[TokenKind::LessThan]) {
862            Some(BinaryOperator::LessThan)
863        } else if self.match_token(&[TokenKind::GreaterThan]) {
864            Some(BinaryOperator::GreaterThan)
865        } else {
866            None
867        }
868    }
869
870    fn parse_additive(&mut self) -> ParseResult<Expression> {
871        let mut expr = self.parse_multiplicative()?;
872
873        while let Some(op) = self.parse_additive_op() {
874            let right = self.parse_multiplicative()?;
875            expr = Expression::BinaryOp {
876                left: Box::new(expr),
877                op,
878                right: Box::new(right),
879            };
880        }
881
882        Ok(expr)
883    }
884
885    fn parse_additive_op(&mut self) -> Option<BinaryOperator> {
886        if self.match_token(&[TokenKind::Plus]) {
887            Some(BinaryOperator::Add)
888        } else if self.match_token(&[TokenKind::Minus]) {
889            Some(BinaryOperator::Subtract)
890        } else {
891            None
892        }
893    }
894
895    fn parse_multiplicative(&mut self) -> ParseResult<Expression> {
896        let mut expr = self.parse_unary()?;
897
898        while let Some(op) = self.parse_multiplicative_op() {
899            let right = self.parse_unary()?;
900            expr = Expression::BinaryOp {
901                left: Box::new(expr),
902                op,
903                right: Box::new(right),
904            };
905        }
906
907        Ok(expr)
908    }
909
910    fn parse_multiplicative_op(&mut self) -> Option<BinaryOperator> {
911        if self.match_token(&[TokenKind::Star]) {
912            Some(BinaryOperator::Multiply)
913        } else if self.match_token(&[TokenKind::Slash]) {
914            Some(BinaryOperator::Divide)
915        } else if self.match_token(&[TokenKind::Percent]) {
916            Some(BinaryOperator::Modulo)
917        } else if self.match_token(&[TokenKind::Caret]) {
918            Some(BinaryOperator::Power)
919        } else {
920            None
921        }
922    }
923
924    fn parse_unary(&mut self) -> ParseResult<Expression> {
925        if self.match_token(&[TokenKind::Not]) {
926            let operand = self.parse_unary()?;
927            return Ok(Expression::UnaryOp {
928                op: UnaryOperator::Not,
929                operand: Box::new(operand),
930            });
931        }
932
933        if self.match_token(&[TokenKind::Minus]) {
934            let operand = self.parse_unary()?;
935            return Ok(Expression::UnaryOp {
936                op: UnaryOperator::Minus,
937                operand: Box::new(operand),
938            });
939        }
940
941        self.parse_postfix()
942    }
943
944    fn parse_postfix(&mut self) -> ParseResult<Expression> {
945        let mut expr = self.parse_primary()?;
946
947        loop {
948            if self.match_token(&[TokenKind::Dot]) {
949                if let TokenKind::Identifier(prop) = &self.peek().kind {
950                    let prop = prop.clone();
951                    self.advance();
952                    expr = Expression::Property {
953                        object: Box::new(expr),
954                        property: prop,
955                    };
956                } else {
957                    break;
958                }
959            } else {
960                break;
961            }
962        }
963
964        Ok(expr)
965    }
966
967    fn parse_primary(&mut self) -> ParseResult<Expression> {
968        match &self.peek().kind.clone() {
969            TokenKind::Integer(n) => {
970                let n = *n;
971                self.advance();
972                Ok(Expression::Integer(n))
973            }
974            TokenKind::Float(n) => {
975                let n = *n;
976                self.advance();
977                Ok(Expression::Float(n))
978            }
979            TokenKind::String(s) => {
980                let s = s.clone();
981                self.advance();
982                Ok(Expression::String(s))
983            }
984            TokenKind::True => {
985                self.advance();
986                Ok(Expression::Boolean(true))
987            }
988            TokenKind::False => {
989                self.advance();
990                Ok(Expression::Boolean(false))
991            }
992            TokenKind::Null => {
993                self.advance();
994                Ok(Expression::Null)
995            }
996            TokenKind::Identifier(name) => {
997                let name = name.clone();
998                self.advance();
999
1000                // Check for function call
1001                if self.match_token(&[TokenKind::LeftParen]) {
1002                    self.parse_function_call(name)
1003                } else {
1004                    Ok(Expression::Variable(name))
1005                }
1006            }
1007            TokenKind::LeftParen => {
1008                self.advance();
1009                let expr = self.parse_expression()?;
1010                self.consume(TokenKind::RightParen, ")")?;
1011                Ok(expr)
1012            }
1013            TokenKind::LeftBracket => {
1014                self.advance();
1015                let mut items = vec![];
1016
1017                if !self.check(&TokenKind::RightBracket) {
1018                    loop {
1019                        items.push(self.parse_expression()?);
1020                        if !self.match_token(&[TokenKind::Comma]) {
1021                            break;
1022                        }
1023                    }
1024                }
1025
1026                self.consume(TokenKind::RightBracket, "]")?;
1027                Ok(Expression::List(items))
1028            }
1029            TokenKind::LeftBrace => {
1030                // Map literal: {key1: value1, key2: value2}
1031                self.advance();
1032                // Pre-allocate with reasonable capacity to reduce reallocations
1033                let mut map = std::collections::HashMap::with_capacity(8);
1034
1035                if !self.check(&TokenKind::RightBrace) {
1036                    loop {
1037                        // Parse key (identifier or string)
1038                        let key = match &self.peek().kind {
1039                            TokenKind::Identifier(k) => {
1040                                let k = k.clone();
1041                                self.advance();
1042                                k
1043                            }
1044                            TokenKind::String(k) => {
1045                                let k = k.clone();
1046                                self.advance();
1047                                k
1048                            }
1049                            _ => {
1050                                let token = self.peek();
1051                                return Err(ParseError::UnexpectedToken {
1052                                    expected: "map key (identifier or string)".to_string(),
1053                                    found: token.kind.to_string(),
1054                                    line: token.position.line,
1055                                    column: token.position.column,
1056                                });
1057                            }
1058                        };
1059
1060                        self.consume(TokenKind::Colon, ":")?;
1061                        let value = self.parse_expression()?;
1062                        map.insert(key, value);
1063
1064                        if !self.match_token(&[TokenKind::Comma]) {
1065                            break;
1066                        }
1067                    }
1068                }
1069
1070                self.consume(TokenKind::RightBrace, "}")?;
1071                Ok(Expression::Map(map))
1072            }
1073            _ => {
1074                let token = self.peek();
1075                Err(ParseError::UnexpectedToken {
1076                    expected: "expression".to_string(),
1077                    found: token.kind.to_string(),
1078                    line: token.position.line,
1079                    column: token.position.column,
1080                })
1081            }
1082        }
1083    }
1084
1085    fn parse_function_call(&mut self, name: String) -> ParseResult<Expression> {
1086        let mut args = vec![];
1087
1088        if !self.check(&TokenKind::RightParen) {
1089            // Check for DISTINCT in aggregation
1090            let distinct = self.match_token(&[TokenKind::Distinct]);
1091
1092            loop {
1093                args.push(self.parse_expression()?);
1094                if !self.match_token(&[TokenKind::Comma]) {
1095                    break;
1096                }
1097            }
1098
1099            // Check if it's an aggregation function
1100            let agg_func = match name.to_uppercase().as_str() {
1101                "COUNT" => Some(AggregationFunction::Count),
1102                "SUM" => Some(AggregationFunction::Sum),
1103                "AVG" => Some(AggregationFunction::Avg),
1104                "MIN" => Some(AggregationFunction::Min),
1105                "MAX" => Some(AggregationFunction::Max),
1106                "COLLECT" => Some(AggregationFunction::Collect),
1107                _ => None,
1108            };
1109
1110            self.consume(TokenKind::RightParen, ")")?;
1111
1112            if let Some(func) = agg_func {
1113                if args.len() != 1 {
1114                    return Err(ParseError::InvalidSyntax(
1115                        "Aggregation function requires exactly one argument".to_string(),
1116                    ));
1117                }
1118                return Ok(Expression::Aggregation {
1119                    function: func,
1120                    expression: Box::new(args.into_iter().next().unwrap()),
1121                    distinct,
1122                });
1123            }
1124        } else {
1125            self.consume(TokenKind::RightParen, ")")?;
1126        }
1127
1128        Ok(Expression::FunctionCall { name, args })
1129    }
1130}
1131
1132/// Parse a Cypher query string into an AST
1133pub fn parse_cypher(input: &str) -> ParseResult<Query> {
1134    let tokens = tokenize(input)?;
1135    let mut parser = Parser::new(tokens);
1136    parser.parse_query()
1137}
1138
1139#[cfg(test)]
1140mod tests {
1141    use super::*;
1142
1143    #[test]
1144    fn test_parse_simple_match() {
1145        let query = "MATCH (n:Person) RETURN n";
1146        let result = parse_cypher(query);
1147        assert!(result.is_ok());
1148
1149        let ast = result.unwrap();
1150        assert_eq!(ast.statements.len(), 2);
1151    }
1152
1153    #[test]
1154    fn test_parse_match_with_where() {
1155        let query = "MATCH (n:Person) WHERE n.age > 30 RETURN n.name";
1156        let result = parse_cypher(query);
1157        assert!(result.is_ok());
1158    }
1159
1160    #[test]
1161    fn test_parse_relationship() {
1162        let query = "MATCH (a:Person)-[r:KNOWS]->(b:Person) RETURN a, r, b";
1163        let result = parse_cypher(query);
1164        assert!(result.is_ok());
1165    }
1166
1167    #[test]
1168    fn test_parse_create() {
1169        let query = "CREATE (n:Person {name: 'Alice', age: 30})";
1170        let result = parse_cypher(query);
1171        assert!(result.is_ok());
1172    }
1173
1174    #[test]
1175    #[ignore = "Hyperedge syntax not yet implemented in parser"]
1176    fn test_parse_hyperedge() {
1177        let query = "MATCH (a)-[r:TRANSACTION]->(b, c, d) RETURN a, r, b, c, d";
1178        let result = parse_cypher(query);
1179        assert!(result.is_ok());
1180
1181        let ast = result.unwrap();
1182        assert!(ast.has_hyperedges());
1183    }
1184
1185    #[test]
1186    fn test_parse_aggregation() {
1187        let query = "MATCH (n:Person) RETURN COUNT(n), AVG(n.age)";
1188        let result = parse_cypher(query);
1189        assert!(result.is_ok());
1190    }
1191
1192    // ============== Edge Case Tests for New Functionality ==============
1193
1194    #[test]
1195    fn test_empty_query_rejected() {
1196        // Empty string should fail
1197        let result = parse_cypher("");
1198        assert!(result.is_err());
1199        match result {
1200            Err(ParseError::InvalidSyntax(msg)) => {
1201                assert!(msg.contains("Empty query"));
1202            }
1203            _ => panic!("Expected InvalidSyntax error for empty query"),
1204        }
1205    }
1206
1207    #[test]
1208    fn test_whitespace_only_query_rejected() {
1209        // Whitespace-only should fail
1210        let result = parse_cypher("   \n\t  ");
1211        assert!(result.is_err());
1212    }
1213
1214    #[test]
1215    fn test_map_literal_in_return() {
1216        // Map literals in RETURN clause
1217        let query = "MATCH (n) RETURN {name: n.name, age: n.age}";
1218        let result = parse_cypher(query);
1219        assert!(result.is_ok());
1220    }
1221
1222    #[test]
1223    fn test_empty_map_literal() {
1224        // Empty map literal
1225        let query = "MATCH (n) RETURN {}";
1226        let result = parse_cypher(query);
1227        assert!(result.is_ok());
1228    }
1229
1230    #[test]
1231    #[ignore = "Nested map literals require recursive Expression parsing - future enhancement"]
1232    fn test_nested_map_literal() {
1233        // Nested map literals - currently not supported (needs recursive expression parsing)
1234        let query = "MATCH (n) RETURN {info: {name: n.name, details: {age: n.age}}}";
1235        let result = parse_cypher(query);
1236        assert!(result.is_ok());
1237    }
1238
1239    #[test]
1240    fn test_chained_relationship_outgoing() {
1241        // Chained outgoing relationships: (a)-[r]->(b)-[s]->(c)
1242        let query = "MATCH (a)-[r:KNOWS]->(b)-[s:WORKS_AT]->(c) RETURN a, b, c";
1243        let result = parse_cypher(query);
1244        assert!(result.is_ok());
1245    }
1246
1247    #[test]
1248    fn test_chained_relationship_mixed() {
1249        // Mixed direction chained relationships: (a)-[r]->(b)<-[s]-(c)
1250        let query =
1251            "MATCH (a:Person)-[r:KNOWS]->(b:Person)<-[s:MANAGES]-(c:Manager) RETURN a, b, c";
1252        let result = parse_cypher(query);
1253        assert!(result.is_ok());
1254    }
1255
1256    #[test]
1257    fn test_undirected_relationship() {
1258        // Undirected relationship: (a)-[r]-(b)
1259        let query = "MATCH (a:Person)-[r:FRIEND]-(b:Person) RETURN a, b";
1260        let result = parse_cypher(query);
1261        assert!(result.is_ok());
1262    }
1263
1264    #[test]
1265    fn test_remove_property() {
1266        // REMOVE statement for property
1267        let query = "MATCH (n:Person) REMOVE n.age RETURN n";
1268        let result = parse_cypher(query);
1269        assert!(result.is_ok());
1270    }
1271
1272    #[test]
1273    fn test_remove_label() {
1274        // REMOVE statement for label
1275        let query = "MATCH (n:Person:Employee) REMOVE n:Employee RETURN n";
1276        let result = parse_cypher(query);
1277        assert!(result.is_ok());
1278    }
1279
1280    #[test]
1281    fn test_map_with_string_keys() {
1282        // Map with string keys (quoted)
1283        let query = "MATCH (n) RETURN {'first-name': n.firstName}";
1284        let result = parse_cypher(query);
1285        assert!(result.is_ok());
1286    }
1287
1288    #[test]
1289    fn test_triple_chained_relationship() {
1290        // Triple chained relationship: (a)-[r]->(b)-[s]->(c)-[t]->(d)
1291        let query = "MATCH (a)-[r]->(b)-[s]->(c)-[t]->(d) RETURN a, d";
1292        let result = parse_cypher(query);
1293        assert!(result.is_ok());
1294    }
1295}