Skip to main content

harn_hostlib/code_index/
cypher.rs

1//! Minimal Cypher executor over [`super::SymbolGraph`].
2//!
3//! Supported subset (intentionally narrow — see issue #2434):
4//!
5//! ```text
6//! query     = MATCH pattern [, pattern]* [WHERE expr] RETURN proj [, proj]*
7//! pattern   = '(' var ':' label '{' kv (',' kv)* '}' ')'
8//!             [ ('-' | '<-') '[' ':' edge ('*' bounds)? ']' ('->' | '-') node ]*
9//! bounds    = INT? '..' INT?
10//! expr      = term [ ('AND' | 'OR') term ]*
11//! term      = path op literal | path op path | literal
12//! op        = '=' | '<>' | '!=' | '<' | '<=' | '>' | '>='
13//! proj      = path [AS alias]
14//! path      = ident '.' ident | ident
15//! literal   = STRING | INT
16//! ```
17//!
18//! Variable-length traversal up to depth 4 is supported via `*1..N` or
19//! the shorthand `*` (defaults to `1..3`). When the upper bound is
20//! omitted (e.g. `*1..` or `*2..`), it defaults to the executor depth
21//! cap (4) — matching the usual Cypher "open upper bound = capped by
22//! engine" semantics. The executor implements depth-first enumeration
23//! with a per-step visited set so cycles can't infinite-loop.
24//!
25//! Read-only; no MERGE/CREATE/SET. The result is a flat
26//! [`Vec<CypherRow>`] where each row maps the projected aliases to
27//! [`CypherValue`]s.
28//!
29//! Execution is bounded by two budgets enforced inside [`exec`] and
30//! [`parse`]:
31//! * [`MAX_ROWS`] — maximum number of intermediate bindings *and*
32//!   projected rows. The executor returns [`CypherError::ExecError`] as
33//!   soon as it tries to push past the cap, so a degenerate
34//!   `MATCH (a),(b),(c) RETURN ...` query cannot lock the symbol-graph
35//!   index for arbitrarily long.
36//! * [`MAX_PATTERNS`] — maximum number of comma-separated disjoint
37//!   patterns in one query. Beyond this, the parser raises
38//!   [`CypherError::ParseError`]; richer joins should use the edge
39//!   syntax (`-[:CALLS]->`) which is bounded by [`MAX_ROWS`] but does
40//!   not multiply pattern counts.
41
42use std::collections::{BTreeMap, HashMap, HashSet};
43use std::fmt;
44use std::rc::Rc;
45
46use harn_vm::VmValue;
47
48use super::symbol_graph::{EdgeKind, NodeId, NodeKind, SymbolGraph};
49
50/// Hard upper bound on the number of rows the executor will materialize
51/// before raising [`CypherError::ExecError`]. A query like
52/// `MATCH (a),(b),(c),(d) RETURN ...` over a workspace symbol graph can
53/// produce N⁴ rows in the worst case; without a cap a single Cypher call
54/// can lock the host's symbol-graph index for many seconds. This budget
55/// matches the schema description (`code_index/cypher.request.json`).
56pub const MAX_ROWS: usize = 10_000;
57
58/// Hard upper bound on the number of comma-separated `MATCH` patterns in
59/// one query. Each additional pattern multiplies the row count, so we
60/// refuse to even start enumerating beyond three disjoint patterns —
61/// scripts that need a richer join should chain patterns through the
62/// edge syntax (`-[:CALLS]->`) rather than relying on a cartesian
63/// product. Enforced at parse time so the executor never sees one.
64pub const MAX_PATTERNS: usize = 3;
65
66/// Maximum traversal depth for variable-length patterns (`*lo..hi`).
67/// Also used as the implicit upper bound when the high end is omitted
68/// (e.g. `*1..`), matching standard "open upper bound" Cypher semantics.
69const VAR_LENGTH_MAX_DEPTH: u32 = 4;
70
71/// Error variants the parser/executor raise. The host wraps these in
72/// [`crate::error::HostlibError`] before surfacing to scripts. Each
73/// variant carries a free-text message identifying the offending token
74/// or position.
75#[derive(Debug, Clone, Eq, PartialEq)]
76pub enum CypherError {
77    /// Tokenizer reached an unexpected character.
78    LexError(String),
79    /// Parser saw a token it didn't recognize at this position.
80    ParseError(String),
81    /// Executor saw something semantically invalid (unknown variable,
82    /// unknown edge label, var-length bound > 4, …).
83    ExecError(String),
84}
85
86impl fmt::Display for CypherError {
87    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88        match self {
89            CypherError::LexError(s) => write!(f, "lex error: {s}"),
90            CypherError::ParseError(s) => write!(f, "parse error: {s}"),
91            CypherError::ExecError(s) => write!(f, "exec error: {s}"),
92        }
93    }
94}
95
96impl std::error::Error for CypherError {}
97
98/// One projected value in a row.
99#[derive(Debug, Clone, Eq, PartialEq)]
100pub enum CypherValue {
101    /// SQL/Cypher `NULL`. Falls back to `VmValue::Nil`.
102    Null,
103    /// UTF-8 string.
104    String(String),
105    /// 64-bit signed integer.
106    Int(i64),
107    /// Boolean.
108    Bool(bool),
109}
110
111impl CypherValue {
112    /// Render as the corresponding [`VmValue`].
113    pub fn to_vm(&self) -> VmValue {
114        match self {
115            CypherValue::Null => VmValue::Nil,
116            CypherValue::String(s) => VmValue::String(Rc::from(s.as_str())),
117            CypherValue::Int(n) => VmValue::Int(*n),
118            CypherValue::Bool(b) => VmValue::Bool(*b),
119        }
120    }
121}
122
123/// One projected row.
124pub type CypherRow = BTreeMap<String, CypherValue>;
125
126/// Parse + execute `query` against `graph`. Returns one row per match.
127pub fn execute(query: &str, graph: &SymbolGraph) -> Result<Vec<CypherRow>, CypherError> {
128    let tokens = lex(query)?;
129    let ast = parse(&tokens)?;
130    exec(&ast, graph)
131}
132
133// ---------------------------------------------------------------------------
134// Lexer
135// ---------------------------------------------------------------------------
136
137#[derive(Debug, Clone, PartialEq)]
138enum Token {
139    /// Keyword (uppercase form, case-insensitive matching at lex time).
140    Keyword(String),
141    Ident(String),
142    Str(String),
143    Int(i64),
144    LParen,
145    RParen,
146    LBrace,
147    RBrace,
148    LBracket,
149    RBracket,
150    Colon,
151    Comma,
152    Dot,
153    Eq,
154    Neq,
155    Lt,
156    Le,
157    Gt,
158    Ge,
159    Star,
160    DotDot,
161    Dash,
162    Arrow,     // ->
163    LeftArrow, // <-
164}
165
166fn lex(input: &str) -> Result<Vec<Token>, CypherError> {
167    let mut out: Vec<Token> = Vec::new();
168    let bytes = input.as_bytes();
169    let mut i = 0;
170    while i < bytes.len() {
171        let b = bytes[i];
172        if b.is_ascii_whitespace() {
173            i += 1;
174            continue;
175        }
176        if b == b'(' {
177            out.push(Token::LParen);
178            i += 1;
179            continue;
180        }
181        if b == b')' {
182            out.push(Token::RParen);
183            i += 1;
184            continue;
185        }
186        if b == b'{' {
187            out.push(Token::LBrace);
188            i += 1;
189            continue;
190        }
191        if b == b'}' {
192            out.push(Token::RBrace);
193            i += 1;
194            continue;
195        }
196        if b == b'[' {
197            out.push(Token::LBracket);
198            i += 1;
199            continue;
200        }
201        if b == b']' {
202            out.push(Token::RBracket);
203            i += 1;
204            continue;
205        }
206        if b == b':' {
207            out.push(Token::Colon);
208            i += 1;
209            continue;
210        }
211        if b == b',' {
212            out.push(Token::Comma);
213            i += 1;
214            continue;
215        }
216        if b == b'*' {
217            out.push(Token::Star);
218            i += 1;
219            continue;
220        }
221        if b == b'=' {
222            out.push(Token::Eq);
223            i += 1;
224            continue;
225        }
226        if b == b'.' {
227            if i + 1 < bytes.len() && bytes[i + 1] == b'.' {
228                out.push(Token::DotDot);
229                i += 2;
230            } else {
231                out.push(Token::Dot);
232                i += 1;
233            }
234            continue;
235        }
236        if b == b'<' {
237            if i + 1 < bytes.len() && bytes[i + 1] == b'-' {
238                out.push(Token::LeftArrow);
239                i += 2;
240            } else if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
241                out.push(Token::Le);
242                i += 2;
243            } else if i + 1 < bytes.len() && bytes[i + 1] == b'>' {
244                out.push(Token::Neq);
245                i += 2;
246            } else {
247                out.push(Token::Lt);
248                i += 1;
249            }
250            continue;
251        }
252        if b == b'>' {
253            if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
254                out.push(Token::Ge);
255                i += 2;
256            } else {
257                out.push(Token::Gt);
258                i += 1;
259            }
260            continue;
261        }
262        if b == b'!' {
263            if i + 1 < bytes.len() && bytes[i + 1] == b'=' {
264                out.push(Token::Neq);
265                i += 2;
266                continue;
267            }
268            return Err(CypherError::LexError(format!(
269                "unexpected '!' at byte {i} (expected '!=')"
270            )));
271        }
272        if b == b'-' {
273            if i + 1 < bytes.len() && bytes[i + 1] == b'>' {
274                out.push(Token::Arrow);
275                i += 2;
276            } else {
277                out.push(Token::Dash);
278                i += 1;
279            }
280            continue;
281        }
282        if b == b'\'' || b == b'"' {
283            let quote = b;
284            let start = i + 1;
285            i += 1;
286            while i < bytes.len() && bytes[i] != quote {
287                if bytes[i] == b'\\' && i + 1 < bytes.len() {
288                    i += 2;
289                } else {
290                    i += 1;
291                }
292            }
293            if i >= bytes.len() {
294                return Err(CypherError::LexError("unterminated string literal".into()));
295            }
296            let raw = &input[start..i];
297            out.push(Token::Str(unescape(raw)));
298            i += 1;
299            continue;
300        }
301        if b.is_ascii_digit() {
302            let start = i;
303            while i < bytes.len() && bytes[i].is_ascii_digit() {
304                i += 1;
305            }
306            let n: i64 = input[start..i].parse().map_err(|err| {
307                CypherError::LexError(format!("bad integer at byte {start}: {err}"))
308            })?;
309            out.push(Token::Int(n));
310            continue;
311        }
312        if b.is_ascii_alphabetic() || b == b'_' {
313            let start = i;
314            while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
315                i += 1;
316            }
317            let word = &input[start..i];
318            let upper = word.to_ascii_uppercase();
319            if is_keyword(&upper) {
320                out.push(Token::Keyword(upper));
321            } else {
322                out.push(Token::Ident(word.to_string()));
323            }
324            continue;
325        }
326        return Err(CypherError::LexError(format!(
327            "unexpected character `{}` at byte {i}",
328            char::from(b)
329        )));
330    }
331    Ok(out)
332}
333
334fn is_keyword(word: &str) -> bool {
335    matches!(
336        word,
337        "MATCH" | "WHERE" | "RETURN" | "AND" | "OR" | "AS" | "NOT" | "TRUE" | "FALSE"
338    )
339}
340
341fn unescape(raw: &str) -> String {
342    let mut out = String::with_capacity(raw.len());
343    let mut iter = raw.chars();
344    while let Some(c) = iter.next() {
345        if c == '\\' {
346            if let Some(next) = iter.next() {
347                out.push(match next {
348                    'n' => '\n',
349                    't' => '\t',
350                    'r' => '\r',
351                    '\\' => '\\',
352                    '\'' => '\'',
353                    '"' => '"',
354                    other => other,
355                });
356            }
357        } else {
358            out.push(c);
359        }
360    }
361    out
362}
363
364// ---------------------------------------------------------------------------
365// Parser AST
366// ---------------------------------------------------------------------------
367
368#[derive(Debug, Clone)]
369struct Query {
370    matches: Vec<Pattern>,
371    where_clause: Option<Expr>,
372    projections: Vec<Projection>,
373}
374
375#[derive(Debug, Clone)]
376struct Pattern {
377    head: NodePat,
378    steps: Vec<RelStep>,
379}
380
381#[derive(Debug, Clone)]
382struct NodePat {
383    var: String,
384    label: Option<String>,
385    props: BTreeMap<String, Literal>,
386}
387
388#[derive(Debug, Clone)]
389struct RelStep {
390    /// Direction the arrow points. true = forward (`-[]->`), false = reverse (`<-[]-`).
391    forward: bool,
392    edge_label: String,
393    var_length: Option<(u32, u32)>,
394    target: NodePat,
395}
396
397#[derive(Debug, Clone)]
398enum Expr {
399    And(Box<Expr>, Box<Expr>),
400    Or(Box<Expr>, Box<Expr>),
401    Not(Box<Expr>),
402    Compare(Operand, CmpOp, Operand),
403    Bool(bool),
404}
405
406#[derive(Debug, Clone, Copy, PartialEq, Eq)]
407enum CmpOp {
408    Eq,
409    Neq,
410    Lt,
411    Le,
412    Gt,
413    Ge,
414}
415
416#[derive(Debug, Clone)]
417enum Operand {
418    /// `var.property` or just `var` (no property).
419    Path {
420        var: String,
421        property: Option<String>,
422    },
423    Literal(Literal),
424}
425
426#[derive(Debug, Clone, PartialEq, Eq)]
427enum Literal {
428    Str(String),
429    Int(i64),
430    Bool(bool),
431}
432
433#[derive(Debug, Clone)]
434struct Projection {
435    operand: Operand,
436    /// Final alias used in the projected row. Empty during parsing
437    /// when no explicit `AS` clause was supplied; filled in after the
438    /// query is fully parsed by [`resolve_projection_aliases`].
439    alias: String,
440    /// `true` if the user supplied an `AS <name>` clause for this
441    /// projection. Used to disambiguate default-alias collisions
442    /// (e.g. `RETURN 1, 2` would otherwise project two `value` keys
443    /// into the same `BTreeMap`).
444    explicit_alias: bool,
445}
446
447// ---------------------------------------------------------------------------
448// Parser
449// ---------------------------------------------------------------------------
450
451struct Parser<'a> {
452    tokens: &'a [Token],
453    pos: usize,
454}
455
456fn parse(tokens: &[Token]) -> Result<Query, CypherError> {
457    let mut p = Parser { tokens, pos: 0 };
458    let q = p.parse_query()?;
459    if p.pos != tokens.len() {
460        return Err(CypherError::ParseError(format!(
461            "trailing tokens after RETURN at position {} ({:?})",
462            p.pos, tokens[p.pos]
463        )));
464    }
465    Ok(q)
466}
467
468impl<'a> Parser<'a> {
469    fn peek(&self) -> Option<&Token> {
470        self.tokens.get(self.pos)
471    }
472
473    fn advance(&mut self) -> Option<&Token> {
474        let t = self.tokens.get(self.pos);
475        self.pos += 1;
476        t
477    }
478
479    fn expect_keyword(&mut self, word: &str) -> Result<(), CypherError> {
480        match self.advance() {
481            Some(Token::Keyword(k)) if k == word => Ok(()),
482            other => Err(CypherError::ParseError(format!(
483                "expected `{word}`, got {other:?}"
484            ))),
485        }
486    }
487
488    fn match_keyword(&mut self, word: &str) -> bool {
489        if matches!(self.peek(), Some(Token::Keyword(k)) if k == word) {
490            self.pos += 1;
491            true
492        } else {
493            false
494        }
495    }
496
497    fn expect(&mut self, want: &Token) -> Result<(), CypherError> {
498        let next = self.advance();
499        if next == Some(want) {
500            Ok(())
501        } else {
502            Err(CypherError::ParseError(format!(
503                "expected {want:?}, got {next:?}"
504            )))
505        }
506    }
507
508    fn parse_query(&mut self) -> Result<Query, CypherError> {
509        self.expect_keyword("MATCH")?;
510        let mut matches = vec![self.parse_pattern()?];
511        while matches!(self.peek(), Some(Token::Comma)) {
512            self.advance();
513            matches.push(self.parse_pattern()?);
514            if matches.len() > MAX_PATTERNS {
515                return Err(CypherError::ParseError(format!(
516                    "too many disjoint MATCH patterns (got {}, cap is {}); \
517                     join through edge syntax (`-[:KIND]->`) instead of a cartesian product",
518                    matches.len(),
519                    MAX_PATTERNS
520                )));
521            }
522        }
523        let where_clause = if self.match_keyword("WHERE") {
524            Some(self.parse_expr()?)
525        } else {
526            None
527        };
528        self.expect_keyword("RETURN")?;
529        let mut projections = vec![self.parse_projection()?];
530        while matches!(self.peek(), Some(Token::Comma)) {
531            self.advance();
532            projections.push(self.parse_projection()?);
533        }
534        resolve_projection_aliases(&mut projections)?;
535        Ok(Query {
536            matches,
537            where_clause,
538            projections,
539        })
540    }
541
542    fn parse_pattern(&mut self) -> Result<Pattern, CypherError> {
543        let head = self.parse_node_pat()?;
544        let mut steps: Vec<RelStep> = Vec::new();
545        loop {
546            // Detect either `-[:X]->` or `<-[:X]-`.
547            let forward = match self.peek() {
548                Some(Token::Dash) => true,
549                Some(Token::LeftArrow) => false,
550                _ => break,
551            };
552            self.advance(); // consume - or <-
553            self.expect(&Token::LBracket)?;
554            self.expect(&Token::Colon)?;
555            let edge_label = match self.advance() {
556                Some(Token::Ident(s)) => s.clone(),
557                other => {
558                    return Err(CypherError::ParseError(format!(
559                        "expected edge label, got {other:?}"
560                    )))
561                }
562            };
563            let var_length = if matches!(self.peek(), Some(Token::Star)) {
564                self.advance();
565                let lo = if matches!(self.peek(), Some(Token::Int(_))) {
566                    if let Some(Token::Int(n)) = self.advance() {
567                        Some(*n)
568                    } else {
569                        None
570                    }
571                } else {
572                    None
573                };
574                let bounds = if matches!(self.peek(), Some(Token::DotDot)) {
575                    self.advance();
576                    let hi = if matches!(self.peek(), Some(Token::Int(_))) {
577                        if let Some(Token::Int(n)) = self.advance() {
578                            Some(*n)
579                        } else {
580                            None
581                        }
582                    } else {
583                        None
584                    };
585                    // `*lo..` with no upper bound: default to the executor
586                    // depth cap so users opt into the maximum traversal
587                    // length without naming it explicitly.
588                    let hi_v = hi.map(|n| n as u32).unwrap_or(VAR_LENGTH_MAX_DEPTH);
589                    (lo.unwrap_or(1) as u32, hi_v)
590                } else {
591                    // bare `*`: 1..3
592                    let lo_v = lo.unwrap_or(1) as u32;
593                    (lo_v, lo_v.max(3))
594                };
595                Some(bounds)
596            } else {
597                None
598            };
599            self.expect(&Token::RBracket)?;
600            if forward {
601                self.expect(&Token::Arrow)?;
602            } else {
603                self.expect(&Token::Dash)?;
604            }
605            let target = self.parse_node_pat()?;
606            steps.push(RelStep {
607                forward,
608                edge_label,
609                var_length,
610                target,
611            });
612        }
613        Ok(Pattern { head, steps })
614    }
615
616    fn parse_node_pat(&mut self) -> Result<NodePat, CypherError> {
617        self.expect(&Token::LParen)?;
618        let var = match self.advance() {
619            Some(Token::Ident(s)) => s.clone(),
620            other => {
621                return Err(CypherError::ParseError(format!(
622                    "expected node variable, got {other:?}"
623                )))
624            }
625        };
626        let label = if matches!(self.peek(), Some(Token::Colon)) {
627            self.advance();
628            match self.advance() {
629                Some(Token::Ident(s)) => Some(s.clone()),
630                other => {
631                    return Err(CypherError::ParseError(format!(
632                        "expected node label, got {other:?}"
633                    )))
634                }
635            }
636        } else {
637            None
638        };
639        let props = if matches!(self.peek(), Some(Token::LBrace)) {
640            self.parse_props()?
641        } else {
642            BTreeMap::new()
643        };
644        self.expect(&Token::RParen)?;
645        Ok(NodePat { var, label, props })
646    }
647
648    fn parse_props(&mut self) -> Result<BTreeMap<String, Literal>, CypherError> {
649        self.expect(&Token::LBrace)?;
650        let mut props: BTreeMap<String, Literal> = BTreeMap::new();
651        loop {
652            let key = match self.advance() {
653                Some(Token::Ident(s)) => s.clone(),
654                other => {
655                    return Err(CypherError::ParseError(format!(
656                        "expected property key, got {other:?}"
657                    )))
658                }
659            };
660            self.expect(&Token::Colon)?;
661            let value = self.parse_literal()?;
662            props.insert(key, value);
663            match self.peek() {
664                Some(Token::Comma) => {
665                    self.advance();
666                }
667                Some(Token::RBrace) => break,
668                other => {
669                    return Err(CypherError::ParseError(format!(
670                        "expected ',' or '}}', got {other:?}"
671                    )))
672                }
673            }
674        }
675        self.expect(&Token::RBrace)?;
676        Ok(props)
677    }
678
679    fn parse_literal(&mut self) -> Result<Literal, CypherError> {
680        match self.advance() {
681            Some(Token::Str(s)) => Ok(Literal::Str(s.clone())),
682            Some(Token::Int(n)) => Ok(Literal::Int(*n)),
683            Some(Token::Keyword(k)) if k == "TRUE" => Ok(Literal::Bool(true)),
684            Some(Token::Keyword(k)) if k == "FALSE" => Ok(Literal::Bool(false)),
685            other => Err(CypherError::ParseError(format!(
686                "expected literal, got {other:?}"
687            ))),
688        }
689    }
690
691    fn parse_expr(&mut self) -> Result<Expr, CypherError> {
692        let mut left = self.parse_and_expr()?;
693        while self.match_keyword("OR") {
694            let right = self.parse_and_expr()?;
695            left = Expr::Or(Box::new(left), Box::new(right));
696        }
697        Ok(left)
698    }
699
700    fn parse_and_expr(&mut self) -> Result<Expr, CypherError> {
701        let mut left = self.parse_unary_expr()?;
702        while self.match_keyword("AND") {
703            let right = self.parse_unary_expr()?;
704            left = Expr::And(Box::new(left), Box::new(right));
705        }
706        Ok(left)
707    }
708
709    fn parse_unary_expr(&mut self) -> Result<Expr, CypherError> {
710        if self.match_keyword("NOT") {
711            let inner = self.parse_unary_expr()?;
712            return Ok(Expr::Not(Box::new(inner)));
713        }
714        self.parse_compare()
715    }
716
717    fn parse_compare(&mut self) -> Result<Expr, CypherError> {
718        if matches!(
719            self.peek(),
720            Some(Token::Keyword(k)) if k == "TRUE" || k == "FALSE"
721        ) {
722            let lit = self.parse_literal()?;
723            return Ok(match lit {
724                Literal::Bool(b) => Expr::Bool(b),
725                _ => unreachable!(),
726            });
727        }
728        let left = self.parse_operand()?;
729        let op = match self.peek() {
730            Some(Token::Eq) => Some(CmpOp::Eq),
731            Some(Token::Neq) => Some(CmpOp::Neq),
732            Some(Token::Lt) => Some(CmpOp::Lt),
733            Some(Token::Le) => Some(CmpOp::Le),
734            Some(Token::Gt) => Some(CmpOp::Gt),
735            Some(Token::Ge) => Some(CmpOp::Ge),
736            _ => None,
737        };
738        let Some(op) = op else {
739            return Err(CypherError::ParseError(
740                "expected comparison operator in WHERE clause".into(),
741            ));
742        };
743        self.advance();
744        let right = self.parse_operand()?;
745        Ok(Expr::Compare(left, op, right))
746    }
747
748    fn parse_operand(&mut self) -> Result<Operand, CypherError> {
749        match self.peek() {
750            Some(Token::Str(_)) | Some(Token::Int(_)) => {
751                let lit = self.parse_literal()?;
752                Ok(Operand::Literal(lit))
753            }
754            Some(Token::Keyword(k)) if k == "TRUE" || k == "FALSE" => {
755                let lit = self.parse_literal()?;
756                Ok(Operand::Literal(lit))
757            }
758            Some(Token::Ident(_)) => {
759                let var = if let Some(Token::Ident(s)) = self.advance() {
760                    s.clone()
761                } else {
762                    unreachable!()
763                };
764                let property = if matches!(self.peek(), Some(Token::Dot)) {
765                    self.advance();
766                    match self.advance() {
767                        Some(Token::Ident(s)) => Some(s.clone()),
768                        other => {
769                            return Err(CypherError::ParseError(format!(
770                                "expected property name after '.', got {other:?}"
771                            )))
772                        }
773                    }
774                } else {
775                    None
776                };
777                Ok(Operand::Path { var, property })
778            }
779            other => Err(CypherError::ParseError(format!(
780                "expected operand, got {other:?}"
781            ))),
782        }
783    }
784
785    fn parse_projection(&mut self) -> Result<Projection, CypherError> {
786        let operand = self.parse_operand()?;
787        let alias = if self.match_keyword("AS") {
788            match self.advance() {
789                Some(Token::Ident(s)) => Some(s.clone()),
790                other => {
791                    return Err(CypherError::ParseError(format!(
792                        "expected alias after AS, got {other:?}"
793                    )))
794                }
795            }
796        } else {
797            None
798        };
799        let explicit_alias = alias.is_some();
800        Ok(Projection {
801            operand,
802            alias: alias.unwrap_or_default(),
803            explicit_alias,
804        })
805    }
806}
807
808fn default_alias(operand: &Operand) -> String {
809    match operand {
810        Operand::Path {
811            var,
812            property: None,
813        } => var.clone(),
814        Operand::Path {
815            var,
816            property: Some(p),
817        } => format!("{var}.{p}"),
818        Operand::Literal(_) => "value".to_string(),
819    }
820}
821
822/// Assign default aliases to every projection that didn't get an
823/// explicit `AS <name>`. Default aliases for literal operands all start
824/// as `"value"`, so without deduplication a query like `RETURN 1, 2`
825/// would silently overwrite the first column. We resolve the collision
826/// deterministically by suffixing the second, third, … occurrence with
827/// `_2`, `_3`, … For non-literal default aliases (`var` or
828/// `var.property`) we leave names unchanged because they are already
829/// disambiguated by the underlying path. If a default-derived alias
830/// collides with an explicit alias, the default is suffixed; an
831/// explicit alias colliding with another explicit alias raises a
832/// `ParseError::ParseError` because that is a user authoring mistake.
833fn resolve_projection_aliases(projections: &mut [Projection]) -> Result<(), CypherError> {
834    use std::collections::HashSet;
835
836    // First pass: collect explicit aliases and detect duplicates among them.
837    let mut seen: HashSet<String> = HashSet::new();
838    for proj in projections.iter() {
839        if proj.explicit_alias && !seen.insert(proj.alias.clone()) {
840            return Err(CypherError::ParseError(format!(
841                "duplicate alias `{}` in RETURN clause",
842                proj.alias
843            )));
844        }
845    }
846
847    // Second pass: fill in default aliases with collision-suffixing.
848    let mut literal_count: usize = 0;
849    for proj in projections.iter_mut() {
850        if proj.explicit_alias {
851            continue;
852        }
853        let base = default_alias(&proj.operand);
854        let is_literal_default = matches!(proj.operand, Operand::Literal(_));
855        let mut candidate = base.clone();
856        if is_literal_default {
857            literal_count += 1;
858            if literal_count >= 2 {
859                candidate = format!("{base}_{literal_count}");
860            }
861        }
862        // Guard against any other accidental collision (default path
863        // alias clashing with an explicit alias or another default).
864        let mut bump: usize = 2;
865        while seen.contains(&candidate) {
866            candidate = format!("{base}_{bump}");
867            bump += 1;
868        }
869        seen.insert(candidate.clone());
870        proj.alias = candidate;
871    }
872    Ok(())
873}
874
875// ---------------------------------------------------------------------------
876// Executor
877// ---------------------------------------------------------------------------
878
879fn exec(query: &Query, graph: &SymbolGraph) -> Result<Vec<CypherRow>, CypherError> {
880    let mut all_rows: Vec<HashMap<String, NodeId>> = vec![HashMap::new()];
881    for pattern in &query.matches {
882        let mut new_rows: Vec<HashMap<String, NodeId>> = Vec::new();
883        for binding in &all_rows {
884            let candidates = match_pattern(pattern, graph, binding)?;
885            for cand in candidates {
886                let mut merged = binding.clone();
887                for (k, v) in cand {
888                    merged.insert(k, v);
889                }
890                new_rows.push(merged);
891                // Pre-projection budget: a cartesian explosion can
892                // balloon `new_rows` long before we ever start
893                // building projected rows, so we cap intermediate
894                // bindings too.
895                if new_rows.len() > MAX_ROWS {
896                    return Err(CypherError::ExecError(format!(
897                        "row budget exceeded ({} > {}); narrow the MATCH or add a WHERE clause",
898                        new_rows.len(),
899                        MAX_ROWS
900                    )));
901                }
902            }
903        }
904        all_rows = new_rows;
905    }
906
907    let mut out: Vec<CypherRow> = Vec::new();
908    for binding in all_rows {
909        if let Some(expr) = &query.where_clause {
910            if !eval_bool(expr, &binding, graph)? {
911                continue;
912            }
913        }
914        let mut row: CypherRow = BTreeMap::new();
915        for proj in &query.projections {
916            let v = eval_operand(&proj.operand, &binding, graph)?;
917            row.insert(proj.alias.clone(), v);
918        }
919        out.push(row);
920        // Post-WHERE budget: even if intermediate bindings stayed
921        // under the cap, a permissive projection on a wide graph can
922        // still hand back too many rows.
923        if out.len() > MAX_ROWS {
924            return Err(CypherError::ExecError(format!(
925                "row budget exceeded ({} > {}); narrow the MATCH or add a WHERE clause",
926                out.len(),
927                MAX_ROWS
928            )));
929        }
930    }
931    Ok(out)
932}
933
934fn match_pattern(
935    pattern: &Pattern,
936    graph: &SymbolGraph,
937    seed: &HashMap<String, NodeId>,
938) -> Result<Vec<HashMap<String, NodeId>>, CypherError> {
939    let head_candidates: Vec<NodeId> = if let Some(existing) = seed.get(&pattern.head.var) {
940        // Already bound — verify it matches the constraints.
941        if node_matches(graph, *existing, &pattern.head) {
942            vec![*existing]
943        } else {
944            vec![]
945        }
946    } else {
947        candidate_nodes(graph, &pattern.head)
948    };
949
950    let mut out: Vec<HashMap<String, NodeId>> = Vec::new();
951    for head_id in head_candidates {
952        let mut binding: HashMap<String, NodeId> = HashMap::new();
953        binding.insert(pattern.head.var.clone(), head_id);
954        extend_steps(graph, &pattern.steps, 0, head_id, binding, &mut out)?;
955    }
956    Ok(out)
957}
958
959fn extend_steps(
960    graph: &SymbolGraph,
961    steps: &[RelStep],
962    idx: usize,
963    cursor: NodeId,
964    binding: HashMap<String, NodeId>,
965    out: &mut Vec<HashMap<String, NodeId>>,
966) -> Result<(), CypherError> {
967    if idx == steps.len() {
968        out.push(binding);
969        return Ok(());
970    }
971    let step = &steps[idx];
972    let (kind, label_reversed) =
973        EdgeKind::parse_with_direction(&step.edge_label).ok_or_else(|| {
974            CypherError::ExecError(format!("unknown edge label `{}`", step.edge_label))
975        })?;
976    // Pattern arrow direction XOR label inversion = effective direction.
977    let forward = step.forward ^ label_reversed;
978    let (lo, hi) = step.var_length.unwrap_or((1, 1));
979    if hi > VAR_LENGTH_MAX_DEPTH {
980        return Err(CypherError::ExecError(format!(
981            "variable-length traversal capped at depth {VAR_LENGTH_MAX_DEPTH} (got `*{lo}..{hi}`)"
982        )));
983    }
984    let lo = lo.max(1);
985    let hi = hi.max(lo);
986
987    // Depth-first enumerate every reachable node at depths in [lo, hi].
988    let mut visited: HashSet<NodeId> = HashSet::new();
989    visited.insert(cursor);
990    let mut stack: Vec<(NodeId, u32, HashSet<NodeId>)> = vec![(cursor, 0, visited)];
991    while let Some((node, depth, visited)) = stack.pop() {
992        if depth >= hi {
993            continue;
994        }
995        let edges = if forward {
996            graph.outgoing(node)
997        } else {
998            graph.incoming(node)
999        };
1000        for edge in edges {
1001            if edge.kind != kind {
1002                continue;
1003            }
1004            let next = if forward { edge.to } else { edge.from };
1005            if visited.contains(&next) {
1006                continue;
1007            }
1008            let new_depth = depth + 1;
1009            if new_depth >= lo && node_matches(graph, next, &step.target) {
1010                let already_bound = binding.get(&step.target.var);
1011                if matches!(already_bound, Some(id) if *id != next) {
1012                    // Variable rebinds to a different id — skip.
1013                } else {
1014                    let mut next_binding = binding.clone();
1015                    next_binding.insert(step.target.var.clone(), next);
1016                    extend_steps(graph, steps, idx + 1, next, next_binding, out)?;
1017                }
1018            }
1019            if new_depth < hi {
1020                let mut next_visited = visited.clone();
1021                next_visited.insert(next);
1022                stack.push((next, new_depth, next_visited));
1023            }
1024        }
1025    }
1026    Ok(())
1027}
1028
1029fn candidate_nodes(graph: &SymbolGraph, pat: &NodePat) -> Vec<NodeId> {
1030    // Property-driven fast path: if `name` is constrained, scan by name.
1031    if let Some(Literal::Str(name)) = pat.props.get("name") {
1032        return graph
1033            .nodes_named(name)
1034            .iter()
1035            .copied()
1036            .filter(|nid| node_matches(graph, *nid, pat))
1037            .collect();
1038    }
1039    if let Some(label) = &pat.label {
1040        let kind = NodeKind::parse(label).unwrap_or(NodeKind::Module);
1041        return graph
1042            .nodes_of_kind(kind)
1043            .into_iter()
1044            .filter(|nid| node_matches(graph, *nid, pat))
1045            .collect();
1046    }
1047    graph
1048        .all_node_ids()
1049        .into_iter()
1050        .filter(|nid| node_matches(graph, *nid, pat))
1051        .collect()
1052}
1053
1054fn node_matches(graph: &SymbolGraph, id: NodeId, pat: &NodePat) -> bool {
1055    let Some(node) = graph.node(id) else {
1056        return false;
1057    };
1058    if let Some(label) = &pat.label {
1059        let Some(kind) = NodeKind::parse(label) else {
1060            return false;
1061        };
1062        if node.kind != kind {
1063            return false;
1064        }
1065    }
1066    for (key, expected) in &pat.props {
1067        let actual = property_value(node, key);
1068        if &actual != expected {
1069            return false;
1070        }
1071    }
1072    true
1073}
1074
1075fn property_value(node: &super::symbol_graph::Node, key: &str) -> Literal {
1076    match key {
1077        "name" => Literal::Str(node.name.clone()),
1078        "path" => Literal::Str(node.path.clone()),
1079        "language" => Literal::Str(node.language.clone()),
1080        "kind" => Literal::Str(node.kind.as_str().to_string()),
1081        "container" => Literal::Str(node.container.clone().unwrap_or_default()),
1082        "signature" => Literal::Str(node.signature.clone()),
1083        "line" => Literal::Int(node.line as i64),
1084        "file_id" => Literal::Int(node.file_id as i64),
1085        "id" => Literal::Int(node.id as i64),
1086        _ => Literal::Str(String::new()),
1087    }
1088}
1089
1090fn eval_bool(
1091    expr: &Expr,
1092    binding: &HashMap<String, NodeId>,
1093    graph: &SymbolGraph,
1094) -> Result<bool, CypherError> {
1095    match expr {
1096        Expr::Bool(b) => Ok(*b),
1097        Expr::And(a, b) => Ok(eval_bool(a, binding, graph)? && eval_bool(b, binding, graph)?),
1098        Expr::Or(a, b) => Ok(eval_bool(a, binding, graph)? || eval_bool(b, binding, graph)?),
1099        Expr::Not(inner) => Ok(!eval_bool(inner, binding, graph)?),
1100        Expr::Compare(l, op, r) => {
1101            let lv = lookup_operand(l, binding, graph)?;
1102            let rv = lookup_operand(r, binding, graph)?;
1103            Ok(apply_cmp(&lv, *op, &rv))
1104        }
1105    }
1106}
1107
1108fn apply_cmp(left: &Literal, op: CmpOp, right: &Literal) -> bool {
1109    match (left, right) {
1110        (Literal::Int(a), Literal::Int(b)) => match op {
1111            CmpOp::Eq => a == b,
1112            CmpOp::Neq => a != b,
1113            CmpOp::Lt => a < b,
1114            CmpOp::Le => a <= b,
1115            CmpOp::Gt => a > b,
1116            CmpOp::Ge => a >= b,
1117        },
1118        (Literal::Str(a), Literal::Str(b)) => match op {
1119            CmpOp::Eq => a == b,
1120            CmpOp::Neq => a != b,
1121            CmpOp::Lt => a < b,
1122            CmpOp::Le => a <= b,
1123            CmpOp::Gt => a > b,
1124            CmpOp::Ge => a >= b,
1125        },
1126        (Literal::Bool(a), Literal::Bool(b)) => match op {
1127            CmpOp::Eq => a == b,
1128            CmpOp::Neq => a != b,
1129            _ => false,
1130        },
1131        _ => matches!(op, CmpOp::Neq),
1132    }
1133}
1134
1135fn lookup_operand(
1136    op: &Operand,
1137    binding: &HashMap<String, NodeId>,
1138    graph: &SymbolGraph,
1139) -> Result<Literal, CypherError> {
1140    match op {
1141        Operand::Literal(lit) => Ok(lit.clone()),
1142        Operand::Path { var, property } => {
1143            let id = binding.get(var).copied().ok_or_else(|| {
1144                CypherError::ExecError(format!("unbound variable `{var}` in expression"))
1145            })?;
1146            let node = graph
1147                .node(id)
1148                .ok_or_else(|| CypherError::ExecError(format!("node id {id} not in graph")))?;
1149            match property {
1150                Some(p) => Ok(property_value(node, p)),
1151                None => Ok(Literal::Str(node.name.clone())),
1152            }
1153        }
1154    }
1155}
1156
1157fn eval_operand(
1158    op: &Operand,
1159    binding: &HashMap<String, NodeId>,
1160    graph: &SymbolGraph,
1161) -> Result<CypherValue, CypherError> {
1162    let lit = lookup_operand(op, binding, graph)?;
1163    Ok(match lit {
1164        Literal::Str(s) => {
1165            if s.is_empty() {
1166                CypherValue::Null
1167            } else {
1168                CypherValue::String(s)
1169            }
1170        }
1171        Literal::Int(n) => CypherValue::Int(n),
1172        Literal::Bool(b) => CypherValue::Bool(b),
1173    })
1174}
1175
1176#[cfg(test)]
1177mod tests {
1178    use super::*;
1179    use crate::ast::Language;
1180
1181    fn fixture() -> SymbolGraph {
1182        let mut g = SymbolGraph::new();
1183        g.rebuild_file(
1184            1,
1185            "src/a.rs",
1186            Language::Rust,
1187            "fn start() {}\nfn driver() { start(); }\n",
1188            &[],
1189        );
1190        g.rebuild_file(
1191            2,
1192            "src/b.rs",
1193            Language::Rust,
1194            "fn entry() { driver(); }\n",
1195            &[],
1196        );
1197        g
1198    }
1199
1200    #[test]
1201    fn lexer_recognises_arrows_and_keywords() {
1202        let toks = lex("MATCH (a)-[:CALLS]->(b) RETURN a.name").unwrap();
1203        assert!(toks.iter().any(|t| matches!(t, Token::Arrow)));
1204        assert!(toks
1205            .iter()
1206            .any(|t| matches!(t, Token::Keyword(k) if k == "MATCH")));
1207    }
1208
1209    #[test]
1210    fn returns_function_by_name() {
1211        let g = fixture();
1212        let rows = execute(
1213            "MATCH (f:Function {name: 'start'}) RETURN f.path AS path, f.line AS line",
1214            &g,
1215        )
1216        .unwrap();
1217        assert_eq!(rows.len(), 1);
1218        assert_eq!(
1219            rows[0].get("path"),
1220            Some(&CypherValue::String("src/a.rs".into()))
1221        );
1222    }
1223
1224    #[test]
1225    fn called_by_var_length_finds_indirect_callers() {
1226        let g = fixture();
1227        let rows = execute(
1228            "MATCH (f:Function {name: 'start'})<-[:CALLS*1..3]-(c:CallSite) RETURN c.path AS path",
1229            &g,
1230        )
1231        .unwrap();
1232        assert!(!rows.is_empty(), "expected at least one call-site caller");
1233    }
1234
1235    #[test]
1236    fn where_predicate_filters_results() {
1237        let g = fixture();
1238        let rows = execute(
1239            "MATCH (f:Function) WHERE f.name = 'driver' RETURN f.path AS path",
1240            &g,
1241        )
1242        .unwrap();
1243        assert_eq!(rows.len(), 1);
1244        assert_eq!(
1245            rows[0].get("path"),
1246            Some(&CypherValue::String("src/a.rs".into()))
1247        );
1248    }
1249
1250    #[test]
1251    fn literal_default_aliases_do_not_collide() {
1252        // `RETURN 1, 2, 3` previously projected all three literals under
1253        // the same `"value"` key, silently overwriting earlier columns.
1254        // The deduplicator suffixes the 2nd/3rd literals as `value_2`
1255        // / `value_3` so every literal makes it into the row.
1256        let g = fixture();
1257        let rows = execute("MATCH (f:Function {name: 'start'}) RETURN 1, 2, 3", &g).unwrap();
1258        assert_eq!(rows.len(), 1);
1259        let row = &rows[0];
1260        assert_eq!(row.get("value"), Some(&CypherValue::Int(1)));
1261        assert_eq!(row.get("value_2"), Some(&CypherValue::Int(2)));
1262        assert_eq!(row.get("value_3"), Some(&CypherValue::Int(3)));
1263    }
1264
1265    #[test]
1266    fn duplicate_explicit_aliases_are_rejected() {
1267        let g = fixture();
1268        let err = execute("MATCH (f:Function) RETURN f.name AS n, f.path AS n", &g).unwrap_err();
1269        assert!(
1270            matches!(err, CypherError::ParseError(_)),
1271            "expected ParseError for duplicate explicit alias, got {err:?}"
1272        );
1273    }
1274
1275    #[test]
1276    fn rejects_too_many_disjoint_patterns() {
1277        // Four disjoint patterns is one more than `MAX_PATTERNS`; the
1278        // parser refuses before the executor ever sees the cartesian
1279        // explosion.
1280        let g = fixture();
1281        let err = execute(
1282            "MATCH (a:Function),(b:Function),(c:Function),(d:Function) RETURN a.name",
1283            &g,
1284        )
1285        .unwrap_err();
1286        assert!(
1287            matches!(&err, CypherError::ParseError(msg) if msg.contains("too many disjoint MATCH patterns")),
1288            "expected ParseError for too many patterns, got {err:?}"
1289        );
1290    }
1291
1292    #[test]
1293    fn enforces_row_budget_on_cartesian_explosion() {
1294        // Build a synthetic graph with enough Function nodes that even
1295        // a three-pattern cartesian product blows the row budget.
1296        // 25 * 25 * 25 = 15,625 > MAX_ROWS (10,000).
1297        let mut g = SymbolGraph::new();
1298        let mut source = String::new();
1299        for i in 0..25 {
1300            source.push_str(&format!("fn f{i}() {{}}\n"));
1301        }
1302        g.rebuild_file(1, "src/big.rs", Language::Rust, &source, &[]);
1303
1304        let err = execute(
1305            "MATCH (a:Function),(b:Function),(c:Function) RETURN a.name AS n",
1306            &g,
1307        )
1308        .unwrap_err();
1309        assert!(
1310            matches!(&err, CypherError::ExecError(msg) if msg.contains("row budget exceeded")),
1311            "expected ExecError for row budget, got {err:?}"
1312        );
1313    }
1314
1315    #[test]
1316    fn open_upper_bound_defaults_to_depth_cap() {
1317        // `*1..` (no hi) should fall back to VAR_LENGTH_MAX_DEPTH, not 3.
1318        let toks = lex("MATCH (a:Function)-[:CALLS*1..]->(b:Function) RETURN a.name AS n").unwrap();
1319        let q = parse(&toks).unwrap();
1320        let step = &q.matches[0].steps[0];
1321        assert_eq!(step.var_length, Some((1, VAR_LENGTH_MAX_DEPTH)));
1322
1323        // `*2..` likewise.
1324        let toks = lex("MATCH (a:Function)-[:CALLS*2..]->(b:Function) RETURN a.name AS n").unwrap();
1325        let q = parse(&toks).unwrap();
1326        let step = &q.matches[0].steps[0];
1327        assert_eq!(step.var_length, Some((2, VAR_LENGTH_MAX_DEPTH)));
1328    }
1329
1330    #[test]
1331    fn rejects_depth_above_four() {
1332        let g = fixture();
1333        let err = execute(
1334            "MATCH (a:Function)-[:CALLS*1..6]->(b:Function) RETURN a.name AS n",
1335            &g,
1336        )
1337        .unwrap_err();
1338        assert!(
1339            matches!(err, CypherError::ExecError(_)),
1340            "expected ExecError, got {err:?}"
1341        );
1342    }
1343}