Skip to main content

lora_analyzer/
analyzer.rs

1use crate::{errors::*, resolved::*, scope::*, symbols::*};
2use lora_ast::{
3    Create, Delete, Document, Expr, InQueryCall, MapProjectionSelector, Match, Merge, NodePattern,
4    Pattern, PatternElement, PatternPart, ProjectionBody, ProjectionItem, Query, QueryPart,
5    ReadingClause, RelationshipPattern, Remove, RemoveItem, Return, Set, SetItem, SinglePartQuery,
6    SingleQuery, Statement, Unwind, UpdatingClause, With,
7};
8use lora_store::GraphStorage;
9use std::collections::{BTreeMap, BTreeSet};
10
11pub struct Analyzer<'a, S: GraphStorage + ?Sized> {
12    storage: &'a S,
13    scopes: ScopeStack,
14    symbols: SymbolTable,
15}
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18enum PatternContext {
19    Read,
20    /// OPTIONAL MATCH — tolerate unknown labels/types (they just won't match).
21    OptionalRead,
22    Write,
23}
24
25impl<'a, S: GraphStorage + ?Sized> Analyzer<'a, S> {
26    pub fn new(storage: &'a S) -> Self {
27        Self {
28            storage,
29            scopes: ScopeStack::new(),
30            symbols: SymbolTable::default(),
31        }
32    }
33
34    pub fn analyze(&mut self, doc: &Document) -> Result<ResolvedQuery, SemanticError> {
35        match &doc.statement {
36            Statement::Query(q) => self.analyze_query(q),
37        }
38    }
39
40    fn analyze_query(&mut self, query: &Query) -> Result<ResolvedQuery, SemanticError> {
41        let mut clauses = Vec::new();
42        let mut unions = Vec::new();
43
44        match query {
45            Query::Regular(r) => {
46                clauses.extend(self.analyze_single_query(&r.head)?);
47
48                for union_part in &r.unions {
49                    // Each UNION branch gets a fresh scope — variables from one
50                    // branch must not leak into another.
51                    self.scopes.clear();
52
53                    let branch_clauses = self.analyze_single_query(&union_part.query)?;
54                    unions.push(ResolvedUnionPart {
55                        all: union_part.all,
56                        clauses: branch_clauses,
57                    });
58                }
59
60                // Validate UNION column compatibility: all branches must
61                // have the same number of columns. Column names are taken
62                // from the first branch (standard Lora semantics).
63                if !unions.is_empty() {
64                    let head_cols = return_column_info(&clauses);
65                    for branch in &unions {
66                        let branch_cols = return_column_info(&branch.clauses);
67                        if let (Some(hc), Some(bc)) = (&head_cols, &branch_cols) {
68                            if hc.len() != bc.len() {
69                                return Err(SemanticError::UnionColumnCountMismatch(
70                                    hc.len(),
71                                    bc.len(),
72                                ));
73                            }
74                            // Validate column names when at least one side
75                            // uses an explicit AS alias.
76                            for ((h_name, h_explicit), (b_name, b_explicit)) in
77                                hc.iter().zip(bc.iter())
78                            {
79                                if (*h_explicit || *b_explicit) && h_name != b_name {
80                                    return Err(SemanticError::UnionColumnNameMismatch(
81                                        h_name.clone(),
82                                        b_name.clone(),
83                                    ));
84                                }
85                            }
86                        }
87                    }
88                }
89            }
90            Query::StandaloneCall(_) => {
91                return Err(SemanticError::UnsupportedFeature(
92                    "Standalone CALL is not yet supported by the analyzer".into(),
93                ));
94            }
95        }
96
97        Ok(ResolvedQuery { clauses, unions })
98    }
99
100    fn analyze_single_query(
101        &mut self,
102        q: &SingleQuery,
103    ) -> Result<Vec<ResolvedClause>, SemanticError> {
104        match q {
105            SingleQuery::SinglePart(sp) => self.analyze_single_part(sp),
106            SingleQuery::MultiPart(mp) => {
107                let mut clauses = Vec::new();
108
109                for part in &mp.parts {
110                    clauses.extend(self.analyze_query_part(part)?);
111                }
112
113                clauses.extend(self.analyze_single_part(&mp.tail)?);
114                Ok(clauses)
115            }
116        }
117    }
118
119    fn analyze_query_part(
120        &mut self,
121        part: &QueryPart,
122    ) -> Result<Vec<ResolvedClause>, SemanticError> {
123        let mut clauses = Vec::new();
124
125        for rc in &part.reading_clauses {
126            clauses.push(self.analyze_reading_clause(rc)?);
127        }
128
129        for uc in &part.updating_clauses {
130            clauses.push(self.analyze_updating_clause(uc)?);
131        }
132
133        clauses.push(ResolvedClause::With(self.analyze_with(&part.with_clause)?));
134        Ok(clauses)
135    }
136
137    fn analyze_single_part(
138        &mut self,
139        q: &SinglePartQuery,
140    ) -> Result<Vec<ResolvedClause>, SemanticError> {
141        let mut clauses = Vec::new();
142
143        for rc in &q.reading_clauses {
144            clauses.push(self.analyze_reading_clause(rc)?);
145        }
146
147        for uc in &q.updating_clauses {
148            clauses.push(self.analyze_updating_clause(uc)?);
149        }
150
151        if let Some(ret) = &q.return_clause {
152            clauses.push(ResolvedClause::Return(self.analyze_return(ret)?));
153        }
154
155        Ok(clauses)
156    }
157
158    fn analyze_reading_clause(
159        &mut self,
160        rc: &ReadingClause,
161    ) -> Result<ResolvedClause, SemanticError> {
162        match rc {
163            ReadingClause::Match(m) => Ok(ResolvedClause::Match(self.analyze_match(m)?)),
164            ReadingClause::Unwind(u) => Ok(ResolvedClause::Unwind(self.analyze_unwind(u)?)),
165            ReadingClause::InQueryCall(c) => self.analyze_in_query_call(c),
166        }
167    }
168
169    fn analyze_updating_clause(
170        &mut self,
171        uc: &UpdatingClause,
172    ) -> Result<ResolvedClause, SemanticError> {
173        match uc {
174            UpdatingClause::Create(c) => Ok(ResolvedClause::Create(self.analyze_create(c)?)),
175            UpdatingClause::Merge(m) => Ok(ResolvedClause::Merge(self.analyze_merge(m)?)),
176            UpdatingClause::Delete(d) => Ok(ResolvedClause::Delete(self.analyze_delete(d)?)),
177            UpdatingClause::Set(s) => Ok(ResolvedClause::Set(self.analyze_set(s)?)),
178            UpdatingClause::Remove(r) => Ok(ResolvedClause::Remove(self.analyze_remove(r)?)),
179        }
180    }
181
182    fn analyze_match(&mut self, m: &Match) -> Result<ResolvedMatch, SemanticError> {
183        let ctx = if m.optional {
184            PatternContext::OptionalRead
185        } else {
186            PatternContext::Read
187        };
188        let pattern = self.analyze_pattern(&m.pattern, ctx)?;
189        let where_ = m
190            .where_
191            .as_ref()
192            .map(|e| self.analyze_expr(e))
193            .transpose()?;
194
195        if let Some(ref w) = where_ {
196            if expr_contains_aggregate(w) {
197                return Err(SemanticError::AggregationInWhere);
198            }
199        }
200
201        Ok(ResolvedMatch {
202            optional: m.optional,
203            pattern,
204            where_,
205        })
206    }
207
208    fn analyze_unwind(&mut self, u: &Unwind) -> Result<ResolvedUnwind, SemanticError> {
209        let expr = self.analyze_expr(&u.expr)?;
210        let alias = self.declare_fresh_variable(&u.alias.name)?;
211
212        Ok(ResolvedUnwind { expr, alias })
213    }
214
215    fn analyze_in_query_call(
216        &mut self,
217        _call: &InQueryCall,
218    ) -> Result<ResolvedClause, SemanticError> {
219        Err(SemanticError::UnsupportedFeature(
220            "CALL ... YIELD is not yet supported by the analyzer".into(),
221        ))
222    }
223
224    fn analyze_create(&mut self, c: &Create) -> Result<ResolvedCreate, SemanticError> {
225        let pattern = self.analyze_pattern(&c.pattern, PatternContext::Write)?;
226        Ok(ResolvedCreate { pattern })
227    }
228
229    fn analyze_merge(&mut self, m: &Merge) -> Result<ResolvedMerge, SemanticError> {
230        let pattern_part = self.analyze_pattern_part(&m.pattern_part, PatternContext::Write)?;
231        let mut actions = Vec::with_capacity(m.actions.len());
232
233        for action in &m.actions {
234            actions.push(ResolvedMergeAction {
235                on_match: action.on_match,
236                set: self.analyze_set(&action.set)?,
237            });
238        }
239
240        Ok(ResolvedMerge {
241            pattern_part,
242            actions,
243        })
244    }
245
246    fn analyze_delete(&mut self, d: &Delete) -> Result<ResolvedDelete, SemanticError> {
247        let expressions = d
248            .expressions
249            .iter()
250            .map(|e| self.analyze_expr(e))
251            .collect::<Result<Vec<_>, _>>()?;
252
253        Ok(ResolvedDelete {
254            detach: d.detach,
255            expressions,
256        })
257    }
258
259    fn analyze_set(&mut self, s: &Set) -> Result<ResolvedSet, SemanticError> {
260        let mut items = Vec::with_capacity(s.items.len());
261
262        for item in &s.items {
263            match item {
264                SetItem::SetProperty { target, value, .. } => {
265                    // SET target (e.g. n.prop) allows new property names since
266                    // the SET is creating/updating properties.
267                    items.push(ResolvedSetItem::SetProperty {
268                        target: self.analyze_expr_write_property(target)?,
269                        value: self.analyze_expr(value)?,
270                    });
271                }
272                SetItem::SetVariable {
273                    variable, value, ..
274                } => {
275                    let var = self.resolve_required_variable(&variable.name)?;
276                    items.push(ResolvedSetItem::SetVariable {
277                        variable: var,
278                        value: self.analyze_expr(value)?,
279                    });
280                }
281                SetItem::MutateVariable {
282                    variable, value, ..
283                } => {
284                    let var = self.resolve_required_variable(&variable.name)?;
285                    items.push(ResolvedSetItem::MutateVariable {
286                        variable: var,
287                        value: self.analyze_expr(value)?,
288                    });
289                }
290                SetItem::SetLabels {
291                    variable, labels, ..
292                } => {
293                    let var = self.resolve_required_variable(&variable.name)?;
294                    for label in labels {
295                        self.validate_label_name(label, PatternContext::Write)?;
296                    }
297                    items.push(ResolvedSetItem::SetLabels {
298                        variable: var,
299                        labels: labels.clone(),
300                    });
301                }
302            }
303        }
304
305        Ok(ResolvedSet { items })
306    }
307
308    fn analyze_remove(&mut self, r: &Remove) -> Result<ResolvedRemove, SemanticError> {
309        let mut items = Vec::with_capacity(r.items.len());
310
311        for item in &r.items {
312            match item {
313                RemoveItem::Labels {
314                    variable, labels, ..
315                } => {
316                    let var = self.resolve_required_variable(&variable.name)?;
317                    items.push(ResolvedRemoveItem::Labels {
318                        variable: var,
319                        labels: labels.clone(),
320                    });
321                }
322                RemoveItem::Property { expr, .. } => {
323                    items.push(ResolvedRemoveItem::Property {
324                        expr: self.analyze_expr(expr)?,
325                    });
326                }
327            }
328        }
329
330        Ok(ResolvedRemove { items })
331    }
332
333    fn analyze_pattern(
334        &mut self,
335        p: &Pattern,
336        context: PatternContext,
337    ) -> Result<ResolvedPattern, SemanticError> {
338        let mut parts = Vec::with_capacity(p.parts.len());
339
340        // In read patterns, detect when the same node variable is used at
341        // multiple positions with conflicting labels (e.g. (n:X)-[r]->(n:Y)).
342        if matches!(context, PatternContext::Read | PatternContext::OptionalRead) {
343            let mut node_labels: BTreeMap<String, Vec<String>> = BTreeMap::new();
344            for part in &p.parts {
345                self.collect_node_var_labels(&part.element, &mut node_labels);
346            }
347            for (name, labels_list) in &node_labels {
348                // Only reject if the variable appears with distinct non-empty label sets
349                if labels_list.len() > 1 {
350                    let non_empty: Vec<&String> =
351                        labels_list.iter().filter(|l| !l.is_empty()).collect();
352                    let unique_labels: BTreeSet<&String> = non_empty.iter().copied().collect();
353                    if unique_labels.len() > 1 {
354                        return Err(SemanticError::DuplicateVariable(name.clone()));
355                    }
356                }
357            }
358        }
359
360        for part in &p.parts {
361            parts.push(self.analyze_pattern_part(part, context)?);
362        }
363
364        Ok(ResolvedPattern { parts })
365    }
366
367    /// Collect (variable_name, labels_string) for each node position in a pattern element.
368    fn collect_node_var_labels(
369        &self,
370        el: &PatternElement,
371        map: &mut BTreeMap<String, Vec<String>>,
372    ) {
373        match el {
374            PatternElement::NodeChain { head, chain, .. } => {
375                if let Some(ref v) = head.variable {
376                    let label_str = format_label_groups(&head.labels);
377                    map.entry(v.name.clone()).or_default().push(label_str);
378                }
379                for step in chain {
380                    if let Some(ref v) = step.node.variable {
381                        let label_str = format_label_groups(&step.node.labels);
382                        map.entry(v.name.clone()).or_default().push(label_str);
383                    }
384                }
385            }
386            PatternElement::Parenthesized(inner, _) => {
387                self.collect_node_var_labels(inner, map);
388            }
389            PatternElement::ShortestPath { element, .. } => {
390                self.collect_node_var_labels(element, map);
391            }
392        }
393    }
394
395    fn analyze_pattern_part(
396        &mut self,
397        part: &PatternPart,
398        context: PatternContext,
399    ) -> Result<ResolvedPatternPart, SemanticError> {
400        let binding = part
401            .binding
402            .as_ref()
403            .map(|v| self.declare_or_reuse_variable(&v.name))
404            .transpose()?;
405
406        let element = self.analyze_pattern_element(&part.element, context)?;
407
408        Ok(ResolvedPatternPart { binding, element })
409    }
410
411    fn analyze_pattern_element(
412        &mut self,
413        el: &PatternElement,
414        context: PatternContext,
415    ) -> Result<ResolvedPatternElement, SemanticError> {
416        match el {
417            PatternElement::NodeChain { head, chain, .. } => {
418                if chain.is_empty() {
419                    let node = self.analyze_node(head, context)?;
420                    return Ok(ResolvedPatternElement::Node {
421                        var: node.var,
422                        labels: node.labels,
423                        properties: node.properties,
424                    });
425                }
426
427                let head = self.analyze_node(head, context)?;
428                let mut resolved_chain = Vec::with_capacity(chain.len());
429
430                for step in chain {
431                    let rel = self.analyze_relationship(&step.relationship, context)?;
432                    let node = self.analyze_node(&step.node, context)?;
433                    resolved_chain.push(ResolvedChain { rel, node });
434                }
435
436                Ok(ResolvedPatternElement::NodeChain {
437                    head,
438                    chain: resolved_chain,
439                })
440            }
441
442            PatternElement::Parenthesized(inner, _) => self.analyze_pattern_element(inner, context),
443
444            PatternElement::ShortestPath { all, element, .. } => {
445                let resolved = self.analyze_pattern_element(element, context)?;
446                match resolved {
447                    ResolvedPatternElement::NodeChain { head, chain } => {
448                        Ok(ResolvedPatternElement::ShortestPath {
449                            all: *all,
450                            head,
451                            chain,
452                        })
453                    }
454                    other => Ok(other),
455                }
456            }
457        }
458    }
459
460    fn analyze_node(
461        &mut self,
462        node: &NodePattern,
463        context: PatternContext,
464    ) -> Result<ResolvedNode, SemanticError> {
465        let var = Some(match &node.variable {
466            // Named node — declare in scope so user code can reference it.
467            Some(v) => self.declare_or_reuse_variable(&v.name)?,
468            // Anonymous node (e.g. `(:Person)`) — allocate an internal VarId
469            // but do NOT declare it in the scope, so it cannot be referenced
470            // by user expressions and will not appear in projections.
471            None => self.symbols.new_var(),
472        });
473
474        let labels: Vec<Vec<String>> = node
475            .labels
476            .iter()
477            .map(|group| {
478                group
479                    .iter()
480                    .map(|l| {
481                        self.validate_label_name(l, context)?;
482                        Ok(l.clone())
483                    })
484                    .collect::<Result<Vec<_>, SemanticError>>()
485            })
486            .collect::<Result<Vec<_>, SemanticError>>()?;
487
488        let properties = node
489            .properties
490            .as_ref()
491            .map(|e| self.analyze_property_map_expr(e))
492            .transpose()?;
493
494        Ok(ResolvedNode {
495            var,
496            labels,
497            properties,
498        })
499    }
500
501    fn analyze_relationship(
502        &mut self,
503        rel: &RelationshipPattern,
504        context: PatternContext,
505    ) -> Result<ResolvedRel, SemanticError> {
506        if let Some(detail) = &rel.detail {
507            let var = Some(match &detail.variable {
508                Some(v) => self.declare_or_reuse_variable(&v.name)?,
509                // Anonymous relationship — allocate an internal VarId so the
510                // relationship value is stored in the row (needed for path
511                // materialization).
512                None => self.symbols.new_var(),
513            });
514
515            let types = detail
516                .types
517                .iter()
518                .map(|t| {
519                    self.validate_relationship_type_name(t, context)?;
520                    Ok(t.clone())
521                })
522                .collect::<Result<Vec<_>, SemanticError>>()?;
523
524            if let Some(range) = &detail.range {
525                if let (Some(start), Some(end)) = (range.start, range.end) {
526                    if start > end {
527                        return Err(SemanticError::InvalidRange(
528                            start,
529                            end,
530                            range.span.start,
531                            range.span.end,
532                        ));
533                    }
534                }
535            }
536
537            let properties = detail
538                .properties
539                .as_ref()
540                .map(|e| self.analyze_property_map_expr(e))
541                .transpose()?;
542
543            Ok(ResolvedRel {
544                var,
545                types,
546                direction: rel.direction,
547                range: detail.range.clone(),
548                properties,
549            })
550        } else {
551            Ok(ResolvedRel {
552                var: None,
553                types: Vec::new(),
554                direction: rel.direction,
555                range: None,
556                properties: None,
557            })
558        }
559    }
560
561    /// Analyze an expression, but allow resolution of projection aliases
562    /// (used for ORDER BY which can reference aliases from RETURN/WITH).
563    fn analyze_expr_with_aliases(
564        &mut self,
565        expr: &Expr,
566        aliases: &BTreeMap<String, VarId>,
567    ) -> Result<ResolvedExpr, SemanticError> {
568        match expr {
569            Expr::Variable(v) => {
570                // First try normal scope resolution; only fall back to
571                // projection aliases when the variable is not in scope.
572                if self.scopes.resolve(&v.name).is_some() {
573                    return self.analyze_expr(expr);
574                }
575                if let Some(&id) = aliases.get(&v.name) {
576                    return Ok(ResolvedExpr::Variable(id));
577                }
578                self.analyze_expr(expr)
579            }
580            // For property access like `alias.prop`, check if the base is an alias.
581            Expr::Property {
582                expr: inner,
583                key,
584                span,
585            } => {
586                let inner = self.analyze_expr_with_aliases(inner, aliases)?;
587                if self.property_access_allowed(&inner, key) {
588                    Ok(ResolvedExpr::Property {
589                        expr: Box::new(inner),
590                        property: key.clone(),
591                    })
592                } else {
593                    Err(SemanticError::UnknownPropertyAt(
594                        key.clone(),
595                        span.start,
596                        span.end,
597                    ))
598                }
599            }
600            // For function calls in ORDER BY (e.g. ORDER BY count(p))
601            Expr::FunctionCall {
602                name,
603                distinct,
604                args,
605                span,
606            } => {
607                let fn_name = name.join(".");
608                validate_function_name(&fn_name, span.start, span.end)?;
609                validate_function_arity(&fn_name, args.len())?;
610
611                let args = args
612                    .iter()
613                    .enumerate()
614                    .map(|(idx, a)| {
615                        if let Some(lit) = try_vector_enum_literal(&fn_name, idx, a) {
616                            Ok(lit)
617                        } else {
618                            self.analyze_expr_with_aliases(a, aliases)
619                        }
620                    })
621                    .collect::<Result<Vec<_>, _>>()?;
622
623                Ok(ResolvedExpr::Function {
624                    name: fn_name,
625                    distinct: *distinct,
626                    args,
627                })
628            }
629            _ => self.analyze_expr(expr),
630        }
631    }
632
633    fn analyze_expr(&mut self, expr: &Expr) -> Result<ResolvedExpr, SemanticError> {
634        match expr {
635            Expr::Variable(v) => {
636                let id = self.resolve_required_variable(&v.name)?;
637                Ok(ResolvedExpr::Variable(id))
638            }
639
640            Expr::Integer(v, _) => Ok(ResolvedExpr::Literal(LiteralValue::Integer(*v))),
641            Expr::Float(v, _) => Ok(ResolvedExpr::Literal(LiteralValue::Float(*v))),
642            Expr::String(v, _) => Ok(ResolvedExpr::Literal(LiteralValue::String(v.clone()))),
643            Expr::Bool(v, _) => Ok(ResolvedExpr::Literal(LiteralValue::Bool(*v))),
644            Expr::Null(_) => Ok(ResolvedExpr::Literal(LiteralValue::Null)),
645            Expr::Parameter(name, _) => Ok(ResolvedExpr::Parameter(name.clone())),
646
647            Expr::List(items, _) => {
648                let items = items
649                    .iter()
650                    .map(|e| self.analyze_expr(e))
651                    .collect::<Result<Vec<_>, _>>()?;
652                Ok(ResolvedExpr::List(items))
653            }
654
655            Expr::Map(items, _) => {
656                let mut seen = BTreeSet::new();
657                let mut out = Vec::with_capacity(items.len());
658
659                for (k, v) in items {
660                    if !seen.insert(k.clone()) {
661                        return Err(SemanticError::DuplicateMapKey(k.clone()));
662                    }
663                    out.push((k.clone(), self.analyze_expr(v)?));
664                }
665
666                Ok(ResolvedExpr::Map(out))
667            }
668
669            Expr::Property { expr, key, span } => {
670                let inner = self.analyze_expr(expr)?;
671
672                if self.property_access_allowed(&inner, key) {
673                    Ok(ResolvedExpr::Property {
674                        expr: Box::new(inner),
675                        property: key.clone(),
676                    })
677                } else {
678                    Err(SemanticError::UnknownPropertyAt(
679                        key.clone(),
680                        span.start,
681                        span.end,
682                    ))
683                }
684            }
685
686            Expr::Binary { lhs, op, rhs, .. } => {
687                let lhs = self.analyze_expr(lhs)?;
688                let rhs = self.analyze_expr(rhs)?;
689
690                Ok(ResolvedExpr::Binary {
691                    lhs: Box::new(lhs),
692                    op: *op,
693                    rhs: Box::new(rhs),
694                })
695            }
696
697            Expr::Unary { op, expr, .. } => {
698                let expr = self.analyze_expr(expr)?;
699                Ok(ResolvedExpr::Unary {
700                    op: *op,
701                    expr: Box::new(expr),
702                })
703            }
704
705            Expr::FunctionCall {
706                name,
707                distinct,
708                args,
709                span,
710                ..
711            } => {
712                let fn_name = name.join(".");
713                validate_function_name(&fn_name, span.start, span.end)?;
714                validate_function_arity(&fn_name, args.len())?;
715
716                let args = args
717                    .iter()
718                    .enumerate()
719                    .map(|(idx, a)| {
720                        if let Some(lit) = try_vector_enum_literal(&fn_name, idx, a) {
721                            Ok(lit)
722                        } else {
723                            self.analyze_expr(a)
724                        }
725                    })
726                    .collect::<Result<Vec<_>, _>>()?;
727
728                Ok(ResolvedExpr::Function {
729                    name: fn_name,
730                    distinct: *distinct,
731                    args,
732                })
733            }
734
735            Expr::ListPredicate {
736                kind,
737                variable,
738                list,
739                predicate,
740                ..
741            } => {
742                let list = self.analyze_expr(list)?;
743                let var_id = self.symbols.new_var();
744                self.scopes.push();
745                self.scopes.declare(variable.name.clone(), var_id);
746                let predicate = self.analyze_expr(predicate)?;
747                self.scopes.pop();
748
749                Ok(ResolvedExpr::ListPredicate {
750                    kind: *kind,
751                    variable: var_id,
752                    list: Box::new(list),
753                    predicate: Box::new(predicate),
754                })
755            }
756
757            Expr::ListComprehension {
758                variable,
759                list,
760                filter,
761                map_expr,
762                ..
763            } => {
764                let list = self.analyze_expr(list)?;
765                let var_id = self.symbols.new_var();
766                self.scopes.push();
767                self.scopes.declare(variable.name.clone(), var_id);
768                let filter = filter.as_ref().map(|e| self.analyze_expr(e)).transpose()?;
769                let map_expr = map_expr
770                    .as_ref()
771                    .map(|e| self.analyze_expr(e))
772                    .transpose()?;
773                self.scopes.pop();
774
775                Ok(ResolvedExpr::ListComprehension {
776                    variable: var_id,
777                    list: Box::new(list),
778                    filter: filter.map(Box::new),
779                    map_expr: map_expr.map(Box::new),
780                })
781            }
782
783            Expr::Reduce {
784                accumulator,
785                init,
786                variable,
787                list,
788                expr,
789                ..
790            } => {
791                let init = self.analyze_expr(init)?;
792                let list = self.analyze_expr(list)?;
793                let acc_id = self.symbols.new_var();
794                let var_id = self.symbols.new_var();
795                self.scopes.push();
796                self.scopes.declare(accumulator.name.clone(), acc_id);
797                self.scopes.declare(variable.name.clone(), var_id);
798                let expr = self.analyze_expr(expr)?;
799                self.scopes.pop();
800
801                Ok(ResolvedExpr::Reduce {
802                    accumulator: acc_id,
803                    init: Box::new(init),
804                    variable: var_id,
805                    list: Box::new(list),
806                    expr: Box::new(expr),
807                })
808            }
809
810            Expr::Index {
811                expr: inner, index, ..
812            } => {
813                let expr = self.analyze_expr(inner)?;
814                let index = self.analyze_expr(index)?;
815                Ok(ResolvedExpr::Index {
816                    expr: Box::new(expr),
817                    index: Box::new(index),
818                })
819            }
820
821            Expr::Slice {
822                expr: inner,
823                from,
824                to,
825                ..
826            } => {
827                let expr = self.analyze_expr(inner)?;
828                let from = from
829                    .as_ref()
830                    .map(|e| self.analyze_expr(e))
831                    .transpose()?
832                    .map(Box::new);
833                let to = to
834                    .as_ref()
835                    .map(|e| self.analyze_expr(e))
836                    .transpose()?
837                    .map(Box::new);
838                Ok(ResolvedExpr::Slice {
839                    expr: Box::new(expr),
840                    from,
841                    to,
842                })
843            }
844
845            Expr::MapProjection {
846                base, selectors, ..
847            } => {
848                let base = self.analyze_expr(base)?;
849                let mut resolved_selectors = Vec::new();
850                for sel in selectors {
851                    match sel {
852                        MapProjectionSelector::Property(name) => {
853                            resolved_selectors.push(ResolvedMapSelector::Property(name.clone()));
854                        }
855                        MapProjectionSelector::AllProperties => {
856                            resolved_selectors.push(ResolvedMapSelector::AllProperties);
857                        }
858                        MapProjectionSelector::Literal(key, expr) => {
859                            let resolved = self.analyze_expr(expr)?;
860                            resolved_selectors
861                                .push(ResolvedMapSelector::Literal(key.clone(), resolved));
862                        }
863                    }
864                }
865                Ok(ResolvedExpr::MapProjection {
866                    base: Box::new(base),
867                    selectors: resolved_selectors,
868                })
869            }
870
871            Expr::Case {
872                input,
873                alternatives,
874                else_expr,
875                ..
876            } => {
877                let input = input
878                    .as_ref()
879                    .map(|e| self.analyze_expr(e))
880                    .transpose()?
881                    .map(Box::new);
882
883                let alternatives = alternatives
884                    .iter()
885                    .map(|(when, then)| Ok((self.analyze_expr(when)?, self.analyze_expr(then)?)))
886                    .collect::<Result<Vec<_>, SemanticError>>()?;
887
888                let else_expr = else_expr
889                    .as_ref()
890                    .map(|e| self.analyze_expr(e))
891                    .transpose()?
892                    .map(Box::new);
893
894                Ok(ResolvedExpr::Case {
895                    input,
896                    alternatives,
897                    else_expr,
898                })
899            }
900
901            Expr::ExistsSubquery {
902                pattern, where_, ..
903            } => {
904                let resolved_pattern =
905                    self.analyze_pattern(pattern, PatternContext::OptionalRead)?;
906                let resolved_where = where_.as_ref().map(|e| self.analyze_expr(e)).transpose()?;
907                Ok(ResolvedExpr::ExistsSubquery {
908                    pattern: resolved_pattern,
909                    where_: resolved_where.map(Box::new),
910                })
911            }
912
913            Expr::PatternComprehension {
914                pattern: pat_element,
915                where_,
916                map_expr,
917                ..
918            } => {
919                // Wrap pattern_element in a PatternPart/Pattern for analysis
920                let pat = Pattern {
921                    parts: vec![PatternPart {
922                        binding: None,
923                        element: (**pat_element).clone(),
924                        span: map_expr.span(),
925                    }],
926                    span: map_expr.span(),
927                };
928                let resolved_pattern = self.analyze_pattern(&pat, PatternContext::OptionalRead)?;
929                let resolved_where = where_.as_ref().map(|e| self.analyze_expr(e)).transpose()?;
930                let resolved_map = self.analyze_expr(map_expr)?;
931                Ok(ResolvedExpr::PatternComprehension {
932                    pattern: resolved_pattern,
933                    where_: resolved_where.map(Box::new),
934                    map_expr: Box::new(resolved_map),
935                })
936            }
937        }
938    }
939
940    fn analyze_property_map_expr(&mut self, expr: &Expr) -> Result<ResolvedExpr, SemanticError> {
941        match expr {
942            Expr::Map(_, _) | Expr::Parameter(_, _) => self.analyze_expr(expr),
943            _ => Err(SemanticError::ExpectedPropertyMap(
944                expr.span().start,
945                expr.span().end,
946            )),
947        }
948    }
949
950    fn analyze_return(&mut self, r: &Return) -> Result<ResolvedReturn, SemanticError> {
951        let analyzed = self.analyze_projection_body(&r.body)?;
952
953        Ok(ResolvedReturn {
954            distinct: r.body.distinct,
955            items: analyzed.items,
956            include_existing: analyzed.include_existing,
957            order: analyzed.order,
958            skip: analyzed.skip,
959            limit: analyzed.limit,
960        })
961    }
962
963    fn analyze_with(&mut self, w: &With) -> Result<ResolvedWith, SemanticError> {
964        let old_scope = self.visible_bindings();
965        let analyzed = self.analyze_projection_body(&w.body)?;
966
967        let mut new_scope = BTreeMap::<String, VarId>::new();
968
969        if analyzed.include_existing {
970            for (name, id) in old_scope {
971                new_scope.insert(name, id);
972            }
973        }
974
975        for exported in &analyzed.exported_aliases {
976            new_scope.insert(exported.name.clone(), exported.id);
977        }
978
979        self.replace_scope(new_scope);
980
981        let where_ = w
982            .where_
983            .as_ref()
984            .map(|e| self.analyze_expr(e))
985            .transpose()?;
986
987        Ok(ResolvedWith {
988            distinct: w.body.distinct,
989            items: analyzed.items,
990            include_existing: analyzed.include_existing,
991            order: analyzed.order,
992            skip: analyzed.skip,
993            limit: analyzed.limit,
994            where_,
995        })
996    }
997
998    fn analyze_projection_body(
999        &mut self,
1000        body: &ProjectionBody,
1001    ) -> Result<AnalyzedProjectionBody, SemanticError> {
1002        let mut items = Vec::new();
1003        let mut include_existing = false;
1004        let mut exported_aliases = Vec::new();
1005        let mut seen_alias_names = BTreeSet::new();
1006
1007        for item in &body.items {
1008            match item {
1009                ProjectionItem::Expr { expr, alias, span } => {
1010                    let resolved = self.analyze_expr(expr)?;
1011
1012                    let explicit = alias.is_some();
1013                    let name = if let Some(var) = alias {
1014                        if !seen_alias_names.insert(var.name.clone()) {
1015                            return Err(SemanticError::DuplicateProjectionAlias(var.name.clone()));
1016                        }
1017                        var.name.clone()
1018                    } else {
1019                        projection_name(expr)
1020                    };
1021
1022                    let output = self.symbols.new_var();
1023
1024                    exported_aliases.push(ExportedAlias {
1025                        name: name.clone(),
1026                        id: output,
1027                    });
1028
1029                    items.push(ResolvedProjection {
1030                        expr: resolved,
1031                        output,
1032                        name,
1033                        explicit_alias: explicit,
1034                        span: *span,
1035                    });
1036                }
1037
1038                ProjectionItem::Star { .. } => {
1039                    include_existing = true;
1040                }
1041            }
1042        }
1043
1044        // Build a lookup from alias names to their output VarIds so ORDER BY
1045        // can reference projection aliases (e.g. ORDER BY name when RETURN p.name AS name).
1046        let alias_map: BTreeMap<String, VarId> = exported_aliases
1047            .iter()
1048            .map(|a| (a.name.clone(), a.id))
1049            .collect();
1050
1051        let order = body
1052            .order
1053            .iter()
1054            .map(|item| {
1055                let expr = self.analyze_expr_with_aliases(&item.expr, &alias_map)?;
1056                Ok(ResolvedSortItem {
1057                    expr,
1058                    direction: item.direction,
1059                })
1060            })
1061            .collect::<Result<Vec<_>, SemanticError>>()?;
1062
1063        let skip = body
1064            .skip
1065            .as_ref()
1066            .map(|e| self.analyze_expr(e))
1067            .transpose()?;
1068        let limit = body
1069            .limit
1070            .as_ref()
1071            .map(|e| self.analyze_expr(e))
1072            .transpose()?;
1073
1074        Ok(AnalyzedProjectionBody {
1075            items,
1076            include_existing,
1077            exported_aliases,
1078            order,
1079            skip,
1080            limit,
1081        })
1082    }
1083
1084    fn resolve_required_variable(&self, name: &str) -> Result<VarId, SemanticError> {
1085        self.scopes
1086            .resolve(name)
1087            .ok_or_else(|| SemanticError::UnknownVariable(name.to_string()))
1088    }
1089
1090    fn declare_fresh_variable(&mut self, name: &str) -> Result<VarId, SemanticError> {
1091        if self.scopes.resolve(name).is_some() {
1092            return Err(SemanticError::DuplicateVariable(name.to_string()));
1093        }
1094
1095        let id = self.symbols.new_var();
1096        self.scopes.declare(name.to_string(), id);
1097        Ok(id)
1098    }
1099
1100    fn declare_or_reuse_variable(&mut self, name: &str) -> Result<VarId, SemanticError> {
1101        if let Some(id) = self.scopes.resolve(name) {
1102            Ok(id)
1103        } else {
1104            let id = self.symbols.new_var();
1105            self.scopes.declare(name.to_string(), id);
1106            Ok(id)
1107        }
1108    }
1109
1110    fn validate_label_name(
1111        &self,
1112        label: &str,
1113        context: PatternContext,
1114    ) -> Result<(), SemanticError> {
1115        if matches!(
1116            context,
1117            PatternContext::Write | PatternContext::OptionalRead
1118        ) || self.storage.has_label_name(label)
1119            || self.storage.node_count() == 0
1120        {
1121            Ok(())
1122        } else {
1123            Err(SemanticError::UnknownLabel(label.to_string()))
1124        }
1125    }
1126
1127    fn validate_relationship_type_name(
1128        &self,
1129        rel_type: &str,
1130        context: PatternContext,
1131    ) -> Result<(), SemanticError> {
1132        if matches!(
1133            context,
1134            PatternContext::Write | PatternContext::OptionalRead
1135        ) || self.storage.has_relationship_type_name(rel_type)
1136            || self.storage.relationship_count() == 0
1137        {
1138            Ok(())
1139        } else {
1140            Err(SemanticError::UnknownRelationshipType(rel_type.to_string()))
1141        }
1142    }
1143
1144    /// Analyze an expression that is the target of a SET operation.
1145    /// Property names on the left side of SET are always allowed (new property creation).
1146    fn analyze_expr_write_property(&mut self, expr: &Expr) -> Result<ResolvedExpr, SemanticError> {
1147        match expr {
1148            Expr::Property {
1149                expr: inner, key, ..
1150            } => {
1151                let inner_resolved = self.analyze_expr(inner)?;
1152                Ok(ResolvedExpr::Property {
1153                    expr: Box::new(inner_resolved),
1154                    property: key.clone(),
1155                })
1156            }
1157            // Fallback to normal analysis for non-property expressions
1158            other => self.analyze_expr(other),
1159        }
1160    }
1161
1162    fn property_access_allowed(&self, base: &ResolvedExpr, key: &str) -> bool {
1163        match base {
1164            ResolvedExpr::Map(_) => true,
1165            _ => {
1166                self.storage.has_property_key(key)
1167                    || (self.storage.node_count() == 0 && self.storage.relationship_count() == 0)
1168            }
1169        }
1170    }
1171
1172    fn visible_bindings(&self) -> BTreeMap<String, VarId> {
1173        self.scopes.visible_bindings()
1174    }
1175
1176    fn replace_scope(&mut self, bindings: BTreeMap<String, VarId>) {
1177        self.scopes.clear();
1178        for (name, id) in bindings {
1179            self.scopes.declare(name, id);
1180        }
1181    }
1182}
1183
1184/// Known scalar and aggregate function names accepted by the engine.
1185const KNOWN_FUNCTIONS: &[&str] = &[
1186    // Aggregate
1187    "count",
1188    "sum",
1189    "avg",
1190    "min",
1191    "max",
1192    "collect",
1193    "stdev",
1194    "stdevp",
1195    "percentilecont",
1196    "percentiledisc",
1197    // Entity introspection
1198    "id",
1199    "type",
1200    "labels",
1201    "keys",
1202    "properties",
1203    // Path functions
1204    "nodes",
1205    "relationships",
1206    // String
1207    "tolower",
1208    "toupper",
1209    "trim",
1210    "ltrim",
1211    "rtrim",
1212    "replace",
1213    "split",
1214    "substring",
1215    "reverse",
1216    "left",
1217    "right",
1218    "lpad",
1219    "rpad",
1220    "char_length",
1221    "normalize",
1222    // Type conversion / introspection
1223    "tostring",
1224    "tointeger",
1225    "toint",
1226    "tofloat",
1227    "toboolean",
1228    "tobooleanornull",
1229    "valuetype",
1230    // Math — basic
1231    "abs",
1232    "ceil",
1233    "floor",
1234    "round",
1235    "sqrt",
1236    "sign",
1237    // Math — trigonometric / logarithmic
1238    "log",
1239    "ln",
1240    "log10",
1241    "exp",
1242    "sin",
1243    "cos",
1244    "tan",
1245    "asin",
1246    "acos",
1247    "atan",
1248    "atan2",
1249    "degrees",
1250    "radians",
1251    // Math — constants
1252    "pi",
1253    "e",
1254    "rand",
1255    // List / size
1256    "size",
1257    "length",
1258    "head",
1259    "tail",
1260    "last",
1261    "range",
1262    // Other
1263    "coalesce",
1264    "timestamp",
1265    // Temporal
1266    "date",
1267    "datetime",
1268    "time",
1269    "localtime",
1270    "localdatetime",
1271    "duration",
1272    "date.truncate",
1273    "datetime.truncate",
1274    "duration.between",
1275    "duration.indays",
1276    // Spatial
1277    "point",
1278    "distance",
1279    // Vector
1280    "vector",
1281    "tointegerlist",
1282    "tofloatlist",
1283    "vector_dimension_count",
1284    "vector_distance",
1285    "vector_norm",
1286    "vector.similarity.cosine",
1287    "vector.similarity.euclidean",
1288];
1289
1290const AGGREGATE_FUNCTIONS: &[&str] = &[
1291    "count",
1292    "sum",
1293    "avg",
1294    "min",
1295    "max",
1296    "collect",
1297    "stdev",
1298    "stdevp",
1299    "percentilecont",
1300    "percentiledisc",
1301];
1302
1303/// Returns (min_args, max_args) for known functions. `None` means no upper bound (variadic).
1304fn function_arity(name: &str) -> Option<(usize, Option<usize>)> {
1305    match name {
1306        // Aggregate — all take exactly 1 argument (count can take 0 for count(*))
1307        "count" => Some((0, Some(1))),
1308        "sum" | "avg" | "min" | "max" | "collect" | "stdev" | "stdevp" => Some((1, Some(1))),
1309        "percentilecont" | "percentiledisc" => Some((2, Some(2))),
1310        // Entity introspection — exactly 1
1311        "id" | "type" | "labels" | "keys" | "properties" | "nodes" | "relationships" => {
1312            Some((1, Some(1)))
1313        }
1314        // String — 1 arg
1315        "tolower" | "toupper" | "trim" | "ltrim" | "rtrim" | "reverse" => Some((1, Some(1))),
1316        // String — 2 args
1317        "split" | "left" | "right" => Some((2, Some(2))),
1318        // String — 3 args
1319        "replace" => Some((3, Some(3))),
1320        // substring: 2 or 3 args
1321        "substring" => Some((2, Some(3))),
1322        // Type conversion — exactly 1
1323        "tostring" | "tointeger" | "toint" | "tofloat" | "toboolean" | "tobooleanornull"
1324        | "valuetype" => Some((1, Some(1))),
1325        // String — lpad/rpad take 3
1326        "lpad" | "rpad" => Some((3, Some(3))),
1327        // String — char_length/normalize take 1
1328        "char_length" | "normalize" => Some((1, Some(1))),
1329        // Math — exactly 1
1330        "abs" | "ceil" | "floor" | "round" | "sqrt" | "sign" => Some((1, Some(1))),
1331        // Math — trig / logarithmic (1 arg)
1332        "log" | "ln" | "log10" | "exp" | "sin" | "cos" | "tan" | "asin" | "acos" | "atan"
1333        | "degrees" | "radians" => Some((1, Some(1))),
1334        // Math — atan2 (2 args)
1335        "atan2" => Some((2, Some(2))),
1336        // Math — constants (0 args)
1337        "pi" | "e" | "rand" => Some((0, Some(0))),
1338        // List / size
1339        "size" | "length" | "head" | "tail" | "last" => Some((1, Some(1))),
1340        // range: 2 or 3
1341        "range" => Some((2, Some(3))),
1342        // coalesce: 1+
1343        "coalesce" => Some((1, None)),
1344        // timestamp: 0
1345        "timestamp" => Some((0, Some(0))),
1346        // Temporal constructors: 0 or 1
1347        "date" | "datetime" | "time" | "localtime" | "localdatetime" => Some((0, Some(1))),
1348        // duration: exactly 1
1349        "duration" => Some((1, Some(1))),
1350        // Temporal namespace functions: exactly 2
1351        "date.truncate" | "datetime.truncate" | "duration.between" | "duration.indays" => {
1352            Some((2, Some(2)))
1353        }
1354        // Spatial
1355        "point" => Some((1, Some(1))),
1356        "distance" => Some((2, Some(2))),
1357        // Vector
1358        "vector" => Some((3, Some(3))),
1359        "tointegerlist" | "tofloatlist" => Some((1, Some(1))),
1360        "vector_dimension_count" => Some((1, Some(1))),
1361        "vector_norm" => Some((2, Some(2))),
1362        "vector_distance" => Some((3, Some(3))),
1363        "vector.similarity.cosine" | "vector.similarity.euclidean" => Some((2, Some(2))),
1364        _ => None,
1365    }
1366}
1367
1368/// Special-case the literal-enum arguments of vector construction and
1369/// metric functions. Bare identifiers like `INTEGER` or `COSINE` parse
1370/// as `Expr::Variable` today; for these specific slots we treat a bare
1371/// identifier as a string literal rather than resolving it against the
1372/// scope. Strings are passed through untouched, and any other expression
1373/// shape falls through to the normal analyzer so runtime type errors
1374/// still surface cleanly.
1375fn try_vector_enum_literal(fn_name: &str, arg_idx: usize, expr: &Expr) -> Option<ResolvedExpr> {
1376    let fn_lower = fn_name.to_ascii_lowercase();
1377    let takes_enum_here = match fn_lower.as_str() {
1378        "vector" => arg_idx == 2,
1379        "vector_distance" => arg_idx == 2,
1380        "vector_norm" => arg_idx == 1,
1381        _ => false,
1382    };
1383    if !takes_enum_here {
1384        return None;
1385    }
1386    if let Expr::Variable(v) = expr {
1387        return Some(ResolvedExpr::Literal(LiteralValue::String(v.name.clone())));
1388    }
1389    None
1390}
1391
1392fn is_aggregate_function(name: &str) -> bool {
1393    AGGREGATE_FUNCTIONS.contains(&name.to_ascii_lowercase().as_str())
1394}
1395
1396fn validate_function_name(name: &str, start: usize, end: usize) -> Result<(), SemanticError> {
1397    let lower = name.to_ascii_lowercase();
1398    if KNOWN_FUNCTIONS.contains(&lower.as_str()) {
1399        Ok(())
1400    } else {
1401        Err(SemanticError::UnknownFunction(name.to_string(), start, end))
1402    }
1403}
1404
1405fn validate_function_arity(name: &str, arg_count: usize) -> Result<(), SemanticError> {
1406    let lower = name.to_ascii_lowercase();
1407    if let Some((min, max)) = function_arity(&lower) {
1408        if arg_count < min {
1409            let expected = if max == Some(min) {
1410                format!("{min}")
1411            } else if let Some(mx) = max {
1412                format!("{min}..{mx}")
1413            } else {
1414                format!("at least {min}")
1415            };
1416            return Err(SemanticError::WrongArity(
1417                name.to_string(),
1418                expected,
1419                arg_count,
1420            ));
1421        }
1422        if let Some(mx) = max {
1423            if arg_count > mx {
1424                let expected = if mx == min {
1425                    format!("{min}")
1426                } else {
1427                    format!("{min}..{mx}")
1428                };
1429                return Err(SemanticError::WrongArity(
1430                    name.to_string(),
1431                    expected,
1432                    arg_count,
1433                ));
1434            }
1435        }
1436    }
1437    Ok(())
1438}
1439
1440/// Format label groups as a string for duplicate-variable detection.
1441fn format_label_groups(groups: &[impl AsRef<[String]>]) -> String {
1442    groups
1443        .iter()
1444        .map(|g| g.as_ref().join("|"))
1445        .collect::<Vec<_>>()
1446        .join(":")
1447}
1448
1449/// Extract column names and explicit-alias flags from the RETURN clause.
1450fn return_column_info(clauses: &[ResolvedClause]) -> Option<Vec<(String, bool)>> {
1451    for clause in clauses.iter().rev() {
1452        if let ResolvedClause::Return(ret) = clause {
1453            return Some(
1454                ret.items
1455                    .iter()
1456                    .map(|p| (p.name.clone(), p.explicit_alias))
1457                    .collect(),
1458            );
1459        }
1460    }
1461    None
1462}
1463
1464/// Returns true if the resolved expression contains any aggregate function call.
1465fn expr_contains_aggregate(expr: &ResolvedExpr) -> bool {
1466    match expr {
1467        ResolvedExpr::Function { name, args, .. } => {
1468            if is_aggregate_function(name) {
1469                return true;
1470            }
1471            args.iter().any(expr_contains_aggregate)
1472        }
1473        ResolvedExpr::Binary { lhs, rhs, .. } => {
1474            expr_contains_aggregate(lhs) || expr_contains_aggregate(rhs)
1475        }
1476        ResolvedExpr::Unary { expr, .. } => expr_contains_aggregate(expr),
1477        ResolvedExpr::Property { expr, .. } => expr_contains_aggregate(expr),
1478        ResolvedExpr::List(items) => items.iter().any(expr_contains_aggregate),
1479        ResolvedExpr::Map(items) => items.iter().any(|(_, v)| expr_contains_aggregate(v)),
1480        ResolvedExpr::Case {
1481            input,
1482            alternatives,
1483            else_expr,
1484        } => {
1485            input.as_ref().is_some_and(|e| expr_contains_aggregate(e))
1486                || alternatives
1487                    .iter()
1488                    .any(|(w, t)| expr_contains_aggregate(w) || expr_contains_aggregate(t))
1489                || else_expr
1490                    .as_ref()
1491                    .is_some_and(|e| expr_contains_aggregate(e))
1492        }
1493        ResolvedExpr::ListPredicate {
1494            list, predicate, ..
1495        } => expr_contains_aggregate(list) || expr_contains_aggregate(predicate),
1496        ResolvedExpr::ListComprehension {
1497            list,
1498            filter,
1499            map_expr,
1500            ..
1501        } => {
1502            expr_contains_aggregate(list)
1503                || filter.as_ref().is_some_and(|e| expr_contains_aggregate(e))
1504                || map_expr
1505                    .as_ref()
1506                    .is_some_and(|e| expr_contains_aggregate(e))
1507        }
1508        ResolvedExpr::Reduce {
1509            init, list, expr, ..
1510        } => {
1511            expr_contains_aggregate(init)
1512                || expr_contains_aggregate(list)
1513                || expr_contains_aggregate(expr)
1514        }
1515        ResolvedExpr::Index { expr, index } => {
1516            expr_contains_aggregate(expr) || expr_contains_aggregate(index)
1517        }
1518        ResolvedExpr::Slice { expr, from, to } => {
1519            expr_contains_aggregate(expr)
1520                || from.as_ref().is_some_and(|e| expr_contains_aggregate(e))
1521                || to.as_ref().is_some_and(|e| expr_contains_aggregate(e))
1522        }
1523        ResolvedExpr::MapProjection { base, selectors } => expr_contains_aggregate(base)
1524            || selectors.iter().any(
1525                |s| matches!(s, ResolvedMapSelector::Literal(_, e) if expr_contains_aggregate(e)),
1526            ),
1527        ResolvedExpr::ExistsSubquery { .. } | ResolvedExpr::PatternComprehension { .. } => false,
1528        ResolvedExpr::Variable(_) | ResolvedExpr::Literal(_) | ResolvedExpr::Parameter(_) => false,
1529    }
1530}
1531
1532fn projection_name(expr: &Expr) -> String {
1533    match expr {
1534        Expr::Variable(v) => v.name.clone(),
1535        Expr::Property { key, .. } => key.clone(),
1536        Expr::FunctionCall { name, .. } => {
1537            name.last().cloned().unwrap_or_else(|| "expr".to_string())
1538        }
1539        _ => "expr".to_string(),
1540    }
1541}
1542
1543#[cfg(test)]
1544mod tests {
1545    use super::*;
1546    use lora_parser::parse_query;
1547    use lora_store::{GraphStorageMut, InMemoryGraph, Properties};
1548
1549    #[test]
1550    fn create_allows_new_relationship_type_when_graph_is_not_empty() {
1551        let mut graph = InMemoryGraph::new();
1552        let alice = graph.create_node(vec!["User".into()], Properties::new());
1553        let bob = graph.create_node(vec!["User".into()], Properties::new());
1554        let _carol = graph.create_node(vec!["User".into()], Properties::new());
1555
1556        graph
1557            .create_relationship(alice.id, bob.id, "FOLLOWS", Properties::new())
1558            .unwrap();
1559
1560        let doc = parse_query(
1561            "MATCH (a:User {id: 2}), (b:User {id: 3}) CREATE (a)-[:KNOWS]->(b) RETURN a, b",
1562        )
1563        .unwrap();
1564
1565        let mut analyzer = Analyzer::new(&graph);
1566        assert!(analyzer.analyze(&doc).is_ok());
1567
1568        let match_doc = parse_query("MATCH (a)-[:KNOWS]->(b) RETURN a, b").unwrap();
1569        let mut analyzer = Analyzer::new(&graph);
1570        assert!(matches!(
1571            analyzer.analyze(&match_doc),
1572            Err(SemanticError::UnknownRelationshipType(rel_type)) if rel_type == "KNOWS"
1573        ));
1574    }
1575
1576    // --- Vector function analyzer tests ----------------------------------
1577
1578    #[test]
1579    fn vector_rewrites_bare_coordinate_type_to_string_literal() {
1580        // `vector([1,2,3], 3, INTEGER)` should not try to resolve INTEGER
1581        // as a variable — the third argument is an enum-like type literal.
1582        let graph = InMemoryGraph::new();
1583        let doc = parse_query("RETURN vector([1, 2, 3], 3, INTEGER) AS v").unwrap();
1584        let mut analyzer = Analyzer::new(&graph);
1585        let resolved = analyzer
1586            .analyze(&doc)
1587            .expect("INTEGER should be rewritten as a string literal, not a variable");
1588        // Walk down into the function call's third arg and confirm it came
1589        // through as a String literal.
1590        let Some(ResolvedClause::Return(ret)) = resolved.clauses.last() else {
1591            panic!("expected RETURN clause");
1592        };
1593        let ResolvedExpr::Function { args, .. } = &ret.items[0].expr else {
1594            panic!("expected function call");
1595        };
1596        assert!(matches!(
1597            args.get(2),
1598            Some(ResolvedExpr::Literal(LiteralValue::String(s))) if s == "INTEGER"
1599        ));
1600    }
1601
1602    #[test]
1603    fn vector_distance_rewrites_bare_metric_identifier() {
1604        let graph = InMemoryGraph::new();
1605        let doc = parse_query(
1606            "RETURN vector_distance(vector([1,2], 2, INT), vector([3,4], 2, INT), EUCLIDEAN) AS d",
1607        )
1608        .unwrap();
1609        let mut analyzer = Analyzer::new(&graph);
1610        let resolved = analyzer
1611            .analyze(&doc)
1612            .expect("EUCLIDEAN should be rewritten as a string literal");
1613        let Some(ResolvedClause::Return(ret)) = resolved.clauses.last() else {
1614            panic!("expected RETURN clause");
1615        };
1616        let ResolvedExpr::Function { args, .. } = &ret.items[0].expr else {
1617            panic!("expected function call");
1618        };
1619        assert!(matches!(
1620            args.get(2),
1621            Some(ResolvedExpr::Literal(LiteralValue::String(s))) if s == "EUCLIDEAN"
1622        ));
1623    }
1624
1625    #[test]
1626    fn vector_norm_rewrites_bare_metric_identifier() {
1627        let graph = InMemoryGraph::new();
1628        let doc =
1629            parse_query("RETURN vector_norm(vector([1,2,3], 3, FLOAT32), MANHATTAN) AS n").unwrap();
1630        let mut analyzer = Analyzer::new(&graph);
1631        assert!(analyzer.analyze(&doc).is_ok());
1632    }
1633
1634    #[test]
1635    fn bare_identifier_outside_enum_slot_still_resolves_as_variable() {
1636        // Outside the enum slot, INTEGER should still behave like a
1637        // variable reference — this guards against the rewrite leaking.
1638        let graph = InMemoryGraph::new();
1639        let doc = parse_query("RETURN INTEGER AS v").unwrap();
1640        let mut analyzer = Analyzer::new(&graph);
1641        assert!(matches!(
1642            analyzer.analyze(&doc),
1643            Err(SemanticError::UnknownVariable(name)) if name == "INTEGER"
1644        ));
1645    }
1646
1647    #[test]
1648    fn vector_function_arity_is_validated() {
1649        let graph = InMemoryGraph::new();
1650        // vector() requires exactly 3 arguments.
1651        let doc = parse_query("RETURN vector([1, 2, 3], 3) AS v").unwrap();
1652        let mut analyzer = Analyzer::new(&graph);
1653        assert!(matches!(
1654            analyzer.analyze(&doc),
1655            Err(SemanticError::WrongArity(name, _, 2)) if name == "vector"
1656        ));
1657    }
1658
1659    #[test]
1660    fn unknown_vector_function_is_rejected() {
1661        let graph = InMemoryGraph::new();
1662        let doc = parse_query("RETURN vector.bogus([1,2,3], 3, INTEGER) AS v").unwrap();
1663        let mut analyzer = Analyzer::new(&graph);
1664        assert!(matches!(
1665            analyzer.analyze(&doc),
1666            Err(SemanticError::UnknownFunction(name, _, _)) if name == "vector.bogus"
1667        ));
1668    }
1669
1670    // --- Enum-literal rewrite scope --------------------------------------
1671
1672    /// Walk a ResolvedClause list, return the last RETURN's first item.
1673    fn return_expr(clauses: &[ResolvedClause]) -> &ResolvedExpr {
1674        let Some(ResolvedClause::Return(ret)) = clauses.last() else {
1675            panic!("expected RETURN clause");
1676        };
1677        &ret.items[0].expr
1678    }
1679
1680    #[test]
1681    fn vector_does_not_rewrite_first_or_second_argument() {
1682        // INTEGER in slot 0 or 1 should still attempt to resolve as a
1683        // variable — only slot 2 is the enum-type slot.
1684        let graph = InMemoryGraph::new();
1685
1686        let bad_first = parse_query("RETURN vector(INTEGER, 3, INTEGER) AS v").unwrap();
1687        let mut analyzer = Analyzer::new(&graph);
1688        assert!(matches!(
1689            analyzer.analyze(&bad_first),
1690            Err(SemanticError::UnknownVariable(name)) if name == "INTEGER"
1691        ));
1692
1693        let bad_second = parse_query("RETURN vector([1, 2, 3], INTEGER, INTEGER) AS v").unwrap();
1694        let mut analyzer = Analyzer::new(&graph);
1695        assert!(matches!(
1696            analyzer.analyze(&bad_second),
1697            Err(SemanticError::UnknownVariable(name)) if name == "INTEGER"
1698        ));
1699    }
1700
1701    #[test]
1702    fn vector_distance_does_not_rewrite_first_or_second_argument() {
1703        let graph = InMemoryGraph::new();
1704        let doc =
1705            parse_query("RETURN vector_distance(EUCLIDEAN, vector([1,2], 2, INT), EUCLIDEAN) AS d")
1706                .unwrap();
1707        let mut analyzer = Analyzer::new(&graph);
1708        assert!(matches!(
1709            analyzer.analyze(&doc),
1710            Err(SemanticError::UnknownVariable(name)) if name == "EUCLIDEAN"
1711        ));
1712    }
1713
1714    #[test]
1715    fn vector_norm_does_not_rewrite_first_argument() {
1716        let graph = InMemoryGraph::new();
1717        let doc = parse_query("RETURN vector_norm(MANHATTAN, EUCLIDEAN) AS n").unwrap();
1718        let mut analyzer = Analyzer::new(&graph);
1719        assert!(matches!(
1720            analyzer.analyze(&doc),
1721            Err(SemanticError::UnknownVariable(name)) if name == "MANHATTAN"
1722        ));
1723    }
1724
1725    #[test]
1726    fn parameter_in_enum_slot_is_preserved_as_parameter() {
1727        // A $param in the enum slot must NOT be rewritten — callers need
1728        // to pass coordinate/metric names dynamically.
1729        let graph = InMemoryGraph::new();
1730        let doc = parse_query("RETURN vector([1, 2, 3], 3, $type) AS v").unwrap();
1731        let mut analyzer = Analyzer::new(&graph);
1732        let resolved = analyzer.analyze(&doc).expect("parameter should be kept");
1733        let ResolvedExpr::Function { args, .. } = return_expr(&resolved.clauses) else {
1734            panic!("expected function");
1735        };
1736        assert!(matches!(args.get(2), Some(ResolvedExpr::Parameter(p)) if p == "type"));
1737    }
1738
1739    #[test]
1740    fn parameter_in_vector_norm_metric_slot_is_preserved() {
1741        let graph = InMemoryGraph::new();
1742        let doc =
1743            parse_query("RETURN vector_norm(vector([1,2,3], 3, FLOAT32), $metric) AS n").unwrap();
1744        let mut analyzer = Analyzer::new(&graph);
1745        let resolved = analyzer.analyze(&doc).expect("parameter should be kept");
1746        let ResolvedExpr::Function { args, .. } = return_expr(&resolved.clauses) else {
1747            panic!("expected function");
1748        };
1749        assert!(matches!(args.get(1), Some(ResolvedExpr::Parameter(p)) if p == "metric"));
1750    }
1751
1752    #[test]
1753    fn variable_named_like_metric_outside_enum_slot_is_not_rewritten() {
1754        // UNWIND exposes a variable literally called COSINE — which is a
1755        // metric name. Using it outside the enum slot must bind normally.
1756        let graph = InMemoryGraph::new();
1757        let doc = parse_query("UNWIND [1.0, 2.0, 3.0] AS COSINE RETURN COSINE AS val").unwrap();
1758        let mut analyzer = Analyzer::new(&graph);
1759        assert!(analyzer.analyze(&doc).is_ok());
1760    }
1761
1762    #[test]
1763    fn string_literal_in_enum_slot_remains_string_literal() {
1764        let graph = InMemoryGraph::new();
1765        let doc = parse_query("RETURN vector([1, 2, 3], 3, 'INTEGER32') AS v").unwrap();
1766        let mut analyzer = Analyzer::new(&graph);
1767        let resolved = analyzer
1768            .analyze(&doc)
1769            .expect("string literal must be passed through");
1770        let ResolvedExpr::Function { args, .. } = return_expr(&resolved.clauses) else {
1771            panic!("expected function");
1772        };
1773        assert!(matches!(
1774            args.get(2),
1775            Some(ResolvedExpr::Literal(LiteralValue::String(s))) if s == "INTEGER32"
1776        ));
1777    }
1778
1779    // --- Arity coverage for every vector function ------------------------
1780
1781    #[test]
1782    fn every_vector_function_has_arity_guard() {
1783        // (function name, offending argument count, min–max hint)
1784        let cases = &[
1785            ("RETURN vector([1], 1) AS v", 2, "vector"),
1786            ("RETURN vector([1], 1, INTEGER, INTEGER) AS v", 4, "vector"),
1787            (
1788                "RETURN vector.similarity.cosine([1]) AS s",
1789                1,
1790                "vector.similarity.cosine",
1791            ),
1792            (
1793                "RETURN vector.similarity.cosine([1],[2],[3]) AS s",
1794                3,
1795                "vector.similarity.cosine",
1796            ),
1797            (
1798                "RETURN vector.similarity.euclidean([1]) AS s",
1799                1,
1800                "vector.similarity.euclidean",
1801            ),
1802            (
1803                "RETURN vector_distance(vector([1],1,INT)) AS d",
1804                1,
1805                "vector_distance",
1806            ),
1807            (
1808                "RETURN vector_norm(vector([1],1,INT)) AS n",
1809                1,
1810                "vector_norm",
1811            ),
1812            (
1813                "RETURN vector_dimension_count() AS n",
1814                0,
1815                "vector_dimension_count",
1816            ),
1817            ("RETURN toIntegerList() AS l", 0, "toIntegerList"),
1818            ("RETURN toFloatList() AS l", 0, "toFloatList"),
1819        ];
1820        for (query, expected_args, name) in cases {
1821            let graph = InMemoryGraph::new();
1822            let doc = parse_query(query).unwrap();
1823            let mut analyzer = Analyzer::new(&graph);
1824            let result = analyzer.analyze(&doc);
1825            match result {
1826                Err(SemanticError::WrongArity(got_name, _, got_args)) => {
1827                    assert_eq!(got_name.to_ascii_lowercase(), name.to_ascii_lowercase());
1828                    assert_eq!(got_args, *expected_args, "query {query:?}");
1829                }
1830                other => panic!("query {query:?} expected WrongArity, got {other:?}"),
1831            }
1832        }
1833    }
1834
1835    #[test]
1836    fn dotted_similarity_typo_is_rejected() {
1837        let graph = InMemoryGraph::new();
1838        let doc = parse_query("RETURN vector.similarity.manhattan([1,2],[3,4]) AS s").unwrap();
1839        let mut analyzer = Analyzer::new(&graph);
1840        assert!(matches!(
1841            analyzer.analyze(&doc),
1842            Err(SemanticError::UnknownFunction(name, _, _))
1843                if name == "vector.similarity.manhattan"
1844        ));
1845    }
1846}
1847
1848#[derive(Debug, Clone)]
1849struct ExportedAlias {
1850    name: String,
1851    id: VarId,
1852}
1853
1854#[derive(Debug, Clone)]
1855struct AnalyzedProjectionBody {
1856    items: Vec<ResolvedProjection>,
1857    include_existing: bool,
1858    exported_aliases: Vec<ExportedAlias>,
1859    order: Vec<ResolvedSortItem>,
1860    skip: Option<ResolvedExpr>,
1861    limit: Option<ResolvedExpr>,
1862}