sparrowdb_execution/
engine.rs

1//! Query execution engine.
2//!
3//! Converts a bound Cypher AST into an operator tree and executes it,
4//! returning a materialized `QueryResult`.
5
6use std::collections::{HashMap, HashSet};
7use std::path::Path;
8
9use tracing::info_span;
10
11use sparrowdb_catalog::catalog::{Catalog, LabelId};
12use sparrowdb_common::{col_id_of, NodeId, Result};
13use sparrowdb_cypher::ast::{
14    BinOpKind, CallStatement, CreateStatement, Expr, ListPredicateKind, Literal,
15    MatchCreateStatement, MatchMergeRelStatement, MatchMutateStatement,
16    MatchOptionalMatchStatement, MatchStatement, MatchWithStatement, Mutation,
17    OptionalMatchStatement, PathPattern, PipelineStage, PipelineStatement, ReturnItem, SortDir,
18    Statement, UnionStatement, UnwindStatement, WithClause,
19};
20use sparrowdb_cypher::{bind, parse};
21use sparrowdb_storage::csr::{CsrBackward, CsrForward};
22use sparrowdb_storage::edge_store::{DeltaRecord, EdgeStore, RelTableId};
23use sparrowdb_storage::fulltext_index::FulltextIndex;
24use sparrowdb_storage::node_store::{NodeStore, Value as StoreValue};
25use sparrowdb_storage::property_index::PropertyIndex;
26use sparrowdb_storage::text_index::TextIndex;
27use sparrowdb_storage::wal::WalReplayer;
28
29use crate::types::{QueryResult, Value};
30
31// ── DegreeCache (SPA-272) ─────────────────────────────────────────────────────
32
33/// Pre-computed out-degree for every node slot across all relationship types.
34///
35/// Built **lazily** on first call to [`Engine::top_k_by_degree`] or
36/// [`Engine::out_degree`] by scanning:
37/// 1. CSR forward files (checkpointed edges) — contribution per slot from offsets.
38/// 2. Delta log records (uncheckpointed edges) — each `DeltaRecord.src` increments
39///    the source slot's count.
40///
41/// Keyed by the lower-32-bit slot extracted from `NodeId.0`
42/// (i.e. `node_id & 0xFFFF_FFFF`).
43///
44/// Lookup is O(1).  [`Engine::top_k_by_degree`] uses this cache to answer
45/// "top-k highest-degree nodes of label L" in O(N log k) where N is the
46/// label's node count (HWM), rather than O(N × E) full edge scans.
47///
48/// Queries that never call `top_k_by_degree` (e.g. point lookups, scans,
49/// hop traversals) pay zero cost: no CSR iteration, no delta-log reads.
50#[derive(Debug, Default)]
51pub struct DegreeCache {
52    /// Maps slot → total out-degree across all relationship types.
53    inner: HashMap<u64, u32>,
54}
55
56impl DegreeCache {
57    /// Return the total out-degree for `slot` across all relationship types.
58    ///
59    /// Returns `0` for slots that have no outgoing edges.
60    pub fn out_degree(&self, slot: u64) -> u32 {
61        self.inner.get(&slot).copied().unwrap_or(0)
62    }
63
64    /// Increment the out-degree counter for `slot` by 1.
65    fn increment(&mut self, slot: u64) {
66        *self.inner.entry(slot).or_insert(0) += 1;
67    }
68
69    /// Build a `DegreeCache` from a set of CSR forward files and delta records.
70    ///
71    /// `csrs` — all per-rel-type CSR forward files loaded at engine open.
72    /// `delta` — all delta-log records (uncommitted/uncheckpointed edges).
73    fn build(csrs: &HashMap<u32, CsrForward>, delta: &[DeltaRecord]) -> Self {
74        let mut cache = DegreeCache::default();
75
76        // 1. Accumulate from CSR: for each rel type, for each src slot, add
77        //    the slot's out-degree (= neighbors slice length).
78        for csr in csrs.values() {
79            for slot in 0..csr.n_nodes() {
80                let deg = csr.neighbors(slot).len() as u32;
81                if deg > 0 {
82                    *cache.inner.entry(slot).or_insert(0) += deg;
83                }
84            }
85        }
86
87        // 2. Accumulate from delta log: each record increments src's slot.
88        //    Lower 32 bits of NodeId = within-label slot number.
89        for rec in delta {
90            let src_slot = rec.src.0 & 0xFFFF_FFFF;
91            cache.increment(src_slot);
92        }
93
94        cache
95    }
96}
97
98// ── DegreeStats (SPA-273) ─────────────────────────────────────────────────────
99
100/// Per-relationship-type degree statistics collected at engine open time.
101///
102/// Built once from CSR forward files (checkpointed edges) by scanning every
103/// source slot's out-degree for each relationship type.  Delta-log edges are
104/// included via the same `delta_all` scan already performed for `DegreeCache`.
105///
106/// Used by future join-order heuristics: `mean()` gives an estimate of how
107/// many hops a traversal on this relationship type will produce per source node.
108#[derive(Debug, Default, Clone)]
109pub struct DegreeStats {
110    /// Minimum out-degree seen across all source nodes for this rel type.
111    pub min: u32,
112    /// Maximum out-degree seen across all source nodes for this rel type.
113    pub max: u32,
114    /// Sum of all per-node out-degrees (numerator of the mean).
115    pub total: u64,
116    /// Number of source nodes contributing to `total` (denominator of the mean).
117    pub count: u64,
118}
119
120impl DegreeStats {
121    /// Mean out-degree for this relationship type.
122    ///
123    /// Returns `1.0` when no edges exist to avoid division-by-zero and because
124    /// an unknown degree is conservatively assumed to be at least 1.
125    pub fn mean(&self) -> f64 {
126        if self.count == 0 {
127            1.0
128        } else {
129            self.total as f64 / self.count as f64
130        }
131    }
132}
133
134/// Tri-state result for relationship table lookup.
135///
136/// Distinguishes three cases that previously both returned `Option::None` from
137/// `resolve_rel_table_id`, causing typed queries to fall back to scanning
138/// all edge stores when the rel type was not yet in the catalog (SPA-185).
139#[derive(Debug, Clone, Copy)]
140enum RelTableLookup {
141    /// The query has no rel-type filter — scan all rel types.
142    All,
143    /// The rel type was found in the catalog; use this specific store.
144    Found(u32),
145    /// The rel type was specified but not found in the catalog — the
146    /// edge cannot exist, so return empty results immediately.
147    NotFound,
148}
149
150/// Immutable snapshot of storage state required to execute a read query.
151///
152/// Groups the fields that are needed for read-only access to the graph so they
153/// can eventually be cloned/shared across parallel executor threads without
154/// bundling the mutable per-query state that lives in [`Engine`].
155pub struct ReadSnapshot {
156    pub store: NodeStore,
157    pub catalog: Catalog,
158    /// Per-relationship-type CSR forward files, keyed by `RelTableId` (u32).
159    pub csrs: HashMap<u32, CsrForward>,
160    pub db_root: std::path::PathBuf,
161    /// Cached live node count per label, updated on every node creation.
162    ///
163    /// Used by the planner to estimate cardinality without re-scanning the
164    /// node store's high-water-mark file on every query.
165    pub label_row_counts: HashMap<LabelId, usize>,
166    /// Per-relationship-type out-degree statistics (SPA-273).
167    ///
168    /// Keyed by `RelTableId` (u32).  Initialized **lazily** on first access
169    /// via [`ReadSnapshot::rel_degree_stats`].  Simple traversal queries
170    /// (Q3, Q4) that never consult the planner heuristics pay zero CSR-scan
171    /// cost.  The scan is only triggered when a query actually needs degree
172    /// statistics (e.g. join-order planning).
173    ///
174    /// `OnceLock` is used instead of `OnceCell` so that `ReadSnapshot` remains
175    /// `Sync` and can safely be shared across parallel BFS threads.
176    rel_degree_stats: std::sync::OnceLock<HashMap<u32, DegreeStats>>,
177}
178
179impl ReadSnapshot {
180    /// Return per-relationship-type out-degree statistics, computing them on
181    /// first call and caching the result for all subsequent calls.
182    ///
183    /// The CSR forward scan is only triggered once per `ReadSnapshot` instance,
184    /// and only when a caller actually needs degree statistics.  Queries that
185    /// never access this (e.g. simple traversals Q3/Q4) pay zero overhead.
186    pub fn rel_degree_stats(&self) -> &HashMap<u32, DegreeStats> {
187        self.rel_degree_stats.get_or_init(|| {
188            self.csrs
189                .iter()
190                .map(|(&rel_table_id, csr)| {
191                    let mut stats = DegreeStats::default();
192                    let mut first = true;
193                    for slot in 0..csr.n_nodes() {
194                        let deg = csr.neighbors(slot).len() as u32;
195                        if deg > 0 {
196                            if first {
197                                stats.min = deg;
198                                stats.max = deg;
199                                first = false;
200                            } else {
201                                if deg < stats.min {
202                                    stats.min = deg;
203                                }
204                                if deg > stats.max {
205                                    stats.max = deg;
206                                }
207                            }
208                            stats.total += deg as u64;
209                            stats.count += 1;
210                        }
211                    }
212                    (rel_table_id, stats)
213                })
214                .collect()
215        })
216    }
217}
218
219/// The execution engine holds references to the storage layer.
220pub struct Engine {
221    pub snapshot: ReadSnapshot,
222    /// Runtime query parameters supplied by the caller (e.g. `$name` → Value).
223    pub params: HashMap<String, Value>,
224    /// In-memory B-tree property equality index (SPA-249).
225    ///
226    /// Loaded **lazily** on first use for each `(label_id, col_id)` pair that a
227    /// query actually filters on.  Queries with no property filter (e.g.
228    /// `COUNT(*)`, hop traversals) never touch this and pay zero build cost.
229    /// `RefCell` provides interior mutability so that `build_for` can be called
230    /// from `&self` scan helpers without changing every method signature.
231    pub prop_index: std::cell::RefCell<PropertyIndex>,
232    /// In-memory text search index for CONTAINS and STARTS WITH (SPA-251, SPA-274).
233    ///
234    /// Loaded **lazily** — only when a query has a CONTAINS or STARTS WITH
235    /// predicate on a specific `(label_id, col_id)` pair, via
236    /// `TextIndex::build_for`.  Queries with no text predicates (e.g.
237    /// `COUNT(*)`, hop traversals) never trigger any TextIndex I/O.
238    /// `RefCell` provides interior mutability so that `build_for` can be called
239    /// from `&self` scan helpers without changing every method signature.
240    /// Stores sorted `(decoded_string, slot)` pairs per `(label_id, col_id)`.
241    /// - CONTAINS: linear scan avoids per-slot property-decode overhead.
242    /// - STARTS WITH: binary-search prefix range — O(log n + k).
243    pub text_index: std::cell::RefCell<TextIndex>,
244    /// Optional per-query deadline (SPA-254).
245    ///
246    /// When `Some`, the engine checks this deadline at the top of each hot
247    /// scan / traversal loop iteration.  If `Instant::now() >= deadline`,
248    /// `Error::QueryTimeout` is returned immediately.  `None` means no
249    /// deadline (backward-compatible default).
250    pub deadline: Option<std::time::Instant>,
251    /// Pre-computed out-degree for every node slot across all relationship types
252    /// (SPA-272).
253    ///
254    /// Initialized **lazily** on first call to [`Engine::top_k_by_degree`] or
255    /// [`Engine::out_degree`].  Queries that never need degree information
256    /// (point lookups, full scans, hop traversals) pay zero cost at engine-open
257    /// time: no CSR iteration, no delta-log I/O.
258    ///
259    /// `RefCell` provides interior mutability so the cache can be populated
260    /// from `&self` methods without changing the signature of `top_k_by_degree`.
261    pub degree_cache: std::cell::RefCell<Option<DegreeCache>>,
262    /// Set of `(label_id, col_id)` pairs that carry a UNIQUE constraint (SPA-234).
263    ///
264    /// Populated by `CREATE CONSTRAINT ON (n:Label) ASSERT n.property IS UNIQUE`.
265    /// Checked in `execute_create` before writing each node: if the property
266    /// value already exists in `prop_index` for that `(label_id, col_id)`, the
267    /// insert is rejected with `Error::InvalidArgument`.
268    pub unique_constraints: HashSet<(u32, u32)>,
269}
270
271impl Engine {
272    /// Create an engine with a pre-built per-type CSR map.
273    ///
274    /// The `csrs` map associates each `RelTableId` (u32) with its forward CSR.
275    /// Use [`Engine::with_single_csr`] in tests or legacy code that only has
276    /// one CSR.
277    pub fn new(
278        store: NodeStore,
279        catalog: Catalog,
280        csrs: HashMap<u32, CsrForward>,
281        db_root: &Path,
282    ) -> Self {
283        Self::new_with_cached_index(store, catalog, csrs, db_root, None)
284    }
285
286    /// Create an engine, optionally seeding the property index from a shared
287    /// cache.  When `cached_index` is `Some`, the index is cloned out of the
288    /// `RwLock` at construction time so the engine can use `RefCell` internally
289    /// without holding the lock.
290    pub fn new_with_cached_index(
291        store: NodeStore,
292        catalog: Catalog,
293        csrs: HashMap<u32, CsrForward>,
294        db_root: &Path,
295        cached_index: Option<&std::sync::RwLock<PropertyIndex>>,
296    ) -> Self {
297        // SPA-249 (lazy fix): property index is built on demand per
298        // (label_id, col_id) pair via PropertyIndex::build_for, called from
299        // execute_scan just before the first lookup for that pair.  Queries
300        // with no property filter (COUNT(*), hop traversals) never trigger
301        // any index I/O at all.
302        //
303        // SPA-274 (lazy text index): text search index is now also built lazily,
304        // mirroring the PropertyIndex pattern.  Only (label_id, col_id) pairs
305        // that appear in an actual CONTAINS or STARTS WITH predicate are loaded.
306        // Queries with no text predicates (COUNT(*), hop traversals, property
307        // lookups) pay zero TextIndex I/O cost.
308        //
309        // SPA-272 / perf fix: DegreeCache is now initialized lazily on first
310        // call to top_k_by_degree() or out_degree().  Queries that never need
311        // degree information (point lookups, full scans, hop traversals) pay
312        // zero cost at engine-open time: no CSR iteration, no delta-log I/O.
313        //
314        // SPA-Q1-perf: build label_row_counts ONCE at Engine::new() by reading
315        // each label's high-water-mark from the NodeStore HWM file (an O(1)
316        // in-memory map lookup after the first call per label).  Previously this
317        // map was left empty and only populated via the write path, so every
318        // read query started with an empty map, forcing the planner to fall back
319        // to per-scan HWM reads on every execution.  Now the snapshot carries a
320        // pre-populated map; the write path continues to increment it on creation.
321        let label_row_counts: HashMap<LabelId, usize> = catalog
322            .list_labels()
323            .unwrap_or_default()
324            .into_iter()
325            .filter_map(|(lid, _name)| {
326                let hwm = store.hwm_for_label(lid as u32).unwrap_or(0);
327                if hwm > 0 {
328                    Some((lid, hwm as usize))
329                } else {
330                    None
331                }
332            })
333            .collect();
334
335        // SPA-273 (lazy): rel_degree_stats is now computed on first access via
336        // ReadSnapshot::rel_degree_stats().  Simple traversal queries (Q3/Q4)
337        // that never consult degree statistics pay zero CSR-scan overhead here.
338        let snapshot = ReadSnapshot {
339            store,
340            catalog,
341            csrs,
342            db_root: db_root.to_path_buf(),
343            label_row_counts,
344            rel_degree_stats: std::sync::OnceLock::new(),
345        };
346
347        // If a shared cached index was provided, clone it out so we start
348        // with pre-loaded columns.  Otherwise start fresh.
349        let idx = cached_index
350            .and_then(|lock| lock.read().ok())
351            .map(|guard| guard.clone())
352            .unwrap_or_default();
353
354        Engine {
355            snapshot,
356            params: HashMap::new(),
357            prop_index: std::cell::RefCell::new(idx),
358            text_index: std::cell::RefCell::new(TextIndex::new()),
359            deadline: None,
360            degree_cache: std::cell::RefCell::new(None),
361            unique_constraints: HashSet::new(),
362        }
363    }
364
365    /// Convenience constructor for tests and legacy callers that have a single
366    /// [`CsrForward`] (stored at `RelTableId(0)`).
367    ///
368    /// SPA-185: prefer `Engine::new` with a full `HashMap<u32, CsrForward>` for
369    /// production use so that per-type filtering is correct.
370    pub fn with_single_csr(
371        store: NodeStore,
372        catalog: Catalog,
373        csr: CsrForward,
374        db_root: &Path,
375    ) -> Self {
376        let mut csrs = HashMap::new();
377        csrs.insert(0u32, csr);
378        Self::new(store, catalog, csrs, db_root)
379    }
380
381    /// Attach runtime query parameters to this engine instance.
382    ///
383    /// Parameters are looked up when evaluating `$name` expressions (e.g. in
384    /// `UNWIND $items AS x`).
385    pub fn with_params(mut self, params: HashMap<String, Value>) -> Self {
386        self.params = params;
387        self
388    }
389
390    /// Set a per-query deadline (SPA-254).
391    ///
392    /// The engine will return [`sparrowdb_common::Error::QueryTimeout`] if
393    /// `Instant::now() >= deadline` during any hot scan or traversal loop.
394    pub fn with_deadline(mut self, deadline: std::time::Instant) -> Self {
395        self.deadline = Some(deadline);
396        self
397    }
398
399    /// Merge the engine's lazily-populated property index into the shared cache
400    /// so that future read queries can skip I/O for columns we already loaded.
401    ///
402    /// Uses union/merge semantics: only columns not yet present in the shared
403    /// cache are added.  This prevents last-writer-wins races when multiple
404    /// concurrent read queries write back to the shared cache simultaneously.
405    ///
406    /// Called from `GraphDb` read paths after `execute_statement`.
407    pub fn write_back_prop_index(&self, shared: &std::sync::RwLock<PropertyIndex>) {
408        if let Ok(mut guard) = shared.write() {
409            guard.merge_from(&self.prop_index.borrow());
410        }
411    }
412
413    /// Check whether the per-query deadline has passed (SPA-254).
414    ///
415    /// Returns `Err(QueryTimeout)` if a deadline is set and has expired,
416    /// `Ok(())` otherwise.  Inline so the hot-path cost when `deadline` is
417    /// `None` compiles down to a single branch-not-taken.
418    #[inline]
419    fn check_deadline(&self) -> sparrowdb_common::Result<()> {
420        if let Some(dl) = self.deadline {
421            if std::time::Instant::now() >= dl {
422                return Err(sparrowdb_common::Error::QueryTimeout);
423            }
424        }
425        Ok(())
426    }
427
428    // ── Per-type CSR / delta helpers ─────────────────────────────────────────
429
430    /// Return the relationship table lookup state for `(src_label_id, dst_label_id, rel_type)`.
431    ///
432    /// - Empty `rel_type` → [`RelTableLookup::All`] (no type filter).
433    /// - Rel type found in catalog → [`RelTableLookup::Found(id)`].
434    /// - Rel type specified but not in catalog → [`RelTableLookup::NotFound`]
435    ///   (the typed edge cannot exist; callers must return empty results).
436    fn resolve_rel_table_id(
437        &self,
438        src_label_id: u32,
439        dst_label_id: u32,
440        rel_type: &str,
441    ) -> RelTableLookup {
442        if rel_type.is_empty() {
443            return RelTableLookup::All;
444        }
445        match self
446            .snapshot
447            .catalog
448            .get_rel_table(src_label_id as u16, dst_label_id as u16, rel_type)
449            .ok()
450            .flatten()
451        {
452            Some(id) => RelTableLookup::Found(id as u32),
453            None => RelTableLookup::NotFound,
454        }
455    }
456
457    /// Read delta records for a specific relationship type.
458    ///
459    /// Returns an empty `Vec` if the rel type has not been registered yet, or
460    /// if the delta file does not exist.
461    fn read_delta_for(&self, rel_table_id: u32) -> Vec<sparrowdb_storage::edge_store::DeltaRecord> {
462        EdgeStore::open(&self.snapshot.db_root, RelTableId(rel_table_id))
463            .and_then(|s| s.read_delta())
464            .unwrap_or_default()
465    }
466
467    /// Read delta records across **all** registered rel types.
468    ///
469    /// Used by code paths that traverse edges without a type filter.
470    fn read_delta_all(&self) -> Vec<sparrowdb_storage::edge_store::DeltaRecord> {
471        let ids = self.snapshot.catalog.list_rel_table_ids();
472        if ids.is_empty() {
473            // No rel types in catalog yet; fall back to table-id 0 (legacy).
474            return EdgeStore::open(&self.snapshot.db_root, RelTableId(0))
475                .and_then(|s| s.read_delta())
476                .unwrap_or_default();
477        }
478        ids.into_iter()
479            .flat_map(|(id, _, _, _)| {
480                EdgeStore::open(&self.snapshot.db_root, RelTableId(id as u32))
481                    .and_then(|s| s.read_delta())
482                    .unwrap_or_default()
483            })
484            .collect()
485    }
486
487    /// Return neighbor slots from the CSR for a given src slot and rel table.
488    fn csr_neighbors(&self, rel_table_id: u32, src_slot: u64) -> Vec<u64> {
489        self.snapshot
490            .csrs
491            .get(&rel_table_id)
492            .map(|csr| csr.neighbors(src_slot).to_vec())
493            .unwrap_or_default()
494    }
495
496    /// Return neighbor slots merged across **all** registered rel types.
497    fn csr_neighbors_all(&self, src_slot: u64) -> Vec<u64> {
498        let mut out: Vec<u64> = Vec::new();
499        for csr in self.snapshot.csrs.values() {
500            out.extend_from_slice(csr.neighbors(src_slot));
501        }
502        out
503    }
504
505    /// Ensure the [`DegreeCache`] is populated, building it lazily on first call.
506    ///
507    /// Reads all delta-log records for every known rel type and scans every CSR
508    /// forward file to tally out-degrees per source slot.  Subsequent calls are
509    /// O(1) — the cache is stored in `self.degree_cache` and reused.
510    ///
511    /// Called automatically by [`top_k_by_degree`] and [`out_degree`].
512    /// Queries that never call those methods (point lookups, full scans,
513    /// hop traversals) pay **zero** cost.
514    fn ensure_degree_cache(&self) {
515        let mut guard = self.degree_cache.borrow_mut();
516        if guard.is_some() {
517            return; // already built
518        }
519
520        // Read all delta-log records (uncheckpointed edges).
521        let delta_all: Vec<DeltaRecord> = {
522            let ids = self.snapshot.catalog.list_rel_table_ids();
523            if ids.is_empty() {
524                EdgeStore::open(&self.snapshot.db_root, RelTableId(0))
525                    .and_then(|s| s.read_delta())
526                    .unwrap_or_default()
527            } else {
528                ids.into_iter()
529                    .flat_map(|(id, _, _, _)| {
530                        EdgeStore::open(&self.snapshot.db_root, RelTableId(id as u32))
531                            .and_then(|s| s.read_delta())
532                            .unwrap_or_default()
533                    })
534                    .collect()
535            }
536        };
537
538        *guard = Some(DegreeCache::build(&self.snapshot.csrs, &delta_all));
539    }
540
541    /// Return the total out-degree for `slot` across all relationship types.
542    ///
543    /// Triggers lazy initialization of the [`DegreeCache`] on first call.
544    /// Returns `0` for slots with no outgoing edges.
545    pub fn out_degree(&self, slot: u64) -> u32 {
546        self.ensure_degree_cache();
547        self.degree_cache
548            .borrow()
549            .as_ref()
550            .expect("degree_cache populated by ensure_degree_cache")
551            .out_degree(slot)
552    }
553
554    /// Return the top-`k` nodes of `label_id` ordered by out-degree descending.
555    ///
556    /// Each element of the returned `Vec` is `(slot, out_degree)`.  Ties in
557    /// degree are broken by slot number (lower slot first) for determinism.
558    ///
559    /// Returns an empty `Vec` when `k == 0` or the label has no nodes.
560    ///
561    /// Uses [`DegreeCache`] for O(1) per-node lookups (SPA-272).
562    /// The cache is built lazily on first call — queries that never call this
563    /// method pay zero cost.
564    pub fn top_k_by_degree(&self, label_id: u32, k: usize) -> Result<Vec<(u64, u32)>> {
565        if k == 0 {
566            return Ok(vec![]);
567        }
568        let hwm = self.snapshot.store.hwm_for_label(label_id)?;
569        if hwm == 0 {
570            return Ok(vec![]);
571        }
572
573        self.ensure_degree_cache();
574        let cache = self.degree_cache.borrow();
575        let cache = cache
576            .as_ref()
577            .expect("degree_cache populated by ensure_degree_cache");
578
579        let mut pairs: Vec<(u64, u32)> = (0..hwm)
580            .map(|slot| (slot, cache.out_degree(slot)))
581            .collect();
582
583        // Sort descending by degree; break ties by ascending slot for determinism.
584        pairs.sort_unstable_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
585        pairs.truncate(k);
586        Ok(pairs)
587    }
588
589    /// Parse, bind, plan, and execute a Cypher query.
590    ///
591    /// Takes `&mut self` because `CREATE` statements auto-register labels in
592    /// the catalog and write nodes to the node store (SPA-156).
593    pub fn execute(&mut self, cypher: &str) -> Result<QueryResult> {
594        let stmt = {
595            let _parse_span = info_span!("sparrowdb.parse", cypher = cypher).entered();
596            parse(cypher)?
597        };
598
599        let bound = {
600            let _bind_span = info_span!("sparrowdb.bind").entered();
601            bind(stmt, &self.snapshot.catalog)?
602        };
603
604        {
605            let _plan_span = info_span!("sparrowdb.plan_execute").entered();
606            self.execute_bound(bound.inner)
607        }
608    }
609
610    /// Execute an already-bound [`Statement`] directly.
611    ///
612    /// Useful for callers (e.g. `WriteTx`) that have already parsed and bound
613    /// the statement and want to dispatch CHECKPOINT/OPTIMIZE themselves.
614    pub fn execute_statement(&mut self, stmt: Statement) -> Result<QueryResult> {
615        self.execute_bound(stmt)
616    }
617
618    fn execute_bound(&mut self, stmt: Statement) -> Result<QueryResult> {
619        match stmt {
620            Statement::Match(m) => self.execute_match(&m),
621            Statement::MatchWith(mw) => self.execute_match_with(&mw),
622            Statement::Unwind(u) => self.execute_unwind(&u),
623            Statement::Create(c) => self.execute_create(&c),
624            // Mutation statements require a write transaction owned by the
625            // caller (GraphDb). They are dispatched via the public helpers
626            // below and should not reach execute_bound in normal use.
627            Statement::Merge(_)
628            | Statement::MatchMergeRel(_)
629            | Statement::MatchMutate(_)
630            | Statement::MatchCreate(_) => Err(sparrowdb_common::Error::InvalidArgument(
631                "mutation statements must be executed via execute_mutation".into(),
632            )),
633            Statement::OptionalMatch(om) => self.execute_optional_match(&om),
634            Statement::MatchOptionalMatch(mom) => self.execute_match_optional_match(&mom),
635            Statement::Union(u) => self.execute_union(u),
636            Statement::Checkpoint | Statement::Optimize => Ok(QueryResult::empty(vec![])),
637            Statement::Call(c) => self.execute_call(&c),
638            Statement::Pipeline(p) => self.execute_pipeline(&p),
639            Statement::CreateIndex { label, property } => {
640                self.execute_create_index(&label, &property)
641            }
642            Statement::CreateConstraint { label, property } => {
643                self.execute_create_constraint(&label, &property)
644            }
645        }
646    }
647
648    // ── CALL procedure dispatch ──────────────────────────────────────────────
649
650    /// Dispatch a `CALL` statement to the appropriate built-in procedure.
651    ///
652    /// Currently implemented procedures:
653    /// - `db.index.fulltext.queryNodes(indexName, query)` — full-text search
654    fn execute_call(&self, c: &CallStatement) -> Result<QueryResult> {
655        match c.procedure.as_str() {
656            "db.index.fulltext.queryNodes" => self.call_fulltext_query_nodes(c),
657            "db.schema" => self.call_db_schema(c),
658            "db.stats" => self.call_db_stats(c),
659            other => Err(sparrowdb_common::Error::InvalidArgument(format!(
660                "unknown procedure: {other}"
661            ))),
662        }
663    }
664
665    /// Implementation of `CALL db.index.fulltext.queryNodes(indexName, query)`.
666    ///
667    /// Args:
668    ///   0 — index name (string literal or param)
669    ///   1 — query string (string literal or param)
670    ///
671    /// Returns one row per matching node with columns declared in YIELD
672    /// (typically `node`).  Each `node` value is a `NodeRef`.
673    fn call_fulltext_query_nodes(&self, c: &CallStatement) -> Result<QueryResult> {
674        // Validate argument count — must be exactly 2.
675        if c.args.len() != 2 {
676            return Err(sparrowdb_common::Error::InvalidArgument(
677                "db.index.fulltext.queryNodes requires exactly 2 arguments: (indexName, query)"
678                    .into(),
679            ));
680        }
681
682        // Evaluate arg 0 → index name.
683        let index_name = eval_expr_to_string(&c.args[0])?;
684        // Evaluate arg 1 → query string.
685        let query = eval_expr_to_string(&c.args[1])?;
686
687        // Open the fulltext index (read-only; no flush on this path).
688        // `FulltextIndex::open` validates the name for path traversal.
689        let index = FulltextIndex::open(&self.snapshot.db_root, &index_name)?;
690
691        // Determine which column names to project.
692        // Default to ["node"] when no YIELD clause was specified.
693        let yield_cols: Vec<String> = if c.yield_columns.is_empty() {
694            vec!["node".to_owned()]
695        } else {
696            c.yield_columns.clone()
697        };
698
699        // Validate YIELD columns — "node" and "score" are supported.
700        if let Some(bad_col) = yield_cols
701            .iter()
702            .find(|c| c.as_str() != "node" && c.as_str() != "score")
703        {
704            return Err(sparrowdb_common::Error::InvalidArgument(format!(
705                "unsupported YIELD column for db.index.fulltext.queryNodes: {bad_col}"
706            )));
707        }
708
709        // Build result rows: one per matching node.
710        // Use search_with_scores so we can populate the `score` YIELD column.
711        let node_ids_with_scores = index.search_with_scores(&query);
712        let mut rows: Vec<Vec<Value>> = Vec::new();
713        for (raw_id, score) in node_ids_with_scores {
714            let node_id = sparrowdb_common::NodeId(raw_id);
715            let row: Vec<Value> = yield_cols
716                .iter()
717                .map(|col| match col.as_str() {
718                    "node" => Value::NodeRef(node_id),
719                    "score" => Value::Float64(score),
720                    _ => Value::Null,
721                })
722                .collect();
723            rows.push(row);
724        }
725
726        // If a RETURN clause follows, project its items over the YIELD rows.
727        let (columns, rows) = if let Some(ref ret) = c.return_clause {
728            self.project_call_return(ret, &yield_cols, rows)?
729        } else {
730            (yield_cols, rows)
731        };
732
733        Ok(QueryResult { columns, rows })
734    }
735
736    /// Implementation of `CALL db.schema()`.
737    ///
738    /// Returns one row per node label and one row per relationship type with
739    /// columns `["type", "name", "properties"]` where:
740    ///   - `type` is `"node"` or `"relationship"`
741    ///   - `name` is the label or rel-type string
742    ///   - `properties` is a `List` of property name strings (sorted, may be empty)
743    ///
744    /// Property names are collected by scanning committed WAL records so the
745    /// caller does not need to have created any nodes yet for labels to appear.
746    fn call_db_schema(&self, c: &CallStatement) -> Result<QueryResult> {
747        if !c.args.is_empty() {
748            return Err(sparrowdb_common::Error::InvalidArgument(
749                "db.schema requires exactly 0 arguments".into(),
750            ));
751        }
752        let columns = vec![
753            "type".to_owned(),
754            "name".to_owned(),
755            "properties".to_owned(),
756        ];
757
758        // Collect property names per label_id and rel_type from the WAL.
759        let wal_dir = self.snapshot.db_root.join("wal");
760        let schema = WalReplayer::scan_schema(&wal_dir)?;
761
762        let mut rows: Vec<Vec<Value>> = Vec::new();
763
764        // Node labels — from catalog.
765        let labels = self.snapshot.catalog.list_labels()?;
766        for (label_id, label_name) in &labels {
767            let mut prop_names: Vec<String> = schema
768                .node_props
769                .get(&(*label_id as u32))
770                .map(|s| s.iter().cloned().collect())
771                .unwrap_or_default();
772            prop_names.sort();
773            let props_value = Value::List(prop_names.into_iter().map(Value::String).collect());
774            rows.push(vec![
775                Value::String("node".to_owned()),
776                Value::String(label_name.clone()),
777                props_value,
778            ]);
779        }
780
781        // Relationship types — from catalog.
782        let rel_tables = self.snapshot.catalog.list_rel_tables()?;
783        // Deduplicate by rel_type name since the same type can appear across multiple src/dst pairs.
784        let mut seen_rel_types: std::collections::HashSet<String> =
785            std::collections::HashSet::new();
786        for (_, _, rel_type) in &rel_tables {
787            if seen_rel_types.insert(rel_type.clone()) {
788                let mut prop_names: Vec<String> = schema
789                    .rel_props
790                    .get(rel_type)
791                    .map(|s| s.iter().cloned().collect())
792                    .unwrap_or_default();
793                prop_names.sort();
794                let props_value = Value::List(prop_names.into_iter().map(Value::String).collect());
795                rows.push(vec![
796                    Value::String("relationship".to_owned()),
797                    Value::String(rel_type.clone()),
798                    props_value,
799                ]);
800            }
801        }
802
803        Ok(QueryResult { columns, rows })
804    }
805
806    /// Implementation of `CALL db.stats()` (SPA-171).
807    ///
808    /// Returns metric/value rows for `total_bytes`, `wal_bytes`, `edge_count`,
809    /// and per-label `nodes.<Label>` and `label_bytes.<Label>`.
810    ///
811    /// `nodes.<Label>` reports the per-label high-water mark (HWM), not the
812    /// live-node count.  Tombstoned slots are included until compaction runs.
813    ///
814    /// Filesystem entries that cannot be read are silently skipped; values may
815    /// be lower bounds when the database directory is partially inaccessible.
816    fn call_db_stats(&self, c: &CallStatement) -> Result<QueryResult> {
817        if !c.args.is_empty() {
818            return Err(sparrowdb_common::Error::InvalidArgument(
819                "db.stats requires exactly 0 arguments".into(),
820            ));
821        }
822        let db_root = &self.snapshot.db_root;
823        let mut rows: Vec<Vec<Value>> = Vec::new();
824
825        rows.push(vec![
826            Value::String("total_bytes".to_owned()),
827            Value::Int64(dir_size_bytes(db_root) as i64),
828        ]);
829
830        let mut wal_bytes: u64 = 0;
831        if let Ok(es) = std::fs::read_dir(db_root.join("wal")) {
832            for e in es.flatten() {
833                let n = e.file_name();
834                let ns = n.to_string_lossy();
835                if ns.starts_with("segment-") && ns.ends_with(".wal") {
836                    if let Ok(m) = e.metadata() {
837                        wal_bytes += m.len();
838                    }
839                }
840            }
841        }
842        rows.push(vec![
843            Value::String("wal_bytes".to_owned()),
844            Value::Int64(wal_bytes as i64),
845        ]);
846
847        const DR: u64 = 20; // DeltaRecord: src(8) + dst(8) + rel_id(4) = 20 bytes
848        let mut edge_count: u64 = 0;
849        if let Ok(ts) = std::fs::read_dir(db_root.join("edges")) {
850            for t in ts.flatten() {
851                if !t.file_type().map(|ft| ft.is_dir()).unwrap_or(false) {
852                    continue;
853                }
854                let rd = t.path();
855                if let Ok(m) = std::fs::metadata(rd.join("delta.log")) {
856                    edge_count += m.len().checked_div(DR).unwrap_or(0);
857                }
858                let fp = rd.join("base.fwd.csr");
859                if fp.exists() {
860                    if let Ok(b) = std::fs::read(&fp) {
861                        if let Ok(csr) = sparrowdb_storage::csr::CsrForward::decode(&b) {
862                            edge_count += csr.n_edges();
863                        }
864                    }
865                }
866            }
867        }
868        rows.push(vec![
869            Value::String("edge_count".to_owned()),
870            Value::Int64(edge_count as i64),
871        ]);
872
873        for (label_id, label_name) in self.snapshot.catalog.list_labels()? {
874            let lid = label_id as u32;
875            let hwm = self.snapshot.store.hwm_for_label(lid).unwrap_or(0);
876            rows.push(vec![
877                Value::String(format!("nodes.{label_name}")),
878                Value::Int64(hwm as i64),
879            ]);
880            let mut lb: u64 = 0;
881            if let Ok(es) = std::fs::read_dir(db_root.join("nodes").join(lid.to_string())) {
882                for e in es.flatten() {
883                    if let Ok(m) = e.metadata() {
884                        lb += m.len();
885                    }
886                }
887            }
888            rows.push(vec![
889                Value::String(format!("label_bytes.{label_name}")),
890                Value::Int64(lb as i64),
891            ]);
892        }
893
894        let columns = vec!["metric".to_owned(), "value".to_owned()];
895        let yield_cols: Vec<String> = if c.yield_columns.is_empty() {
896            columns.clone()
897        } else {
898            c.yield_columns.clone()
899        };
900        for col in &yield_cols {
901            if col != "metric" && col != "value" {
902                return Err(sparrowdb_common::Error::InvalidArgument(format!(
903                    "unsupported YIELD column for db.stats: {col}"
904                )));
905            }
906        }
907        let idxs: Vec<usize> = yield_cols
908            .iter()
909            .map(|c| if c == "metric" { 0 } else { 1 })
910            .collect();
911        let projected: Vec<Vec<Value>> = rows
912            .into_iter()
913            .map(|r| idxs.iter().map(|&i| r[i].clone()).collect())
914            .collect();
915        let (fc, fr) = if let Some(ref ret) = c.return_clause {
916            self.project_call_return(ret, &yield_cols, projected)?
917        } else {
918            (yield_cols, projected)
919        };
920        Ok(QueryResult {
921            columns: fc,
922            rows: fr,
923        })
924    }
925
926    /// Project a RETURN clause over rows produced by a CALL statement.
927    ///
928    /// The YIELD columns from the CALL become the row environment.  Each
929    /// return item is evaluated against those columns:
930    ///   - `Var(name)` — returns the raw yield-column value
931    ///   - `PropAccess { var, prop }` — reads a property from the NodeRef
932    ///
933    /// This covers the primary KMS pattern:
934    /// `CALL … YIELD node RETURN node.content, node.title`
935    fn project_call_return(
936        &self,
937        ret: &sparrowdb_cypher::ast::ReturnClause,
938        yield_cols: &[String],
939        rows: Vec<Vec<Value>>,
940    ) -> Result<(Vec<String>, Vec<Vec<Value>>)> {
941        // Column names from return items.
942        let out_cols: Vec<String> = ret
943            .items
944            .iter()
945            .map(|item| {
946                item.alias
947                    .clone()
948                    .unwrap_or_else(|| expr_to_col_name(&item.expr))
949            })
950            .collect();
951
952        let mut out_rows = Vec::new();
953        for row in rows {
954            // Build a name → Value map for this row.
955            let env: HashMap<String, Value> = yield_cols
956                .iter()
957                .zip(row.iter())
958                .map(|(k, v)| (k.clone(), v.clone()))
959                .collect();
960
961            let projected: Vec<Value> = ret
962                .items
963                .iter()
964                .map(|item| eval_call_expr(&item.expr, &env, &self.snapshot.store))
965                .collect();
966            out_rows.push(projected);
967        }
968        Ok((out_cols, out_rows))
969    }
970
971    /// Returns `true` if `stmt` is a mutation (MERGE, MATCH+SET, MATCH+DELETE,
972    /// MATCH+CREATE edge).
973    ///
974    /// Used by `GraphDb::execute` to route the statement to the write path.
975    pub fn is_mutation(stmt: &Statement) -> bool {
976        match stmt {
977            Statement::Merge(_)
978            | Statement::MatchMergeRel(_)
979            | Statement::MatchMutate(_)
980            | Statement::MatchCreate(_) => true,
981            // All standalone CREATE statements must go through the
982            // write-transaction path to ensure WAL durability and correct
983            // single-writer semantics, regardless of whether edges are present.
984            Statement::Create(_) => true,
985            _ => false,
986        }
987    }
988
989    // ── Mutation execution (called by GraphDb with a write transaction) ────────
990
991    /// Scan nodes matching the MATCH patterns in a `MatchMutate` statement and
992    /// return the list of matching `NodeId`s.  The caller is responsible for
993    /// applying the actual mutations inside a write transaction.
994    pub fn scan_match_mutate(&self, mm: &MatchMutateStatement) -> Result<Vec<NodeId>> {
995        if mm.match_patterns.is_empty() {
996            return Ok(vec![]);
997        }
998
999        // Guard: only single-node patterns (no multi-pattern, no relationship hops)
1000        // are supported.  Silently ignoring extra patterns would mutate the wrong
1001        // nodes; instead we surface a clear error.
1002        if mm.match_patterns.len() != 1 || !mm.match_patterns[0].rels.is_empty() {
1003            return Err(sparrowdb_common::Error::InvalidArgument(
1004                "MATCH...SET/DELETE currently supports only single-node patterns (no relationships)"
1005                    .into(),
1006            ));
1007        }
1008
1009        let pat = &mm.match_patterns[0];
1010        if pat.nodes.is_empty() {
1011            return Ok(vec![]);
1012        }
1013        let node_pat = &pat.nodes[0];
1014        let label = node_pat.labels.first().cloned().unwrap_or_default();
1015
1016        let label_id = match self.snapshot.catalog.get_label(&label)? {
1017            Some(id) => id as u32,
1018            // SPA-266: unknown label → no nodes can match; return empty result.
1019            None => return Ok(vec![]),
1020        };
1021
1022        let hwm = self.snapshot.store.hwm_for_label(label_id)?;
1023
1024        // Collect prop filter col_ids.
1025        let filter_col_ids: Vec<u32> = node_pat
1026            .props
1027            .iter()
1028            .map(|pe| prop_name_to_col_id(&pe.key))
1029            .collect();
1030
1031        // Col_ids referenced by the WHERE clause.
1032        let mut all_col_ids: Vec<u32> = filter_col_ids;
1033        if let Some(ref where_expr) = mm.where_clause {
1034            collect_col_ids_from_expr(where_expr, &mut all_col_ids);
1035        }
1036
1037        let var_name = node_pat.var.as_str();
1038        let mut matching_ids = Vec::new();
1039
1040        for slot in 0..hwm {
1041            let node_id = NodeId(((label_id as u64) << 32) | slot);
1042
1043            // SPA-216: skip tombstoned nodes so that already-deleted nodes are
1044            // not re-deleted and are not matched by SET mutations either.
1045            if self.is_node_tombstoned(node_id) {
1046                continue;
1047            }
1048
1049            let props = read_node_props(&self.snapshot.store, node_id, &all_col_ids)?;
1050
1051            if !matches_prop_filter_static(
1052                &props,
1053                &node_pat.props,
1054                &self.dollar_params(),
1055                &self.snapshot.store,
1056            ) {
1057                continue;
1058            }
1059
1060            if let Some(ref where_expr) = mm.where_clause {
1061                let mut row_vals =
1062                    build_row_vals(&props, var_name, &all_col_ids, &self.snapshot.store);
1063                row_vals.extend(self.dollar_params());
1064                if !self.eval_where_graph(where_expr, &row_vals) {
1065                    continue;
1066                }
1067            }
1068
1069            matching_ids.push(node_id);
1070        }
1071
1072        Ok(matching_ids)
1073    }
1074
1075    /// Return the mutation carried by a `MatchMutate` statement, exposing it
1076    /// to the caller (GraphDb) so it can apply it inside a write transaction.
1077    pub fn mutation_from_match_mutate(mm: &MatchMutateStatement) -> &Mutation {
1078        &mm.mutation
1079    }
1080
1081    // ── Node-scan helpers (shared by scan_match_create and scan_match_create_rows) ──
1082
1083    /// Returns `true` if the given node has been tombstoned (col 0 == u64::MAX).
1084    ///
1085    /// `NotFound` is expected for new/sparse nodes where col_0 has not been
1086    /// written yet and is treated as "not tombstoned".  All other errors are
1087    /// logged as warnings and also treated as "not tombstoned" so that
1088    /// transient storage issues do not suppress valid nodes during a scan.
1089    fn is_node_tombstoned(&self, node_id: NodeId) -> bool {
1090        match self.snapshot.store.get_node_raw(node_id, &[0u32]) {
1091            Ok(col0) => col0.iter().any(|&(c, v)| c == 0 && v == u64::MAX),
1092            Err(sparrowdb_common::Error::NotFound) => false,
1093            Err(e) => {
1094                tracing::warn!(
1095                    node_id = node_id.0,
1096                    error = ?e,
1097                    "tombstone check failed; treating node as not tombstoned"
1098                );
1099                false
1100            }
1101        }
1102    }
1103
1104    /// Returns `true` if `node_id` satisfies every inline prop predicate in
1105    /// `filter_col_ids` / `props`.
1106    ///
1107    /// `filter_col_ids` must be pre-computed from `props` with
1108    /// `prop_name_to_col_id`.  Pass an empty slice when there are no filters
1109    /// (the method returns `true` immediately).
1110    fn node_matches_prop_filter(
1111        &self,
1112        node_id: NodeId,
1113        filter_col_ids: &[u32],
1114        props: &[sparrowdb_cypher::ast::PropEntry],
1115    ) -> bool {
1116        if props.is_empty() {
1117            return true;
1118        }
1119        match self.snapshot.store.get_node_raw(node_id, filter_col_ids) {
1120            Ok(raw_props) => matches_prop_filter_static(
1121                &raw_props,
1122                props,
1123                &self.dollar_params(),
1124                &self.snapshot.store,
1125            ),
1126            Err(_) => false,
1127        }
1128    }
1129
1130    // ── Scan for MATCH…CREATE (called by GraphDb with a write transaction) ──────
1131
1132    /// Scan nodes matching the MATCH patterns in a `MatchCreateStatement` and
1133    /// return a map of variable name → Vec<NodeId> for each named node pattern.
1134    ///
1135    /// The caller (GraphDb) uses this to resolve variable bindings before
1136    /// calling `WriteTx::create_edge` for each edge in the CREATE clause.
1137    pub fn scan_match_create(
1138        &self,
1139        mc: &MatchCreateStatement,
1140    ) -> Result<HashMap<String, Vec<NodeId>>> {
1141        let mut var_candidates: HashMap<String, Vec<NodeId>> = HashMap::new();
1142
1143        for pat in &mc.match_patterns {
1144            for node_pat in &pat.nodes {
1145                if node_pat.var.is_empty() {
1146                    continue;
1147                }
1148                // Skip if already resolved (same var can appear in multiple patterns).
1149                if var_candidates.contains_key(&node_pat.var) {
1150                    continue;
1151                }
1152
1153                let label = node_pat.labels.first().cloned().unwrap_or_default();
1154                let label_id: u32 = match self.snapshot.catalog.get_label(&label)? {
1155                    Some(id) => id as u32,
1156                    None => {
1157                        // Label not found → no matching nodes for this variable.
1158                        var_candidates.insert(node_pat.var.clone(), vec![]);
1159                        continue;
1160                    }
1161                };
1162
1163                let hwm = self.snapshot.store.hwm_for_label(label_id)?;
1164
1165                // Collect col_ids needed for inline prop filtering.
1166                let filter_col_ids: Vec<u32> = node_pat
1167                    .props
1168                    .iter()
1169                    .map(|p| prop_name_to_col_id(&p.key))
1170                    .collect();
1171
1172                let mut matching_ids: Vec<NodeId> = Vec::new();
1173                for slot in 0..hwm {
1174                    let node_id = NodeId(((label_id as u64) << 32) | slot);
1175
1176                    // Skip tombstoned nodes (col_0 == u64::MAX).
1177                    // Treat a missing-file error as "not tombstoned".
1178                    match self.snapshot.store.get_node_raw(node_id, &[0u32]) {
1179                        Ok(col0) if col0.iter().any(|&(c, v)| c == 0 && v == u64::MAX) => {
1180                            continue;
1181                        }
1182                        Ok(_) | Err(_) => {}
1183                    }
1184
1185                    // Apply inline prop filter if any.
1186                    if !node_pat.props.is_empty() {
1187                        match self.snapshot.store.get_node_raw(node_id, &filter_col_ids) {
1188                            Ok(props) => {
1189                                if !matches_prop_filter_static(
1190                                    &props,
1191                                    &node_pat.props,
1192                                    &self.dollar_params(),
1193                                    &self.snapshot.store,
1194                                ) {
1195                                    continue;
1196                                }
1197                            }
1198                            // If a filter column doesn't exist on disk, the node
1199                            // cannot satisfy the filter.
1200                            Err(_) => continue,
1201                        }
1202                    }
1203
1204                    matching_ids.push(node_id);
1205                }
1206
1207                var_candidates.insert(node_pat.var.clone(), matching_ids);
1208            }
1209        }
1210
1211        Ok(var_candidates)
1212    }
1213
1214    /// Execute the MATCH portion of a `MatchCreateStatement` and return one
1215    /// binding map per matched row.
1216    ///
1217    /// Each element of the returned `Vec` is a `HashMap<variable_name, NodeId>`
1218    /// that represents one fully-correlated result row from the MATCH clause.
1219    /// The caller uses these to drive `WriteTx::create_edge` — one call per row.
1220    ///
1221    /// # Algorithm
1222    ///
1223    /// For each `PathPattern` in `match_patterns`:
1224    /// - **No relationships** (node-only pattern): scan the node store applying
1225    ///   inline prop filters; collect one candidate set per named variable.
1226    ///   Cross-join these sets with the rows accumulated so far.
1227    /// - **One relationship hop** (`(a)-[:R]->(b)`): traverse the CSR + delta
1228    ///   log to enumerate actual (src, dst) pairs that are connected by an edge,
1229    ///   then filter each node against its inline prop predicates.  Only
1230    ///   correlated pairs are yielded — this is the key difference from the old
1231    ///   `scan_match_create` which treated every node as an independent
1232    ///   candidate and then took a full Cartesian product.
1233    ///
1234    /// Patterns beyond a single hop are not yet supported and return an error.
1235    pub fn scan_match_create_rows(
1236        &self,
1237        mc: &MatchCreateStatement,
1238    ) -> Result<Vec<HashMap<String, NodeId>>> {
1239        // Start with a single empty row (identity for cross-join).
1240        let mut accumulated: Vec<HashMap<String, NodeId>> = vec![HashMap::new()];
1241
1242        for pat in &mc.match_patterns {
1243            if pat.rels.is_empty() {
1244                // ── Node-only pattern: collect candidates per variable, then
1245                //    cross-join into accumulated rows. ──────────────────────
1246                //
1247                // Collect each named node variable's candidate list.
1248                let mut per_var: Vec<(String, Vec<NodeId>)> = Vec::new();
1249
1250                for node_pat in &pat.nodes {
1251                    if node_pat.var.is_empty() {
1252                        continue;
1253                    }
1254
1255                    // SPA-211: when no label is specified, scan all registered
1256                    // labels so that unlabeled MATCH patterns find nodes of
1257                    // any type (instead of silently returning empty).
1258                    let scan_label_ids: Vec<u32> = if node_pat.labels.is_empty() {
1259                        self.snapshot
1260                            .catalog
1261                            .list_labels()?
1262                            .into_iter()
1263                            .map(|(id, _)| id as u32)
1264                            .collect()
1265                    } else {
1266                        let label = node_pat.labels.first().cloned().unwrap_or_default();
1267                        match self.snapshot.catalog.get_label(&label)? {
1268                            Some(id) => vec![id as u32],
1269                            None => {
1270                                // No nodes can match → entire MATCH yields nothing.
1271                                return Ok(vec![]);
1272                            }
1273                        }
1274                    };
1275
1276                    let filter_col_ids: Vec<u32> = node_pat
1277                        .props
1278                        .iter()
1279                        .map(|p| prop_name_to_col_id(&p.key))
1280                        .collect();
1281
1282                    let mut matching_ids: Vec<NodeId> = Vec::new();
1283                    for label_id in scan_label_ids {
1284                        let hwm = self.snapshot.store.hwm_for_label(label_id)?;
1285                        for slot in 0..hwm {
1286                            let node_id = NodeId(((label_id as u64) << 32) | slot);
1287
1288                            if self.is_node_tombstoned(node_id) {
1289                                continue;
1290                            }
1291                            if !self.node_matches_prop_filter(
1292                                node_id,
1293                                &filter_col_ids,
1294                                &node_pat.props,
1295                            ) {
1296                                continue;
1297                            }
1298
1299                            matching_ids.push(node_id);
1300                        }
1301                    }
1302
1303                    if matching_ids.is_empty() {
1304                        // No matching nodes → entire MATCH is empty.
1305                        return Ok(vec![]);
1306                    }
1307
1308                    per_var.push((node_pat.var.clone(), matching_ids));
1309                }
1310
1311                // Cross-join the per_var candidates into accumulated.
1312                // `candidates` is guaranteed non-empty (checked above), so the result
1313                // will be non-empty as long as `accumulated` is non-empty.
1314                for (var, candidates) in per_var {
1315                    let mut next: Vec<HashMap<String, NodeId>> = Vec::new();
1316                    for row in &accumulated {
1317                        for &node_id in &candidates {
1318                            let mut new_row = row.clone();
1319                            new_row.insert(var.clone(), node_id);
1320                            next.push(new_row);
1321                        }
1322                    }
1323                    accumulated = next;
1324                }
1325            } else if pat.rels.len() == 1 && pat.nodes.len() == 2 {
1326                // ── Single-hop relationship pattern: traverse CSR + delta edges
1327                //    to produce correlated (src, dst) pairs. ─────────────────
1328                let src_node_pat = &pat.nodes[0];
1329                let dst_node_pat = &pat.nodes[1];
1330                let rel_pat = &pat.rels[0];
1331
1332                // Only outgoing direction is supported for MATCH…CREATE traversal.
1333                if rel_pat.dir != sparrowdb_cypher::ast::EdgeDir::Outgoing {
1334                    return Err(sparrowdb_common::Error::Unimplemented);
1335                }
1336
1337                let src_label = src_node_pat.labels.first().cloned().unwrap_or_default();
1338                let dst_label = dst_node_pat.labels.first().cloned().unwrap_or_default();
1339
1340                let src_label_id: u32 = match self.snapshot.catalog.get_label(&src_label)? {
1341                    Some(id) => id as u32,
1342                    None => return Ok(vec![]),
1343                };
1344                let dst_label_id: u32 = match self.snapshot.catalog.get_label(&dst_label)? {
1345                    Some(id) => id as u32,
1346                    None => return Ok(vec![]),
1347                };
1348
1349                let src_filter_cols: Vec<u32> = src_node_pat
1350                    .props
1351                    .iter()
1352                    .map(|p| prop_name_to_col_id(&p.key))
1353                    .collect();
1354                let dst_filter_cols: Vec<u32> = dst_node_pat
1355                    .props
1356                    .iter()
1357                    .map(|p| prop_name_to_col_id(&p.key))
1358                    .collect();
1359
1360                // SPA-185: resolve per-type rel table for delta and CSR reads.
1361                let rel_lookup =
1362                    self.resolve_rel_table_id(src_label_id, dst_label_id, &rel_pat.rel_type);
1363                if matches!(rel_lookup, RelTableLookup::NotFound) {
1364                    return Ok(vec![]);
1365                }
1366
1367                // Build a src_slot → Vec<dst_slot> adjacency map from the delta log once,
1368                // filtering by src_label to avoid O(N*M) scanning inside the outer loop.
1369                let delta_adj: HashMap<u64, Vec<u64>> = {
1370                    let records: Vec<DeltaRecord> = match rel_lookup {
1371                        RelTableLookup::Found(rtid) => self.read_delta_for(rtid),
1372                        _ => self.read_delta_all(),
1373                    };
1374                    let mut adj: HashMap<u64, Vec<u64>> = HashMap::new();
1375                    for r in records {
1376                        let s = r.src.0;
1377                        let s_label = (s >> 32) as u32;
1378                        if s_label == src_label_id {
1379                            let s_slot = s & 0xFFFF_FFFF;
1380                            adj.entry(s_slot).or_default().push(r.dst.0 & 0xFFFF_FFFF);
1381                        }
1382                    }
1383                    adj
1384                };
1385
1386                let hwm_src = self.snapshot.store.hwm_for_label(src_label_id)?;
1387
1388                // Pairs yielded by this pattern for cross-join below.
1389                let mut pattern_rows: Vec<HashMap<String, NodeId>> = Vec::new();
1390
1391                for src_slot in 0..hwm_src {
1392                    // SPA-254: check per-query deadline at every slot boundary.
1393                    self.check_deadline()?;
1394
1395                    let src_node = NodeId(((src_label_id as u64) << 32) | src_slot);
1396
1397                    if self.is_node_tombstoned(src_node) {
1398                        continue;
1399                    }
1400                    if !self.node_matches_prop_filter(
1401                        src_node,
1402                        &src_filter_cols,
1403                        &src_node_pat.props,
1404                    ) {
1405                        continue;
1406                    }
1407
1408                    // Collect outgoing neighbours (CSR + delta adjacency map).
1409                    let csr_neighbors_vec: Vec<u64> = match rel_lookup {
1410                        RelTableLookup::Found(rtid) => self.csr_neighbors(rtid, src_slot),
1411                        _ => self.csr_neighbors_all(src_slot),
1412                    };
1413                    let empty: Vec<u64> = Vec::new();
1414                    let delta_neighbors: &[u64] =
1415                        delta_adj.get(&src_slot).map_or(&empty, |v| v.as_slice());
1416
1417                    let mut seen: HashSet<u64> = HashSet::new();
1418                    for &dst_slot in csr_neighbors_vec.iter().chain(delta_neighbors.iter()) {
1419                        if !seen.insert(dst_slot) {
1420                            continue;
1421                        }
1422                        let dst_node = NodeId(((dst_label_id as u64) << 32) | dst_slot);
1423
1424                        if self.is_node_tombstoned(dst_node) {
1425                            continue;
1426                        }
1427                        if !self.node_matches_prop_filter(
1428                            dst_node,
1429                            &dst_filter_cols,
1430                            &dst_node_pat.props,
1431                        ) {
1432                            continue;
1433                        }
1434
1435                        let mut row: HashMap<String, NodeId> = HashMap::new();
1436
1437                        // When src and dst use the same variable (self-loop pattern),
1438                        // the edge must actually be a self-loop (src == dst).
1439                        if !src_node_pat.var.is_empty()
1440                            && !dst_node_pat.var.is_empty()
1441                            && src_node_pat.var == dst_node_pat.var
1442                        {
1443                            if src_node != dst_node {
1444                                continue;
1445                            }
1446                            row.insert(src_node_pat.var.clone(), src_node);
1447                        } else {
1448                            if !src_node_pat.var.is_empty() {
1449                                row.insert(src_node_pat.var.clone(), src_node);
1450                            }
1451                            if !dst_node_pat.var.is_empty() {
1452                                row.insert(dst_node_pat.var.clone(), dst_node);
1453                            }
1454                        }
1455                        pattern_rows.push(row);
1456                    }
1457                }
1458
1459                if pattern_rows.is_empty() {
1460                    return Ok(vec![]);
1461                }
1462
1463                // Cross-join pattern_rows into accumulated, enforcing shared-variable
1464                // constraints: if a variable appears in both acc_row and pat_row, only
1465                // keep combinations where they agree on the same NodeId.
1466                let mut next: Vec<HashMap<String, NodeId>> = Vec::new();
1467                for acc_row in &accumulated {
1468                    'outer: for pat_row in &pattern_rows {
1469                        // Reject combinations where shared variables disagree.
1470                        for (k, v) in pat_row {
1471                            if let Some(existing) = acc_row.get(k) {
1472                                if existing != v {
1473                                    continue 'outer;
1474                                }
1475                            }
1476                        }
1477                        let mut new_row = acc_row.clone();
1478                        new_row.extend(pat_row.iter().map(|(k, v)| (k.clone(), *v)));
1479                        next.push(new_row);
1480                    }
1481                }
1482                accumulated = next;
1483            } else {
1484                // Multi-hop patterns not yet supported for MATCH…CREATE.
1485                return Err(sparrowdb_common::Error::Unimplemented);
1486            }
1487        }
1488
1489        Ok(accumulated)
1490    }
1491
1492    /// Scan the MATCH patterns of a `MatchMergeRelStatement` and return
1493    /// correlated `(variable → NodeId)` binding rows — identical semantics to
1494    /// `scan_match_create_rows` but taking the MERGE form's match patterns (SPA-233).
1495    pub fn scan_match_merge_rel_rows(
1496        &self,
1497        mm: &MatchMergeRelStatement,
1498    ) -> Result<Vec<HashMap<String, NodeId>>> {
1499        // Reuse scan_match_create_rows by wrapping the MERGE patterns in a
1500        // MatchCreateStatement with an empty (no-op) CREATE body.
1501        let proxy = MatchCreateStatement {
1502            match_patterns: mm.match_patterns.clone(),
1503            match_props: vec![],
1504            create: CreateStatement {
1505                nodes: vec![],
1506                edges: vec![],
1507            },
1508        };
1509        self.scan_match_create_rows(&proxy)
1510    }
1511
1512    // ── UNWIND ─────────────────────────────────────────────────────────────────
1513
1514    fn execute_unwind(&self, u: &UnwindStatement) -> Result<QueryResult> {
1515        use crate::operators::{Operator, UnwindOperator};
1516
1517        // Evaluate the list expression to a Vec<Value>.
1518        let values = eval_list_expr(&u.expr, &self.params)?;
1519
1520        // Determine the output column name from the RETURN clause.
1521        let column_names = extract_return_column_names(&u.return_clause.items);
1522
1523        if values.is_empty() {
1524            return Ok(QueryResult::empty(column_names));
1525        }
1526
1527        let mut op = UnwindOperator::new(u.alias.clone(), values);
1528        let chunks = op.collect_all()?;
1529
1530        // Materialize: for each chunk/group/row, project the RETURN columns.
1531        //
1532        // Only fall back to the UNWIND alias value when the output column
1533        // actually corresponds to the alias variable.  Returning a value for
1534        // an unrelated variable (e.g. `RETURN y` when alias is `x`) would
1535        // silently produce wrong results instead of NULL.
1536        let mut rows: Vec<Vec<Value>> = Vec::new();
1537        for chunk in &chunks {
1538            for group in &chunk.groups {
1539                let n = group.len();
1540                for row_idx in 0..n {
1541                    let row = u
1542                        .return_clause
1543                        .items
1544                        .iter()
1545                        .map(|item| {
1546                            // Determine whether this RETURN item refers to the
1547                            // alias variable produced by UNWIND.
1548                            let is_alias = match &item.expr {
1549                                Expr::Var(name) => name == &u.alias,
1550                                _ => false,
1551                            };
1552                            if is_alias {
1553                                group.get_value(&u.alias, row_idx).unwrap_or(Value::Null)
1554                            } else {
1555                                // Variable is not in scope for this UNWIND —
1556                                // return NULL rather than leaking the alias value.
1557                                Value::Null
1558                            }
1559                        })
1560                        .collect();
1561                    rows.push(row);
1562                }
1563            }
1564        }
1565
1566        Ok(QueryResult {
1567            columns: column_names,
1568            rows,
1569        })
1570    }
1571
1572    // ── CREATE node execution ─────────────────────────────────────────────────
1573
1574    /// Execute a `CREATE` statement, auto-registering labels as needed (SPA-156).
1575    ///
1576    /// For each node in the CREATE clause:
1577    /// 1. Look up (or create) its primary label in the catalog.
1578    /// 2. Convert inline properties to `(col_id, StoreValue)` pairs using the
1579    ///    same FNV-1a hash used by `WriteTx::merge_node`.
1580    /// 3. Write the node to the node store.
1581    fn execute_create(&mut self, create: &CreateStatement) -> Result<QueryResult> {
1582        for node in &create.nodes {
1583            // Resolve the primary label, creating it if absent.
1584            let label = node.labels.first().cloned().unwrap_or_default();
1585
1586            // SPA-208: reject reserved __SO_ label prefix.
1587            if is_reserved_label(&label) {
1588                return Err(sparrowdb_common::Error::InvalidArgument(format!(
1589                    "invalid argument: label \"{label}\" is reserved — the __SO_ prefix is for internal use only"
1590                )));
1591            }
1592
1593            let label_id: u32 = match self.snapshot.catalog.get_label(&label)? {
1594                Some(id) => id as u32,
1595                None => self.snapshot.catalog.create_label(&label)? as u32,
1596            };
1597
1598            // Convert AST props to (col_id, StoreValue) pairs.
1599            // Property values are full expressions (e.g. `datetime()`),
1600            // evaluated with an empty binding map.
1601            let empty_bindings: HashMap<String, Value> = HashMap::new();
1602            let props: Vec<(u32, StoreValue)> = node
1603                .props
1604                .iter()
1605                .map(|entry| {
1606                    let col_id = prop_name_to_col_id(&entry.key);
1607                    let val = eval_expr(&entry.value, &empty_bindings);
1608                    let store_val = value_to_store_value(val);
1609                    (col_id, store_val)
1610                })
1611                .collect();
1612
1613            // SPA-234: enforce UNIQUE constraints declared via
1614            // `CREATE CONSTRAINT ON (n:Label) ASSERT n.property IS UNIQUE`.
1615            // For each constrained (label_id, col_id) pair, check whether the
1616            // incoming value already exists in the property index.  If so,
1617            // return a constraint-violation error before writing the node.
1618            //
1619            // Only inline-encodable types (Int64 and short Bytes ≤ 7 bytes)
1620            // are checked via the prop_index fast path.  Float values and
1621            // long strings require heap storage and cannot be encoded with
1622            // to_u64(); for those types we return an explicit error rather
1623            // than panicking (StoreValue::Float::to_u64 is documented to
1624            // panic for heap-backed values).
1625            for (col_id, store_val) in &props {
1626                if self.unique_constraints.contains(&(label_id, *col_id)) {
1627                    let raw = match store_val {
1628                        StoreValue::Int64(_) => store_val.to_u64(),
1629                        StoreValue::Bytes(b) if b.len() <= 7 => store_val.to_u64(),
1630                        StoreValue::Bytes(_) => {
1631                            return Err(sparrowdb_common::Error::InvalidArgument(
1632                                "UNIQUE constraints on string values longer than 7 bytes are not yet supported".into(),
1633                            ));
1634                        }
1635                        StoreValue::Float(_) => {
1636                            return Err(sparrowdb_common::Error::InvalidArgument(
1637                                "UNIQUE constraints on float values are not yet supported".into(),
1638                            ));
1639                        }
1640                    };
1641                    if !self
1642                        .prop_index
1643                        .borrow()
1644                        .lookup(label_id, *col_id, raw)
1645                        .is_empty()
1646                    {
1647                        return Err(sparrowdb_common::Error::InvalidArgument(format!(
1648                            "unique constraint violation: label \"{label}\" already has a node with the same value for this property"
1649                        )));
1650                    }
1651                }
1652            }
1653
1654            let node_id = self.snapshot.store.create_node(label_id, &props)?;
1655            // SPA-234: after writing, insert new values into the prop_index so
1656            // that subsequent creates in the same session also respect the
1657            // UNIQUE constraint (the index may be stale if built before this
1658            // node was written).
1659            {
1660                let slot =
1661                    sparrowdb_storage::property_index::PropertyIndex::node_id_to_slot(node_id);
1662                let mut idx = self.prop_index.borrow_mut();
1663                for (col_id, store_val) in &props {
1664                    if self.unique_constraints.contains(&(label_id, *col_id)) {
1665                        // Only insert inline-encodable values; Float/long Bytes
1666                        // were already rejected above before create_node was called.
1667                        let raw = match store_val {
1668                            StoreValue::Int64(_) => store_val.to_u64(),
1669                            StoreValue::Bytes(b) if b.len() <= 7 => store_val.to_u64(),
1670                            _ => continue,
1671                        };
1672                        idx.insert(label_id, *col_id, slot, raw);
1673                    }
1674                }
1675            }
1676            // Update cached row count for the planner (SPA-new).
1677            *self
1678                .snapshot
1679                .label_row_counts
1680                .entry(label_id as LabelId)
1681                .or_insert(0) += 1;
1682        }
1683        Ok(QueryResult::empty(vec![]))
1684    }
1685
1686    fn execute_create_index(&mut self, label: &str, property: &str) -> Result<QueryResult> {
1687        let label_id: u32 = match self.snapshot.catalog.get_label(label)? {
1688            Some(id) => id as u32,
1689            None => return Ok(QueryResult::empty(vec![])),
1690        };
1691        let col_id = col_id_of(property);
1692        self.prop_index
1693            .borrow_mut()
1694            .build_for(&self.snapshot.store, label_id, col_id)?;
1695        Ok(QueryResult::empty(vec![]))
1696    }
1697
1698    /// Execute `CREATE CONSTRAINT ON (n:Label) ASSERT n.property IS UNIQUE` (SPA-234).
1699    ///
1700    /// Records `(label_id, col_id)` in `self.unique_constraints` so that
1701    /// subsequent `execute_create` calls reject duplicate values.  Also builds
1702    /// the backing prop-index for that pair (needed to check existence cheaply).
1703    /// If the label does not yet exist in the catalog it is auto-created so that
1704    /// later `CREATE` statements can register against the constraint.
1705    fn execute_create_constraint(&mut self, label: &str, property: &str) -> Result<QueryResult> {
1706        let label_id: u32 = match self.snapshot.catalog.get_label(label)? {
1707            Some(id) => id as u32,
1708            None => self.snapshot.catalog.create_label(label)? as u32,
1709        };
1710        let col_id = col_id_of(property);
1711
1712        // Build the property index for this (label_id, col_id) pair so that
1713        // uniqueness checks in execute_create can use O(log n) lookups.
1714        self.prop_index
1715            .borrow_mut()
1716            .build_for(&self.snapshot.store, label_id, col_id)?;
1717
1718        // Register the constraint.
1719        self.unique_constraints.insert((label_id, col_id));
1720
1721        Ok(QueryResult::empty(vec![]))
1722    }
1723
1724    // ── UNION ─────────────────────────────────────────────────────────────────
1725
1726    /// Execute `stmt1 UNION [ALL] stmt2`.
1727    ///
1728    /// Concatenates the row sets from both sides.  When `!all`, duplicate rows
1729    /// are eliminated using the same `deduplicate_rows` logic used by DISTINCT.
1730    /// Both sides must produce the same number of columns; column names are taken
1731    /// from the left side.
1732    fn execute_union(&mut self, u: UnionStatement) -> Result<QueryResult> {
1733        let left_result = self.execute_bound(*u.left)?;
1734        let right_result = self.execute_bound(*u.right)?;
1735
1736        // Validate column counts match.
1737        if !left_result.columns.is_empty()
1738            && !right_result.columns.is_empty()
1739            && left_result.columns.len() != right_result.columns.len()
1740        {
1741            return Err(sparrowdb_common::Error::InvalidArgument(format!(
1742                "UNION: left side has {} columns, right side has {}",
1743                left_result.columns.len(),
1744                right_result.columns.len()
1745            )));
1746        }
1747
1748        let columns = if !left_result.columns.is_empty() {
1749            left_result.columns.clone()
1750        } else {
1751            right_result.columns.clone()
1752        };
1753
1754        let mut rows = left_result.rows;
1755        rows.extend(right_result.rows);
1756
1757        if !u.all {
1758            deduplicate_rows(&mut rows);
1759        }
1760
1761        Ok(QueryResult { columns, rows })
1762    }
1763
1764    // ── WITH clause pipeline ──────────────────────────────────────────────────
1765
1766    /// Execute `MATCH … WITH expr AS alias [WHERE pred] … RETURN …`.
1767    ///
1768    /// 1. Scan MATCH patterns → collect intermediate rows as `Vec<HashMap<String, Value>>`.
1769    /// 2. Project each row through the WITH items (evaluate expr, bind to alias).
1770    /// 3. Apply WITH WHERE predicate on the projected map.
1771    /// 4. Evaluate RETURN expressions against the projected map.
1772    fn execute_match_with(&self, m: &MatchWithStatement) -> Result<QueryResult> {
1773        // Step 1: collect intermediate rows from MATCH scan.
1774        let intermediate = self.collect_match_rows_for_with(
1775            &m.match_patterns,
1776            m.match_where.as_ref(),
1777            &m.with_clause,
1778        )?;
1779
1780        // Step 2: check if WITH clause has aggregate expressions.
1781        // If so, we aggregate the intermediate rows first, producing one output row
1782        // per unique grouping key.
1783        let has_agg = m
1784            .with_clause
1785            .items
1786            .iter()
1787            .any(|item| is_aggregate_expr(&item.expr));
1788
1789        let projected: Vec<HashMap<String, Value>> = if has_agg {
1790            // Aggregate the intermediate rows into a set of projected rows.
1791            let agg_rows = self.aggregate_with_items(&intermediate, &m.with_clause.items);
1792            // Apply WHERE filter on the aggregated rows.
1793            agg_rows
1794                .into_iter()
1795                .filter(|with_vals| {
1796                    if let Some(ref where_expr) = m.with_clause.where_clause {
1797                        let mut with_vals_p = with_vals.clone();
1798                        with_vals_p.extend(self.dollar_params());
1799                        self.eval_where_graph(where_expr, &with_vals_p)
1800                    } else {
1801                        true
1802                    }
1803                })
1804                .map(|mut with_vals| {
1805                    with_vals.extend(self.dollar_params());
1806                    with_vals
1807                })
1808                .collect()
1809        } else {
1810            // Non-aggregate path: project each row through the WITH items.
1811            let mut projected: Vec<HashMap<String, Value>> = Vec::new();
1812            for row_vals in &intermediate {
1813                let mut with_vals: HashMap<String, Value> = HashMap::new();
1814                for item in &m.with_clause.items {
1815                    let val = self.eval_expr_graph(&item.expr, row_vals);
1816                    with_vals.insert(item.alias.clone(), val);
1817                    // SPA-134: if the WITH item is a bare Var (e.g. `n AS person`),
1818                    // also inject the NodeRef under the alias so that EXISTS subqueries
1819                    // in a subsequent WHERE clause can resolve the source node.
1820                    if let sparrowdb_cypher::ast::Expr::Var(ref src_var) = item.expr {
1821                        if let Some(node_ref) = row_vals.get(src_var) {
1822                            if matches!(node_ref, Value::NodeRef(_)) {
1823                                with_vals.insert(item.alias.clone(), node_ref.clone());
1824                                with_vals.insert(
1825                                    format!("{}.__node_id__", item.alias),
1826                                    node_ref.clone(),
1827                                );
1828                            }
1829                        }
1830                        // Also check __node_id__ key.
1831                        let nid_key = format!("{src_var}.__node_id__");
1832                        if let Some(node_ref) = row_vals.get(&nid_key) {
1833                            with_vals
1834                                .insert(format!("{}.__node_id__", item.alias), node_ref.clone());
1835                        }
1836                    }
1837                }
1838                if let Some(ref where_expr) = m.with_clause.where_clause {
1839                    let mut with_vals_p = with_vals.clone();
1840                    with_vals_p.extend(self.dollar_params());
1841                    if !self.eval_where_graph(where_expr, &with_vals_p) {
1842                        continue;
1843                    }
1844                }
1845                // Merge dollar_params into the projected row so that downstream
1846                // RETURN/ORDER-BY/SKIP/LIMIT expressions can resolve $param references.
1847                with_vals.extend(self.dollar_params());
1848                projected.push(with_vals);
1849            }
1850            projected
1851        };
1852
1853        // Step 3: project RETURN from the WITH-projected rows.
1854        let column_names = extract_return_column_names(&m.return_clause.items);
1855
1856        // Apply ORDER BY on the projected rows (which still have all WITH aliases)
1857        // before projecting down to RETURN columns — this allows ORDER BY on columns
1858        // that are not in the RETURN clause (e.g. ORDER BY age when only name is returned).
1859        let mut ordered_projected = projected;
1860        if !m.order_by.is_empty() {
1861            ordered_projected.sort_by(|a, b| {
1862                for (expr, dir) in &m.order_by {
1863                    let val_a = eval_expr(expr, a);
1864                    let val_b = eval_expr(expr, b);
1865                    let cmp = compare_values(&val_a, &val_b);
1866                    let cmp = if *dir == SortDir::Desc {
1867                        cmp.reverse()
1868                    } else {
1869                        cmp
1870                    };
1871                    if cmp != std::cmp::Ordering::Equal {
1872                        return cmp;
1873                    }
1874                }
1875                std::cmp::Ordering::Equal
1876            });
1877        }
1878
1879        // Apply SKIP / LIMIT before final projection.
1880        if let Some(skip) = m.skip {
1881            let skip = (skip as usize).min(ordered_projected.len());
1882            ordered_projected.drain(0..skip);
1883        }
1884        if let Some(lim) = m.limit {
1885            ordered_projected.truncate(lim as usize);
1886        }
1887
1888        let mut rows: Vec<Vec<Value>> = ordered_projected
1889            .iter()
1890            .map(|with_vals| {
1891                m.return_clause
1892                    .items
1893                    .iter()
1894                    .map(|item| self.eval_expr_graph(&item.expr, with_vals))
1895                    .collect()
1896            })
1897            .collect();
1898
1899        if m.distinct {
1900            deduplicate_rows(&mut rows);
1901        }
1902
1903        Ok(QueryResult {
1904            columns: column_names,
1905            rows,
1906        })
1907    }
1908
1909    /// Aggregate a set of raw scan rows through a list of WITH items that
1910    /// include aggregate expressions (COUNT(*), collect(), etc.).
1911    ///
1912    /// Returns one `HashMap<String, Value>` per unique grouping key.
1913    fn aggregate_with_items(
1914        &self,
1915        rows: &[HashMap<String, Value>],
1916        items: &[sparrowdb_cypher::ast::WithItem],
1917    ) -> Vec<HashMap<String, Value>> {
1918        // Classify each WITH item as key or aggregate.
1919        let key_indices: Vec<usize> = items
1920            .iter()
1921            .enumerate()
1922            .filter(|(_, item)| !is_aggregate_expr(&item.expr))
1923            .map(|(i, _)| i)
1924            .collect();
1925        let agg_indices: Vec<usize> = items
1926            .iter()
1927            .enumerate()
1928            .filter(|(_, item)| is_aggregate_expr(&item.expr))
1929            .map(|(i, _)| i)
1930            .collect();
1931
1932        // Build groups.
1933        let mut group_keys: Vec<Vec<Value>> = Vec::new();
1934        let mut group_accum: Vec<Vec<Vec<Value>>> = Vec::new(); // [group][agg_pos] → values
1935
1936        for row_vals in rows {
1937            let key: Vec<Value> = key_indices
1938                .iter()
1939                .map(|&i| eval_expr(&items[i].expr, row_vals))
1940                .collect();
1941            let group_idx = if let Some(pos) = group_keys.iter().position(|k| k == &key) {
1942                pos
1943            } else {
1944                group_keys.push(key);
1945                group_accum.push(vec![vec![]; agg_indices.len()]);
1946                group_keys.len() - 1
1947            };
1948            for (ai, &ri) in agg_indices.iter().enumerate() {
1949                match &items[ri].expr {
1950                    sparrowdb_cypher::ast::Expr::CountStar => {
1951                        group_accum[group_idx][ai].push(Value::Int64(1));
1952                    }
1953                    sparrowdb_cypher::ast::Expr::FnCall { name, args }
1954                        if name.to_lowercase() == "collect" =>
1955                    {
1956                        let val = if !args.is_empty() {
1957                            eval_expr(&args[0], row_vals)
1958                        } else {
1959                            Value::Null
1960                        };
1961                        if !matches!(val, Value::Null) {
1962                            group_accum[group_idx][ai].push(val);
1963                        }
1964                    }
1965                    sparrowdb_cypher::ast::Expr::FnCall { name, args }
1966                        if matches!(
1967                            name.to_lowercase().as_str(),
1968                            "count" | "sum" | "avg" | "min" | "max"
1969                        ) =>
1970                    {
1971                        let val = if !args.is_empty() {
1972                            eval_expr(&args[0], row_vals)
1973                        } else {
1974                            Value::Null
1975                        };
1976                        if !matches!(val, Value::Null) {
1977                            group_accum[group_idx][ai].push(val);
1978                        }
1979                    }
1980                    _ => {}
1981                }
1982            }
1983        }
1984
1985        // If no rows were seen, still produce one output row for global aggregates
1986        // (e.g. COUNT(*) over an empty scan returns 0).
1987        if rows.is_empty() && key_indices.is_empty() {
1988            let mut out_row: HashMap<String, Value> = HashMap::new();
1989            for &ri in &agg_indices {
1990                let val = match &items[ri].expr {
1991                    sparrowdb_cypher::ast::Expr::CountStar => Value::Int64(0),
1992                    sparrowdb_cypher::ast::Expr::FnCall { name, .. }
1993                        if name.to_lowercase() == "collect" =>
1994                    {
1995                        Value::List(vec![])
1996                    }
1997                    _ => Value::Int64(0),
1998                };
1999                out_row.insert(items[ri].alias.clone(), val);
2000            }
2001            return vec![out_row];
2002        }
2003
2004        // Finalize each group.
2005        let mut result: Vec<HashMap<String, Value>> = Vec::new();
2006        for (gi, key_vals) in group_keys.iter().enumerate() {
2007            let mut out_row: HashMap<String, Value> = HashMap::new();
2008            // Insert key values.
2009            for (ki, &ri) in key_indices.iter().enumerate() {
2010                out_row.insert(items[ri].alias.clone(), key_vals[ki].clone());
2011            }
2012            // Finalize aggregates.
2013            for (ai, &ri) in agg_indices.iter().enumerate() {
2014                let accum = &group_accum[gi][ai];
2015                let val = match &items[ri].expr {
2016                    sparrowdb_cypher::ast::Expr::CountStar => Value::Int64(accum.len() as i64),
2017                    sparrowdb_cypher::ast::Expr::FnCall { name, .. }
2018                        if name.to_lowercase() == "collect" =>
2019                    {
2020                        Value::List(accum.clone())
2021                    }
2022                    sparrowdb_cypher::ast::Expr::FnCall { name, .. }
2023                        if name.to_lowercase() == "count" =>
2024                    {
2025                        Value::Int64(accum.len() as i64)
2026                    }
2027                    sparrowdb_cypher::ast::Expr::FnCall { name, .. }
2028                        if name.to_lowercase() == "sum" =>
2029                    {
2030                        let sum: i64 = accum
2031                            .iter()
2032                            .filter_map(|v| {
2033                                if let Value::Int64(n) = v {
2034                                    Some(*n)
2035                                } else {
2036                                    None
2037                                }
2038                            })
2039                            .sum();
2040                        Value::Int64(sum)
2041                    }
2042                    sparrowdb_cypher::ast::Expr::FnCall { name, .. }
2043                        if name.to_lowercase() == "min" =>
2044                    {
2045                        accum
2046                            .iter()
2047                            .min_by(|a, b| compare_values(a, b))
2048                            .cloned()
2049                            .unwrap_or(Value::Null)
2050                    }
2051                    sparrowdb_cypher::ast::Expr::FnCall { name, .. }
2052                        if name.to_lowercase() == "max" =>
2053                    {
2054                        accum
2055                            .iter()
2056                            .max_by(|a, b| compare_values(a, b))
2057                            .cloned()
2058                            .unwrap_or(Value::Null)
2059                    }
2060                    _ => Value::Null,
2061                };
2062                out_row.insert(items[ri].alias.clone(), val);
2063            }
2064            result.push(out_row);
2065        }
2066        result
2067    }
2068
2069    /// Execute a multi-clause Cypher pipeline (SPA-134).
2070    ///
2071    /// Executes stages left-to-right, passing the intermediate row set from
2072    /// one stage to the next, then projects the final RETURN clause.
2073    fn execute_pipeline(&self, p: &PipelineStatement) -> Result<QueryResult> {
2074        // Step 1: Produce the initial row set from the leading clause.
2075        let mut current_rows: Vec<HashMap<String, Value>> =
2076            if let Some((expr, alias)) = &p.leading_unwind {
2077                // UNWIND-led pipeline: expand the list into individual rows.
2078                let values = eval_list_expr(expr, &self.params)?;
2079                values
2080                    .into_iter()
2081                    .map(|v| {
2082                        let mut m = HashMap::new();
2083                        m.insert(alias.clone(), v);
2084                        m
2085                    })
2086                    .collect()
2087            } else if let Some(ref patterns) = p.leading_match {
2088                // MATCH-led pipeline: scan the graph.
2089                // For the pipeline we need a dummy WithClause (scan will collect all
2090                // col IDs needed by subsequent stages).  Use a wide scan that includes
2091                // NodeRefs for EXISTS support.
2092                self.collect_pipeline_match_rows(patterns, p.leading_where.as_ref())?
2093            } else {
2094                vec![HashMap::new()]
2095            };
2096
2097        // Step 2: Execute pipeline stages in order.
2098        for stage in &p.stages {
2099            match stage {
2100                PipelineStage::With {
2101                    clause,
2102                    order_by,
2103                    skip,
2104                    limit,
2105                } => {
2106                    // SPA-134: ORDER BY in a WITH clause can reference variables from the
2107                    // PRECEDING stage (before projection).  Apply ORDER BY / SKIP / LIMIT
2108                    // on current_rows (pre-projection) first, then project.
2109                    if !order_by.is_empty() {
2110                        current_rows.sort_by(|a, b| {
2111                            for (expr, dir) in order_by {
2112                                let va = eval_expr(expr, a);
2113                                let vb = eval_expr(expr, b);
2114                                let cmp = compare_values(&va, &vb);
2115                                let cmp = if *dir == SortDir::Desc {
2116                                    cmp.reverse()
2117                                } else {
2118                                    cmp
2119                                };
2120                                if cmp != std::cmp::Ordering::Equal {
2121                                    return cmp;
2122                                }
2123                            }
2124                            std::cmp::Ordering::Equal
2125                        });
2126                    }
2127                    if let Some(s) = skip {
2128                        let s = (*s as usize).min(current_rows.len());
2129                        current_rows.drain(0..s);
2130                    }
2131                    if let Some(l) = limit {
2132                        current_rows.truncate(*l as usize);
2133                    }
2134
2135                    // Check for aggregates.
2136                    let has_agg = clause
2137                        .items
2138                        .iter()
2139                        .any(|item| is_aggregate_expr(&item.expr));
2140                    let next_rows: Vec<HashMap<String, Value>> = if has_agg {
2141                        let agg_rows = self.aggregate_with_items(&current_rows, &clause.items);
2142                        agg_rows
2143                            .into_iter()
2144                            .filter(|with_vals| {
2145                                if let Some(ref where_expr) = clause.where_clause {
2146                                    let mut wv = with_vals.clone();
2147                                    wv.extend(self.dollar_params());
2148                                    self.eval_where_graph(where_expr, &wv)
2149                                } else {
2150                                    true
2151                                }
2152                            })
2153                            .map(|mut with_vals| {
2154                                with_vals.extend(self.dollar_params());
2155                                with_vals
2156                            })
2157                            .collect()
2158                    } else {
2159                        let mut next_rows: Vec<HashMap<String, Value>> = Vec::new();
2160                        for row_vals in &current_rows {
2161                            let mut with_vals: HashMap<String, Value> = HashMap::new();
2162                            for item in &clause.items {
2163                                let val = self.eval_expr_graph(&item.expr, row_vals);
2164                                with_vals.insert(item.alias.clone(), val);
2165                                // Propagate NodeRef for bare variable aliases.
2166                                if let sparrowdb_cypher::ast::Expr::Var(ref src_var) = item.expr {
2167                                    if let Some(nr @ Value::NodeRef(_)) = row_vals.get(src_var) {
2168                                        with_vals.insert(item.alias.clone(), nr.clone());
2169                                        with_vals.insert(
2170                                            format!("{}.__node_id__", item.alias),
2171                                            nr.clone(),
2172                                        );
2173                                    }
2174                                    let nid_key = format!("{src_var}.__node_id__");
2175                                    if let Some(nr) = row_vals.get(&nid_key) {
2176                                        with_vals.insert(
2177                                            format!("{}.__node_id__", item.alias),
2178                                            nr.clone(),
2179                                        );
2180                                    }
2181                                }
2182                            }
2183                            if let Some(ref where_expr) = clause.where_clause {
2184                                let mut wv = with_vals.clone();
2185                                wv.extend(self.dollar_params());
2186                                if !self.eval_where_graph(where_expr, &wv) {
2187                                    continue;
2188                                }
2189                            }
2190                            with_vals.extend(self.dollar_params());
2191                            next_rows.push(with_vals);
2192                        }
2193                        next_rows
2194                    };
2195                    current_rows = next_rows;
2196                }
2197                PipelineStage::Match {
2198                    patterns,
2199                    where_clause,
2200                } => {
2201                    // Re-traverse the graph for each row in current_rows,
2202                    // substituting WITH-projected values for inline prop filters.
2203                    let mut next_rows: Vec<HashMap<String, Value>> = Vec::new();
2204                    for binding in &current_rows {
2205                        let new_rows = self.execute_pipeline_match_stage(
2206                            patterns,
2207                            where_clause.as_ref(),
2208                            binding,
2209                        )?;
2210                        next_rows.extend(new_rows);
2211                    }
2212                    current_rows = next_rows;
2213                }
2214                PipelineStage::Unwind { alias, new_alias } => {
2215                    // Unwind a list variable from the current row set.
2216                    let mut next_rows: Vec<HashMap<String, Value>> = Vec::new();
2217                    for row_vals in &current_rows {
2218                        let list_val = row_vals.get(alias.as_str()).cloned().unwrap_or(Value::Null);
2219                        let items = match list_val {
2220                            Value::List(v) => v,
2221                            other => vec![other],
2222                        };
2223                        for item in items {
2224                            let mut new_row = row_vals.clone();
2225                            new_row.insert(new_alias.clone(), item);
2226                            next_rows.push(new_row);
2227                        }
2228                    }
2229                    current_rows = next_rows;
2230                }
2231            }
2232        }
2233
2234        // Step 3: PROJECT the RETURN clause.
2235        let column_names = extract_return_column_names(&p.return_clause.items);
2236
2237        // Apply ORDER BY on the fully-projected rows before narrowing to RETURN columns.
2238        if !p.return_order_by.is_empty() {
2239            current_rows.sort_by(|a, b| {
2240                for (expr, dir) in &p.return_order_by {
2241                    let va = eval_expr(expr, a);
2242                    let vb = eval_expr(expr, b);
2243                    let cmp = compare_values(&va, &vb);
2244                    let cmp = if *dir == SortDir::Desc {
2245                        cmp.reverse()
2246                    } else {
2247                        cmp
2248                    };
2249                    if cmp != std::cmp::Ordering::Equal {
2250                        return cmp;
2251                    }
2252                }
2253                std::cmp::Ordering::Equal
2254            });
2255        }
2256
2257        if let Some(skip) = p.return_skip {
2258            let skip = (skip as usize).min(current_rows.len());
2259            current_rows.drain(0..skip);
2260        }
2261        if let Some(lim) = p.return_limit {
2262            current_rows.truncate(lim as usize);
2263        }
2264
2265        let mut rows: Vec<Vec<Value>> = current_rows
2266            .iter()
2267            .map(|row_vals| {
2268                p.return_clause
2269                    .items
2270                    .iter()
2271                    .map(|item| self.eval_expr_graph(&item.expr, row_vals))
2272                    .collect()
2273            })
2274            .collect();
2275
2276        if p.distinct {
2277            deduplicate_rows(&mut rows);
2278        }
2279
2280        Ok(QueryResult {
2281            columns: column_names,
2282            rows,
2283        })
2284    }
2285
2286    /// Collect all rows for a leading MATCH in a pipeline without a bound WithClause.
2287    ///
2288    /// Unlike `collect_match_rows_for_with`, this performs a wide scan that includes
2289    /// all stored column IDs for each label, and always injects NodeRef entries so
2290    /// EXISTS subqueries and subsequent MATCH stages can resolve node references.
2291    fn collect_pipeline_match_rows(
2292        &self,
2293        patterns: &[PathPattern],
2294        where_clause: Option<&Expr>,
2295    ) -> Result<Vec<HashMap<String, Value>>> {
2296        if patterns.is_empty() {
2297            return Ok(vec![HashMap::new()]);
2298        }
2299
2300        // For simplicity handle single-node pattern (no relationship hops in leading MATCH).
2301        let pat = &patterns[0];
2302        let node = &pat.nodes[0];
2303        let var_name = node.var.as_str();
2304        let label = node.labels.first().cloned().unwrap_or_default();
2305
2306        let label_id = match self.snapshot.catalog.get_label(&label)? {
2307            Some(id) => id as u32,
2308            None => return Ok(vec![]),
2309        };
2310        let hwm = self.snapshot.store.hwm_for_label(label_id)?;
2311        let col_ids: Vec<u32> = self
2312            .snapshot
2313            .store
2314            .col_ids_for_label(label_id)
2315            .unwrap_or_default();
2316
2317        let mut result: Vec<HashMap<String, Value>> = Vec::new();
2318        for slot in 0..hwm {
2319            let node_id = NodeId(((label_id as u64) << 32) | slot);
2320            if self.is_node_tombstoned(node_id) {
2321                continue;
2322            }
2323            let props = match self.snapshot.store.get_node_raw(node_id, &col_ids) {
2324                Ok(p) => p,
2325                Err(_) => continue,
2326            };
2327            if !self.matches_prop_filter(&props, &node.props) {
2328                continue;
2329            }
2330            let mut row_vals = build_row_vals(&props, var_name, &col_ids, &self.snapshot.store);
2331            // Always inject NodeRef for EXISTS and next-stage MATCH.
2332            row_vals.insert(var_name.to_string(), Value::NodeRef(node_id));
2333            row_vals.insert(format!("{var_name}.__node_id__"), Value::NodeRef(node_id));
2334
2335            if let Some(wexpr) = where_clause {
2336                let mut row_vals_p = row_vals.clone();
2337                row_vals_p.extend(self.dollar_params());
2338                if !self.eval_where_graph(wexpr, &row_vals_p) {
2339                    continue;
2340                }
2341            }
2342            result.push(row_vals);
2343        }
2344        Ok(result)
2345    }
2346
2347    /// Execute a MATCH stage within a pipeline, given a set of variable bindings
2348    /// from the preceding WITH stage.
2349    ///
2350    /// For each node pattern in `patterns`:
2351    /// - Scan the label.
2352    /// - Filter by inline prop filters, substituting any value that matches
2353    ///   a variable name from `binding` (e.g. `{name: pname}` where `pname`
2354    ///   is bound in the preceding WITH).
2355    fn execute_pipeline_match_stage(
2356        &self,
2357        patterns: &[PathPattern],
2358        where_clause: Option<&Expr>,
2359        binding: &HashMap<String, Value>,
2360    ) -> Result<Vec<HashMap<String, Value>>> {
2361        if patterns.is_empty() {
2362            return Ok(vec![binding.clone()]);
2363        }
2364
2365        let pat = &patterns[0];
2366
2367        // Check if this is a relationship hop pattern.
2368        if !pat.rels.is_empty() {
2369            // Relationship traversal in a pipeline MATCH stage.
2370            // Currently supports single-hop: (src)-[:REL]->(dst)
2371            return self.execute_pipeline_match_hop(pat, where_clause, binding);
2372        }
2373
2374        let node = &pat.nodes[0];
2375        let var_name = node.var.as_str();
2376        let label = node.labels.first().cloned().unwrap_or_default();
2377
2378        let label_id = match self.snapshot.catalog.get_label(&label)? {
2379            Some(id) => id as u32,
2380            None => return Ok(vec![]),
2381        };
2382        let hwm = self.snapshot.store.hwm_for_label(label_id)?;
2383        let col_ids: Vec<u32> = self
2384            .snapshot
2385            .store
2386            .col_ids_for_label(label_id)
2387            .unwrap_or_default();
2388
2389        let mut result: Vec<HashMap<String, Value>> = Vec::new();
2390        let params = self.dollar_params();
2391        for slot in 0..hwm {
2392            let node_id = NodeId(((label_id as u64) << 32) | slot);
2393            if self.is_node_tombstoned(node_id) {
2394                continue;
2395            }
2396            let props = match self.snapshot.store.get_node_raw(node_id, &col_ids) {
2397                Ok(p) => p,
2398                Err(_) => continue,
2399            };
2400
2401            // Evaluate inline prop filters, resolving variable references from binding.
2402            if !self.matches_prop_filter_with_binding(&props, &node.props, binding, &params) {
2403                continue;
2404            }
2405
2406            let mut row_vals = build_row_vals(&props, var_name, &col_ids, &self.snapshot.store);
2407            // Merge binding variables so upstream aliases remain in scope.
2408            row_vals.extend(binding.clone());
2409            row_vals.insert(var_name.to_string(), Value::NodeRef(node_id));
2410            row_vals.insert(format!("{var_name}.__node_id__"), Value::NodeRef(node_id));
2411
2412            if let Some(wexpr) = where_clause {
2413                let mut row_vals_p = row_vals.clone();
2414                row_vals_p.extend(params.clone());
2415                if !self.eval_where_graph(wexpr, &row_vals_p) {
2416                    continue;
2417                }
2418            }
2419            result.push(row_vals);
2420        }
2421        Ok(result)
2422    }
2423
2424    /// Execute a single-hop relationship traversal in a pipeline MATCH stage.
2425    ///
2426    /// Handles `(src:Label {props})-[:REL]->(dst:Label {props})` where `src` or `dst`
2427    /// variable names may already be bound in `binding`.
2428    fn execute_pipeline_match_hop(
2429        &self,
2430        pat: &sparrowdb_cypher::ast::PathPattern,
2431        where_clause: Option<&Expr>,
2432        binding: &HashMap<String, Value>,
2433    ) -> Result<Vec<HashMap<String, Value>>> {
2434        if pat.nodes.len() < 2 || pat.rels.is_empty() {
2435            return Ok(vec![]);
2436        }
2437        let src_pat = &pat.nodes[0];
2438        let dst_pat = &pat.nodes[1];
2439        let rel_pat = &pat.rels[0];
2440
2441        let src_label = src_pat.labels.first().cloned().unwrap_or_default();
2442        let dst_label = dst_pat.labels.first().cloned().unwrap_or_default();
2443
2444        let src_label_id = match self.snapshot.catalog.get_label(&src_label)? {
2445            Some(id) => id as u32,
2446            None => return Ok(vec![]),
2447        };
2448        let dst_label_id = match self.snapshot.catalog.get_label(&dst_label)? {
2449            Some(id) => id as u32,
2450            None => return Ok(vec![]),
2451        };
2452
2453        let src_col_ids: Vec<u32> = self
2454            .snapshot
2455            .store
2456            .col_ids_for_label(src_label_id)
2457            .unwrap_or_default();
2458        let dst_col_ids: Vec<u32> = self
2459            .snapshot
2460            .store
2461            .col_ids_for_label(dst_label_id)
2462            .unwrap_or_default();
2463        let params = self.dollar_params();
2464
2465        // Find candidate src nodes.
2466        let src_candidates: Vec<NodeId> = {
2467            // If the src var is already bound as a NodeRef, use that directly.
2468            let bound_src = binding
2469                .get(&src_pat.var)
2470                .or_else(|| binding.get(&format!("{}.__node_id__", src_pat.var)));
2471            if let Some(Value::NodeRef(nid)) = bound_src {
2472                vec![*nid]
2473            } else {
2474                let hwm = self.snapshot.store.hwm_for_label(src_label_id)?;
2475                let mut cands = Vec::new();
2476                for slot in 0..hwm {
2477                    let node_id = NodeId(((src_label_id as u64) << 32) | slot);
2478                    if self.is_node_tombstoned(node_id) {
2479                        continue;
2480                    }
2481                    if let Ok(props) = self.snapshot.store.get_node_raw(node_id, &src_col_ids) {
2482                        if self.matches_prop_filter_with_binding(
2483                            &props,
2484                            &src_pat.props,
2485                            binding,
2486                            &params,
2487                        ) {
2488                            cands.push(node_id);
2489                        }
2490                    }
2491                }
2492                cands
2493            }
2494        };
2495
2496        let rel_table_id = self.resolve_rel_table_id(src_label_id, dst_label_id, &rel_pat.rel_type);
2497
2498        let mut result: Vec<HashMap<String, Value>> = Vec::new();
2499        for src_id in src_candidates {
2500            let src_slot = src_id.0 & 0xFFFF_FFFF;
2501            let dst_slots: Vec<u64> = match &rel_table_id {
2502                RelTableLookup::Found(rtid) => self.csr_neighbors(*rtid, src_slot),
2503                RelTableLookup::NotFound => continue,
2504                RelTableLookup::All => self.csr_neighbors_all(src_slot),
2505            };
2506            // Also check the delta.
2507            let delta_slots: Vec<u64> = self
2508                .read_delta_all()
2509                .into_iter()
2510                .filter(|r| {
2511                    let r_src_label = (r.src.0 >> 32) as u32;
2512                    let r_src_slot = r.src.0 & 0xFFFF_FFFF;
2513                    r_src_label == src_label_id && r_src_slot == src_slot
2514                })
2515                .map(|r| r.dst.0 & 0xFFFF_FFFF)
2516                .collect();
2517            let all_slots: std::collections::HashSet<u64> =
2518                dst_slots.into_iter().chain(delta_slots).collect();
2519
2520            for dst_slot in all_slots {
2521                let dst_id = NodeId(((dst_label_id as u64) << 32) | dst_slot);
2522                if self.is_node_tombstoned(dst_id) {
2523                    continue;
2524                }
2525                if let Ok(dst_props) = self.snapshot.store.get_node_raw(dst_id, &dst_col_ids) {
2526                    if !self.matches_prop_filter_with_binding(
2527                        &dst_props,
2528                        &dst_pat.props,
2529                        binding,
2530                        &params,
2531                    ) {
2532                        continue;
2533                    }
2534                    let src_props = self
2535                        .snapshot
2536                        .store
2537                        .get_node_raw(src_id, &src_col_ids)
2538                        .unwrap_or_default();
2539                    let mut row_vals = build_row_vals(
2540                        &src_props,
2541                        &src_pat.var,
2542                        &src_col_ids,
2543                        &self.snapshot.store,
2544                    );
2545                    row_vals.extend(build_row_vals(
2546                        &dst_props,
2547                        &dst_pat.var,
2548                        &dst_col_ids,
2549                        &self.snapshot.store,
2550                    ));
2551                    // Merge upstream bindings.
2552                    row_vals.extend(binding.clone());
2553                    row_vals.insert(src_pat.var.clone(), Value::NodeRef(src_id));
2554                    row_vals.insert(
2555                        format!("{}.__node_id__", src_pat.var),
2556                        Value::NodeRef(src_id),
2557                    );
2558                    row_vals.insert(dst_pat.var.clone(), Value::NodeRef(dst_id));
2559                    row_vals.insert(
2560                        format!("{}.__node_id__", dst_pat.var),
2561                        Value::NodeRef(dst_id),
2562                    );
2563
2564                    if let Some(wexpr) = where_clause {
2565                        let mut row_vals_p = row_vals.clone();
2566                        row_vals_p.extend(params.clone());
2567                        if !self.eval_where_graph(wexpr, &row_vals_p) {
2568                            continue;
2569                        }
2570                    }
2571                    result.push(row_vals);
2572                }
2573            }
2574        }
2575        Ok(result)
2576    }
2577
2578    /// Filter a node's props against a set of PropEntry filters, resolving variable
2579    /// references from `binding` before comparing.
2580    ///
2581    /// For example, `{name: pname}` where `pname` is a variable in `binding` will
2582    /// look up `binding["pname"]` and use it as the expected value.
2583    fn matches_prop_filter_with_binding(
2584        &self,
2585        props: &[(u32, u64)],
2586        filters: &[sparrowdb_cypher::ast::PropEntry],
2587        binding: &HashMap<String, Value>,
2588        params: &HashMap<String, Value>,
2589    ) -> bool {
2590        for f in filters {
2591            let col_id = prop_name_to_col_id(&f.key);
2592            let stored_raw = props.iter().find(|(c, _)| *c == col_id).map(|(_, v)| *v);
2593
2594            // Evaluate the filter expression, first substituting from binding.
2595            let filter_val = match &f.value {
2596                sparrowdb_cypher::ast::Expr::Var(v) => {
2597                    // Variable reference — look up in binding.
2598                    binding.get(v).cloned().unwrap_or(Value::Null)
2599                }
2600                other => eval_expr(other, params),
2601            };
2602
2603            let stored_val = stored_raw.map(|raw| decode_raw_val(raw, &self.snapshot.store));
2604            let matches = match (stored_val, &filter_val) {
2605                (Some(Value::String(a)), Value::String(b)) => &a == b,
2606                (Some(Value::Int64(a)), Value::Int64(b)) => a == *b,
2607                (Some(Value::Bool(a)), Value::Bool(b)) => a == *b,
2608                (Some(Value::Float64(a)), Value::Float64(b)) => a == *b,
2609                (None, Value::Null) => true,
2610                _ => false,
2611            };
2612            if !matches {
2613                return false;
2614            }
2615        }
2616        true
2617    }
2618
2619    /// Scan a MATCH pattern and return one `HashMap<String, Value>` per matching row.
2620    ///
2621    /// Only simple single-node scans (no relationship hops) are supported for
2622    /// the WITH pipeline; complex patterns return `Err(Unimplemented)`.
2623    ///
2624    /// Keys in the returned map follow the `build_row_vals` convention:
2625    /// `"{var}.col_{col_id}"` → `Value::Int64(raw)`, plus any `"{var}.{prop}"` entries
2626    /// added for direct lookup in WITH expressions.
2627    fn collect_match_rows_for_with(
2628        &self,
2629        patterns: &[PathPattern],
2630        where_clause: Option<&Expr>,
2631        with_clause: &WithClause,
2632    ) -> Result<Vec<HashMap<String, Value>>> {
2633        if patterns.is_empty() || patterns[0].rels.is_empty() {
2634            let pat = &patterns[0];
2635            let node = &pat.nodes[0];
2636            let var_name = node.var.as_str();
2637            let label = node.labels.first().cloned().unwrap_or_default();
2638            let label_id = self
2639                .snapshot
2640                .catalog
2641                .get_label(&label)?
2642                .ok_or(sparrowdb_common::Error::NotFound)?;
2643            let label_id_u32 = label_id as u32;
2644            let hwm = self.snapshot.store.hwm_for_label(label_id_u32)?;
2645
2646            // Collect col_ids needed by WHERE + WITH projections + inline prop filters.
2647            let mut all_col_ids: Vec<u32> = Vec::new();
2648            if let Some(wexpr) = &where_clause {
2649                collect_col_ids_from_expr(wexpr, &mut all_col_ids);
2650            }
2651            for item in &with_clause.items {
2652                collect_col_ids_from_expr(&item.expr, &mut all_col_ids);
2653            }
2654            for p in &node.props {
2655                let col_id = prop_name_to_col_id(&p.key);
2656                if !all_col_ids.contains(&col_id) {
2657                    all_col_ids.push(col_id);
2658                }
2659            }
2660
2661            let mut result: Vec<HashMap<String, Value>> = Vec::new();
2662            for slot in 0..hwm {
2663                let node_id = NodeId(((label_id_u32 as u64) << 32) | slot);
2664                // SPA-216: use is_node_tombstoned() to avoid spurious NotFound
2665                // when tombstone_node() wrote col_0 only for the deleted slot.
2666                if self.is_node_tombstoned(node_id) {
2667                    continue;
2668                }
2669                let props = read_node_props(&self.snapshot.store, node_id, &all_col_ids)?;
2670                if !self.matches_prop_filter(&props, &node.props) {
2671                    continue;
2672                }
2673                let mut row_vals =
2674                    build_row_vals(&props, var_name, &all_col_ids, &self.snapshot.store);
2675                // SPA-134: inject NodeRef so eval_exists_subquery can resolve the
2676                // source node ID when EXISTS { } appears in MATCH WHERE or WITH WHERE.
2677                row_vals.insert(var_name.to_string(), Value::NodeRef(node_id));
2678                row_vals.insert(format!("{var_name}.__node_id__"), Value::NodeRef(node_id));
2679                if let Some(wexpr) = &where_clause {
2680                    let mut row_vals_p = row_vals.clone();
2681                    row_vals_p.extend(self.dollar_params());
2682                    if !self.eval_where_graph(wexpr, &row_vals_p) {
2683                        continue;
2684                    }
2685                }
2686                result.push(row_vals);
2687            }
2688            Ok(result)
2689        } else {
2690            Err(sparrowdb_common::Error::Unimplemented)
2691        }
2692    }
2693
2694    fn execute_match(&self, m: &MatchStatement) -> Result<QueryResult> {
2695        if m.pattern.is_empty() {
2696            // Standalone RETURN with no MATCH: evaluate each item as a scalar expression.
2697            let column_names = extract_return_column_names(&m.return_clause.items);
2698            let empty_vals: HashMap<String, Value> = HashMap::new();
2699            let row: Vec<Value> = m
2700                .return_clause
2701                .items
2702                .iter()
2703                .map(|item| eval_expr(&item.expr, &empty_vals))
2704                .collect();
2705            return Ok(QueryResult {
2706                columns: column_names,
2707                rows: vec![row],
2708            });
2709        }
2710
2711        // Determine if this is a 2-hop query.
2712        let is_two_hop = m.pattern.len() == 1 && m.pattern[0].rels.len() == 2;
2713        let is_one_hop = m.pattern.len() == 1 && m.pattern[0].rels.len() == 1;
2714        // N-hop (3+): generalised iterative traversal (SPA-252).
2715        let is_n_hop = m.pattern.len() == 1 && m.pattern[0].rels.len() >= 3;
2716        // Detect variable-length path: single pattern with exactly 1 rel that has min_hops set.
2717        let is_var_len = m.pattern.len() == 1
2718            && m.pattern[0].rels.len() == 1
2719            && m.pattern[0].rels[0].min_hops.is_some();
2720
2721        let column_names = extract_return_column_names(&m.return_clause.items);
2722
2723        // SPA-136: multi-node-pattern MATCH (e.g. MATCH (a), (b) RETURN shortestPath(...))
2724        // requires a cross-product join across all patterns.
2725        let is_multi_pattern = m.pattern.len() > 1 && m.pattern.iter().all(|p| p.rels.is_empty());
2726
2727        // ── Q7 degree-cache fast-path (SPA-272 wiring) ────────────────────────
2728        // Detect `MATCH (n:Label) RETURN … ORDER BY out_degree(n) DESC LIMIT k`
2729        // and short-circuit to `top_k_by_degree` — O(N log k) vs full edge scan.
2730        // Preconditions: single node pattern, no rels, no WHERE, DESC LIMIT set.
2731        if !is_var_len
2732            && !is_two_hop
2733            && !is_one_hop
2734            && !is_n_hop
2735            && !is_multi_pattern
2736            && m.pattern.len() == 1
2737            && m.pattern[0].rels.is_empty()
2738        {
2739            // ── Q6 COUNT label fast-path (SPA-197) ──────────────────────
2740            // MATCH (n:Label) RETURN COUNT(n) AS total  →  O(1) lookup
2741            if let Some(result) = self.try_count_label_fastpath(m, &column_names)? {
2742                return Ok(result);
2743            }
2744
2745            if let Some(result) = self.try_degree_sort_fastpath(m, &column_names)? {
2746                return Ok(result);
2747            }
2748        }
2749
2750        if is_var_len {
2751            self.execute_variable_length(m, &column_names)
2752        } else if is_two_hop {
2753            self.execute_two_hop(m, &column_names)
2754        } else if is_one_hop {
2755            self.execute_one_hop(m, &column_names)
2756        } else if is_n_hop {
2757            self.execute_n_hop(m, &column_names)
2758        } else if is_multi_pattern {
2759            self.execute_multi_pattern_scan(m, &column_names)
2760        } else if m.pattern[0].rels.is_empty() {
2761            self.execute_scan(m, &column_names)
2762        } else {
2763            // Multi-pattern or complex query — fallback to sequential execution.
2764            self.execute_scan(m, &column_names)
2765        }
2766    }
2767
2768    // ── Q6 COUNT label fast-path (SPA-197) ─────────────────────────────────────
2769    //
2770    // Detects `MATCH (n:Label) RETURN COUNT(n) AS alias` (or COUNT(*)) and
2771    // answers it from the pre-populated `label_row_counts` HashMap in O(1)
2772    // instead of scanning every node slot.
2773    //
2774    // Qualifying conditions:
2775    //   1. Exactly one label on the node pattern.
2776    //   2. No WHERE clause.
2777    //   3. No inline prop filters on the node pattern.
2778    //   4. RETURN has exactly one item: COUNT(*) or COUNT(var) where var
2779    //      matches the node pattern variable.
2780    //   5. No ORDER BY, SKIP, or LIMIT (single scalar result).
2781    fn try_count_label_fastpath(
2782        &self,
2783        m: &MatchStatement,
2784        column_names: &[String],
2785    ) -> Result<Option<QueryResult>> {
2786        let pat = &m.pattern[0];
2787        let node = &pat.nodes[0];
2788
2789        // Condition 1: exactly one label.
2790        let label = match &node.labels[..] {
2791            [l] => l.clone(),
2792            _ => return Ok(None),
2793        };
2794
2795        // Condition 2: no WHERE clause.
2796        if m.where_clause.is_some() {
2797            return Ok(None);
2798        }
2799
2800        // Condition 3: no inline prop filters.
2801        if !node.props.is_empty() {
2802            return Ok(None);
2803        }
2804
2805        // Condition 4: exactly one RETURN item that is COUNT(*) or COUNT(var).
2806        if m.return_clause.items.len() != 1 {
2807            return Ok(None);
2808        }
2809        let item = &m.return_clause.items[0];
2810        let is_count = match &item.expr {
2811            Expr::CountStar => true,
2812            Expr::FnCall { name, args } => {
2813                name == "count"
2814                    && args.len() == 1
2815                    && matches!(&args[0], Expr::Var(v) if v == &node.var)
2816            }
2817            _ => false,
2818        };
2819        if !is_count {
2820            return Ok(None);
2821        }
2822
2823        // Condition 5: no ORDER BY / SKIP / LIMIT.
2824        if !m.order_by.is_empty() || m.skip.is_some() || m.limit.is_some() {
2825            return Ok(None);
2826        }
2827
2828        // All conditions met — resolve label → count from the cached map.
2829        let count = match self.snapshot.catalog.get_label(&label)? {
2830            Some(id) => *self.snapshot.label_row_counts.get(&id).unwrap_or(&0),
2831            None => 0,
2832        };
2833
2834        tracing::debug!(label = %label, count = count, "Q6 COUNT label fastpath hit");
2835
2836        Ok(Some(QueryResult {
2837            columns: column_names.to_vec(),
2838            rows: vec![vec![Value::Int64(count as i64)]],
2839        }))
2840    }
2841
2842    // ── Q7 degree-cache fast-path (SPA-272 Cypher wiring) ─────────────────────
2843    //
2844    // Detects `MATCH (n:Label) RETURN … ORDER BY out_degree(n) DESC LIMIT k`
2845    // and answers it directly from the pre-computed DegreeCache without scanning
2846    // edges.  Returns `None` when the pattern does not qualify; the caller then
2847    // falls through to the normal execution path.
2848    //
2849    // Qualifying conditions:
2850    //   1. Exactly one label on the node pattern.
2851    //   2. No WHERE clause (no post-filter that would change cardinality).
2852    //   3. No inline prop filters on the node pattern.
2853    //   4. ORDER BY has exactly one key: `out_degree(n)` or `degree(n)` DESC.
2854    //   5. LIMIT is Some(k) with k > 0.
2855    //   6. The variable in the ORDER BY call matches the node pattern variable.
2856    fn try_degree_sort_fastpath(
2857        &self,
2858        m: &MatchStatement,
2859        column_names: &[String],
2860    ) -> Result<Option<QueryResult>> {
2861        use sparrowdb_cypher::ast::SortDir;
2862
2863        let pat = &m.pattern[0];
2864        let node = &pat.nodes[0];
2865
2866        // Condition 1: exactly one label.
2867        let label = match &node.labels[..] {
2868            [l] => l.clone(),
2869            _ => return Ok(None),
2870        };
2871
2872        // Condition 2: no WHERE clause.
2873        if m.where_clause.is_some() {
2874            return Ok(None);
2875        }
2876
2877        // Condition 3: no inline prop filters.
2878        if !node.props.is_empty() {
2879            return Ok(None);
2880        }
2881
2882        // Condition 4: ORDER BY has exactly one key that is out_degree(var) or degree(var) DESC.
2883        if m.order_by.len() != 1 {
2884            return Ok(None);
2885        }
2886        let (sort_expr, sort_dir) = &m.order_by[0];
2887        if *sort_dir != SortDir::Desc {
2888            return Ok(None);
2889        }
2890        let order_var = match sort_expr {
2891            Expr::FnCall { name, args } => {
2892                let name_lc = name.to_lowercase();
2893                if name_lc != "out_degree" && name_lc != "degree" {
2894                    return Ok(None);
2895                }
2896                match args.first() {
2897                    Some(Expr::Var(v)) => v.clone(),
2898                    _ => return Ok(None),
2899                }
2900            }
2901            _ => return Ok(None),
2902        };
2903
2904        // Condition 5: LIMIT must be set and > 0.
2905        let k = match m.limit {
2906            Some(k) if k > 0 => k as usize,
2907            _ => return Ok(None),
2908        };
2909
2910        // Condition 6: ORDER BY variable must match the node pattern variable.
2911        let node_var = node.var.as_str();
2912        if !order_var.is_empty() && !node_var.is_empty() && order_var != node_var {
2913            return Ok(None);
2914        }
2915
2916        // All conditions met — resolve label_id and call the cache.
2917        let label_id = match self.snapshot.catalog.get_label(&label)? {
2918            Some(id) => id as u32,
2919            None => {
2920                return Ok(Some(QueryResult {
2921                    columns: column_names.to_vec(),
2922                    rows: vec![],
2923                }))
2924            }
2925        };
2926
2927        tracing::debug!(
2928            label = %label,
2929            k = k,
2930            "SPA-272: degree-cache fast-path activated"
2931        );
2932
2933        let top_k = self.top_k_by_degree(label_id, k)?;
2934
2935        // Apply SKIP if present.
2936        let skip = m.skip.unwrap_or(0) as usize;
2937        let top_k = if skip >= top_k.len() {
2938            &[][..]
2939        } else {
2940            &top_k[skip..]
2941        };
2942
2943        // Build result rows.  For each (slot, degree) project the RETURN clause.
2944        let mut rows: Vec<Vec<Value>> = Vec::with_capacity(top_k.len());
2945        for &(slot, degree) in top_k {
2946            let node_id = NodeId(((label_id as u64) << 32) | slot);
2947
2948            // Skip tombstoned nodes (deleted nodes may still appear in cache).
2949            if self.is_node_tombstoned(node_id) {
2950                continue;
2951            }
2952
2953            // Fetch all properties we might need for RETURN projection.
2954            let all_col_ids: Vec<u32> = collect_col_ids_from_columns(column_names);
2955            let nullable_props = self
2956                .snapshot
2957                .store
2958                .get_node_raw_nullable(node_id, &all_col_ids)?;
2959            let props: Vec<(u32, u64)> = nullable_props
2960                .iter()
2961                .filter_map(|&(col_id, opt)| opt.map(|v| (col_id, v)))
2962                .collect();
2963
2964            // Project the RETURN columns.
2965            let row: Vec<Value> = column_names
2966                .iter()
2967                .map(|col_name| {
2968                    // Resolve out_degree(var) / degree(var) → degree value.
2969                    let degree_col_name_out = format!("out_degree({node_var})");
2970                    let degree_col_name_deg = format!("degree({node_var})");
2971                    if col_name == &degree_col_name_out
2972                        || col_name == &degree_col_name_deg
2973                        || col_name == "degree"
2974                        || col_name == "out_degree"
2975                    {
2976                        return Value::Int64(degree as i64);
2977                    }
2978                    // Resolve property accesses: "var.prop" or "prop".
2979                    let prop = col_name
2980                        .split_once('.')
2981                        .map(|(_, p)| p)
2982                        .unwrap_or(col_name.as_str());
2983                    let col_id = prop_name_to_col_id(prop);
2984                    props
2985                        .iter()
2986                        .find(|(c, _)| *c == col_id)
2987                        .map(|(_, v)| decode_raw_val(*v, &self.snapshot.store))
2988                        .unwrap_or(Value::Null)
2989                })
2990                .collect();
2991
2992            rows.push(row);
2993        }
2994
2995        Ok(Some(QueryResult {
2996            columns: column_names.to_vec(),
2997            rows,
2998        }))
2999    }
3000
3001    // ── COUNT(f) + ORDER BY alias DESC LIMIT k fast-path (SPA-272 / Q7) ────────
3002    //
3003    // Detects 1-hop aggregation queries of the shape:
3004    //
3005    //   MATCH (n:Label)-[:TYPE]->(f:Label2)
3006    //   RETURN n.prop, COUNT(f) AS alias
3007    //   ORDER BY alias DESC LIMIT k
3008    //
3009    // and answers them directly from the pre-computed DegreeCache without
3010    // scanning edges or grouping rows.  Returns `None` when the pattern does
3011    // not qualify; the caller falls through to the normal execution path.
3012    //
3013    // Qualifying conditions:
3014    //   1. Single 1-hop pattern with outgoing direction, no WHERE clause,
3015    //      no inline prop filters on either node.
3016    //   2. Exactly 2 RETURN items: one property access `n.prop` (group key)
3017    //      and one `COUNT(var)` where `var` matches the destination variable.
3018    //   3. ORDER BY is `Expr::Var(alias)` DESC where alias == COUNT's alias.
3019    //   4. LIMIT is Some(k) with k > 0.
3020    fn try_count_agg_degree_fastpath(
3021        &self,
3022        m: &MatchStatement,
3023        column_names: &[String],
3024    ) -> Result<Option<QueryResult>> {
3025        use sparrowdb_cypher::ast::EdgeDir;
3026
3027        let pat = &m.pattern[0];
3028        // Must be a 1-hop pattern.
3029        if pat.nodes.len() != 2 || pat.rels.len() != 1 {
3030            return Ok(None);
3031        }
3032        let src_node = &pat.nodes[0];
3033        let dst_node = &pat.nodes[1];
3034        let rel = &pat.rels[0];
3035
3036        // Outgoing direction only.
3037        if rel.dir != EdgeDir::Outgoing {
3038            return Ok(None);
3039        }
3040
3041        // No WHERE clause.
3042        if m.where_clause.is_some() {
3043            return Ok(None);
3044        }
3045
3046        // No inline prop filters on either node.
3047        if !src_node.props.is_empty() || !dst_node.props.is_empty() {
3048            return Ok(None);
3049        }
3050
3051        // Source must have a label.
3052        let src_label = match src_node.labels.first() {
3053            Some(l) if !l.is_empty() => l.clone(),
3054            _ => return Ok(None),
3055        };
3056
3057        // Exactly 2 RETURN items.
3058        let items = &m.return_clause.items;
3059        if items.len() != 2 {
3060            return Ok(None);
3061        }
3062
3063        // Identify which item is COUNT(dst_var) and which is the group key (n.prop).
3064        let dst_var = &dst_node.var;
3065        let src_var = &src_node.var;
3066
3067        let (prop_col_name, count_alias) = {
3068            let mut prop_col: Option<String> = None;
3069            let mut count_al: Option<String> = None;
3070
3071            for item in items {
3072                match &item.expr {
3073                    Expr::FnCall { name, args }
3074                        if name.to_lowercase() == "count" && args.len() == 1 =>
3075                    {
3076                        // COUNT(f) — arg must be the destination variable.
3077                        if let Some(Expr::Var(v)) = args.first() {
3078                            if v == dst_var {
3079                                count_al =
3080                                    item.alias.clone().or_else(|| Some(format!("COUNT({})", v)));
3081                            } else {
3082                                return Ok(None);
3083                            }
3084                        } else {
3085                            return Ok(None);
3086                        }
3087                    }
3088                    Expr::PropAccess { var, prop } => {
3089                        // n.prop — must reference the source variable.
3090                        if var == src_var {
3091                            prop_col = Some(prop.clone());
3092                        } else {
3093                            return Ok(None);
3094                        }
3095                    }
3096                    _ => return Ok(None),
3097                }
3098            }
3099
3100            match (prop_col, count_al) {
3101                (Some(pc), Some(ca)) => (pc, ca),
3102                _ => return Ok(None),
3103            }
3104        };
3105
3106        // ORDER BY must be a single Var matching the COUNT alias, DESC.
3107        if m.order_by.len() != 1 {
3108            return Ok(None);
3109        }
3110        let (sort_expr, sort_dir) = &m.order_by[0];
3111        if *sort_dir != SortDir::Desc {
3112            return Ok(None);
3113        }
3114        match sort_expr {
3115            Expr::Var(v) if *v == count_alias => {}
3116            _ => return Ok(None),
3117        }
3118
3119        // LIMIT must be set and > 0.
3120        let k = match m.limit {
3121            Some(k) if k > 0 => k as usize,
3122            _ => return Ok(None),
3123        };
3124
3125        // ── All conditions met — execute via DegreeCache. ──────────────────
3126
3127        let label_id = match self.snapshot.catalog.get_label(&src_label)? {
3128            Some(id) => id as u32,
3129            None => {
3130                return Ok(Some(QueryResult {
3131                    columns: column_names.to_vec(),
3132                    rows: vec![],
3133                }));
3134            }
3135        };
3136
3137        tracing::debug!(
3138            label = %src_label,
3139            k = k,
3140            count_alias = %count_alias,
3141            "SPA-272: COUNT-agg degree-cache fast-path activated (Q7 shape)"
3142        );
3143
3144        let top_k = self.top_k_by_degree(label_id, k)?;
3145
3146        // Apply SKIP if present.
3147        let skip = m.skip.unwrap_or(0) as usize;
3148        let top_k = if skip >= top_k.len() {
3149            &[][..]
3150        } else {
3151            &top_k[skip..]
3152        };
3153
3154        // Resolve the property column ID for the group key.
3155        let prop_col_id = prop_name_to_col_id(&prop_col_name);
3156
3157        // Build result rows. For each (slot, degree), look up n.prop and emit.
3158        // Skip degree-0 nodes: a 1-hop MATCH only produces rows for nodes with
3159        // at least one neighbor, so COUNT(f) is always >= 1 in the normal path.
3160        let mut rows: Vec<Vec<Value>> = Vec::with_capacity(top_k.len());
3161        for &(slot, degree) in top_k {
3162            if degree == 0 {
3163                continue;
3164            }
3165
3166            let node_id = NodeId(((label_id as u64) << 32) | slot);
3167
3168            // Skip tombstoned nodes.
3169            if self.is_node_tombstoned(node_id) {
3170                continue;
3171            }
3172
3173            // Fetch the property for the group key (nullable path so missing
3174            // columns return NULL instead of a NotFound error).
3175            let prop_raw = read_node_props(&self.snapshot.store, node_id, &[prop_col_id])?;
3176            let prop_val = prop_raw
3177                .iter()
3178                .find(|(c, _)| *c == prop_col_id)
3179                .map(|(_, v)| decode_raw_val(*v, &self.snapshot.store))
3180                .unwrap_or(Value::Null);
3181
3182            // Project in the same order as column_names.
3183            let row: Vec<Value> = column_names
3184                .iter()
3185                .map(|col| {
3186                    if col == &count_alias {
3187                        Value::Int64(degree as i64)
3188                    } else {
3189                        prop_val.clone()
3190                    }
3191                })
3192                .collect();
3193
3194            rows.push(row);
3195        }
3196
3197        Ok(Some(QueryResult {
3198            columns: column_names.to_vec(),
3199            rows,
3200        }))
3201    }
3202
3203    // ── OPTIONAL MATCH (standalone) ───────────────────────────────────────────
3204
3205    /// Execute `OPTIONAL MATCH pattern RETURN …`.
3206    ///
3207    /// Left-outer-join semantics: if the scan finds zero rows (label missing or
3208    /// no nodes), return exactly one row with NULL for every RETURN column.
3209    fn execute_optional_match(&self, om: &OptionalMatchStatement) -> Result<QueryResult> {
3210        use sparrowdb_common::Error;
3211
3212        // Re-use execute_match by constructing a temporary MatchStatement.
3213        let match_stmt = MatchStatement {
3214            pattern: om.pattern.clone(),
3215            where_clause: om.where_clause.clone(),
3216            return_clause: om.return_clause.clone(),
3217            order_by: om.order_by.clone(),
3218            skip: om.skip,
3219            limit: om.limit,
3220            distinct: om.distinct,
3221        };
3222
3223        let column_names = extract_return_column_names(&om.return_clause.items);
3224
3225        let result = self.execute_match(&match_stmt);
3226
3227        match result {
3228            Ok(qr) if !qr.rows.is_empty() => Ok(qr),
3229            // Empty result or label-not-found → one NULL row.
3230            Ok(_) | Err(Error::NotFound) | Err(Error::InvalidArgument(_)) => {
3231                let null_row = vec![Value::Null; column_names.len()];
3232                Ok(QueryResult {
3233                    columns: column_names,
3234                    rows: vec![null_row],
3235                })
3236            }
3237            Err(e) => Err(e),
3238        }
3239    }
3240
3241    // ── MATCH … OPTIONAL MATCH … RETURN ──────────────────────────────────────
3242
3243    /// Execute `MATCH (n) OPTIONAL MATCH (n)-[:R]->(m) RETURN …`.
3244    ///
3245    /// For each row produced by the leading MATCH, attempt to join against the
3246    /// OPTIONAL MATCH sub-pattern.  Rows with no join hits contribute one row
3247    /// with NULL values for the OPTIONAL MATCH variables.
3248    fn execute_match_optional_match(
3249        &self,
3250        mom: &MatchOptionalMatchStatement,
3251    ) -> Result<QueryResult> {
3252        let column_names = extract_return_column_names(&mom.return_clause.items);
3253
3254        // ── Step 1: scan the leading MATCH to get all left-side rows ─────────
3255        // Build a temporary MatchStatement for the leading MATCH.
3256        let lead_return_items: Vec<ReturnItem> = mom
3257            .return_clause
3258            .items
3259            .iter()
3260            .filter(|item| {
3261                // Include items whose var is defined by the leading MATCH patterns.
3262                let lead_vars: Vec<&str> = mom
3263                    .match_patterns
3264                    .iter()
3265                    .flat_map(|p| p.nodes.iter().map(|n| n.var.as_str()))
3266                    .collect();
3267                match &item.expr {
3268                    Expr::PropAccess { var, .. } => lead_vars.contains(&var.as_str()),
3269                    Expr::Var(v) => lead_vars.contains(&v.as_str()),
3270                    _ => false,
3271                }
3272            })
3273            .cloned()
3274            .collect();
3275
3276        // We need all column names from leading MATCH variables for the scan.
3277        // Collect all column names referenced by lead-side return items.
3278        let lead_col_names = extract_return_column_names(&lead_return_items);
3279
3280        // Check that the leading MATCH label exists.
3281        if mom.match_patterns.is_empty() || mom.match_patterns[0].nodes.is_empty() {
3282            let null_row = vec![Value::Null; column_names.len()];
3283            return Ok(QueryResult {
3284                columns: column_names,
3285                rows: vec![null_row],
3286            });
3287        }
3288        let lead_node_pat = &mom.match_patterns[0].nodes[0];
3289        let lead_label = lead_node_pat.labels.first().cloned().unwrap_or_default();
3290        let lead_label_id = match self.snapshot.catalog.get_label(&lead_label)? {
3291            Some(id) => id as u32,
3292            None => {
3293                // The leading MATCH is non-optional: unknown label → 0 rows (not null).
3294                return Ok(QueryResult {
3295                    columns: column_names,
3296                    rows: vec![],
3297                });
3298            }
3299        };
3300
3301        // Collect all col_ids needed for lead scan.
3302        let lead_all_col_ids: Vec<u32> = {
3303            let mut ids = collect_col_ids_from_columns(&lead_col_names);
3304            if let Some(ref wexpr) = mom.match_where {
3305                collect_col_ids_from_expr(wexpr, &mut ids);
3306            }
3307            for p in &lead_node_pat.props {
3308                let col_id = prop_name_to_col_id(&p.key);
3309                if !ids.contains(&col_id) {
3310                    ids.push(col_id);
3311                }
3312            }
3313            ids
3314        };
3315
3316        let lead_hwm = self.snapshot.store.hwm_for_label(lead_label_id)?;
3317        let lead_var = lead_node_pat.var.as_str();
3318
3319        // Collect lead rows as (slot, props) pairs.
3320        let mut lead_rows: Vec<(u64, Vec<(u32, u64)>)> = Vec::new();
3321        for slot in 0..lead_hwm {
3322            let node_id = NodeId(((lead_label_id as u64) << 32) | slot);
3323            // SPA-216: use is_node_tombstoned() to avoid spurious NotFound
3324            // when tombstone_node() wrote col_0 only for the deleted slot.
3325            if self.is_node_tombstoned(node_id) {
3326                continue;
3327            }
3328            let props = read_node_props(&self.snapshot.store, node_id, &lead_all_col_ids)?;
3329            if !self.matches_prop_filter(&props, &lead_node_pat.props) {
3330                continue;
3331            }
3332            if let Some(ref wexpr) = mom.match_where {
3333                let mut row_vals =
3334                    build_row_vals(&props, lead_var, &lead_all_col_ids, &self.snapshot.store);
3335                row_vals.extend(self.dollar_params());
3336                if !self.eval_where_graph(wexpr, &row_vals) {
3337                    continue;
3338                }
3339            }
3340            lead_rows.push((slot, props));
3341        }
3342
3343        // ── Step 2: for each lead row, run the optional sub-pattern ──────────
3344
3345        // Determine optional-side node variable and label.
3346        let opt_patterns = &mom.optional_patterns;
3347
3348        // Determine optional-side variables from return clause.
3349        let opt_vars: Vec<String> = opt_patterns
3350            .iter()
3351            .flat_map(|p| p.nodes.iter().map(|n| n.var.clone()))
3352            .filter(|v| !v.is_empty())
3353            .collect();
3354
3355        let mut result_rows: Vec<Vec<Value>> = Vec::new();
3356
3357        for (lead_slot, lead_props) in &lead_rows {
3358            let lead_row_vals = build_row_vals(
3359                lead_props,
3360                lead_var,
3361                &lead_all_col_ids,
3362                &self.snapshot.store,
3363            );
3364
3365            // Attempt the optional sub-pattern.
3366            // We only support the common case:
3367            //   (lead_var)-[:REL_TYPE]->(opt_var:Label)
3368            // where opt_patterns has exactly one path with one rel hop.
3369            let opt_sub_rows: Vec<HashMap<String, Value>> = if opt_patterns.len() == 1
3370                && opt_patterns[0].rels.len() == 1
3371                && opt_patterns[0].nodes.len() == 2
3372            {
3373                let opt_pat = &opt_patterns[0];
3374                let opt_src_pat = &opt_pat.nodes[0];
3375                let opt_dst_pat = &opt_pat.nodes[1];
3376                let opt_rel_pat = &opt_pat.rels[0];
3377
3378                // Destination label — if not found, treat as 0 (no matches).
3379                let opt_dst_label = opt_dst_pat.labels.first().cloned().unwrap_or_default();
3380                let opt_dst_label_id: Option<u32> =
3381                    match self.snapshot.catalog.get_label(&opt_dst_label) {
3382                        Ok(Some(id)) => Some(id as u32),
3383                        _ => None,
3384                    };
3385
3386                self.optional_one_hop_sub_rows(
3387                    *lead_slot,
3388                    lead_label_id,
3389                    opt_dst_label_id,
3390                    opt_src_pat,
3391                    opt_dst_pat,
3392                    opt_rel_pat,
3393                    &opt_vars,
3394                    &column_names,
3395                )
3396                .unwrap_or_default()
3397            } else {
3398                // Unsupported optional pattern → treat as no matches.
3399                vec![]
3400            };
3401
3402            if opt_sub_rows.is_empty() {
3403                // No matches: emit lead row with NULLs for optional vars.
3404                let row: Vec<Value> = mom
3405                    .return_clause
3406                    .items
3407                    .iter()
3408                    .map(|item| {
3409                        let v = eval_expr(&item.expr, &lead_row_vals);
3410                        if v == Value::Null {
3411                            // Check if it's a lead-side expr that returned null
3412                            // because we don't have the value, vs an opt-side expr.
3413                            match &item.expr {
3414                                Expr::PropAccess { var, .. } | Expr::Var(var) => {
3415                                    if opt_vars.contains(var) {
3416                                        Value::Null
3417                                    } else {
3418                                        eval_expr(&item.expr, &lead_row_vals)
3419                                    }
3420                                }
3421                                _ => eval_expr(&item.expr, &lead_row_vals),
3422                            }
3423                        } else {
3424                            v
3425                        }
3426                    })
3427                    .collect();
3428                result_rows.push(row);
3429            } else {
3430                // Matches: emit one row per match with both sides populated.
3431                for opt_row_vals in opt_sub_rows {
3432                    let mut combined = lead_row_vals.clone();
3433                    combined.extend(opt_row_vals);
3434                    let row: Vec<Value> = mom
3435                        .return_clause
3436                        .items
3437                        .iter()
3438                        .map(|item| eval_expr(&item.expr, &combined))
3439                        .collect();
3440                    result_rows.push(row);
3441                }
3442            }
3443        }
3444
3445        if mom.distinct {
3446            deduplicate_rows(&mut result_rows);
3447        }
3448        if let Some(skip) = mom.skip {
3449            let skip = (skip as usize).min(result_rows.len());
3450            result_rows.drain(0..skip);
3451        }
3452        if let Some(lim) = mom.limit {
3453            result_rows.truncate(lim as usize);
3454        }
3455
3456        Ok(QueryResult {
3457            columns: column_names,
3458            rows: result_rows,
3459        })
3460    }
3461
3462    /// Scan neighbors of `src_slot` via delta log + CSR for the optional 1-hop,
3463    /// returning one `HashMap<String,Value>` per matching destination node.
3464    #[allow(clippy::too_many_arguments)]
3465    fn optional_one_hop_sub_rows(
3466        &self,
3467        src_slot: u64,
3468        src_label_id: u32,
3469        dst_label_id: Option<u32>,
3470        _src_pat: &sparrowdb_cypher::ast::NodePattern,
3471        dst_node_pat: &sparrowdb_cypher::ast::NodePattern,
3472        rel_pat: &sparrowdb_cypher::ast::RelPattern,
3473        opt_vars: &[String],
3474        column_names: &[String],
3475    ) -> Result<Vec<HashMap<String, Value>>> {
3476        let dst_label_id = match dst_label_id {
3477            Some(id) => id,
3478            None => return Ok(vec![]),
3479        };
3480
3481        let dst_var = dst_node_pat.var.as_str();
3482        let col_ids_dst = collect_col_ids_for_var(dst_var, column_names, dst_label_id);
3483        let _ = opt_vars;
3484
3485        // SPA-185: resolve rel-type lookup once; use for both delta and CSR reads.
3486        let rel_lookup = self.resolve_rel_table_id(src_label_id, dst_label_id, &rel_pat.rel_type);
3487
3488        // If the rel type was specified but not registered, no edges can exist.
3489        if matches!(rel_lookup, RelTableLookup::NotFound) {
3490            return Ok(vec![]);
3491        }
3492
3493        let delta_neighbors: Vec<u64> = {
3494            let records: Vec<DeltaRecord> = match rel_lookup {
3495                RelTableLookup::Found(rtid) => self.read_delta_for(rtid),
3496                _ => self.read_delta_all(),
3497            };
3498            records
3499                .into_iter()
3500                .filter(|r| {
3501                    let r_src_label = (r.src.0 >> 32) as u32;
3502                    let r_src_slot = r.src.0 & 0xFFFF_FFFF;
3503                    r_src_label == src_label_id && r_src_slot == src_slot
3504                })
3505                .map(|r| r.dst.0 & 0xFFFF_FFFF)
3506                .collect()
3507        };
3508
3509        let csr_neighbors = match rel_lookup {
3510            RelTableLookup::Found(rtid) => self.csr_neighbors(rtid, src_slot),
3511            _ => self.csr_neighbors_all(src_slot),
3512        };
3513        let all_neighbors: Vec<u64> = csr_neighbors.into_iter().chain(delta_neighbors).collect();
3514
3515        let mut seen: HashSet<u64> = HashSet::new();
3516        let mut sub_rows: Vec<HashMap<String, Value>> = Vec::new();
3517
3518        for dst_slot in all_neighbors {
3519            if !seen.insert(dst_slot) {
3520                continue;
3521            }
3522            let dst_node = NodeId(((dst_label_id as u64) << 32) | dst_slot);
3523            let dst_props = read_node_props(&self.snapshot.store, dst_node, &col_ids_dst)?;
3524            if !self.matches_prop_filter(&dst_props, &dst_node_pat.props) {
3525                continue;
3526            }
3527            let row_vals = build_row_vals(&dst_props, dst_var, &col_ids_dst, &self.snapshot.store);
3528            sub_rows.push(row_vals);
3529        }
3530
3531        Ok(sub_rows)
3532    }
3533
3534    // ── Node-only scan (no relationships) ─────────────────────────────────────
3535
3536    /// Execute a multi-pattern node-only MATCH by cross-joining each pattern's candidates.
3537    ///
3538    /// `MATCH (a:Person {name:'Alice'}), (b:Person {name:'Bob'}) RETURN shortestPath(...)`
3539    /// produces one merged row per combination of matching nodes.  Each row contains both
3540    /// `"{var}" → Value::NodeRef(node_id)` (for `resolve_node_id_from_var`) and
3541    /// `"{var}.col_{hash}" → Value` entries (for property access via `eval_expr`).
3542    fn execute_multi_pattern_scan(
3543        &self,
3544        m: &MatchStatement,
3545        column_names: &[String],
3546    ) -> Result<QueryResult> {
3547        // Collect candidate NodeIds per variable across all patterns.
3548        let mut per_var: Vec<(String, u32, Vec<NodeId>)> = Vec::new(); // (var, label_id, candidates)
3549
3550        for pat in &m.pattern {
3551            if pat.nodes.is_empty() {
3552                continue;
3553            }
3554            let node = &pat.nodes[0];
3555            if node.var.is_empty() {
3556                continue;
3557            }
3558            let label = node.labels.first().cloned().unwrap_or_default();
3559            let label_id = match self.snapshot.catalog.get_label(&label)? {
3560                Some(id) => id as u32,
3561                None => return Ok(QueryResult::empty(column_names.to_vec())),
3562            };
3563            let filter_col_ids: Vec<u32> = node
3564                .props
3565                .iter()
3566                .map(|p| prop_name_to_col_id(&p.key))
3567                .collect();
3568            let params = self.dollar_params();
3569            let hwm = self.snapshot.store.hwm_for_label(label_id)?;
3570            let mut candidates: Vec<NodeId> = Vec::new();
3571            for slot in 0..hwm {
3572                let node_id = NodeId(((label_id as u64) << 32) | slot);
3573                if self.is_node_tombstoned(node_id) {
3574                    continue;
3575                }
3576                if filter_col_ids.is_empty() {
3577                    candidates.push(node_id);
3578                } else if let Ok(raw_props) =
3579                    self.snapshot.store.get_node_raw(node_id, &filter_col_ids)
3580                {
3581                    if matches_prop_filter_static(
3582                        &raw_props,
3583                        &node.props,
3584                        &params,
3585                        &self.snapshot.store,
3586                    ) {
3587                        candidates.push(node_id);
3588                    }
3589                }
3590            }
3591            if candidates.is_empty() {
3592                return Ok(QueryResult::empty(column_names.to_vec()));
3593            }
3594            per_var.push((node.var.clone(), label_id, candidates));
3595        }
3596
3597        // Cross-product all candidates into row_vals maps.
3598        let mut accumulated: Vec<HashMap<String, Value>> = vec![HashMap::new()];
3599        for (var, _label_id, candidates) in &per_var {
3600            let mut next: Vec<HashMap<String, Value>> = Vec::new();
3601            for base_row in &accumulated {
3602                for &node_id in candidates {
3603                    let mut row = base_row.clone();
3604                    // Bind var as NodeRef (needed by resolve_node_id_from_var for shortestPath).
3605                    row.insert(var.clone(), Value::NodeRef(node_id));
3606                    row.insert(format!("{var}.__node_id__"), Value::NodeRef(node_id));
3607                    // Also store properties under "var.col_N" keys for eval_expr PropAccess.
3608                    let label_id = (node_id.0 >> 32) as u32;
3609                    let label_col_ids = self
3610                        .snapshot
3611                        .store
3612                        .col_ids_for_label(label_id)
3613                        .unwrap_or_default();
3614                    let nullable = self
3615                        .snapshot
3616                        .store
3617                        .get_node_raw_nullable(node_id, &label_col_ids)
3618                        .unwrap_or_default();
3619                    for &(col_id, opt_raw) in &nullable {
3620                        if let Some(raw) = opt_raw {
3621                            row.insert(
3622                                format!("{var}.col_{col_id}"),
3623                                decode_raw_val(raw, &self.snapshot.store),
3624                            );
3625                        }
3626                    }
3627                    next.push(row);
3628                }
3629            }
3630            accumulated = next;
3631        }
3632
3633        // Apply WHERE clause.
3634        if let Some(ref where_expr) = m.where_clause {
3635            accumulated.retain(|row| self.eval_where_graph(where_expr, row));
3636        }
3637
3638        // Inject runtime params into each row before projection.
3639        let dollar_params = self.dollar_params();
3640        if !dollar_params.is_empty() {
3641            for row in &mut accumulated {
3642                row.extend(dollar_params.clone());
3643            }
3644        }
3645
3646        let mut rows = self.aggregate_rows_graph(&accumulated, &m.return_clause.items);
3647
3648        // ORDER BY / LIMIT / SKIP.
3649        apply_order_by(&mut rows, m, column_names);
3650        if let Some(skip) = m.skip {
3651            let skip = (skip as usize).min(rows.len());
3652            rows.drain(0..skip);
3653        }
3654        if let Some(limit) = m.limit {
3655            rows.truncate(limit as usize);
3656        }
3657
3658        Ok(QueryResult {
3659            columns: column_names.to_vec(),
3660            rows,
3661        })
3662    }
3663
3664    fn execute_scan(&self, m: &MatchStatement, column_names: &[String]) -> Result<QueryResult> {
3665        let pat = &m.pattern[0];
3666        let node = &pat.nodes[0];
3667
3668        // SPA-192/SPA-194: when no label is specified, scan ALL known labels and union
3669        // the results.  Delegate to the per-label helper for each label.
3670        if node.labels.is_empty() {
3671            return self.execute_scan_all_labels(m, column_names);
3672        }
3673
3674        let label = node.labels.first().cloned().unwrap_or_default();
3675        // SPA-245: unknown label → 0 rows (standard Cypher semantics, not an error).
3676        let label_id = match self.snapshot.catalog.get_label(&label)? {
3677            Some(id) => id as u32,
3678            None => {
3679                return Ok(QueryResult {
3680                    columns: column_names.to_vec(),
3681                    rows: vec![],
3682                })
3683            }
3684        };
3685        let label_id_u32 = label_id;
3686
3687        let hwm = self.snapshot.store.hwm_for_label(label_id_u32)?;
3688        tracing::debug!(label = %label, hwm = hwm, "node scan start");
3689
3690        // Collect all col_ids we need: RETURN columns + WHERE clause columns +
3691        // inline prop filter columns.
3692        let col_ids = collect_col_ids_from_columns(column_names);
3693        let mut all_col_ids: Vec<u32> = col_ids.clone();
3694        // Add col_ids referenced by the WHERE clause.
3695        if let Some(ref where_expr) = m.where_clause {
3696            collect_col_ids_from_expr(where_expr, &mut all_col_ids);
3697        }
3698        // Add col_ids for inline prop filters on the node pattern.
3699        for p in &node.props {
3700            let col_id = prop_name_to_col_id(&p.key);
3701            if !all_col_ids.contains(&col_id) {
3702                all_col_ids.push(col_id);
3703            }
3704        }
3705
3706        let use_agg = has_aggregate_in_return(&m.return_clause.items);
3707        // SPA-196: id(n) requires a NodeRef in the row map.  The fast
3708        // project_row path only stores individual property columns, so it
3709        // cannot evaluate id().  Force the eval path whenever id() appears in
3710        // any RETURN item, even when no aggregation is requested.
3711        // SPA-213: bare variable projection also requires the eval path.
3712        let use_eval_path = use_agg || needs_node_ref_in_return(&m.return_clause.items);
3713        if use_eval_path {
3714            // Aggregate / eval expressions reference properties not captured by
3715            // column_names (e.g. collect(p.name) -> column "collect(p.name)").
3716            // Extract col_ids from every RETURN expression so the scan reads
3717            // all necessary columns.
3718            for item in &m.return_clause.items {
3719                collect_col_ids_from_expr(&item.expr, &mut all_col_ids);
3720            }
3721        }
3722
3723        // SPA-213: bare node variable projection needs ALL stored columns for the label.
3724        // Collect them once before the scan loop so we can build a Value::Map per node.
3725        let bare_vars = bare_var_names_in_return(&m.return_clause.items);
3726        let all_label_col_ids: Vec<u32> = if !bare_vars.is_empty() {
3727            self.snapshot.store.col_ids_for_label(label_id_u32)?
3728        } else {
3729            vec![]
3730        };
3731
3732        let mut raw_rows: Vec<HashMap<String, Value>> = Vec::new();
3733        let mut rows: Vec<Vec<Value>> = Vec::new();
3734
3735        // SPA-249 (lazy build): ensure the property index is loaded for every
3736        // column referenced by inline prop filters before attempting a lookup.
3737        // Each build_for call is a cache-hit no-op after the first time.
3738        // We acquire and drop the mutable borrow before the immutable lookup below.
3739        for p in &node.props {
3740            let col_id = sparrowdb_common::col_id_of(&p.key);
3741            // Errors are suppressed inside build_for; index falls back to full scan.
3742            let _ =
3743                self.prop_index
3744                    .borrow_mut()
3745                    .build_for(&self.snapshot.store, label_id_u32, col_id);
3746        }
3747
3748        // SPA-273: selectivity threshold — if the index would return more than
3749        // 10% of all rows for this label, it's cheaper to do a full scan and
3750        // avoid the extra slot-set construction overhead.  We use `hwm` as the
3751        // denominator (high-water mark = total allocated slots, which is an
3752        // upper bound on live row count).  When hwm == 0 the threshold never
3753        // fires (no rows exist).
3754        let selectivity_threshold: u64 = if hwm > 0 { (hwm / 10).max(1) } else { u64::MAX };
3755
3756        // SPA-249: try to use the property equality index when there is exactly
3757        // one inline prop filter with an inline-encodable literal value.
3758        // Overflow strings (> 7 bytes) cannot be indexed, so they fall back to
3759        // full scan.  A WHERE clause is always applied per-slot afterward.
3760        //
3761        // SPA-273: discard candidates when they exceed the selectivity threshold
3762        // (index would scan >10% of rows — full scan is preferred).
3763        let index_candidate_slots: Option<Vec<u32>> = {
3764            let prop_index_ref = self.prop_index.borrow();
3765            let candidates = try_index_lookup_for_props(&node.props, label_id_u32, &prop_index_ref);
3766            match candidates {
3767                Some(ref slots) if slots.len() as u64 > selectivity_threshold => {
3768                    tracing::debug!(
3769                        label = %label,
3770                        candidates = slots.len(),
3771                        threshold = selectivity_threshold,
3772                        "SPA-273: index exceeds selectivity threshold — falling back to full scan"
3773                    );
3774                    None
3775                }
3776                other => other,
3777            }
3778        };
3779
3780        // SPA-249 Phase 1b: when the inline-prop index has no candidates, try to
3781        // use the property index for a WHERE-clause equality predicate
3782        // (`WHERE n.prop = literal`).  The WHERE clause is still re-evaluated
3783        // per slot for correctness.
3784        //
3785        // We pre-build the index for any single-equality WHERE prop so the lazy
3786        // cache is populated before the immutable borrow below.
3787        if index_candidate_slots.is_none() {
3788            if let Some(wexpr) = m.where_clause.as_ref() {
3789                for prop_name in where_clause_eq_prop_names(wexpr, node.var.as_str()) {
3790                    let col_id = sparrowdb_common::col_id_of(prop_name);
3791                    let _ = self.prop_index.borrow_mut().build_for(
3792                        &self.snapshot.store,
3793                        label_id_u32,
3794                        col_id,
3795                    );
3796                }
3797            }
3798        }
3799        // SPA-273: apply the same selectivity threshold to WHERE-clause equality
3800        // index candidates.
3801        let where_eq_candidate_slots: Option<Vec<u32>> = if index_candidate_slots.is_none() {
3802            let prop_index_ref = self.prop_index.borrow();
3803            let candidates = m.where_clause.as_ref().and_then(|wexpr| {
3804                try_where_eq_index_lookup(wexpr, node.var.as_str(), label_id_u32, &prop_index_ref)
3805            });
3806            match candidates {
3807                Some(ref slots) if slots.len() as u64 > selectivity_threshold => {
3808                    tracing::debug!(
3809                        label = %label,
3810                        candidates = slots.len(),
3811                        threshold = selectivity_threshold,
3812                        "SPA-273: WHERE-eq index exceeds selectivity threshold — falling back to full scan"
3813                    );
3814                    None
3815                }
3816                other => other,
3817            }
3818        } else {
3819            None
3820        };
3821
3822        // SPA-249 Phase 2: when neither equality path fired, try to use the
3823        // property index for a WHERE-clause range predicate (`>`, `>=`, `<`, `<=`,
3824        // or a compound AND of two half-open bounds on the same property).
3825        //
3826        // Pre-build for any range-predicate WHERE props before the immutable borrow.
3827        if index_candidate_slots.is_none() && where_eq_candidate_slots.is_none() {
3828            if let Some(wexpr) = m.where_clause.as_ref() {
3829                for prop_name in where_clause_range_prop_names(wexpr, node.var.as_str()) {
3830                    let col_id = sparrowdb_common::col_id_of(prop_name);
3831                    let _ = self.prop_index.borrow_mut().build_for(
3832                        &self.snapshot.store,
3833                        label_id_u32,
3834                        col_id,
3835                    );
3836                }
3837            }
3838        }
3839        let where_range_candidate_slots: Option<Vec<u32>> =
3840            if index_candidate_slots.is_none() && where_eq_candidate_slots.is_none() {
3841                let prop_index_ref = self.prop_index.borrow();
3842                m.where_clause.as_ref().and_then(|wexpr| {
3843                    try_where_range_index_lookup(
3844                        wexpr,
3845                        node.var.as_str(),
3846                        label_id_u32,
3847                        &prop_index_ref,
3848                    )
3849                })
3850            } else {
3851                None
3852            };
3853
3854        // SPA-251 / SPA-274 (lazy text index): when the equality index has no
3855        // candidates (None), check whether the WHERE clause is a simple CONTAINS
3856        // or STARTS WITH predicate on a labeled node property, and use the text
3857        // index to narrow the slot set.  The WHERE clause is always re-evaluated
3858        // per slot afterward for correctness (tombstone filtering, compound
3859        // predicates, etc.).
3860        //
3861        // Pre-warm the text index for any text-predicate columns before the
3862        // immutable borrow below, mirroring the PropertyIndex lazy pattern.
3863        // Queries with no text predicates never call build_for and pay zero I/O.
3864        if index_candidate_slots.is_none()
3865            && where_eq_candidate_slots.is_none()
3866            && where_range_candidate_slots.is_none()
3867        {
3868            if let Some(wexpr) = m.where_clause.as_ref() {
3869                for prop_name in where_clause_text_prop_names(wexpr, node.var.as_str()) {
3870                    let col_id = sparrowdb_common::col_id_of(prop_name);
3871                    self.text_index.borrow_mut().build_for(
3872                        &self.snapshot.store,
3873                        label_id_u32,
3874                        col_id,
3875                    );
3876                }
3877            }
3878        }
3879        let text_candidate_slots: Option<Vec<u32>> = if index_candidate_slots.is_none()
3880            && where_eq_candidate_slots.is_none()
3881            && where_range_candidate_slots.is_none()
3882        {
3883            m.where_clause.as_ref().and_then(|wexpr| {
3884                let text_index_ref = self.text_index.borrow();
3885                try_text_index_lookup(wexpr, node.var.as_str(), label_id_u32, &text_index_ref)
3886            })
3887        } else {
3888            None
3889        };
3890
3891        // Build an iterator over candidate slot values.  When the equality index
3892        // or text index narrows the set, iterate only those slots; otherwise
3893        // iterate 0..hwm.
3894        let slot_iter: Box<dyn Iterator<Item = u64>> =
3895            if let Some(ref slots) = index_candidate_slots {
3896                tracing::debug!(
3897                    label = %label,
3898                    candidates = slots.len(),
3899                    "SPA-249: property index fast path"
3900                );
3901                Box::new(slots.iter().map(|&s| s as u64))
3902            } else if let Some(ref slots) = where_eq_candidate_slots {
3903                tracing::debug!(
3904                    label = %label,
3905                    candidates = slots.len(),
3906                    "SPA-249 Phase 1b: WHERE equality index fast path"
3907                );
3908                Box::new(slots.iter().map(|&s| s as u64))
3909            } else if let Some(ref slots) = where_range_candidate_slots {
3910                tracing::debug!(
3911                    label = %label,
3912                    candidates = slots.len(),
3913                    "SPA-249 Phase 2: WHERE range index fast path"
3914                );
3915                Box::new(slots.iter().map(|&s| s as u64))
3916            } else if let Some(ref slots) = text_candidate_slots {
3917                tracing::debug!(
3918                    label = %label,
3919                    candidates = slots.len(),
3920                    "SPA-251: text index fast path"
3921                );
3922                Box::new(slots.iter().map(|&s| s as u64))
3923            } else {
3924                Box::new(0..hwm)
3925            };
3926
3927        // SPA-198: LIMIT pushdown — compute an early-exit cap so we can break
3928        // out of the scan loop once we have enough rows.  This is only safe
3929        // when there is no aggregation, no ORDER BY, and no DISTINCT (all of
3930        // which require the full result set before they can operate).
3931        let scan_cap: usize = if !use_eval_path && !m.distinct && m.order_by.is_empty() {
3932            match (m.skip, m.limit) {
3933                (Some(s), Some(l)) => (s as usize).saturating_add(l as usize),
3934                (None, Some(l)) => l as usize,
3935                _ => usize::MAX,
3936            }
3937        } else {
3938            usize::MAX
3939        };
3940
3941        for slot in slot_iter {
3942            // SPA-254: check per-query deadline at every slot boundary.
3943            self.check_deadline()?;
3944
3945            let node_id = NodeId(((label_id_u32 as u64) << 32) | slot);
3946            if slot < 1024 || slot % 10_000 == 0 {
3947                tracing::trace!(slot = slot, node_id = node_id.0, "scan emit");
3948            }
3949
3950            // SPA-164/SPA-216: skip tombstoned nodes.  delete_node writes
3951            // u64::MAX into col_0 as the deletion sentinel; nodes in that state
3952            // must not appear in scan results.  Use is_node_tombstoned() rather
3953            // than a raw `get_node_raw(...)?` so that a short col_0 file (e.g.
3954            // when tombstone_node only wrote the deleted slot and did not
3955            // zero-pad up to the HWM) does not propagate a spurious NotFound
3956            // error for un-deleted nodes whose slots are beyond the file end.
3957            if self.is_node_tombstoned(node_id) {
3958                continue;
3959            }
3960
3961            // Use nullable reads so that absent columns (property never written
3962            // for this node) are omitted from the row map rather than surfacing
3963            // as Err(NotFound).  Absent columns will evaluate to Value::Null in
3964            // eval_expr, enabling correct IS NULL / IS NOT NULL semantics.
3965            let nullable_props = self
3966                .snapshot
3967                .store
3968                .get_node_raw_nullable(node_id, &all_col_ids)?;
3969            let props: Vec<(u32, u64)> = nullable_props
3970                .iter()
3971                .filter_map(|&(col_id, opt)| opt.map(|v| (col_id, v)))
3972                .collect();
3973
3974            // Apply inline prop filter from the pattern.
3975            if !self.matches_prop_filter(&props, &node.props) {
3976                continue;
3977            }
3978
3979            // Apply WHERE clause.
3980            let var_name = node.var.as_str();
3981            if let Some(ref where_expr) = m.where_clause {
3982                let mut row_vals =
3983                    build_row_vals(&props, var_name, &all_col_ids, &self.snapshot.store);
3984                // Inject label metadata so labels(n) works in WHERE.
3985                if !var_name.is_empty() && !label.is_empty() {
3986                    row_vals.insert(
3987                        format!("{}.__labels__", var_name),
3988                        Value::List(vec![Value::String(label.clone())]),
3989                    );
3990                }
3991                // SPA-196: inject NodeRef so id(n) works in WHERE clauses.
3992                if !var_name.is_empty() {
3993                    row_vals.insert(var_name.to_string(), Value::NodeRef(node_id));
3994                }
3995                // Inject runtime params so $param references in WHERE work.
3996                row_vals.extend(self.dollar_params());
3997                if !self.eval_where_graph(where_expr, &row_vals) {
3998                    continue;
3999                }
4000            }
4001
4002            if use_eval_path {
4003                // Build eval_expr-compatible map for aggregation / id() path.
4004                let mut row_vals =
4005                    build_row_vals(&props, var_name, &all_col_ids, &self.snapshot.store);
4006                // Inject label metadata for aggregation.
4007                if !var_name.is_empty() && !label.is_empty() {
4008                    row_vals.insert(
4009                        format!("{}.__labels__", var_name),
4010                        Value::List(vec![Value::String(label.clone())]),
4011                    );
4012                }
4013                if !var_name.is_empty() {
4014                    // SPA-213: when this variable is returned bare, read all properties
4015                    // for the node and expose them as a Value::Map under the var key.
4016                    // Also keep NodeRef under __node_id__ so id(n) continues to work.
4017                    if bare_vars.contains(&var_name.to_string()) && !all_label_col_ids.is_empty() {
4018                        let all_nullable = self
4019                            .snapshot
4020                            .store
4021                            .get_node_raw_nullable(node_id, &all_label_col_ids)?;
4022                        let all_props: Vec<(u32, u64)> = all_nullable
4023                            .iter()
4024                            .filter_map(|&(col_id, opt)| opt.map(|v| (col_id, v)))
4025                            .collect();
4026                        row_vals.insert(
4027                            var_name.to_string(),
4028                            build_node_map(&all_props, &self.snapshot.store),
4029                        );
4030                    } else {
4031                        row_vals.insert(var_name.to_string(), Value::NodeRef(node_id));
4032                    }
4033                    // Always store NodeRef under __node_id__ so id(n) works even when
4034                    // the var itself is a Map (SPA-213).
4035                    row_vals.insert(format!("{}.__node_id__", var_name), Value::NodeRef(node_id));
4036                }
4037                raw_rows.push(row_vals);
4038            } else {
4039                // Project RETURN columns directly (fast path).
4040                let row = project_row(
4041                    &props,
4042                    column_names,
4043                    &all_col_ids,
4044                    var_name,
4045                    &label,
4046                    &self.snapshot.store,
4047                );
4048                rows.push(row);
4049                // SPA-198: early exit when we have enough rows for SKIP+LIMIT.
4050                if rows.len() >= scan_cap {
4051                    break;
4052                }
4053            }
4054        }
4055
4056        if use_eval_path {
4057            rows = self.aggregate_rows_graph(&raw_rows, &m.return_clause.items);
4058        } else {
4059            if m.distinct {
4060                deduplicate_rows(&mut rows);
4061            }
4062
4063            // ORDER BY
4064            apply_order_by(&mut rows, m, column_names);
4065
4066            // SKIP
4067            if let Some(skip) = m.skip {
4068                let skip = (skip as usize).min(rows.len());
4069                rows.drain(0..skip);
4070            }
4071
4072            // LIMIT
4073            if let Some(lim) = m.limit {
4074                rows.truncate(lim as usize);
4075            }
4076        }
4077
4078        tracing::debug!(rows = rows.len(), "node scan complete");
4079        Ok(QueryResult {
4080            columns: column_names.to_vec(),
4081            rows,
4082        })
4083    }
4084
4085    // ── Label-less full scan: MATCH (n) RETURN … — SPA-192/SPA-194 ─────────
4086    //
4087    // When the node pattern carries no label filter we must scan every label
4088    // that is registered in the catalog and union the results.  Aggregation,
4089    // ORDER BY and LIMIT are applied once after the union so that e.g.
4090    // `count(n)` counts all nodes and `LIMIT k` returns exactly k rows across
4091    // all labels rather than k rows per label.
4092
4093    fn execute_scan_all_labels(
4094        &self,
4095        m: &MatchStatement,
4096        column_names: &[String],
4097    ) -> Result<QueryResult> {
4098        let all_labels = self.snapshot.catalog.list_labels()?;
4099        tracing::debug!(label_count = all_labels.len(), "label-less full scan start");
4100
4101        let pat = &m.pattern[0];
4102        let node = &pat.nodes[0];
4103        let var_name = node.var.as_str();
4104
4105        // Collect col_ids needed across all labels (same set for every label).
4106        let mut all_col_ids: Vec<u32> = collect_col_ids_from_columns(column_names);
4107        if let Some(ref where_expr) = m.where_clause {
4108            collect_col_ids_from_expr(where_expr, &mut all_col_ids);
4109        }
4110        for p in &node.props {
4111            let col_id = prop_name_to_col_id(&p.key);
4112            if !all_col_ids.contains(&col_id) {
4113                all_col_ids.push(col_id);
4114            }
4115        }
4116
4117        let use_agg = has_aggregate_in_return(&m.return_clause.items);
4118        // SPA-213: bare variable also needs the eval path in label-less scan.
4119        let use_eval_path_all = use_agg || needs_node_ref_in_return(&m.return_clause.items);
4120        if use_eval_path_all {
4121            for item in &m.return_clause.items {
4122                collect_col_ids_from_expr(&item.expr, &mut all_col_ids);
4123            }
4124        }
4125
4126        // SPA-213: detect bare var names for property-map projection.
4127        let bare_vars_all = bare_var_names_in_return(&m.return_clause.items);
4128
4129        let mut raw_rows: Vec<HashMap<String, Value>> = Vec::new();
4130        let mut rows: Vec<Vec<Value>> = Vec::new();
4131
4132        for (label_id, label_name) in &all_labels {
4133            let label_id_u32 = *label_id as u32;
4134            let hwm = self.snapshot.store.hwm_for_label(label_id_u32)?;
4135            tracing::debug!(label = %label_name, hwm = hwm, "label-less scan: label slot");
4136
4137            // SPA-213: read all col_ids for this label once per label.
4138            let all_label_col_ids_here: Vec<u32> = if !bare_vars_all.is_empty() {
4139                self.snapshot.store.col_ids_for_label(label_id_u32)?
4140            } else {
4141                vec![]
4142            };
4143
4144            for slot in 0..hwm {
4145                // SPA-254: check per-query deadline at every slot boundary.
4146                self.check_deadline()?;
4147
4148                let node_id = NodeId(((label_id_u32 as u64) << 32) | slot);
4149
4150                // Skip tombstoned nodes (SPA-164/SPA-216): use
4151                // is_node_tombstoned() to avoid spurious NotFound when
4152                // tombstone_node() wrote col_0 only for the deleted slot.
4153                if self.is_node_tombstoned(node_id) {
4154                    continue;
4155                }
4156
4157                let nullable_props = self
4158                    .snapshot
4159                    .store
4160                    .get_node_raw_nullable(node_id, &all_col_ids)?;
4161                let props: Vec<(u32, u64)> = nullable_props
4162                    .iter()
4163                    .filter_map(|&(col_id, opt)| opt.map(|v| (col_id, v)))
4164                    .collect();
4165
4166                // Apply inline prop filter.
4167                if !self.matches_prop_filter(&props, &node.props) {
4168                    continue;
4169                }
4170
4171                // Apply WHERE clause.
4172                if let Some(ref where_expr) = m.where_clause {
4173                    let mut row_vals =
4174                        build_row_vals(&props, var_name, &all_col_ids, &self.snapshot.store);
4175                    if !var_name.is_empty() {
4176                        row_vals.insert(
4177                            format!("{}.__labels__", var_name),
4178                            Value::List(vec![Value::String(label_name.clone())]),
4179                        );
4180                        row_vals.insert(var_name.to_string(), Value::NodeRef(node_id));
4181                    }
4182                    row_vals.extend(self.dollar_params());
4183                    if !self.eval_where_graph(where_expr, &row_vals) {
4184                        continue;
4185                    }
4186                }
4187
4188                if use_eval_path_all {
4189                    let mut row_vals =
4190                        build_row_vals(&props, var_name, &all_col_ids, &self.snapshot.store);
4191                    if !var_name.is_empty() {
4192                        row_vals.insert(
4193                            format!("{}.__labels__", var_name),
4194                            Value::List(vec![Value::String(label_name.clone())]),
4195                        );
4196                        // SPA-213: bare variable → Value::Map; otherwise NodeRef.
4197                        if bare_vars_all.contains(&var_name.to_string())
4198                            && !all_label_col_ids_here.is_empty()
4199                        {
4200                            let all_nullable = self
4201                                .snapshot
4202                                .store
4203                                .get_node_raw_nullable(node_id, &all_label_col_ids_here)?;
4204                            let all_props: Vec<(u32, u64)> = all_nullable
4205                                .iter()
4206                                .filter_map(|&(col_id, opt)| opt.map(|v| (col_id, v)))
4207                                .collect();
4208                            row_vals.insert(
4209                                var_name.to_string(),
4210                                build_node_map(&all_props, &self.snapshot.store),
4211                            );
4212                        } else {
4213                            row_vals.insert(var_name.to_string(), Value::NodeRef(node_id));
4214                        }
4215                        row_vals
4216                            .insert(format!("{}.__node_id__", var_name), Value::NodeRef(node_id));
4217                    }
4218                    raw_rows.push(row_vals);
4219                } else {
4220                    let row = project_row(
4221                        &props,
4222                        column_names,
4223                        &all_col_ids,
4224                        var_name,
4225                        label_name,
4226                        &self.snapshot.store,
4227                    );
4228                    rows.push(row);
4229                }
4230            }
4231        }
4232
4233        if use_eval_path_all {
4234            rows = self.aggregate_rows_graph(&raw_rows, &m.return_clause.items);
4235        }
4236
4237        // DISTINCT / ORDER BY / SKIP / LIMIT apply regardless of which path
4238        // built the rows (eval or fast path).
4239        if m.distinct {
4240            deduplicate_rows(&mut rows);
4241        }
4242        apply_order_by(&mut rows, m, column_names);
4243        if let Some(skip) = m.skip {
4244            let skip = (skip as usize).min(rows.len());
4245            rows.drain(0..skip);
4246        }
4247        if let Some(lim) = m.limit {
4248            rows.truncate(lim as usize);
4249        }
4250
4251        tracing::debug!(rows = rows.len(), "label-less full scan complete");
4252        Ok(QueryResult {
4253            columns: column_names.to_vec(),
4254            rows,
4255        })
4256    }
4257
4258    // ── 1-hop traversal: (a)-[:R]->(f) ───────────────────────────────────────
4259
4260    fn execute_one_hop(&self, m: &MatchStatement, column_names: &[String]) -> Result<QueryResult> {
4261        // ── Q7 COUNT-agg degree-cache fast-path (SPA-272) ─────────────────────
4262        // Try to short-circuit `MATCH (n)-[:R]->(f) RETURN n.prop, COUNT(f) AS
4263        // alias ORDER BY alias DESC LIMIT k` via DegreeCache before falling
4264        // through to the full scan + aggregate path.
4265        if let Some(result) = self.try_count_agg_degree_fastpath(m, column_names)? {
4266            return Ok(result);
4267        }
4268
4269        let pat = &m.pattern[0];
4270        let src_node_pat = &pat.nodes[0];
4271        let dst_node_pat = &pat.nodes[1];
4272        let rel_pat = &pat.rels[0];
4273
4274        let dir = &rel_pat.dir;
4275        // Incoming-only: swap the logical src/dst and recurse as Outgoing by
4276        // swapping pattern roles.  We handle it by falling through with the
4277        // node patterns in swapped order below.
4278        // Both (undirected): handled by running forward + backward passes.
4279        // Unknown directions remain unimplemented.
4280        use sparrowdb_cypher::ast::EdgeDir;
4281
4282        let src_label = src_node_pat.labels.first().cloned().unwrap_or_default();
4283        let dst_label = dst_node_pat.labels.first().cloned().unwrap_or_default();
4284        // Resolve src/dst label IDs.  Either may be absent (unlabeled pattern node).
4285        let src_label_id_opt: Option<u32> = if src_label.is_empty() {
4286            None
4287        } else {
4288            self.snapshot
4289                .catalog
4290                .get_label(&src_label)?
4291                .map(|id| id as u32)
4292        };
4293        let dst_label_id_opt: Option<u32> = if dst_label.is_empty() {
4294            None
4295        } else {
4296            self.snapshot
4297                .catalog
4298                .get_label(&dst_label)?
4299                .map(|id| id as u32)
4300        };
4301
4302        // Build the list of rel tables to scan.
4303        //
4304        // Each entry is (catalog_rel_table_id, effective_src_label_id,
4305        // effective_dst_label_id, rel_type_name).
4306        //
4307        // * If the pattern specifies a rel type, filter to matching tables only.
4308        // * If src/dst labels are given, filter to matching label IDs.
4309        // * Otherwise include all registered rel tables.
4310        //
4311        // SPA-195: this also fixes the previous hardcoded RelTableId(0) bug —
4312        // every rel table now reads from its own correctly-named delta log file.
4313        let all_rel_tables = self.snapshot.catalog.list_rel_tables_with_ids();
4314        let rel_tables_to_scan: Vec<(u64, u32, u32, String)> = all_rel_tables
4315            .into_iter()
4316            .filter(|(_, sid, did, rt)| {
4317                let type_ok = rel_pat.rel_type.is_empty() || rt == &rel_pat.rel_type;
4318                let src_ok = src_label_id_opt.map(|id| id == *sid as u32).unwrap_or(true);
4319                let dst_ok = dst_label_id_opt.map(|id| id == *did as u32).unwrap_or(true);
4320                type_ok && src_ok && dst_ok
4321            })
4322            .map(|(catalog_id, sid, did, rt)| (catalog_id, sid as u32, did as u32, rt))
4323            .collect();
4324
4325        let use_agg = has_aggregate_in_return(&m.return_clause.items);
4326        let mut raw_rows: Vec<HashMap<String, Value>> = Vec::new();
4327        let mut rows: Vec<Vec<Value>> = Vec::new();
4328        // For undirected (Both), track seen (src_slot, dst_slot) pairs from the
4329        // forward pass so we don't re-emit them in the backward pass.
4330        let mut seen_undirected: HashSet<(u64, u64)> = HashSet::new();
4331
4332        // Pre-compute label name lookup for unlabeled patterns.
4333        let label_id_to_name: Vec<(u16, String)> = if src_label.is_empty() || dst_label.is_empty() {
4334            self.snapshot.catalog.list_labels().unwrap_or_default()
4335        } else {
4336            vec![]
4337        };
4338
4339        // Iterate each qualifying rel table.
4340        for (catalog_rel_id, tbl_src_label_id, tbl_dst_label_id, tbl_rel_type) in
4341            &rel_tables_to_scan
4342        {
4343            let storage_rel_id = RelTableId(*catalog_rel_id as u32);
4344            let effective_src_label_id = *tbl_src_label_id;
4345            let effective_dst_label_id = *tbl_dst_label_id;
4346
4347            // SPA-195: the rel type name for this edge comes from the catalog
4348            // entry, not from rel_pat.rel_type (which may be empty for [r]).
4349            let effective_rel_type: &str = tbl_rel_type.as_str();
4350
4351            // Compute the effective src/dst label names for metadata injection.
4352            let effective_src_label: &str = if src_label.is_empty() {
4353                label_id_to_name
4354                    .iter()
4355                    .find(|(id, _)| *id as u32 == effective_src_label_id)
4356                    .map(|(_, name)| name.as_str())
4357                    .unwrap_or("")
4358            } else {
4359                src_label.as_str()
4360            };
4361            let effective_dst_label: &str = if dst_label.is_empty() {
4362                label_id_to_name
4363                    .iter()
4364                    .find(|(id, _)| *id as u32 == effective_dst_label_id)
4365                    .map(|(_, name)| name.as_str())
4366                    .unwrap_or("")
4367            } else {
4368                dst_label.as_str()
4369            };
4370
4371            let hwm_src = match self.snapshot.store.hwm_for_label(effective_src_label_id) {
4372                Ok(h) => h,
4373                Err(_) => continue,
4374            };
4375            tracing::debug!(
4376                src_label = %effective_src_label,
4377                dst_label = %effective_dst_label,
4378                rel_type = %effective_rel_type,
4379                hwm_src = hwm_src,
4380                "one-hop traversal start"
4381            );
4382
4383            let mut col_ids_src =
4384                collect_col_ids_for_var(&src_node_pat.var, column_names, effective_src_label_id);
4385            let mut col_ids_dst =
4386                collect_col_ids_for_var(&dst_node_pat.var, column_names, effective_dst_label_id);
4387            if use_agg {
4388                for item in &m.return_clause.items {
4389                    collect_col_ids_from_expr(&item.expr, &mut col_ids_src);
4390                    collect_col_ids_from_expr(&item.expr, &mut col_ids_dst);
4391                }
4392            }
4393            // Ensure WHERE-only columns are fetched so predicates can evaluate them.
4394            if let Some(ref where_expr) = m.where_clause {
4395                collect_col_ids_from_expr(where_expr, &mut col_ids_src);
4396                collect_col_ids_from_expr(where_expr, &mut col_ids_dst);
4397            }
4398
4399            // Read ALL delta records for this specific rel table once (outside
4400            // the per-src-slot loop) so we open the file only once per table.
4401            let delta_records_all = {
4402                let edge_store = EdgeStore::open(&self.snapshot.db_root, storage_rel_id);
4403                edge_store.and_then(|s| s.read_delta()).unwrap_or_default()
4404            };
4405
4406            // SPA-178: Build (src_slot, dst_slot) → edge_id map for delta edges.
4407            // This lets us look up which edge a (src, dst) pair corresponds to
4408            // when reading edge properties.
4409            let delta_edge_id_map: std::collections::HashMap<(u64, u64), u64> =
4410                delta_records_all
4411                    .iter()
4412                    .enumerate()
4413                    .map(|(idx, r)| {
4414                        let s = r.src.0 & 0xFFFF_FFFF;
4415                        let d = r.dst.0 & 0xFFFF_FFFF;
4416                        ((s, d), idx as u64)
4417                    })
4418                    .collect();
4419
4420            // SPA-178: Pre-read all edge props for this rel table if any edge
4421            // property access is needed (inline filter or projection).
4422            let needs_edge_props = !rel_pat.props.is_empty()
4423                || (!rel_pat.var.is_empty()
4424                    && column_names.iter().any(|c| {
4425                        c.split_once('.')
4426                            .map_or(false, |(v, _)| v == rel_pat.var.as_str())
4427                    }));
4428            let all_edge_props_raw: Vec<(u64, u32, u64)> = if needs_edge_props {
4429                EdgeStore::open(&self.snapshot.db_root, storage_rel_id)
4430                    .and_then(|s| s.read_all_edge_props())
4431                    .unwrap_or_default()
4432            } else {
4433                vec![]
4434            };
4435            // Group by edge_id: last-write-wins per col_id.
4436            let mut edge_props_by_id: std::collections::HashMap<u64, Vec<(u32, u64)>> =
4437                std::collections::HashMap::new();
4438            for (edge_id, col_id, value) in &all_edge_props_raw {
4439                let entry = edge_props_by_id.entry(*edge_id).or_default();
4440                if let Some(existing) = entry.iter_mut().find(|(c, _)| *c == *col_id) {
4441                    existing.1 = *value;
4442                } else {
4443                    entry.push((*col_id, *value));
4444                }
4445            }
4446
4447            // Scan source nodes for this label.
4448            for src_slot in 0..hwm_src {
4449                // SPA-254: check per-query deadline at every slot boundary.
4450                self.check_deadline()?;
4451
4452                let src_node = NodeId(((effective_src_label_id as u64) << 32) | src_slot);
4453                let src_props = if !col_ids_src.is_empty() || !src_node_pat.props.is_empty() {
4454                    let all_needed: Vec<u32> = {
4455                        let mut v = col_ids_src.clone();
4456                        for p in &src_node_pat.props {
4457                            let col_id = prop_name_to_col_id(&p.key);
4458                            if !v.contains(&col_id) {
4459                                v.push(col_id);
4460                            }
4461                        }
4462                        v
4463                    };
4464                    self.snapshot.store.get_node_raw(src_node, &all_needed)?
4465                } else {
4466                    vec![]
4467                };
4468
4469                // Apply src inline prop filter.
4470                if !self.matches_prop_filter(&src_props, &src_node_pat.props) {
4471                    continue;
4472                }
4473
4474                // SPA-163 / SPA-195: read delta edges for this src node from
4475                // the correct per-rel-table delta log (no longer hardcoded to 0).
4476                let delta_neighbors: Vec<u64> = delta_records_all
4477                    .iter()
4478                    .filter(|r| {
4479                        let r_src_label = (r.src.0 >> 32) as u32;
4480                        let r_src_slot = r.src.0 & 0xFFFF_FFFF;
4481                        r_src_label == effective_src_label_id && r_src_slot == src_slot
4482                    })
4483                    .map(|r| r.dst.0 & 0xFFFF_FFFF)
4484                    .collect();
4485
4486                // Look up the CSR for this specific rel table.  open_csr_map
4487                // builds a per-table map keyed by catalog_rel_id, so each rel
4488                // type's checkpointed edges are found under its own key.
4489                let csr_neighbors: &[u64] = self
4490                    .snapshot
4491                    .csrs
4492                    .get(&u32::try_from(*catalog_rel_id).expect("rel_table_id overflowed u32"))
4493                    .map(|c| c.neighbors(src_slot))
4494                    .unwrap_or(&[]);
4495                let all_neighbors: Vec<u64> = csr_neighbors
4496                    .iter()
4497                    .copied()
4498                    .chain(delta_neighbors.into_iter())
4499                    .collect();
4500
4501                // ── SPA-200: batch-read dst properties — O(cols) fs::read() calls
4502                // instead of O(neighbors × cols). ─────────────────────────────────
4503                // Compute the full column-id list needed for dst (same for every
4504                // neighbor in this src → * traversal).
4505                let all_needed_dst: Vec<u32> = if !col_ids_dst.is_empty()
4506                    || !dst_node_pat.props.is_empty()
4507                {
4508                    let mut v = col_ids_dst.clone();
4509                    for p in &dst_node_pat.props {
4510                        let col_id = prop_name_to_col_id(&p.key);
4511                        if !v.contains(&col_id) {
4512                            v.push(col_id);
4513                        }
4514                    }
4515                    v
4516                } else {
4517                    vec![]
4518                };
4519
4520                // Deduplicate neighbor slots for the batch read (same set we
4521                // visit in the inner loop; duplicates are skipped there anyway).
4522                let unique_dst_slots: Vec<u32> = {
4523                    let mut seen: HashSet<u64> = HashSet::new();
4524                    all_neighbors
4525                        .iter()
4526                        .filter_map(|&s| if seen.insert(s) { Some(s as u32) } else { None })
4527                        .collect()
4528                };
4529
4530                // Batch-read: one fs::read() per column for all neighbors.
4531                // dst_batch[i] = raw column values for unique_dst_slots[i].
4532                let dst_batch: Vec<Vec<u64>> = if !all_needed_dst.is_empty() {
4533                    self.snapshot.store.batch_read_node_props(
4534                        effective_dst_label_id,
4535                        &unique_dst_slots,
4536                        &all_needed_dst,
4537                    )?
4538                } else {
4539                    vec![]
4540                };
4541                // Build a slot → batch-row index map for O(1) lookup.
4542                let dst_slot_to_idx: HashMap<u64, usize> = unique_dst_slots
4543                    .iter()
4544                    .enumerate()
4545                    .map(|(i, &s)| (s as u64, i))
4546                    .collect();
4547
4548                let mut seen_neighbors: HashSet<u64> = HashSet::new();
4549                for &dst_slot in &all_neighbors {
4550                    if !seen_neighbors.insert(dst_slot) {
4551                        continue;
4552                    }
4553                    // For undirected (Both) track emitted (src,dst) pairs so the
4554                    // backward pass can skip them to avoid double-emission.
4555                    if *dir == EdgeDir::Both {
4556                        seen_undirected.insert((src_slot, dst_slot));
4557                    }
4558                    let dst_node = NodeId(((effective_dst_label_id as u64) << 32) | dst_slot);
4559                    // Use the batch-prefetched result; fall back to per-node
4560                    // read only when the slot was not in the batch (shouldn't
4561                    // happen, but keeps the code correct under all conditions).
4562                    let dst_props: Vec<(u32, u64)> = if !all_needed_dst.is_empty() {
4563                        if let Some(&idx) = dst_slot_to_idx.get(&dst_slot) {
4564                            all_needed_dst
4565                                .iter()
4566                                .copied()
4567                                .zip(dst_batch[idx].iter().copied())
4568                                .collect()
4569                        } else {
4570                            // Fallback: individual read (e.g. delta-only slot).
4571                            self.snapshot.store.get_node_raw(dst_node, &all_needed_dst)?
4572                        }
4573                    } else {
4574                        vec![]
4575                    };
4576
4577                    // Apply dst inline prop filter.
4578                    if !self.matches_prop_filter(&dst_props, &dst_node_pat.props) {
4579                        continue;
4580                    }
4581
4582                    // SPA-178: look up edge props for this (src_slot, dst_slot) pair.
4583                    let current_edge_props: Vec<(u32, u64)> =
4584                        if needs_edge_props {
4585                            let eid = delta_edge_id_map.get(&(src_slot, dst_slot)).copied();
4586                            eid.and_then(|id| edge_props_by_id.get(&id))
4587                                .cloned()
4588                                .unwrap_or_default()
4589                        } else {
4590                            vec![]
4591                        };
4592
4593                    // Apply inline edge prop filter from rel pattern: [r:TYPE {prop: val}].
4594                    if !rel_pat.props.is_empty()
4595                        && !self.matches_prop_filter(&current_edge_props, &rel_pat.props)
4596                    {
4597                        continue;
4598                    }
4599
4600                    // For undirected (Both), record (src_slot, dst_slot) so the
4601                    // backward pass skips already-emitted pairs.
4602                    if *dir == EdgeDir::Both {
4603                        seen_undirected.insert((src_slot, dst_slot));
4604                    }
4605
4606                    // Apply WHERE clause.
4607                    if let Some(ref where_expr) = m.where_clause {
4608                        let mut row_vals = build_row_vals(
4609                            &src_props,
4610                            &src_node_pat.var,
4611                            &col_ids_src,
4612                            &self.snapshot.store,
4613                        );
4614                        row_vals.extend(build_row_vals(
4615                            &dst_props,
4616                            &dst_node_pat.var,
4617                            &col_ids_dst,
4618                            &self.snapshot.store,
4619                        ));
4620                        // Inject relationship metadata so type(r) works in WHERE.
4621                        if !rel_pat.var.is_empty() {
4622                            row_vals.insert(
4623                                format!("{}.__type__", rel_pat.var),
4624                                Value::String(effective_rel_type.to_string()),
4625                            );
4626                        }
4627                        // Inject node label metadata so labels(n) works in WHERE.
4628                        if !src_node_pat.var.is_empty() && !effective_src_label.is_empty() {
4629                            row_vals.insert(
4630                                format!("{}.__labels__", src_node_pat.var),
4631                                Value::List(vec![Value::String(effective_src_label.to_string())]),
4632                            );
4633                        }
4634                        if !dst_node_pat.var.is_empty() && !effective_dst_label.is_empty() {
4635                            row_vals.insert(
4636                                format!("{}.__labels__", dst_node_pat.var),
4637                                Value::List(vec![Value::String(effective_dst_label.to_string())]),
4638                            );
4639                        }
4640                        row_vals.extend(self.dollar_params());
4641                        if !self.eval_where_graph(where_expr, &row_vals) {
4642                            continue;
4643                        }
4644                    }
4645
4646                    if use_agg {
4647                        let mut row_vals = build_row_vals(
4648                            &src_props,
4649                            &src_node_pat.var,
4650                            &col_ids_src,
4651                            &self.snapshot.store,
4652                        );
4653                        row_vals.extend(build_row_vals(
4654                            &dst_props,
4655                            &dst_node_pat.var,
4656                            &col_ids_dst,
4657                            &self.snapshot.store,
4658                        ));
4659                        // Inject relationship and label metadata for aggregate path.
4660                        if !rel_pat.var.is_empty() {
4661                            row_vals.insert(
4662                                format!("{}.__type__", rel_pat.var),
4663                                Value::String(effective_rel_type.to_string()),
4664                            );
4665                        }
4666                        if !src_node_pat.var.is_empty() && !effective_src_label.is_empty() {
4667                            row_vals.insert(
4668                                format!("{}.__labels__", src_node_pat.var),
4669                                Value::List(vec![Value::String(effective_src_label.to_string())]),
4670                            );
4671                        }
4672                        if !dst_node_pat.var.is_empty() && !effective_dst_label.is_empty() {
4673                            row_vals.insert(
4674                                format!("{}.__labels__", dst_node_pat.var),
4675                                Value::List(vec![Value::String(effective_dst_label.to_string())]),
4676                            );
4677                        }
4678                        if !src_node_pat.var.is_empty() {
4679                            row_vals.insert(src_node_pat.var.clone(), Value::NodeRef(src_node));
4680                        }
4681                        if !dst_node_pat.var.is_empty() {
4682                            row_vals.insert(dst_node_pat.var.clone(), Value::NodeRef(dst_node));
4683                        }
4684                        // SPA-242: bind the relationship variable as a non-null
4685                        // EdgeRef so COUNT(r) counts matched edges correctly.
4686                        if !rel_pat.var.is_empty() {
4687                            // Encode a unique edge identity: high 32 bits = rel
4688                            // table id, low 32 bits = dst_slot.  src_slot is
4689                            // already implicit in the traversal nesting order but
4690                            // we mix it in via XOR to keep uniqueness within the
4691                            // same rel table.
4692                            let edge_id = sparrowdb_common::EdgeId(
4693                                (*catalog_rel_id << 32) | (src_slot ^ dst_slot) & 0xFFFF_FFFF,
4694                            );
4695                            row_vals.insert(rel_pat.var.clone(), Value::EdgeRef(edge_id));
4696                        }
4697                        raw_rows.push(row_vals);
4698                    } else {
4699                        // Build result row.
4700                        // SPA-195: use effective_rel_type (from the catalog per
4701                        // rel table) so unlabeled / untyped patterns return the
4702                        // correct relationship type name rather than empty string.
4703                        let rel_var_type = if !rel_pat.var.is_empty() {
4704                            Some((rel_pat.var.as_str(), effective_rel_type))
4705                        } else {
4706                            None
4707                        };
4708                        let src_label_meta =
4709                            if !src_node_pat.var.is_empty() && !effective_src_label.is_empty() {
4710                                Some((src_node_pat.var.as_str(), effective_src_label))
4711                            } else {
4712                                None
4713                            };
4714                        let dst_label_meta =
4715                            if !dst_node_pat.var.is_empty() && !effective_dst_label.is_empty() {
4716                                Some((dst_node_pat.var.as_str(), effective_dst_label))
4717                            } else {
4718                                None
4719                            };
4720                        // SPA-178: build edge_props arg for project_hop_row.
4721                        let rel_edge_props_arg = if !rel_pat.var.is_empty()
4722                            && !current_edge_props.is_empty()
4723                        {
4724                            Some((rel_pat.var.as_str(), current_edge_props.as_slice()))
4725                        } else {
4726                            None
4727                        };
4728                        let row = project_hop_row(
4729                            &src_props,
4730                            &dst_props,
4731                            column_names,
4732                            &src_node_pat.var,
4733                            &dst_node_pat.var,
4734                            rel_var_type,
4735                            src_label_meta,
4736                            dst_label_meta,
4737                            &self.snapshot.store,
4738                            rel_edge_props_arg,
4739                        );
4740                        rows.push(row);
4741                    }
4742                }
4743            }
4744        }
4745
4746        // ── Backward pass for undirected (Both) — SPA-193 ───────────────────
4747        // For (a)-[r]-(b), the forward pass emitted rows for edges a→b.
4748        // Now scan each rel table in reverse (dst→src) to find backward edges
4749        // (b→a) that were not already emitted in the forward pass.
4750        if *dir == EdgeDir::Both {
4751            for (catalog_rel_id, tbl_src_label_id, tbl_dst_label_id, tbl_rel_type) in
4752                &rel_tables_to_scan
4753            {
4754                let storage_rel_id = RelTableId(*catalog_rel_id as u32);
4755                // In the backward pass, scan "dst" label nodes (b-side) as src.
4756                let bwd_scan_label_id = *tbl_dst_label_id;
4757                let bwd_dst_label_id = *tbl_src_label_id;
4758                let effective_rel_type: &str = tbl_rel_type.as_str();
4759
4760                let effective_src_label: &str = if src_label.is_empty() {
4761                    label_id_to_name
4762                        .iter()
4763                        .find(|(id, _)| *id as u32 == bwd_scan_label_id)
4764                        .map(|(_, name)| name.as_str())
4765                        .unwrap_or("")
4766                } else {
4767                    src_label.as_str()
4768                };
4769                let effective_dst_label: &str = if dst_label.is_empty() {
4770                    label_id_to_name
4771                        .iter()
4772                        .find(|(id, _)| *id as u32 == bwd_dst_label_id)
4773                        .map(|(_, name)| name.as_str())
4774                        .unwrap_or("")
4775                } else {
4776                    dst_label.as_str()
4777                };
4778
4779                let hwm_bwd = match self.snapshot.store.hwm_for_label(bwd_scan_label_id) {
4780                    Ok(h) => h,
4781                    Err(_) => continue,
4782                };
4783
4784                let mut col_ids_src =
4785                    collect_col_ids_for_var(&src_node_pat.var, column_names, bwd_scan_label_id);
4786                let mut col_ids_dst =
4787                    collect_col_ids_for_var(&dst_node_pat.var, column_names, bwd_dst_label_id);
4788                if use_agg {
4789                    for item in &m.return_clause.items {
4790                        collect_col_ids_from_expr(&item.expr, &mut col_ids_src);
4791                        collect_col_ids_from_expr(&item.expr, &mut col_ids_dst);
4792                    }
4793                }
4794
4795                // Read delta records for this rel table (physical edges stored
4796                // as src=a, dst=b that we want to traverse in reverse b→a).
4797                let delta_records_bwd = EdgeStore::open(&self.snapshot.db_root, storage_rel_id)
4798                    .and_then(|s| s.read_delta())
4799                    .unwrap_or_default();
4800
4801                // Load the backward CSR for this rel table (written by
4802                // checkpoint).  Falls back to None gracefully when no
4803                // checkpoint has been run yet so pre-checkpoint databases
4804                // still return correct results via the delta log path.
4805                let csr_bwd: Option<CsrBackward> =
4806                    EdgeStore::open(&self.snapshot.db_root, storage_rel_id)
4807                        .and_then(|s| s.open_bwd())
4808                        .ok();
4809
4810                // Scan the b-side (physical dst label = tbl_dst_label_id).
4811                for b_slot in 0..hwm_bwd {
4812                    let b_node = NodeId(((bwd_scan_label_id as u64) << 32) | b_slot);
4813                    let b_props = if !col_ids_src.is_empty() || !src_node_pat.props.is_empty() {
4814                        let all_needed: Vec<u32> = {
4815                            let mut v = col_ids_src.clone();
4816                            for p in &src_node_pat.props {
4817                                let col_id = prop_name_to_col_id(&p.key);
4818                                if !v.contains(&col_id) {
4819                                    v.push(col_id);
4820                                }
4821                            }
4822                            v
4823                        };
4824                        self.snapshot.store.get_node_raw(b_node, &all_needed)?
4825                    } else {
4826                        vec![]
4827                    };
4828                    // Apply src-side (a-side pattern) prop filter — note: in the
4829                    // undirected backward pass the pattern variables are swapped,
4830                    // so src_node_pat corresponds to the "a" role which is the
4831                    // b-slot we are scanning.
4832                    if !self.matches_prop_filter(&b_props, &src_node_pat.props) {
4833                        continue;
4834                    }
4835
4836                    // Find edges in delta log where b_slot is the *destination*
4837                    // (physical edge: some_src → b_slot), giving us predecessors.
4838                    let delta_predecessors: Vec<u64> = delta_records_bwd
4839                        .iter()
4840                        .filter(|r| {
4841                            let r_dst_label = (r.dst.0 >> 32) as u32;
4842                            let r_dst_slot = r.dst.0 & 0xFFFF_FFFF;
4843                            r_dst_label == bwd_scan_label_id && r_dst_slot == b_slot
4844                        })
4845                        .map(|r| r.src.0 & 0xFFFF_FFFF)
4846                        .collect();
4847
4848                    // Also include checkpointed predecessors from the backward
4849                    // CSR (populated after checkpoint; empty/None before first
4850                    // checkpoint).  Combine with delta predecessors so that
4851                    // undirected matching works for both pre- and post-checkpoint
4852                    // databases.
4853                    let csr_predecessors: &[u64] = csr_bwd
4854                        .as_ref()
4855                        .map(|c| c.predecessors(b_slot))
4856                        .unwrap_or(&[]);
4857                    let all_predecessors: Vec<u64> = csr_predecessors
4858                        .iter()
4859                        .copied()
4860                        .chain(delta_predecessors.into_iter())
4861                        .collect();
4862
4863                    let mut seen_preds: HashSet<u64> = HashSet::new();
4864                    for a_slot in all_predecessors {
4865                        if !seen_preds.insert(a_slot) {
4866                            continue;
4867                        }
4868                        // Skip pairs already emitted in the forward pass.
4869                        // The backward row being emitted is (b_slot, a_slot) --
4870                        // b is the node being scanned (physical dst of the edge),
4871                        // a is its predecessor (physical src).
4872                        // Only suppress this row if that exact reversed pair was
4873                        // already produced by the forward pass (i.e. a physical
4874                        // b->a edge was stored and traversed).
4875                        // SPA-257: using (a_slot, b_slot) was wrong -- it
4876                        // suppressed the legitimate backward traversal of a->b.
4877                        if seen_undirected.contains(&(b_slot, a_slot)) {
4878                            continue;
4879                        }
4880
4881                        let a_node = NodeId(((bwd_dst_label_id as u64) << 32) | a_slot);
4882                        let a_props = if !col_ids_dst.is_empty() || !dst_node_pat.props.is_empty() {
4883                            let all_needed: Vec<u32> = {
4884                                let mut v = col_ids_dst.clone();
4885                                for p in &dst_node_pat.props {
4886                                    let col_id = prop_name_to_col_id(&p.key);
4887                                    if !v.contains(&col_id) {
4888                                        v.push(col_id);
4889                                    }
4890                                }
4891                                v
4892                            };
4893                            self.snapshot.store.get_node_raw(a_node, &all_needed)?
4894                        } else {
4895                            vec![]
4896                        };
4897
4898                        if !self.matches_prop_filter(&a_props, &dst_node_pat.props) {
4899                            continue;
4900                        }
4901
4902                        // Apply WHERE clause.
4903                        if let Some(ref where_expr) = m.where_clause {
4904                            let mut row_vals = build_row_vals(
4905                                &b_props,
4906                                &src_node_pat.var,
4907                                &col_ids_src,
4908                                &self.snapshot.store,
4909                            );
4910                            row_vals.extend(build_row_vals(
4911                                &a_props,
4912                                &dst_node_pat.var,
4913                                &col_ids_dst,
4914                                &self.snapshot.store,
4915                            ));
4916                            if !rel_pat.var.is_empty() {
4917                                row_vals.insert(
4918                                    format!("{}.__type__", rel_pat.var),
4919                                    Value::String(effective_rel_type.to_string()),
4920                                );
4921                            }
4922                            if !src_node_pat.var.is_empty() && !effective_src_label.is_empty() {
4923                                row_vals.insert(
4924                                    format!("{}.__labels__", src_node_pat.var),
4925                                    Value::List(vec![Value::String(
4926                                        effective_src_label.to_string(),
4927                                    )]),
4928                                );
4929                            }
4930                            if !dst_node_pat.var.is_empty() && !effective_dst_label.is_empty() {
4931                                row_vals.insert(
4932                                    format!("{}.__labels__", dst_node_pat.var),
4933                                    Value::List(vec![Value::String(
4934                                        effective_dst_label.to_string(),
4935                                    )]),
4936                                );
4937                            }
4938                            row_vals.extend(self.dollar_params());
4939                            if !self.eval_where_graph(where_expr, &row_vals) {
4940                                continue;
4941                            }
4942                        }
4943
4944                        if use_agg {
4945                            let mut row_vals = build_row_vals(
4946                                &b_props,
4947                                &src_node_pat.var,
4948                                &col_ids_src,
4949                                &self.snapshot.store,
4950                            );
4951                            row_vals.extend(build_row_vals(
4952                                &a_props,
4953                                &dst_node_pat.var,
4954                                &col_ids_dst,
4955                                &self.snapshot.store,
4956                            ));
4957                            if !rel_pat.var.is_empty() {
4958                                row_vals.insert(
4959                                    format!("{}.__type__", rel_pat.var),
4960                                    Value::String(effective_rel_type.to_string()),
4961                                );
4962                            }
4963                            if !src_node_pat.var.is_empty() && !effective_src_label.is_empty() {
4964                                row_vals.insert(
4965                                    format!("{}.__labels__", src_node_pat.var),
4966                                    Value::List(vec![Value::String(
4967                                        effective_src_label.to_string(),
4968                                    )]),
4969                                );
4970                            }
4971                            if !dst_node_pat.var.is_empty() && !effective_dst_label.is_empty() {
4972                                row_vals.insert(
4973                                    format!("{}.__labels__", dst_node_pat.var),
4974                                    Value::List(vec![Value::String(
4975                                        effective_dst_label.to_string(),
4976                                    )]),
4977                                );
4978                            }
4979                            if !src_node_pat.var.is_empty() {
4980                                row_vals.insert(src_node_pat.var.clone(), Value::NodeRef(b_node));
4981                            }
4982                            if !dst_node_pat.var.is_empty() {
4983                                row_vals.insert(dst_node_pat.var.clone(), Value::NodeRef(a_node));
4984                            }
4985                            // SPA-242: bind the relationship variable as a non-null
4986                            // EdgeRef so COUNT(r) counts matched edges correctly.
4987                            if !rel_pat.var.is_empty() {
4988                                let edge_id = sparrowdb_common::EdgeId(
4989                                    (*catalog_rel_id << 32) | (b_slot ^ a_slot) & 0xFFFF_FFFF,
4990                                );
4991                                row_vals.insert(rel_pat.var.clone(), Value::EdgeRef(edge_id));
4992                            }
4993                            raw_rows.push(row_vals);
4994                        } else {
4995                            let rel_var_type = if !rel_pat.var.is_empty() {
4996                                Some((rel_pat.var.as_str(), effective_rel_type))
4997                            } else {
4998                                None
4999                            };
5000                            let src_label_meta = if !src_node_pat.var.is_empty()
5001                                && !effective_src_label.is_empty()
5002                            {
5003                                Some((src_node_pat.var.as_str(), effective_src_label))
5004                            } else {
5005                                None
5006                            };
5007                            let dst_label_meta = if !dst_node_pat.var.is_empty()
5008                                && !effective_dst_label.is_empty()
5009                            {
5010                                Some((dst_node_pat.var.as_str(), effective_dst_label))
5011                            } else {
5012                                None
5013                            };
5014                            let row = project_hop_row(
5015                                &b_props,
5016                                &a_props,
5017                                column_names,
5018                                &src_node_pat.var,
5019                                &dst_node_pat.var,
5020                                rel_var_type,
5021                                src_label_meta,
5022                                dst_label_meta,
5023                                &self.snapshot.store,
5024                                None, // edge props not available in backward pass
5025                            );
5026                            rows.push(row);
5027                        }
5028                    }
5029                }
5030            }
5031        }
5032
5033        if use_agg {
5034            rows = self.aggregate_rows_graph(&raw_rows, &m.return_clause.items);
5035        } else {
5036            // DISTINCT
5037            if m.distinct {
5038                deduplicate_rows(&mut rows);
5039            }
5040
5041            // ORDER BY
5042            apply_order_by(&mut rows, m, column_names);
5043
5044            // SKIP
5045            if let Some(skip) = m.skip {
5046                let skip = (skip as usize).min(rows.len());
5047                rows.drain(0..skip);
5048            }
5049
5050            // LIMIT
5051            if let Some(lim) = m.limit {
5052                rows.truncate(lim as usize);
5053            }
5054        }
5055
5056        tracing::debug!(rows = rows.len(), "one-hop traversal complete");
5057        Ok(QueryResult {
5058            columns: column_names.to_vec(),
5059            rows,
5060        })
5061    }
5062
5063    // ── 2-hop traversal: (a)-[:R]->()-[:R]->(fof) ────────────────────────────
5064
5065    fn execute_two_hop(&self, m: &MatchStatement, column_names: &[String]) -> Result<QueryResult> {
5066        use crate::join::AspJoin;
5067
5068        let pat = &m.pattern[0];
5069        let src_node_pat = &pat.nodes[0];
5070        // nodes[1] is the mid node (may be named, e.g. `m` in Q8 mutual-friends)
5071        let mid_node_pat = &pat.nodes[1];
5072        // nodes[2] is the fof (friend-of-friend) / anchor-B in Q8
5073        let fof_node_pat = &pat.nodes[2];
5074
5075        let src_label = src_node_pat.labels.first().cloned().unwrap_or_default();
5076        let fof_label = fof_node_pat.labels.first().cloned().unwrap_or_default();
5077        let src_label_id = self
5078            .snapshot
5079            .catalog
5080            .get_label(&src_label)?
5081            .ok_or(sparrowdb_common::Error::NotFound)? as u32;
5082        let fof_label_id = self
5083            .snapshot
5084            .catalog
5085            .get_label(&fof_label)?
5086            .ok_or(sparrowdb_common::Error::NotFound)? as u32;
5087
5088        let hwm_src = self.snapshot.store.hwm_for_label(src_label_id)?;
5089        tracing::debug!(src_label = %src_label, fof_label = %fof_label, hwm_src = hwm_src, "two-hop traversal start");
5090
5091        // Collect col_ids for fof: projected columns plus any columns referenced by prop filters.
5092        // Also include any columns referenced by the WHERE clause, scoped to the fof variable so
5093        // that src-only predicates do not cause spurious column fetches from fof nodes.
5094        let col_ids_fof = {
5095            let mut ids = collect_col_ids_for_var(&fof_node_pat.var, column_names, fof_label_id);
5096            for p in &fof_node_pat.props {
5097                let col_id = prop_name_to_col_id(&p.key);
5098                if !ids.contains(&col_id) {
5099                    ids.push(col_id);
5100                }
5101            }
5102            if let Some(ref where_expr) = m.where_clause {
5103                collect_col_ids_from_expr_for_var(where_expr, &fof_node_pat.var, &mut ids);
5104            }
5105            ids
5106        };
5107
5108        // Collect col_ids for src: columns referenced in RETURN (for projection)
5109        // plus columns referenced in WHERE for the src variable.
5110        // SPA-252: projection columns must be included so that project_fof_row
5111        // can resolve src-variable columns (e.g. `RETURN a.name` when src_var = "a").
5112        let col_ids_src_where: Vec<u32> = {
5113            let mut ids = collect_col_ids_for_var(&src_node_pat.var, column_names, src_label_id);
5114            if let Some(ref where_expr) = m.where_clause {
5115                collect_col_ids_from_expr_for_var(where_expr, &src_node_pat.var, &mut ids);
5116            }
5117            ids
5118        };
5119
5120        // SPA-201: detect if the second relationship hop is Incoming FIRST,
5121        // because col_ids_mid is only populated for the incoming case.
5122        // For patterns like (a)-[:R]->(m)<-[:R]-(b), rels[1].dir == Incoming,
5123        // meaning we need the PREDECESSORS of mid (nodes that have an edge TO mid)
5124        // rather than the SUCCESSORS (forward neighbors of mid).
5125        let second_hop_incoming = pat
5126            .rels
5127            .get(1)
5128            .map(|r| r.dir == sparrowdb_cypher::ast::EdgeDir::Incoming)
5129            .unwrap_or(false);
5130
5131        // SPA-201: collect col_ids for the mid node (nodes[1] = m in Q8).
5132        // For the Incoming second-hop case the mid is the projected "common neighbor"
5133        // (e.g. `RETURN m.uid`), so we must read its properties.
5134        let mid_label = mid_node_pat.labels.first().cloned().unwrap_or_default();
5135        let mid_label_id: u32 = if mid_label.is_empty() {
5136            src_label_id // fall back to src label when mid has no label annotation
5137        } else {
5138            self.snapshot
5139                .catalog
5140                .get_label(&mid_label)
5141                .ok()
5142                .flatten()
5143                .map(|id| id as u32)
5144                .unwrap_or(src_label_id)
5145        };
5146        let col_ids_mid: Vec<u32> = if second_hop_incoming && !mid_node_pat.var.is_empty() {
5147            let mut ids = collect_col_ids_for_var(&mid_node_pat.var, column_names, mid_label_id);
5148            for p in &mid_node_pat.props {
5149                let col_id = prop_name_to_col_id(&p.key);
5150                if !ids.contains(&col_id) {
5151                    ids.push(col_id);
5152                }
5153            }
5154            if let Some(ref where_expr) = m.where_clause {
5155                collect_col_ids_from_expr_for_var(where_expr, &mid_node_pat.var, &mut ids);
5156            }
5157            ids
5158        } else {
5159            vec![]
5160        };
5161
5162        // SPA-163 + SPA-185: build a slot-level adjacency map from all delta
5163        // logs so that edges written since the last checkpoint are visible for
5164        // 2-hop queries.  We aggregate across all rel types here because the
5165        // 2-hop executor does not currently filter on rel_type.
5166        // Map: src_slot → Vec<dst_slot> (only records whose src label matches).
5167        let delta_adj: HashMap<u64, Vec<u64>> = {
5168            let mut adj: HashMap<u64, Vec<u64>> = HashMap::new();
5169            for r in self.read_delta_all() {
5170                let r_src_label = (r.src.0 >> 32) as u32;
5171                let r_src_slot = r.src.0 & 0xFFFF_FFFF;
5172                if r_src_label == src_label_id {
5173                    adj.entry(r_src_slot)
5174                        .or_default()
5175                        .push(r.dst.0 & 0xFFFF_FFFF);
5176                }
5177            }
5178            adj
5179        };
5180
5181        // SPA-185: build a merged CSR that union-combines edges from all
5182        // per-type CSRs so the 2-hop traversal sees paths through any rel type.
5183        // AspJoin requires a single &CsrForward; we construct a combined one
5184        // rather than using an arbitrary first entry.
5185        let merged_csr = {
5186            let max_nodes = self
5187                .snapshot
5188                .csrs
5189                .values()
5190                .map(|c| c.n_nodes())
5191                .max()
5192                .unwrap_or(0);
5193            let mut edges: Vec<(u64, u64)> = Vec::new();
5194            for csr in self.snapshot.csrs.values() {
5195                for src in 0..csr.n_nodes() {
5196                    for &dst in csr.neighbors(src) {
5197                        edges.push((src, dst));
5198                    }
5199                }
5200            }
5201            // CsrForward::build requires a sorted edge list.
5202            edges.sort_unstable();
5203            edges.dedup();
5204            CsrForward::build(max_nodes, &edges)
5205        };
5206
5207        // SPA-201: build a merged backward CSR when the second hop is Incoming.
5208        // For (a)-[:R]->(m)<-[:R]-(b) we need predecessors(mid) to find b-nodes.
5209        // We derive this from the already-loaded forward CSRs (no extra disk I/O)
5210        // by building CsrBackward from the same forward edge list used for merged_csr.
5211        // CsrBackward::build takes (src, dst) forward edges and stores them reversed.
5212        let merged_bwd_csr: Option<CsrBackward> = if second_hop_incoming {
5213            let max_nodes = self
5214                .snapshot
5215                .csrs
5216                .values()
5217                .map(|c| c.n_nodes())
5218                .max()
5219                .unwrap_or(0);
5220            // Re-use the same sorted+deduped edge list already in merged_csr.
5221            // We rebuild it here because CsrForward doesn't expose its edge list,
5222            // but this construction is O(E) and the merged_csr build already did it.
5223            let mut fwd_edges: Vec<(u64, u64)> = Vec::new();
5224            for csr in self.snapshot.csrs.values() {
5225                for src in 0..csr.n_nodes() {
5226                    for &dst in csr.neighbors(src) {
5227                        fwd_edges.push((src, dst));
5228                    }
5229                }
5230            }
5231            fwd_edges.sort_unstable();
5232            fwd_edges.dedup();
5233            if fwd_edges.is_empty() {
5234                None
5235            } else {
5236                Some(CsrBackward::build(max_nodes, &fwd_edges))
5237            }
5238        } else {
5239            None
5240        };
5241
5242        // SPA-201: build a delta adjacency map for the backward (incoming) direction.
5243        // Maps dst_slot → Vec<src_slot> for edges in the delta log (written since checkpoint).
5244        let delta_adj_bwd: HashMap<u64, Vec<u64>> = if second_hop_incoming {
5245            let mut adj: HashMap<u64, Vec<u64>> = HashMap::new();
5246            for r in self.read_delta_all() {
5247                let r_dst_slot = r.dst.0 & 0xFFFF_FFFF;
5248                let r_src_slot = r.src.0 & 0xFFFF_FFFF;
5249                adj.entry(r_dst_slot).or_default().push(r_src_slot);
5250            }
5251            adj
5252        } else {
5253            HashMap::new()
5254        };
5255
5256        let join = AspJoin::new(&merged_csr);
5257        let mut rows = Vec::new();
5258
5259        // Scan source nodes.
5260        for src_slot in 0..hwm_src {
5261            // SPA-254: check per-query deadline at every slot boundary.
5262            self.check_deadline()?;
5263
5264            let src_node = NodeId(((src_label_id as u64) << 32) | src_slot);
5265            let src_needed: Vec<u32> = {
5266                let mut v = vec![];
5267                for p in &src_node_pat.props {
5268                    let col_id = prop_name_to_col_id(&p.key);
5269                    if !v.contains(&col_id) {
5270                        v.push(col_id);
5271                    }
5272                }
5273                for &col_id in &col_ids_src_where {
5274                    if !v.contains(&col_id) {
5275                        v.push(col_id);
5276                    }
5277                }
5278                v
5279            };
5280
5281            let src_props = read_node_props(&self.snapshot.store, src_node, &src_needed)?;
5282
5283            // Apply src inline prop filter.
5284            if !self.matches_prop_filter(&src_props, &src_node_pat.props) {
5285                continue;
5286            }
5287
5288            if second_hop_incoming {
5289                // SPA-201: Incoming second hop — pattern (a)-[:R]->(m)<-[:R]-(b).
5290                //
5291                // Semantics: find all mid-nodes M such that (a→M) AND (b→M) where
5292                // b matches the fof_node_pat filter.  The result rows project M
5293                // (the common neighbor / mutual friend), not B.
5294                //
5295                // Algorithm:
5296                //   1. First-hop forward: candidate M slots = CSR neighbors of src + delta.
5297                //   2. For each M: collect B slots = predecessors of M (bwd CSR + delta).
5298                //   3. Read B props, apply fof_node_pat filter — if any B passes, M is valid.
5299                //   4. For valid M: read mid props, apply mid prop filter, build result row.
5300
5301                // Collect all candidate M slots from the forward first hop.
5302                let mid_slots: Vec<u64> = {
5303                    let mut csr_mids: Vec<u64> = merged_csr.neighbors(src_slot).to_vec();
5304                    // Delta first hop from src.
5305                    if let Some(delta_first) = delta_adj.get(&src_slot) {
5306                        for &mid in delta_first {
5307                            if !csr_mids.contains(&mid) {
5308                                csr_mids.push(mid);
5309                            }
5310                        }
5311                    }
5312                    csr_mids
5313                };
5314
5315                for mid_slot in mid_slots {
5316                    // Read mid props for projection (and mid prop filter).
5317                    let mid_node = NodeId(((mid_label_id as u64) << 32) | mid_slot);
5318                    let mid_props = if !col_ids_mid.is_empty() {
5319                        read_node_props(&self.snapshot.store, mid_node, &col_ids_mid)?
5320                    } else {
5321                        vec![]
5322                    };
5323
5324                    // Apply mid inline prop filter (e.g. `m:User`).
5325                    if !self.matches_prop_filter(&mid_props, &mid_node_pat.props) {
5326                        continue;
5327                    }
5328
5329                    // Collect B slots = predecessors of M (bwd CSR + delta bwd).
5330                    let mut found_valid_fof = false;
5331                    let csr_preds: &[u64] = merged_bwd_csr
5332                        .as_ref()
5333                        .map(|bwd| bwd.predecessors(mid_slot))
5334                        .unwrap_or(&[]);
5335                    let delta_preds_opt = delta_adj_bwd.get(&mid_slot);
5336
5337                    let all_b_slots: Vec<u64> = {
5338                        let mut v: Vec<u64> = csr_preds.to_vec();
5339                        if let Some(delta_preds) = delta_preds_opt {
5340                            for &b in delta_preds {
5341                                if !v.contains(&b) {
5342                                    v.push(b);
5343                                }
5344                            }
5345                        }
5346                        v
5347                    };
5348
5349                    for b_slot in &all_b_slots {
5350                        let b_node = NodeId(((fof_label_id as u64) << 32) | *b_slot);
5351                        let b_props =
5352                            read_node_props(&self.snapshot.store, b_node, &col_ids_fof)?;
5353
5354                        // Apply fof (b) inline prop filter.
5355                        if !self.matches_prop_filter(&b_props, &fof_node_pat.props) {
5356                            continue;
5357                        }
5358
5359                        // Apply WHERE clause for this (src=a, mid=m, fof=b) binding.
5360                        if let Some(ref where_expr) = m.where_clause {
5361                            let mut row_vals = build_row_vals(
5362                                &src_props,
5363                                &src_node_pat.var,
5364                                &col_ids_src_where,
5365                                &self.snapshot.store,
5366                            );
5367                            row_vals.extend(build_row_vals(
5368                                &mid_props,
5369                                &mid_node_pat.var,
5370                                &col_ids_mid,
5371                                &self.snapshot.store,
5372                            ));
5373                            row_vals.extend(build_row_vals(
5374                                &b_props,
5375                                &fof_node_pat.var,
5376                                &col_ids_fof,
5377                                &self.snapshot.store,
5378                            ));
5379                            // Label metadata.
5380                            if !src_node_pat.var.is_empty() && !src_label.is_empty() {
5381                                row_vals.insert(
5382                                    format!("{}.__labels__", src_node_pat.var),
5383                                    Value::List(vec![Value::String(src_label.clone())]),
5384                                );
5385                            }
5386                            if !mid_node_pat.var.is_empty() && !mid_label.is_empty() {
5387                                row_vals.insert(
5388                                    format!("{}.__labels__", mid_node_pat.var),
5389                                    Value::List(vec![Value::String(mid_label.clone())]),
5390                                );
5391                            }
5392                            if !fof_node_pat.var.is_empty() && !fof_label.is_empty() {
5393                                row_vals.insert(
5394                                    format!("{}.__labels__", fof_node_pat.var),
5395                                    Value::List(vec![Value::String(fof_label.clone())]),
5396                                );
5397                            }
5398                            if !pat.rels[0].var.is_empty() {
5399                                row_vals.insert(
5400                                    format!("{}.__type__", pat.rels[0].var),
5401                                    Value::String(pat.rels[0].rel_type.clone()),
5402                                );
5403                            }
5404                            if !pat.rels[1].var.is_empty() {
5405                                row_vals.insert(
5406                                    format!("{}.__type__", pat.rels[1].var),
5407                                    Value::String(pat.rels[1].rel_type.clone()),
5408                                );
5409                            }
5410                            row_vals.extend(self.dollar_params());
5411                            if !self.eval_where_graph(where_expr, &row_vals) {
5412                                continue;
5413                            }
5414                        }
5415
5416                        // Project a row: src (a) + mid (m) + fof (b) columns.
5417                        // We build the row using a 3-var aware helper here so that
5418                        // `RETURN m.name`, `RETURN a.name`, and `RETURN b.name` all
5419                        // resolve correctly.
5420                        let row = project_three_var_row(
5421                            &src_props,
5422                            &mid_props,
5423                            &b_props,
5424                            column_names,
5425                            &src_node_pat.var,
5426                            &mid_node_pat.var,
5427                            &self.snapshot.store,
5428                        );
5429                        rows.push(row);
5430                        found_valid_fof = true;
5431                        // Continue — multiple b nodes may match (emit one row per match).
5432                    }
5433                    let _ = found_valid_fof; // suppress unused warning
5434                }
5435                // Skip the rest of the per-src-slot processing for the Incoming case.
5436                continue;
5437            }
5438
5439            // ── Forward-forward path (both hops Outgoing) ─────────────────────
5440            let mut fof_slots: Vec<u64> = {
5441                // Use ASP-Join.
5442                join.two_hop(src_slot)?
5443            };
5444
5445            // SPA-163: extend with delta-log 2-hop paths (forward-forward only).
5446            {
5447                let first_hop_delta = delta_adj
5448                    .get(&src_slot)
5449                    .map(|v| v.as_slice())
5450                    .unwrap_or(&[]);
5451                if !first_hop_delta.is_empty() {
5452                    let mut delta_fof: HashSet<u64> = HashSet::new();
5453                    for &mid_slot in first_hop_delta {
5454                        // CSR second hop from mid (use merged multi-type CSR):
5455                        for &fof in merged_csr.neighbors(mid_slot) {
5456                            delta_fof.insert(fof);
5457                        }
5458                        // Delta second hop from mid:
5459                        if let Some(mid_neighbors) = delta_adj.get(&mid_slot) {
5460                            for &fof in mid_neighbors {
5461                                delta_fof.insert(fof);
5462                            }
5463                        }
5464                    }
5465                    fof_slots.extend(delta_fof);
5466                    // Re-deduplicate the combined set.
5467                    let unique: HashSet<u64> = fof_slots.into_iter().collect();
5468                    fof_slots = unique.into_iter().collect();
5469                    fof_slots.sort_unstable();
5470                }
5471            }
5472
5473            // ── SPA-200: batch-read fof properties — O(cols) fs::read() calls
5474            // instead of O(fof_slots × cols). ────────────────────────────────
5475            // `col_ids_fof` is constant for this src_slot iteration (determined
5476            // by the query structure, not by the specific fof node).
5477            let fof_slots_u32: Vec<u32> = fof_slots.iter().map(|&s| s as u32).collect();
5478            let fof_batch: Vec<Vec<u64>> = if !col_ids_fof.is_empty() {
5479                self.snapshot.store.batch_read_node_props(
5480                    fof_label_id,
5481                    &fof_slots_u32,
5482                    &col_ids_fof,
5483                )?
5484            } else {
5485                vec![]
5486            };
5487            // Build slot → batch-row index map for O(1) lookup in the inner loop.
5488            let fof_slot_to_idx: HashMap<u64, usize> = fof_slots
5489                .iter()
5490                .enumerate()
5491                .map(|(i, &s)| (s, i))
5492                .collect();
5493
5494            for fof_slot in fof_slots {
5495                let fof_node = NodeId(((fof_label_id as u64) << 32) | fof_slot);
5496                // Build fof_props in the same Vec<(col_id, u64)> format as
5497                // read_node_props returns: filter out 0-sentinel (absent) values.
5498                let fof_props: Vec<(u32, u64)> = if !col_ids_fof.is_empty() {
5499                    if let Some(&idx) = fof_slot_to_idx.get(&fof_slot) {
5500                        col_ids_fof
5501                            .iter()
5502                            .copied()
5503                            .zip(fof_batch[idx].iter().copied())
5504                            .filter(|&(_, v)| v != 0)
5505                            .collect()
5506                    } else {
5507                        // Fallback: individual read (delta-only slot not in batch).
5508                        read_node_props(&self.snapshot.store, fof_node, &col_ids_fof)?
5509                    }
5510                } else {
5511                    vec![]
5512                };
5513
5514                // Apply fof inline prop filter.
5515                if !self.matches_prop_filter(&fof_props, &fof_node_pat.props) {
5516                    continue;
5517                }
5518
5519                // Apply WHERE clause predicate.
5520                if let Some(ref where_expr) = m.where_clause {
5521                    let mut row_vals = build_row_vals(
5522                        &src_props,
5523                        &src_node_pat.var,
5524                        &col_ids_src_where,
5525                        &self.snapshot.store,
5526                    );
5527                    row_vals.extend(build_row_vals(
5528                        &fof_props,
5529                        &fof_node_pat.var,
5530                        &col_ids_fof,
5531                        &self.snapshot.store,
5532                    ));
5533                    // Inject label metadata so labels(n) works in WHERE.
5534                    if !src_node_pat.var.is_empty() && !src_label.is_empty() {
5535                        row_vals.insert(
5536                            format!("{}.__labels__", src_node_pat.var),
5537                            Value::List(vec![Value::String(src_label.clone())]),
5538                        );
5539                    }
5540                    if !fof_node_pat.var.is_empty() && !fof_label.is_empty() {
5541                        row_vals.insert(
5542                            format!("{}.__labels__", fof_node_pat.var),
5543                            Value::List(vec![Value::String(fof_label.clone())]),
5544                        );
5545                    }
5546                    // Inject relationship type metadata so type(r) works in WHERE.
5547                    if !pat.rels[0].var.is_empty() {
5548                        row_vals.insert(
5549                            format!("{}.__type__", pat.rels[0].var),
5550                            Value::String(pat.rels[0].rel_type.clone()),
5551                        );
5552                    }
5553                    if !pat.rels[1].var.is_empty() {
5554                        row_vals.insert(
5555                            format!("{}.__type__", pat.rels[1].var),
5556                            Value::String(pat.rels[1].rel_type.clone()),
5557                        );
5558                    }
5559                    row_vals.extend(self.dollar_params());
5560                    if !self.eval_where_graph(where_expr, &row_vals) {
5561                        continue;
5562                    }
5563                }
5564
5565                let row = project_fof_row(
5566                    &src_props,
5567                    &fof_props,
5568                    column_names,
5569                    &src_node_pat.var,
5570                    &self.snapshot.store,
5571                );
5572                rows.push(row);
5573            }
5574        }
5575
5576        // DISTINCT
5577        if m.distinct {
5578            deduplicate_rows(&mut rows);
5579        }
5580
5581        // ORDER BY
5582        apply_order_by(&mut rows, m, column_names);
5583
5584        // SKIP
5585        if let Some(skip) = m.skip {
5586            let skip = (skip as usize).min(rows.len());
5587            rows.drain(0..skip);
5588        }
5589
5590        // LIMIT
5591        if let Some(lim) = m.limit {
5592            rows.truncate(lim as usize);
5593        }
5594
5595        tracing::debug!(rows = rows.len(), "two-hop traversal complete");
5596        Ok(QueryResult {
5597            columns: column_names.to_vec(),
5598            rows,
5599        })
5600    }
5601
5602    // ── N-hop traversal (SPA-252): (a)-[:R]->(b)-[:R]->...-(z) ──────────────
5603
5604    /// General N-hop traversal for inline chains with 3 or more relationship
5605    /// hops in a single MATCH pattern, e.g.:
5606    ///   MATCH (a)-[:R]->(b)-[:R]->(c)-[:R]->(d) RETURN a.name, b.name, c.name, d.name
5607    ///
5608    /// The algorithm iterates forward hop by hop.  At each level it maintains
5609    /// a "frontier" of `(slot, props)` tuples for the current boundary nodes,
5610    /// plus an accumulated `row_vals` map that records all variable→property
5611    /// bindings seen so far.  When the frontier advances to the final node, a
5612    /// result row is projected from the accumulated map.
5613    ///
5614    /// This replaces the previous fallthrough to `execute_scan` which only
5615    /// scanned the first node and ignored all relationship hops.
5616    fn execute_n_hop(&self, m: &MatchStatement, column_names: &[String]) -> Result<QueryResult> {
5617        let pat = &m.pattern[0];
5618        let n_nodes = pat.nodes.len();
5619        let n_rels = pat.rels.len();
5620
5621        // Sanity: nodes.len() == rels.len() + 1 always holds for a linear chain.
5622        if n_nodes != n_rels + 1 {
5623            return Err(sparrowdb_common::Error::Unimplemented);
5624        }
5625
5626        // Pre-compute col_ids needed per node variable so we only read the
5627        // property columns that are actually projected or filtered.
5628        let col_ids_per_node: Vec<Vec<u32>> = (0..n_nodes)
5629            .map(|i| {
5630                let node_pat = &pat.nodes[i];
5631                let var = &node_pat.var;
5632                let mut ids = if var.is_empty() {
5633                    vec![]
5634                } else {
5635                    collect_col_ids_for_var(var, column_names, 0)
5636                };
5637                // Include columns required by WHERE predicates for this var.
5638                if let Some(ref where_expr) = m.where_clause {
5639                    if !var.is_empty() {
5640                        collect_col_ids_from_expr_for_var(where_expr, var, &mut ids);
5641                    }
5642                }
5643                // Include columns required by inline prop filters.
5644                for p in &node_pat.props {
5645                    let col_id = prop_name_to_col_id(&p.key);
5646                    if !ids.contains(&col_id) {
5647                        ids.push(col_id);
5648                    }
5649                }
5650                // Always read at least col_0 so the node can be identified.
5651                if ids.is_empty() {
5652                    ids.push(0);
5653                }
5654                ids
5655            })
5656            .collect();
5657
5658        // Resolve label_ids for all node positions.
5659        let label_ids_per_node: Vec<Option<u32>> = (0..n_nodes)
5660            .map(|i| {
5661                let label = pat.nodes[i].labels.first().cloned().unwrap_or_default();
5662                if label.is_empty() {
5663                    None
5664                } else {
5665                    self.snapshot
5666                        .catalog
5667                        .get_label(&label)
5668                        .ok()
5669                        .flatten()
5670                        .map(|id| id as u32)
5671                }
5672            })
5673            .collect();
5674
5675        // Scan the first (source) node and kick off the recursive hop chain.
5676        let src_label_id = match label_ids_per_node[0] {
5677            Some(id) => id,
5678            None => return Err(sparrowdb_common::Error::Unimplemented),
5679        };
5680        let hwm_src = self.snapshot.store.hwm_for_label(src_label_id)?;
5681
5682        // We read all delta edges once up front to avoid repeated file I/O.
5683        let delta_all = self.read_delta_all();
5684
5685        let mut rows: Vec<Vec<Value>> = Vec::new();
5686
5687        for src_slot in 0..hwm_src {
5688            // SPA-254: check per-query deadline at every slot boundary.
5689            self.check_deadline()?;
5690
5691            let src_node_id = NodeId(((src_label_id as u64) << 32) | src_slot);
5692
5693            // Skip tombstoned nodes.
5694            if self.is_node_tombstoned(src_node_id) {
5695                continue;
5696            }
5697
5698            let src_props =
5699                read_node_props(&self.snapshot.store, src_node_id, &col_ids_per_node[0])?;
5700
5701            // Apply inline prop filter for the source node.
5702            if !self.matches_prop_filter(&src_props, &pat.nodes[0].props) {
5703                continue;
5704            }
5705
5706            // Seed the frontier with the source node binding.
5707            let mut row_vals: HashMap<String, Value> = HashMap::new();
5708            if !pat.nodes[0].var.is_empty() {
5709                for &(col_id, raw) in &src_props {
5710                    let key = format!("{}.col_{col_id}", pat.nodes[0].var);
5711                    row_vals.insert(key, decode_raw_val(raw, &self.snapshot.store));
5712                }
5713            }
5714
5715            // `frontier` holds (slot, accumulated_vals) pairs for the current
5716            // boundary of the traversal.  Each entry represents one in-progress
5717            // path; cloning ensures bindings are isolated across branches.
5718            let mut frontier: Vec<(u64, HashMap<String, Value>)> = vec![(src_slot, row_vals)];
5719
5720            for hop_idx in 0..n_rels {
5721                let next_node_pat = &pat.nodes[hop_idx + 1];
5722                let next_label_id_opt = label_ids_per_node[hop_idx + 1];
5723                let next_col_ids = &col_ids_per_node[hop_idx + 1];
5724                let cur_label_id = label_ids_per_node[hop_idx].unwrap_or(src_label_id);
5725
5726                let mut next_frontier: Vec<(u64, HashMap<String, Value>)> = Vec::new();
5727
5728                for (cur_slot, cur_vals) in frontier {
5729                    // Gather neighbors from CSR + delta for this hop.
5730                    let csr_nb: Vec<u64> = self.csr_neighbors_all(cur_slot);
5731                    let delta_nb: Vec<u64> = delta_all
5732                        .iter()
5733                        .filter(|r| {
5734                            let r_src_label = (r.src.0 >> 32) as u32;
5735                            let r_src_slot = r.src.0 & 0xFFFF_FFFF;
5736                            r_src_label == cur_label_id && r_src_slot == cur_slot
5737                        })
5738                        .map(|r| r.dst.0 & 0xFFFF_FFFF)
5739                        .collect();
5740
5741                    let mut seen: HashSet<u64> = HashSet::new();
5742                    let all_nb: Vec<u64> = csr_nb
5743                        .into_iter()
5744                        .chain(delta_nb)
5745                        .filter(|&nb| seen.insert(nb))
5746                        .collect();
5747
5748                    for next_slot in all_nb {
5749                        let next_node_id = if let Some(lbl_id) = next_label_id_opt {
5750                            NodeId(((lbl_id as u64) << 32) | next_slot)
5751                        } else {
5752                            NodeId(next_slot)
5753                        };
5754
5755                        let next_props =
5756                            read_node_props(&self.snapshot.store, next_node_id, next_col_ids)?;
5757
5758                        // Apply inline prop filter for this hop's destination node.
5759                        if !self.matches_prop_filter(&next_props, &next_node_pat.props) {
5760                            continue;
5761                        }
5762
5763                        // Clone the accumulated bindings and extend with this node's
5764                        // properties, keyed under its own variable name.
5765                        let mut new_vals = cur_vals.clone();
5766                        if !next_node_pat.var.is_empty() {
5767                            for &(col_id, raw) in &next_props {
5768                                let key = format!("{}.col_{col_id}", next_node_pat.var);
5769                                new_vals.insert(key, decode_raw_val(raw, &self.snapshot.store));
5770                            }
5771                        }
5772
5773                        next_frontier.push((next_slot, new_vals));
5774                    }
5775                }
5776
5777                frontier = next_frontier;
5778            }
5779
5780            // `frontier` now contains complete paths.  Project result rows.
5781            for (_final_slot, path_vals) in frontier {
5782                // Apply WHERE clause using the full accumulated binding map.
5783                if let Some(ref where_expr) = m.where_clause {
5784                    let mut eval_vals = path_vals.clone();
5785                    eval_vals.extend(self.dollar_params());
5786                    if !self.eval_where_graph(where_expr, &eval_vals) {
5787                        continue;
5788                    }
5789                }
5790
5791                // Project column values from the accumulated binding map.
5792                // Each column name is "var.prop" — look up "var.col_<id>" in the map.
5793                let row: Vec<Value> = column_names
5794                    .iter()
5795                    .map(|col_name| {
5796                        if let Some((var, prop)) = col_name.split_once('.') {
5797                            let key = format!("{var}.col_{}", col_id_of(prop));
5798                            path_vals.get(&key).cloned().unwrap_or(Value::Null)
5799                        } else {
5800                            Value::Null
5801                        }
5802                    })
5803                    .collect();
5804
5805                rows.push(row);
5806            }
5807        }
5808
5809        // DISTINCT
5810        if m.distinct {
5811            deduplicate_rows(&mut rows);
5812        }
5813
5814        // ORDER BY
5815        apply_order_by(&mut rows, m, column_names);
5816
5817        // SKIP
5818        if let Some(skip) = m.skip {
5819            let skip = (skip as usize).min(rows.len());
5820            rows.drain(0..skip);
5821        }
5822
5823        // LIMIT
5824        if let Some(lim) = m.limit {
5825            rows.truncate(lim as usize);
5826        }
5827
5828        tracing::debug!(
5829            rows = rows.len(),
5830            n_rels = n_rels,
5831            "n-hop traversal complete"
5832        );
5833        Ok(QueryResult {
5834            columns: column_names.to_vec(),
5835            rows,
5836        })
5837    }
5838
5839    // ── Variable-length path traversal: (a)-[:R*M..N]->(b) ──────────────────
5840
5841    /// Collect all neighbor slot-ids reachable from `src_slot` via the delta
5842    /// log and CSR adjacency.  src_label_id is used to filter delta records.
5843    ///
5844    /// SPA-185: reads across all rel types (used by variable-length path
5845    /// traversal which does not currently filter on rel_type).
5846    /// Return the labeled outgoing neighbors of `(src_slot, src_label_id)`.
5847    ///
5848    /// Each entry is `(dst_slot, dst_label_id)`.  The delta log encodes the full
5849    /// NodeId in `r.dst`, so label_id is recovered precisely.  For CSR-only
5850    /// destinations the label is looked up in the `node_label` hint map (built
5851    /// from the delta by the caller); if absent, `src_label_id` is used as a
5852    /// conservative fallback (correct for homogeneous graphs).
5853    fn get_node_neighbors_labeled(
5854        &self,
5855        src_slot: u64,
5856        src_label_id: u32,
5857        delta_all: &[sparrowdb_storage::edge_store::DeltaRecord],
5858        node_label: &std::collections::HashSet<(u64, u32)>,
5859        all_label_ids: &[u32],
5860        out: &mut std::collections::HashSet<(u64, u32)>,
5861    ) {
5862        out.clear();
5863
5864        // ── CSR neighbors (slot only; label recovered by scanning all label HWMs
5865        //    or falling back to src_label_id for homogeneous graphs) ────────────
5866        let csr_slots: Vec<u64> = self.csr_neighbors_all(src_slot);
5867
5868        // ── Delta neighbors (full NodeId available) ───────────────────────────
5869        // Insert delta neighbors first — their labels are authoritative.
5870        for r in delta_all.iter().filter(|r| {
5871            let r_src_label = (r.src.0 >> 32) as u32;
5872            let r_src_slot = r.src.0 & 0xFFFF_FFFF;
5873            r_src_label == src_label_id && r_src_slot == src_slot
5874        }) {
5875            let dst_slot = r.dst.0 & 0xFFFF_FFFF;
5876            let dst_label = (r.dst.0 >> 32) as u32;
5877            out.insert((dst_slot, dst_label));
5878        }
5879
5880        // For each CSR slot, determine label: prefer a delta-confirmed label,
5881        // else scan all known label ids to find one whose HWM covers that slot.
5882        // If no label confirms it, fall back to src_label_id.
5883        'csr: for dst_slot in csr_slots {
5884            // Check if delta already gave us a label for this slot.
5885            for &lid in all_label_ids {
5886                if out.contains(&(dst_slot, lid)) {
5887                    continue 'csr; // already recorded with correct label
5888                }
5889            }
5890            // Try to determine the dst label from the delta node_label registry.
5891            // node_label contains (slot, label_id) pairs seen anywhere in delta.
5892            let mut found = false;
5893            for &lid in all_label_ids {
5894                if node_label.contains(&(dst_slot, lid)) {
5895                    out.insert((dst_slot, lid));
5896                    found = true;
5897                    break;
5898                }
5899            }
5900            if !found {
5901                // No label info available — fallback to src_label_id (correct for
5902                // homogeneous graphs, gracefully wrong for unmapped CSR-only nodes
5903                // in heterogeneous graphs with no delta activity on those nodes).
5904                out.insert((dst_slot, src_label_id));
5905            }
5906        }
5907    }
5908
5909    /// DFS traversal for variable-length path patterns `(src)-[:R*min..max]->(dst)`.
5910    ///
5911    /// Returns a `Vec<(dst_slot, dst_label_id)>` with **one entry per simple path**
5912    /// that ends at `depth ∈ [min_hops, max_hops]`.  The same destination node can
5913    /// appear multiple times when it is reachable via distinct simple paths
5914    /// (enumerative semantics, as required by OpenCypher).
5915    ///
5916    /// A simple path never visits the same node twice.  "Visited" is tracked per
5917    /// path using a stack that is pushed on entry and popped on backtrack — the
5918    /// classic DFS-with-backtracking pattern.
5919    ///
5920    /// Safety cap: `max_hops` is clamped to 10 to bound worst-case traversal.
5921    /// Result cap: at most `PATH_RESULT_CAP` entries are returned; a warning is
5922    /// printed to stderr if the cap is hit.
5923    ///
5924    /// Replaces the former global-visited BFS (existential semantics) that was
5925    /// correct for `shortestPath` but wrong for enumerative MATCH traversal:
5926    ///   - Diamond A→B→D, A→C→D: old BFS returned D once; DFS returns D twice.
5927    ///   - Zero-hop (`min_hops == 0`): source node still returned as-is.
5928    ///   - Self-loop A→A: correctly excluded (A is already in the path visited set).
5929    #[allow(clippy::too_many_arguments)]
5930    fn execute_variable_hops(
5931        &self,
5932        src_slot: u64,
5933        src_label_id: u32,
5934        min_hops: u32,
5935        max_hops: u32,
5936        delta_all: &[sparrowdb_storage::edge_store::DeltaRecord],
5937        node_label: &std::collections::HashSet<(u64, u32)>,
5938        all_label_ids: &[u32],
5939        neighbors_buf: &mut std::collections::HashSet<(u64, u32)>,
5940        use_reachability: bool,
5941        result_limit: usize,
5942    ) -> Vec<(u64, u32)> {
5943        const SAFETY_CAP: u32 = 10;
5944        let max_hops = max_hops.min(SAFETY_CAP);
5945
5946        let mut results: Vec<(u64, u32)> = Vec::new();
5947
5948        // Zero-hop match: source node itself is the only result.
5949        if min_hops == 0 {
5950            results.push((src_slot, src_label_id));
5951            if max_hops == 0 {
5952                return results;
5953            }
5954        }
5955
5956        if use_reachability {
5957            // ── Reachability BFS (existential fast-path, issue #165) ──────────────────
5958            //
5959            // Global visited set: each node is enqueued at most once.
5960            // O(V + E) — correct when RETURN DISTINCT is present and no path
5961            // variable is bound, so per-path enumeration is not needed.
5962            //
5963            // Early-exit: when `result_limit` is set (LIMIT clause with no ORDER BY /
5964            // SKIP), stop expanding the frontier once we have collected enough results.
5965            // Safe because DISTINCT + LIMIT with no ORDER BY has no defined ordering
5966            // — BFS order is as valid as any other.  (Issue #199.)
5967            let mut global_visited: std::collections::HashSet<(u64, u32)> =
5968                std::collections::HashSet::new();
5969            global_visited.insert((src_slot, src_label_id));
5970
5971            let mut frontier: std::collections::VecDeque<(u64, u32, u32)> =
5972                std::collections::VecDeque::new();
5973            frontier.push_back((src_slot, src_label_id, 0));
5974
5975            'bfs: while let Some((cur_slot, cur_label, depth)) = frontier.pop_front() {
5976                if depth >= max_hops {
5977                    continue;
5978                }
5979                self.get_node_neighbors_labeled(
5980                    cur_slot,
5981                    cur_label,
5982                    delta_all,
5983                    node_label,
5984                    all_label_ids,
5985                    neighbors_buf,
5986                );
5987                for (nb_slot, nb_label) in neighbors_buf.iter().copied().collect::<Vec<_>>() {
5988                    if global_visited.insert((nb_slot, nb_label)) {
5989                        let nb_depth = depth + 1;
5990                        if nb_depth >= min_hops {
5991                            results.push((nb_slot, nb_label));
5992                            // Early-exit: stop the moment we have enough results.
5993                            // Only safe when result_limit reflects a LIMIT with no ORDER BY.
5994                            if results.len() >= result_limit {
5995                                break 'bfs;
5996                            }
5997                        }
5998                        frontier.push_back((nb_slot, nb_label, nb_depth));
5999                    }
6000                }
6001            }
6002        } else {
6003            // ── Enumerative DFS (full path semantics) ─────────────────────────────────
6004            //
6005            // Hard cap: min of the caller's result_limit and PATH_RESULT_CAP.
6006            // Prevents unbounded memory growth on highly-connected graphs.
6007            const PATH_RESULT_CAP: usize = 100_000;
6008            let effective_cap = result_limit.min(PATH_RESULT_CAP);
6009
6010            // Each stack frame is `(node_slot, node_label_id, depth, neighbors)`.
6011            // The `neighbors` vec holds all outgoing neighbors of `node`; we consume
6012            // them one by one with `pop()`.  When the vec is empty we backtrack by
6013            // popping the frame and removing the node from `path_visited`.
6014            //
6015            // `path_visited` tracks nodes on the *current path* only (not globally),
6016            // so nodes that appear in two separate paths (e.g. diamond D) are each
6017            // visited once per path, yielding one result entry per path.
6018            type Frame = (u64, u32, u32, Vec<(u64, u32)>);
6019
6020            // Per-path visited set — (slot, label_id) to handle heterogeneous graphs.
6021            let mut path_visited: std::collections::HashSet<(u64, u32)> =
6022                std::collections::HashSet::new();
6023            path_visited.insert((src_slot, src_label_id));
6024
6025            // Build neighbors of source.
6026            self.get_node_neighbors_labeled(
6027                src_slot,
6028                src_label_id,
6029                delta_all,
6030                node_label,
6031                all_label_ids,
6032                neighbors_buf,
6033            );
6034            let src_nbrs: Vec<(u64, u32)> = neighbors_buf.iter().copied().collect();
6035
6036            // Push the source frame at depth 1 (the neighbors are the hop-1 candidates).
6037            let mut stack: Vec<Frame> = vec![(src_slot, src_label_id, 1, src_nbrs)];
6038
6039            while let Some(frame) = stack.last_mut() {
6040                let (_, _, depth, ref mut nbrs) = *frame;
6041
6042                match nbrs.pop() {
6043                    None => {
6044                        // All neighbors exhausted — backtrack.
6045                        let (popped_slot, popped_label, popped_depth, _) = stack.pop().unwrap();
6046                        // Remove this node from path_visited only if it was added when we
6047                        // entered it (depth > 1; the source is seeded before the loop).
6048                        if popped_depth > 1 {
6049                            path_visited.remove(&(popped_slot, popped_label));
6050                        }
6051                    }
6052                    Some((nb_slot, nb_label)) => {
6053                        // Skip nodes already on the current path (simple path constraint).
6054                        if path_visited.contains(&(nb_slot, nb_label)) {
6055                            continue;
6056                        }
6057
6058                        // Emit if depth is within the result window.
6059                        if depth >= min_hops {
6060                            results.push((nb_slot, nb_label));
6061                            if results.len() >= effective_cap {
6062                                if effective_cap >= PATH_RESULT_CAP {
6063                                    eprintln!(
6064                                        "sparrowdb: variable-length path result cap \
6065                                         ({PATH_RESULT_CAP}) hit; truncating results.  \
6066                                         Consider RETURN DISTINCT or a tighter *M..N bound."
6067                                    );
6068                                }
6069                                return results;
6070                            }
6071                        }
6072
6073                        // Recurse deeper if max_hops not yet reached.
6074                        if depth < max_hops {
6075                            path_visited.insert((nb_slot, nb_label));
6076                            self.get_node_neighbors_labeled(
6077                                nb_slot,
6078                                nb_label,
6079                                delta_all,
6080                                node_label,
6081                                all_label_ids,
6082                                neighbors_buf,
6083                            );
6084                            let next_nbrs: Vec<(u64, u32)> =
6085                                neighbors_buf.iter().copied().collect();
6086                            stack.push((nb_slot, nb_label, depth + 1, next_nbrs));
6087                        }
6088                    }
6089                }
6090            }
6091        }
6092
6093        results
6094    }
6095
6096    /// Compatibility shim used by callers that do not need per-node label tracking.
6097    fn get_node_neighbors_by_slot(
6098        &self,
6099        src_slot: u64,
6100        src_label_id: u32,
6101        delta_all: &[sparrowdb_storage::edge_store::DeltaRecord],
6102    ) -> Vec<u64> {
6103        let csr_neighbors: Vec<u64> = self.csr_neighbors_all(src_slot);
6104        let delta_neighbors: Vec<u64> = delta_all
6105            .iter()
6106            .filter(|r| {
6107                let r_src_label = (r.src.0 >> 32) as u32;
6108                let r_src_slot = r.src.0 & 0xFFFF_FFFF;
6109                r_src_label == src_label_id && r_src_slot == src_slot
6110            })
6111            .map(|r| r.dst.0 & 0xFFFF_FFFF)
6112            .collect();
6113        let mut all: std::collections::HashSet<u64> = csr_neighbors.into_iter().collect();
6114        all.extend(delta_neighbors);
6115        all.into_iter().collect()
6116    }
6117
6118    /// Execute a variable-length path query: `MATCH (a:L1)-[:R*M..N]->(b:L2) RETURN …`.
6119    fn execute_variable_length(
6120        &self,
6121        m: &MatchStatement,
6122        column_names: &[String],
6123    ) -> Result<QueryResult> {
6124        let pat = &m.pattern[0];
6125        let src_node_pat = &pat.nodes[0];
6126        let dst_node_pat = &pat.nodes[1];
6127        let rel_pat = &pat.rels[0];
6128
6129        if rel_pat.dir != sparrowdb_cypher::ast::EdgeDir::Outgoing {
6130            return Err(sparrowdb_common::Error::Unimplemented);
6131        }
6132
6133        let min_hops = rel_pat.min_hops.unwrap_or(1);
6134        let max_hops = rel_pat.max_hops.unwrap_or(10); // unbounded → cap at 10
6135
6136        let src_label = src_node_pat.labels.first().cloned().unwrap_or_default();
6137        let dst_label = dst_node_pat.labels.first().cloned().unwrap_or_default();
6138
6139        let src_label_id = self
6140            .snapshot
6141            .catalog
6142            .get_label(&src_label)?
6143            .ok_or(sparrowdb_common::Error::NotFound)? as u32;
6144        // dst_label_id is None when the destination pattern has no label constraint.
6145        let dst_label_id: Option<u32> = if dst_label.is_empty() {
6146            None
6147        } else {
6148            Some(
6149                self.snapshot
6150                    .catalog
6151                    .get_label(&dst_label)?
6152                    .ok_or(sparrowdb_common::Error::NotFound)? as u32,
6153            )
6154        };
6155
6156        let hwm_src = self.snapshot.store.hwm_for_label(src_label_id)?;
6157
6158        let col_ids_src = collect_col_ids_for_var(&src_node_pat.var, column_names, src_label_id);
6159        let col_ids_dst =
6160            collect_col_ids_for_var(&dst_node_pat.var, column_names, dst_label_id.unwrap_or(0));
6161
6162        // Build dst read set: projection columns + dst inline-prop filter columns +
6163        // WHERE-clause columns on the dst variable.  Mirrors the 1-hop code (SPA-224).
6164        let dst_all_col_ids: Vec<u32> = {
6165            let mut v = col_ids_dst.clone();
6166            for p in &dst_node_pat.props {
6167                let col_id = prop_name_to_col_id(&p.key);
6168                if !v.contains(&col_id) {
6169                    v.push(col_id);
6170                }
6171            }
6172            if let Some(ref where_expr) = m.where_clause {
6173                collect_col_ids_from_expr(where_expr, &mut v);
6174            }
6175            v
6176        };
6177
6178        let mut rows: Vec<Vec<Value>> = Vec::new();
6179        // NOTE: No deduplication by (src, dst) here.  With DFS-with-backtracking
6180        // the traversal returns one entry per *simple path*, so the same destination
6181        // can appear multiple times when reachable via distinct paths (enumerative
6182        // semantics required by OpenCypher).  The old global-visited BFS never
6183        // produced duplicates and needed this guard; the DFS replacement does not.
6184
6185        // Precompute label-id → name map once so that the hot path inside
6186        // `for dst_slot in dst_nodes` does not call `list_labels()` per node.
6187        let labels_by_id: std::collections::HashMap<u16, String> = self
6188            .snapshot
6189            .catalog
6190            .list_labels()
6191            .unwrap_or_default()
6192            .into_iter()
6193            .collect();
6194
6195        // SPA-275: hoist delta read and node_label map out of the per-source loop.
6196        // Previously execute_variable_hops rebuilt these on every call — O(sources)
6197        // delta reads and O(sources × delta_records) HashMap insertions per query.
6198        // Now we build them once and pass references into the BFS.
6199        let delta_all = self.read_delta_all();
6200        let mut node_label: std::collections::HashSet<(u64, u32)> =
6201            std::collections::HashSet::new();
6202        for r in &delta_all {
6203            let src_s = r.src.0 & 0xFFFF_FFFF;
6204            let src_l = (r.src.0 >> 32) as u32;
6205            node_label.insert((src_s, src_l));
6206            let dst_s = r.dst.0 & 0xFFFF_FFFF;
6207            let dst_l = (r.dst.0 >> 32) as u32;
6208            node_label.insert((dst_s, dst_l));
6209        }
6210        let mut all_label_ids: Vec<u32> = node_label.iter().map(|&(_, l)| l).collect();
6211        all_label_ids.sort_unstable();
6212        all_label_ids.dedup();
6213
6214        // Reusable neighbors buffer: allocated once, cleared between frontier nodes.
6215        let mut neighbors_buf: std::collections::HashSet<(u64, u32)> =
6216            std::collections::HashSet::new();
6217
6218        // Compute effective result limit: when no ORDER BY and no SKIP are present,
6219        // we can stop collecting rows once we reach LIMIT (early exit).
6220        // With ORDER BY or SKIP we must collect all rows before sorting/skipping.
6221        let has_order_by = !m.order_by.is_empty();
6222        let has_skip = m.skip.is_some();
6223        let row_limit: usize = if has_order_by || has_skip {
6224            usize::MAX
6225        } else {
6226            m.limit.map(|l| l as usize).unwrap_or(usize::MAX)
6227        };
6228
6229        for src_slot in 0..hwm_src {
6230            // SPA-254: check per-query deadline at every slot boundary.
6231            self.check_deadline()?;
6232
6233            // Early exit: already have enough rows for the LIMIT.
6234            if rows.len() >= row_limit {
6235                break;
6236            }
6237
6238            let src_node = NodeId(((src_label_id as u64) << 32) | src_slot);
6239
6240            // Fetch source props (for filter + projection).
6241            let src_all_col_ids: Vec<u32> = {
6242                let mut v = col_ids_src.clone();
6243                for p in &src_node_pat.props {
6244                    let col_id = prop_name_to_col_id(&p.key);
6245                    if !v.contains(&col_id) {
6246                        v.push(col_id);
6247                    }
6248                }
6249                if let Some(ref where_expr) = m.where_clause {
6250                    collect_col_ids_from_expr(where_expr, &mut v);
6251                }
6252                v
6253            };
6254            let src_props = read_node_props(&self.snapshot.store, src_node, &src_all_col_ids)?;
6255
6256            if !self.matches_prop_filter(&src_props, &src_node_pat.props) {
6257                continue;
6258            }
6259
6260            // BFS to find all reachable (slot, label_id) pairs within [min_hops, max_hops].
6261            // delta_all, node_label, all_label_ids, and neighbors_buf are hoisted out of
6262            // this loop (SPA-275) and reused across all source nodes.
6263            // Use reachability BFS when RETURN DISTINCT is present and no path variable
6264            // is bound (issue #165). Otherwise use enumerative DFS for full path semantics.
6265            let use_reachability = m.distinct && rel_pat.var.is_empty();
6266            // Pass remaining row budget into the BFS/DFS so it can stop early.
6267            let remaining = row_limit.saturating_sub(rows.len());
6268            let dst_nodes = self.execute_variable_hops(
6269                src_slot,
6270                src_label_id,
6271                min_hops,
6272                max_hops,
6273                &delta_all,
6274                &node_label,
6275                &all_label_ids,
6276                &mut neighbors_buf,
6277                use_reachability,
6278                remaining,
6279            );
6280
6281            for (dst_slot, actual_label_id) in dst_nodes {
6282                // When the destination pattern specifies a label, only include nodes
6283                // whose actual label (recovered from the delta) matches.
6284                if let Some(required_label) = dst_label_id {
6285                    if actual_label_id != required_label {
6286                        continue;
6287                    }
6288                }
6289
6290                // Use the actual label_id to construct the NodeId so that
6291                // heterogeneous graph nodes are addressed correctly.
6292                let resolved_dst_label_id = dst_label_id.unwrap_or(actual_label_id);
6293
6294                let dst_node = NodeId(((resolved_dst_label_id as u64) << 32) | dst_slot);
6295                // SPA-224: read dst props using the full column set (projection +
6296                // inline filter + WHERE), not just the projection set.  Without the
6297                // filter columns the inline prop check below always fails silently
6298                // when the dst variable is not referenced in RETURN.
6299                let dst_props = read_node_props(&self.snapshot.store, dst_node, &dst_all_col_ids)?;
6300
6301                if !self.matches_prop_filter(&dst_props, &dst_node_pat.props) {
6302                    continue;
6303                }
6304
6305                // Resolve the actual label name for this destination node so that
6306                // labels(x) and label metadata work even when the pattern is unlabeled.
6307                // Use the precomputed map to avoid calling list_labels() per node.
6308                let resolved_dst_label_name: String = if !dst_label.is_empty() {
6309                    dst_label.clone()
6310                } else {
6311                    labels_by_id
6312                        .get(&(actual_label_id as u16))
6313                        .cloned()
6314                        .unwrap_or_default()
6315                };
6316
6317                // Apply WHERE clause.
6318                if let Some(ref where_expr) = m.where_clause {
6319                    let mut row_vals = build_row_vals(
6320                        &src_props,
6321                        &src_node_pat.var,
6322                        &col_ids_src,
6323                        &self.snapshot.store,
6324                    );
6325                    row_vals.extend(build_row_vals(
6326                        &dst_props,
6327                        &dst_node_pat.var,
6328                        &col_ids_dst,
6329                        &self.snapshot.store,
6330                    ));
6331                    // Inject relationship metadata so type(r) works in WHERE.
6332                    if !rel_pat.var.is_empty() {
6333                        row_vals.insert(
6334                            format!("{}.__type__", rel_pat.var),
6335                            Value::String(rel_pat.rel_type.clone()),
6336                        );
6337                    }
6338                    // Inject node label metadata so labels(n) works in WHERE.
6339                    if !src_node_pat.var.is_empty() && !src_label.is_empty() {
6340                        row_vals.insert(
6341                            format!("{}.__labels__", src_node_pat.var),
6342                            Value::List(vec![Value::String(src_label.clone())]),
6343                        );
6344                    }
6345                    // Use resolved_dst_label_name so labels(x) works even for unlabeled
6346                    // destination patterns (dst_label is empty but actual_label_id is known).
6347                    if !dst_node_pat.var.is_empty() && !resolved_dst_label_name.is_empty() {
6348                        row_vals.insert(
6349                            format!("{}.__labels__", dst_node_pat.var),
6350                            Value::List(vec![Value::String(resolved_dst_label_name.clone())]),
6351                        );
6352                    }
6353                    row_vals.extend(self.dollar_params());
6354                    if !self.eval_where_graph(where_expr, &row_vals) {
6355                        continue;
6356                    }
6357                }
6358
6359                let rel_var_type = if !rel_pat.var.is_empty() {
6360                    Some((rel_pat.var.as_str(), rel_pat.rel_type.as_str()))
6361                } else {
6362                    None
6363                };
6364                let src_label_meta = if !src_node_pat.var.is_empty() && !src_label.is_empty() {
6365                    Some((src_node_pat.var.as_str(), src_label.as_str()))
6366                } else {
6367                    None
6368                };
6369                let dst_label_meta =
6370                    if !dst_node_pat.var.is_empty() && !resolved_dst_label_name.is_empty() {
6371                        Some((dst_node_pat.var.as_str(), resolved_dst_label_name.as_str()))
6372                    } else {
6373                        None
6374                    };
6375                let row = project_hop_row(
6376                    &src_props,
6377                    &dst_props,
6378                    column_names,
6379                    &src_node_pat.var,
6380                    &dst_node_pat.var,
6381                    rel_var_type,
6382                    src_label_meta,
6383                    dst_label_meta,
6384                    &self.snapshot.store,
6385                    None, // edge props not available in OPTIONAL MATCH path
6386                );
6387                rows.push(row);
6388            }
6389        }
6390
6391        // DISTINCT
6392        if m.distinct {
6393            deduplicate_rows(&mut rows);
6394        }
6395
6396        // ORDER BY
6397        apply_order_by(&mut rows, m, column_names);
6398
6399        // SKIP
6400        if let Some(skip) = m.skip {
6401            let skip = (skip as usize).min(rows.len());
6402            rows.drain(0..skip);
6403        }
6404
6405        // LIMIT
6406        if let Some(lim) = m.limit {
6407            rows.truncate(lim as usize);
6408        }
6409
6410        tracing::debug!(
6411            rows = rows.len(),
6412            min_hops,
6413            max_hops,
6414            "variable-length traversal complete"
6415        );
6416        Ok(QueryResult {
6417            columns: column_names.to_vec(),
6418            rows,
6419        })
6420    }
6421
6422    // ── Property filter helpers ───────────────────────────────────────────────
6423
6424    fn matches_prop_filter(
6425        &self,
6426        props: &[(u32, u64)],
6427        filters: &[sparrowdb_cypher::ast::PropEntry],
6428    ) -> bool {
6429        matches_prop_filter_static(props, filters, &self.dollar_params(), &self.snapshot.store)
6430    }
6431
6432    /// Build a map of runtime parameters keyed with a `$` prefix,
6433    /// suitable for passing to `eval_expr` / `eval_where`.
6434    ///
6435    /// For example, `params["name"] = Value::String("Alice")` becomes
6436    /// `{"$name": Value::String("Alice")}` in the returned map.
6437    fn dollar_params(&self) -> HashMap<String, Value> {
6438        self.params
6439            .iter()
6440            .map(|(k, v)| (format!("${k}"), v.clone()))
6441            .collect()
6442    }
6443
6444    // ── Graph-aware expression evaluation (SPA-136, SPA-137, SPA-138) ────────
6445
6446    /// Evaluate an expression that may require graph access (EXISTS, ShortestPath).
6447    fn eval_expr_graph(&self, expr: &Expr, vals: &HashMap<String, Value>) -> Value {
6448        match expr {
6449            Expr::ExistsSubquery(ep) => Value::Bool(self.eval_exists_subquery(ep, vals)),
6450            Expr::ShortestPath(sp) => self.eval_shortest_path_expr(sp, vals),
6451            Expr::CaseWhen {
6452                branches,
6453                else_expr,
6454            } => {
6455                for (cond, then_val) in branches {
6456                    if let Value::Bool(true) = self.eval_expr_graph(cond, vals) {
6457                        return self.eval_expr_graph(then_val, vals);
6458                    }
6459                }
6460                else_expr
6461                    .as_ref()
6462                    .map(|e| self.eval_expr_graph(e, vals))
6463                    .unwrap_or(Value::Null)
6464            }
6465            Expr::And(l, r) => {
6466                match (self.eval_expr_graph(l, vals), self.eval_expr_graph(r, vals)) {
6467                    (Value::Bool(a), Value::Bool(b)) => Value::Bool(a && b),
6468                    _ => Value::Null,
6469                }
6470            }
6471            Expr::Or(l, r) => {
6472                match (self.eval_expr_graph(l, vals), self.eval_expr_graph(r, vals)) {
6473                    (Value::Bool(a), Value::Bool(b)) => Value::Bool(a || b),
6474                    _ => Value::Null,
6475                }
6476            }
6477            Expr::Not(inner) => match self.eval_expr_graph(inner, vals) {
6478                Value::Bool(b) => Value::Bool(!b),
6479                _ => Value::Null,
6480            },
6481            // SPA-134: PropAccess where the variable resolves to a NodeRef (e.g. `WITH n AS person
6482            // RETURN person.name`).  Fetch the property from the node store directly.
6483            Expr::PropAccess { var, prop } => {
6484                // Try normal key first (col_N or direct "var.prop" entry).
6485                let normal = eval_expr(expr, vals);
6486                if !matches!(normal, Value::Null) {
6487                    return normal;
6488                }
6489                // Fallback: if the variable is a NodeRef, read the property from the store.
6490                if let Some(Value::NodeRef(node_id)) = vals
6491                    .get(var.as_str())
6492                    .or_else(|| vals.get(&format!("{var}.__node_id__")))
6493                {
6494                    let col_id = prop_name_to_col_id(prop);
6495                    if let Ok(props) = self.snapshot.store.get_node_raw(*node_id, &[col_id]) {
6496                        if let Some(&(_, raw)) = props.iter().find(|(c, _)| *c == col_id) {
6497                            return decode_raw_val(raw, &self.snapshot.store);
6498                        }
6499                    }
6500                }
6501                Value::Null
6502            }
6503            _ => eval_expr(expr, vals),
6504        }
6505    }
6506
6507    /// Graph-aware WHERE evaluation — falls back to eval_where for pure expressions.
6508    fn eval_where_graph(&self, expr: &Expr, vals: &HashMap<String, Value>) -> bool {
6509        match self.eval_expr_graph(expr, vals) {
6510            Value::Bool(b) => b,
6511            _ => eval_where(expr, vals),
6512        }
6513    }
6514
6515    /// Evaluate `EXISTS { (n)-[:REL]->(:DstLabel) }` — SPA-137.
6516    fn eval_exists_subquery(
6517        &self,
6518        ep: &sparrowdb_cypher::ast::ExistsPattern,
6519        vals: &HashMap<String, Value>,
6520    ) -> bool {
6521        let path = &ep.path;
6522        if path.nodes.len() < 2 || path.rels.is_empty() {
6523            return false;
6524        }
6525        let src_pat = &path.nodes[0];
6526        let dst_pat = &path.nodes[1];
6527        let rel_pat = &path.rels[0];
6528
6529        let src_node_id = match self.resolve_node_id_from_var(&src_pat.var, vals) {
6530            Some(id) => id,
6531            None => return false,
6532        };
6533        let src_slot = src_node_id.0 & 0xFFFF_FFFF;
6534        let src_label_id = (src_node_id.0 >> 32) as u32;
6535
6536        let dst_label = dst_pat.labels.first().map(String::as_str).unwrap_or("");
6537        let dst_label_id_opt: Option<u32> = if dst_label.is_empty() {
6538            None
6539        } else {
6540            self.snapshot
6541                .catalog
6542                .get_label(dst_label)
6543                .ok()
6544                .flatten()
6545                .map(|id| id as u32)
6546        };
6547
6548        let rel_lookup = if let Some(dst_lid) = dst_label_id_opt {
6549            self.resolve_rel_table_id(src_label_id, dst_lid, &rel_pat.rel_type)
6550        } else {
6551            RelTableLookup::All
6552        };
6553
6554        let csr_nb: Vec<u64> = match rel_lookup {
6555            RelTableLookup::Found(rtid) => self.csr_neighbors(rtid, src_slot),
6556            RelTableLookup::NotFound => return false,
6557            RelTableLookup::All => self.csr_neighbors_all(src_slot),
6558        };
6559        let delta_nb: Vec<u64> = self
6560            .read_delta_all()
6561            .into_iter()
6562            .filter(|r| {
6563                let r_src_label = (r.src.0 >> 32) as u32;
6564                let r_src_slot = r.src.0 & 0xFFFF_FFFF;
6565                if r_src_label != src_label_id || r_src_slot != src_slot {
6566                    return false;
6567                }
6568                // When a destination label is known, only keep edges that point
6569                // to nodes of that label — slots are label-relative so mixing
6570                // labels causes false positive matches.
6571                if let Some(dst_lid) = dst_label_id_opt {
6572                    let r_dst_label = (r.dst.0 >> 32) as u32;
6573                    r_dst_label == dst_lid
6574                } else {
6575                    true
6576                }
6577            })
6578            .map(|r| r.dst.0 & 0xFFFF_FFFF)
6579            .collect();
6580
6581        let all_nb: std::collections::HashSet<u64> = csr_nb.into_iter().chain(delta_nb).collect();
6582
6583        for dst_slot in all_nb {
6584            if let Some(did) = dst_label_id_opt {
6585                let probe_id = NodeId(((did as u64) << 32) | dst_slot);
6586                if self.snapshot.store.get_node_raw(probe_id, &[]).is_err() {
6587                    continue;
6588                }
6589                if !dst_pat.props.is_empty() {
6590                    let col_ids: Vec<u32> = dst_pat
6591                        .props
6592                        .iter()
6593                        .map(|p| prop_name_to_col_id(&p.key))
6594                        .collect();
6595                    match self.snapshot.store.get_node_raw(probe_id, &col_ids) {
6596                        Ok(props) => {
6597                            let params = self.dollar_params();
6598                            if !matches_prop_filter_static(
6599                                &props,
6600                                &dst_pat.props,
6601                                &params,
6602                                &self.snapshot.store,
6603                            ) {
6604                                continue;
6605                            }
6606                        }
6607                        Err(_) => continue,
6608                    }
6609                }
6610            }
6611            return true;
6612        }
6613        false
6614    }
6615
6616    /// Resolve a NodeId from `vals` for a variable name.
6617    fn resolve_node_id_from_var(&self, var: &str, vals: &HashMap<String, Value>) -> Option<NodeId> {
6618        let id_key = format!("{var}.__node_id__");
6619        if let Some(Value::NodeRef(nid)) = vals.get(&id_key) {
6620            return Some(*nid);
6621        }
6622        if let Some(Value::NodeRef(nid)) = vals.get(var) {
6623            return Some(*nid);
6624        }
6625        None
6626    }
6627
6628    /// Evaluate `shortestPath((src)-[:REL*]->(dst))` — SPA-136.
6629    fn eval_shortest_path_expr(
6630        &self,
6631        sp: &sparrowdb_cypher::ast::ShortestPathExpr,
6632        vals: &HashMap<String, Value>,
6633    ) -> Value {
6634        // Resolve src: if the variable is already bound as a NodeRef, extract
6635        // label_id and slot from the NodeId directly (high 32 bits = label_id,
6636        // low 32 bits = slot). This handles the case where shortestPath((a)-...)
6637        // refers to a variable bound in the outer MATCH without repeating its label.
6638        let (src_label_id, src_slot) =
6639            if let Some(nid) = self.resolve_node_id_from_var(&sp.src_var, vals) {
6640                let label_id = (nid.0 >> 32) as u32;
6641                let slot = nid.0 & 0xFFFF_FFFF;
6642                (label_id, slot)
6643            } else {
6644                // Fall back to label lookup + property scan.
6645                let label_id = match self.snapshot.catalog.get_label(&sp.src_label) {
6646                    Ok(Some(id)) => id as u32,
6647                    _ => return Value::Null,
6648                };
6649                match self.find_node_by_props(label_id, &sp.src_props) {
6650                    Some(slot) => (label_id, slot),
6651                    None => return Value::Null,
6652                }
6653            };
6654
6655        let dst_slot = if let Some(nid) = self.resolve_node_id_from_var(&sp.dst_var, vals) {
6656            nid.0 & 0xFFFF_FFFF
6657        } else {
6658            let dst_label_id = match self.snapshot.catalog.get_label(&sp.dst_label) {
6659                Ok(Some(id)) => id as u32,
6660                _ => return Value::Null,
6661            };
6662            match self.find_node_by_props(dst_label_id, &sp.dst_props) {
6663                Some(slot) => slot,
6664                None => return Value::Null,
6665            }
6666        };
6667
6668        match self.bfs_shortest_path(src_slot, src_label_id, dst_slot, 10) {
6669            Some(hops) => Value::Int64(hops as i64),
6670            None => Value::Null,
6671        }
6672    }
6673
6674    /// Scan a label for the first node matching all property filters.
6675    fn find_node_by_props(
6676        &self,
6677        label_id: u32,
6678        props: &[sparrowdb_cypher::ast::PropEntry],
6679    ) -> Option<u64> {
6680        if props.is_empty() {
6681            return None;
6682        }
6683        let hwm = self.snapshot.store.hwm_for_label(label_id).ok()?;
6684        let col_ids: Vec<u32> = props.iter().map(|p| prop_name_to_col_id(&p.key)).collect();
6685        let params = self.dollar_params();
6686        for slot in 0..hwm {
6687            let node_id = NodeId(((label_id as u64) << 32) | slot);
6688            if let Ok(raw_props) = self.snapshot.store.get_node_raw(node_id, &col_ids) {
6689                if matches_prop_filter_static(&raw_props, props, &params, &self.snapshot.store) {
6690                    return Some(slot);
6691                }
6692            }
6693        }
6694        None
6695    }
6696
6697    /// BFS from `src_slot` to `dst_slot`, returning the hop count or None.
6698    ///
6699    /// `src_label_id` is used to look up edges in the WAL delta for every hop.
6700    /// When all nodes in the shortest path share the same label (the typical
6701    /// single-label homogeneous graph), this is correct.  Heterogeneous graphs
6702    /// with intermediate nodes of a different label will still find paths via
6703    /// the CSR (`csr_neighbors_all`), which is label-agnostic; only in-flight
6704    /// WAL edges from intermediate nodes of a different label may be missed.
6705    fn bfs_shortest_path(
6706        &self,
6707        src_slot: u64,
6708        src_label_id: u32,
6709        dst_slot: u64,
6710        max_hops: u32,
6711    ) -> Option<u32> {
6712        if src_slot == dst_slot {
6713            return Some(0);
6714        }
6715        // Hoist delta read out of the BFS loop to avoid repeated I/O.
6716        let delta_all = self.read_delta_all();
6717        let mut visited: std::collections::HashSet<u64> = std::collections::HashSet::new();
6718        visited.insert(src_slot);
6719        let mut frontier: Vec<u64> = vec![src_slot];
6720
6721        for depth in 1..=max_hops {
6722            let mut next_frontier: Vec<u64> = Vec::new();
6723            for &node_slot in &frontier {
6724                let neighbors =
6725                    self.get_node_neighbors_by_slot(node_slot, src_label_id, &delta_all);
6726                for nb in neighbors {
6727                    if nb == dst_slot {
6728                        return Some(depth);
6729                    }
6730                    if visited.insert(nb) {
6731                        next_frontier.push(nb);
6732                    }
6733                }
6734            }
6735            if next_frontier.is_empty() {
6736                break;
6737            }
6738            frontier = next_frontier;
6739        }
6740        None
6741    }
6742
6743    /// Engine-aware aggregate_rows: evaluates graph-dependent RETURN expressions
6744    /// (ShortestPath, EXISTS) via self before delegating to the standalone helper.
6745    fn aggregate_rows_graph(
6746        &self,
6747        rows: &[HashMap<String, Value>],
6748        return_items: &[ReturnItem],
6749    ) -> Vec<Vec<Value>> {
6750        // Check if any return item needs graph access.
6751        let needs_graph = return_items.iter().any(|item| expr_needs_graph(&item.expr));
6752        if !needs_graph {
6753            return aggregate_rows(rows, return_items);
6754        }
6755        // For graph-dependent items, project each row using eval_expr_graph.
6756        rows.iter()
6757            .map(|row_vals| {
6758                return_items
6759                    .iter()
6760                    .map(|item| self.eval_expr_graph(&item.expr, row_vals))
6761                    .collect()
6762            })
6763            .collect()
6764    }
6765}
6766
6767// ── Free-standing prop-filter helper (usable without &self) ───────────────────
6768
6769fn matches_prop_filter_static(
6770    props: &[(u32, u64)],
6771    filters: &[sparrowdb_cypher::ast::PropEntry],
6772    params: &HashMap<String, Value>,
6773    store: &NodeStore,
6774) -> bool {
6775    for f in filters {
6776        let col_id = prop_name_to_col_id(&f.key);
6777        let stored_val = props.iter().find(|(c, _)| *c == col_id).map(|(_, v)| *v);
6778
6779        // Evaluate the filter expression (supports literals, function calls, and
6780        // runtime parameters via `$name` — params are keyed as `"$name"` in the map).
6781        let filter_val = eval_expr(&f.value, params);
6782        let matches = match filter_val {
6783            Value::Int64(n) => {
6784                // Int64 values are stored with TAG_INT64 (0x00) in the top byte.
6785                // Use StoreValue::to_u64() for canonical encoding (SPA-169).
6786                stored_val == Some(StoreValue::Int64(n).to_u64())
6787            }
6788            Value::Bool(b) => {
6789                // Booleans are stored as Int64(1) for true, Int64(0) for false
6790                // (see value_to_store_value / literal_to_store_value).
6791                let expected = StoreValue::Int64(if b { 1 } else { 0 }).to_u64();
6792                stored_val == Some(expected)
6793            }
6794            Value::String(s) => {
6795                // Use store.raw_str_matches to handle both inline (≤7 bytes) and
6796                // overflow (>7 bytes) string encodings (SPA-212).
6797                stored_val.is_some_and(|raw| store.raw_str_matches(raw, &s))
6798            }
6799            Value::Float64(f) => {
6800                // Float values are stored via TAG_FLOAT in the overflow heap (SPA-267).
6801                // Decode the raw stored u64 back to a Value::Float and compare.
6802                stored_val.is_some_and(|raw| {
6803                    matches!(store.decode_raw_value(raw), StoreValue::Float(stored_f) if stored_f == f)
6804                })
6805            }
6806            Value::Null => true, // null filter passes (param-like behaviour)
6807            _ => false,
6808        };
6809        if !matches {
6810            return false;
6811        }
6812    }
6813    true
6814}
6815
6816// ── Helpers ───────────────────────────────────────────────────────────────────
6817
6818/// Evaluate an UNWIND list expression to a concrete `Vec<Value>`.
6819///
6820/// Supports:
6821/// - `Expr::List([...])` — list literal
6822/// - `Expr::Literal(Param(name))` — looks up `name` in `params`; expects `Value::List`
6823/// - `Expr::FnCall { name: "range", args }` — integer range expansion
6824fn eval_list_expr(expr: &Expr, params: &HashMap<String, Value>) -> Result<Vec<Value>> {
6825    match expr {
6826        Expr::List(elems) => {
6827            let mut values = Vec::with_capacity(elems.len());
6828            for elem in elems {
6829                values.push(eval_scalar_expr(elem));
6830            }
6831            Ok(values)
6832        }
6833        Expr::Literal(Literal::Param(name)) => {
6834            // Look up the parameter in the runtime params map.
6835            match params.get(name) {
6836                Some(Value::List(items)) => Ok(items.clone()),
6837                Some(other) => {
6838                    // Non-list value: wrap as a single-element list so the
6839                    // caller can still iterate (matches Neo4j behaviour).
6840                    Ok(vec![other.clone()])
6841                }
6842                None => {
6843                    // Parameter not supplied — produce an empty list (no rows).
6844                    Ok(vec![])
6845                }
6846            }
6847        }
6848        Expr::FnCall { name, args } => {
6849            // Expand function calls that produce lists.
6850            // Currently only `range(start, end[, step])` is supported here.
6851            let name_lc = name.to_lowercase();
6852            if name_lc == "range" {
6853                let empty_vals: std::collections::HashMap<String, Value> =
6854                    std::collections::HashMap::new();
6855                let evaluated: Vec<Value> =
6856                    args.iter().map(|a| eval_expr(a, &empty_vals)).collect();
6857                // range(start, end[, step]) → Vec<Int64>
6858                let start = match evaluated.first() {
6859                    Some(Value::Int64(n)) => *n,
6860                    _ => {
6861                        return Err(sparrowdb_common::Error::InvalidArgument(
6862                            "range() expects integer arguments".into(),
6863                        ))
6864                    }
6865                };
6866                let end = match evaluated.get(1) {
6867                    Some(Value::Int64(n)) => *n,
6868                    _ => {
6869                        return Err(sparrowdb_common::Error::InvalidArgument(
6870                            "range() expects at least 2 integer arguments".into(),
6871                        ))
6872                    }
6873                };
6874                let step: i64 = match evaluated.get(2) {
6875                    Some(Value::Int64(n)) => *n,
6876                    None => 1,
6877                    _ => 1,
6878                };
6879                if step == 0 {
6880                    return Err(sparrowdb_common::Error::InvalidArgument(
6881                        "range(): step must not be zero".into(),
6882                    ));
6883                }
6884                let mut values = Vec::new();
6885                if step > 0 {
6886                    let mut i = start;
6887                    while i <= end {
6888                        values.push(Value::Int64(i));
6889                        i += step;
6890                    }
6891                } else {
6892                    let mut i = start;
6893                    while i >= end {
6894                        values.push(Value::Int64(i));
6895                        i += step;
6896                    }
6897                }
6898                Ok(values)
6899            } else {
6900                // Other function calls are not list-producing.
6901                Err(sparrowdb_common::Error::InvalidArgument(format!(
6902                    "UNWIND: function '{name}' does not return a list"
6903                )))
6904            }
6905        }
6906        other => Err(sparrowdb_common::Error::InvalidArgument(format!(
6907            "UNWIND expression is not a list: {:?}",
6908            other
6909        ))),
6910    }
6911}
6912
6913/// Evaluate a scalar expression to a `Value` (no row context needed).
6914fn eval_scalar_expr(expr: &Expr) -> Value {
6915    match expr {
6916        Expr::Literal(lit) => match lit {
6917            Literal::Int(n) => Value::Int64(*n),
6918            Literal::Float(f) => Value::Float64(*f),
6919            Literal::Bool(b) => Value::Bool(*b),
6920            Literal::String(s) => Value::String(s.clone()),
6921            Literal::Null => Value::Null,
6922            Literal::Param(_) => Value::Null,
6923        },
6924        _ => Value::Null,
6925    }
6926}
6927
6928fn extract_return_column_names(items: &[ReturnItem]) -> Vec<String> {
6929    items
6930        .iter()
6931        .map(|item| match &item.alias {
6932            Some(alias) => alias.clone(),
6933            None => match &item.expr {
6934                Expr::PropAccess { var, prop } => format!("{var}.{prop}"),
6935                Expr::Var(v) => v.clone(),
6936                Expr::CountStar => "count(*)".to_string(),
6937                Expr::FnCall { name, args } => {
6938                    let arg_str = args
6939                        .first()
6940                        .map(|a| match a {
6941                            Expr::PropAccess { var, prop } => format!("{var}.{prop}"),
6942                            Expr::Var(v) => v.clone(),
6943                            _ => "*".to_string(),
6944                        })
6945                        .unwrap_or_else(|| "*".to_string());
6946                    format!("{}({})", name.to_lowercase(), arg_str)
6947                }
6948                _ => "?".to_string(),
6949            },
6950        })
6951        .collect()
6952}
6953
6954/// Collect all column IDs referenced by property accesses in an expression,
6955/// scoped to a specific variable name.
6956///
6957/// Only `PropAccess` nodes whose `var` field matches `target_var` contribute
6958/// column IDs, so callers can separate src-side from fof-side columns without
6959/// accidentally fetching unrelated properties from the wrong node.
6960fn collect_col_ids_from_expr_for_var(expr: &Expr, target_var: &str, out: &mut Vec<u32>) {
6961    match expr {
6962        Expr::PropAccess { var, prop } => {
6963            if var == target_var {
6964                let col_id = prop_name_to_col_id(prop);
6965                if !out.contains(&col_id) {
6966                    out.push(col_id);
6967                }
6968            }
6969        }
6970        Expr::BinOp { left, right, .. } => {
6971            collect_col_ids_from_expr_for_var(left, target_var, out);
6972            collect_col_ids_from_expr_for_var(right, target_var, out);
6973        }
6974        Expr::And(l, r) | Expr::Or(l, r) => {
6975            collect_col_ids_from_expr_for_var(l, target_var, out);
6976            collect_col_ids_from_expr_for_var(r, target_var, out);
6977        }
6978        Expr::Not(inner) | Expr::IsNull(inner) | Expr::IsNotNull(inner) => {
6979            collect_col_ids_from_expr_for_var(inner, target_var, out);
6980        }
6981        Expr::InList { expr, list, .. } => {
6982            collect_col_ids_from_expr_for_var(expr, target_var, out);
6983            for item in list {
6984                collect_col_ids_from_expr_for_var(item, target_var, out);
6985            }
6986        }
6987        Expr::FnCall { args, .. } | Expr::List(args) => {
6988            for arg in args {
6989                collect_col_ids_from_expr_for_var(arg, target_var, out);
6990            }
6991        }
6992        Expr::ListPredicate {
6993            list_expr,
6994            predicate,
6995            ..
6996        } => {
6997            collect_col_ids_from_expr_for_var(list_expr, target_var, out);
6998            collect_col_ids_from_expr_for_var(predicate, target_var, out);
6999        }
7000        // SPA-138: CASE WHEN branches may reference property accesses.
7001        Expr::CaseWhen {
7002            branches,
7003            else_expr,
7004        } => {
7005            for (cond, then_val) in branches {
7006                collect_col_ids_from_expr_for_var(cond, target_var, out);
7007                collect_col_ids_from_expr_for_var(then_val, target_var, out);
7008            }
7009            if let Some(e) = else_expr {
7010                collect_col_ids_from_expr_for_var(e, target_var, out);
7011            }
7012        }
7013        _ => {}
7014    }
7015}
7016
7017/// Collect all column IDs referenced by property accesses in an expression.
7018///
7019/// Used to ensure that every column needed by a WHERE clause is read from
7020/// disk before predicate evaluation, even when it is not in the RETURN list.
7021fn collect_col_ids_from_expr(expr: &Expr, out: &mut Vec<u32>) {
7022    match expr {
7023        Expr::PropAccess { prop, .. } => {
7024            let col_id = prop_name_to_col_id(prop);
7025            if !out.contains(&col_id) {
7026                out.push(col_id);
7027            }
7028        }
7029        Expr::BinOp { left, right, .. } => {
7030            collect_col_ids_from_expr(left, out);
7031            collect_col_ids_from_expr(right, out);
7032        }
7033        Expr::And(l, r) | Expr::Or(l, r) => {
7034            collect_col_ids_from_expr(l, out);
7035            collect_col_ids_from_expr(r, out);
7036        }
7037        Expr::Not(inner) => collect_col_ids_from_expr(inner, out),
7038        Expr::InList { expr, list, .. } => {
7039            collect_col_ids_from_expr(expr, out);
7040            for item in list {
7041                collect_col_ids_from_expr(item, out);
7042            }
7043        }
7044        // FnCall arguments (e.g. collect(p.name)) may reference properties.
7045        Expr::FnCall { args, .. } => {
7046            for arg in args {
7047                collect_col_ids_from_expr(arg, out);
7048            }
7049        }
7050        Expr::ListPredicate {
7051            list_expr,
7052            predicate,
7053            ..
7054        } => {
7055            collect_col_ids_from_expr(list_expr, out);
7056            collect_col_ids_from_expr(predicate, out);
7057        }
7058        // Inline list literal: recurse into each element so property references are loaded.
7059        Expr::List(items) => {
7060            for item in items {
7061                collect_col_ids_from_expr(item, out);
7062            }
7063        }
7064        Expr::IsNull(inner) | Expr::IsNotNull(inner) => {
7065            collect_col_ids_from_expr(inner, out);
7066        }
7067        // SPA-138: CASE WHEN branches may reference property accesses.
7068        Expr::CaseWhen {
7069            branches,
7070            else_expr,
7071        } => {
7072            for (cond, then_val) in branches {
7073                collect_col_ids_from_expr(cond, out);
7074                collect_col_ids_from_expr(then_val, out);
7075            }
7076            if let Some(e) = else_expr {
7077                collect_col_ids_from_expr(e, out);
7078            }
7079        }
7080        _ => {}
7081    }
7082}
7083
7084/// Convert an AST `Literal` to the `StoreValue` used by the node store.
7085///
7086/// Integers are stored as `Int64`; strings are stored as `Bytes` (up to 8 bytes
7087/// inline, matching the storage layer's encoding in `Value::to_u64`).
7088#[allow(dead_code)]
7089fn literal_to_store_value(lit: &Literal) -> StoreValue {
7090    match lit {
7091        Literal::Int(n) => StoreValue::Int64(*n),
7092        Literal::String(s) => StoreValue::Bytes(s.as_bytes().to_vec()),
7093        Literal::Float(f) => StoreValue::Float(*f),
7094        Literal::Bool(b) => StoreValue::Int64(if *b { 1 } else { 0 }),
7095        Literal::Null | Literal::Param(_) => StoreValue::Int64(0),
7096    }
7097}
7098
7099/// Convert an evaluated `Value` to the `StoreValue` used by the node store.
7100///
7101/// Used when a node property value is an arbitrary expression (e.g.
7102/// `datetime()`), rather than a bare literal.
7103fn value_to_store_value(val: Value) -> StoreValue {
7104    match val {
7105        Value::Int64(n) => StoreValue::Int64(n),
7106        Value::Float64(f) => StoreValue::Float(f),
7107        Value::Bool(b) => StoreValue::Int64(if b { 1 } else { 0 }),
7108        Value::String(s) => StoreValue::Bytes(s.into_bytes()),
7109        Value::Null => StoreValue::Int64(0),
7110        Value::NodeRef(id) => StoreValue::Int64(id.0 as i64),
7111        Value::EdgeRef(id) => StoreValue::Int64(id.0 as i64),
7112        Value::List(_) => StoreValue::Int64(0),
7113        Value::Map(_) => StoreValue::Int64(0),
7114    }
7115}
7116
7117/// Encode a string literal using the type-tagged storage encoding (SPA-169).
7118///
7119/// Returns the `u64` that `StoreValue::Bytes(s.as_bytes()).to_u64()` produces
7120/// with the new tagged encoding, allowing prop-filter and WHERE-clause
7121/// comparisons against stored raw column values.
7122fn string_to_raw_u64(s: &str) -> u64 {
7123    StoreValue::Bytes(s.as_bytes().to_vec()).to_u64()
7124}
7125
7126/// SPA-249: attempt an O(log n) index lookup for a node pattern's prop filters.
7127///
7128/// Returns `Some(slots)` when *all* of the following hold:
7129/// 1. There is exactly one inline prop filter in `props`.
7130/// 2. The filter value is a `Literal::Int` or a short `Literal::String` (≤ 7 bytes,
7131///    i.e., it can be represented inline without a heap pointer).
7132/// 3. The `(label_id, col_id)` pair is present in the index.
7133///
7134/// In all other cases (multiple filters, overflow string, param literal, no
7135/// index entry) returns `None` so the caller falls back to a full O(n) scan.
7136fn try_index_lookup_for_props(
7137    props: &[sparrowdb_cypher::ast::PropEntry],
7138    label_id: u32,
7139    prop_index: &sparrowdb_storage::property_index::PropertyIndex,
7140) -> Option<Vec<u32>> {
7141    // Only handle the single-equality-filter case.
7142    if props.len() != 1 {
7143        return None;
7144    }
7145    let filter = &props[0];
7146
7147    // Encode the filter literal as a raw u64 (the same encoding used on disk).
7148    let raw_value: u64 = match &filter.value {
7149        Expr::Literal(Literal::Int(n)) => StoreValue::Int64(*n).to_u64(),
7150        Expr::Literal(Literal::String(s)) if s.len() <= 7 => {
7151            StoreValue::Bytes(s.as_bytes().to_vec()).to_u64()
7152        }
7153        // Overflow strings (> 7 bytes) carry a heap pointer; not indexable.
7154        // Params and other expression types also fall back to full scan.
7155        _ => return None,
7156    };
7157
7158    let col_id = prop_name_to_col_id(&filter.key);
7159    if !prop_index.is_indexed(label_id, col_id) {
7160        return None;
7161    }
7162    Some(prop_index.lookup(label_id, col_id, raw_value).to_vec())
7163}
7164
7165/// SPA-251: Try to use the text index for a simple CONTAINS or STARTS WITH
7166/// predicate in the WHERE clause.
7167///
7168/// Returns `Some(slots)` when:
7169/// 1. The WHERE expression is a single `BinOp` with `Contains` or `StartsWith`.
7170/// 2. The left operand is a `PropAccess { var, prop }` where `var` matches
7171///    the node variable name (`node_var`).
7172/// 3. The right operand is a `Literal::String`.
7173/// 4. The `(label_id, col_id)` pair is present in the text index.
7174///
7175/// Returns `None` for compound predicates, non-string literals, or when the
7176/// column has not been indexed — the caller falls back to a full O(n) scan.
7177fn try_text_index_lookup(
7178    expr: &Expr,
7179    node_var: &str,
7180    label_id: u32,
7181    text_index: &TextIndex,
7182) -> Option<Vec<u32>> {
7183    let (left, op, right) = match expr {
7184        Expr::BinOp { left, op, right }
7185            if matches!(op, BinOpKind::Contains | BinOpKind::StartsWith) =>
7186        {
7187            (left.as_ref(), op, right.as_ref())
7188        }
7189        _ => return None,
7190    };
7191
7192    // Left must be a property access on the node variable.
7193    let prop_name = match left {
7194        Expr::PropAccess { var, prop } if var.as_str() == node_var => prop.as_str(),
7195        _ => return None,
7196    };
7197
7198    // Right must be a string literal.
7199    let pattern = match right {
7200        Expr::Literal(Literal::String(s)) => s.as_str(),
7201        _ => return None,
7202    };
7203
7204    let col_id = prop_name_to_col_id(prop_name);
7205    if !text_index.is_indexed(label_id, col_id) {
7206        return None;
7207    }
7208
7209    let slots = match op {
7210        BinOpKind::Contains => text_index.lookup_contains(label_id, col_id, pattern),
7211        BinOpKind::StartsWith => text_index.lookup_starts_with(label_id, col_id, pattern),
7212        _ => return None,
7213    };
7214
7215    Some(slots)
7216}
7217
7218/// SPA-274 (lazy text index): Extract the property name referenced in a
7219/// WHERE-clause CONTAINS or STARTS WITH predicate (`n.prop CONTAINS 'str'` or
7220/// `n.prop STARTS WITH 'str'`) so the caller can pre-build the lazy text index
7221/// for that `(label_id, col_id)` pair.
7222///
7223/// Returns an empty vec if the expression is not a simple text predicate on
7224/// the given node variable.
7225fn where_clause_text_prop_names<'a>(expr: &'a Expr, node_var: &str) -> Vec<&'a str> {
7226    let left = match expr {
7227        Expr::BinOp {
7228            left,
7229            op: BinOpKind::Contains | BinOpKind::StartsWith,
7230            right: _,
7231        } => left.as_ref(),
7232        _ => return vec![],
7233    };
7234    if let Expr::PropAccess { var, prop } = left {
7235        if var.as_str() == node_var {
7236            return vec![prop.as_str()];
7237        }
7238    }
7239    vec![]
7240}
7241
7242/// SPA-249 (lazy build): Extract all property names referenced in a WHERE-clause
7243/// equality predicate (`n.prop = literal` or `literal = n.prop`) so the caller
7244/// can pre-build the lazy index for those `(label_id, col_id)` pairs.
7245///
7246/// Returns an empty vec if the expression does not match the pattern.
7247fn where_clause_eq_prop_names<'a>(expr: &'a Expr, node_var: &str) -> Vec<&'a str> {
7248    let (left, right) = match expr {
7249        Expr::BinOp {
7250            left,
7251            op: BinOpKind::Eq,
7252            right,
7253        } => (left.as_ref(), right.as_ref()),
7254        _ => return vec![],
7255    };
7256    if let Expr::PropAccess { var, prop } = left {
7257        if var.as_str() == node_var {
7258            return vec![prop.as_str()];
7259        }
7260    }
7261    if let Expr::PropAccess { var, prop } = right {
7262        if var.as_str() == node_var {
7263            return vec![prop.as_str()];
7264        }
7265    }
7266    vec![]
7267}
7268
7269/// SPA-249 (lazy build): Extract all property names referenced in a WHERE-clause
7270/// range predicate (`n.prop > literal`, etc., or compound AND) so the caller
7271/// can pre-build the lazy index for those `(label_id, col_id)` pairs.
7272///
7273/// Returns an empty vec if the expression does not match the pattern.
7274fn where_clause_range_prop_names<'a>(expr: &'a Expr, node_var: &str) -> Vec<&'a str> {
7275    let is_range_op = |op: &BinOpKind| {
7276        matches!(
7277            op,
7278            BinOpKind::Gt | BinOpKind::Ge | BinOpKind::Lt | BinOpKind::Le
7279        )
7280    };
7281
7282    // Simple range: `n.prop OP literal` or `literal OP n.prop`.
7283    if let Expr::BinOp { left, op, right } = expr {
7284        if is_range_op(op) {
7285            if let Expr::PropAccess { var, prop } = left.as_ref() {
7286                if var.as_str() == node_var {
7287                    return vec![prop.as_str()];
7288                }
7289            }
7290            if let Expr::PropAccess { var, prop } = right.as_ref() {
7291                if var.as_str() == node_var {
7292                    return vec![prop.as_str()];
7293                }
7294            }
7295            return vec![];
7296        }
7297    }
7298
7299    // Compound AND: `lhs AND rhs` — collect from both sides.
7300    if let Expr::BinOp {
7301        left,
7302        op: BinOpKind::And,
7303        right,
7304    } = expr
7305    {
7306        let mut names: Vec<&'a str> = where_clause_range_prop_names(left, node_var);
7307        names.extend(where_clause_range_prop_names(right, node_var));
7308        return names;
7309    }
7310
7311    vec![]
7312}
7313
7314/// SPA-249 Phase 1b: Try to use the property equality index for a WHERE-clause
7315/// equality predicate of the form `n.prop = <literal>`.
7316///
7317/// Returns `Some(slots)` when:
7318/// 1. The WHERE expression is a `BinOp` with `Eq`, one side being
7319///    `PropAccess { var, prop }` where `var` == `node_var` and the other side
7320///    being an inline-encodable `Literal` (Int or String ≤ 7 bytes).
7321/// 2. The `(label_id, col_id)` pair is present in the index.
7322///
7323/// Returns `None` in all other cases so the caller falls back to a full scan.
7324fn try_where_eq_index_lookup(
7325    expr: &Expr,
7326    node_var: &str,
7327    label_id: u32,
7328    prop_index: &sparrowdb_storage::property_index::PropertyIndex,
7329) -> Option<Vec<u32>> {
7330    let (left, op, right) = match expr {
7331        Expr::BinOp { left, op, right } if matches!(op, BinOpKind::Eq) => {
7332            (left.as_ref(), op, right.as_ref())
7333        }
7334        _ => return None,
7335    };
7336    let _ = op;
7337
7338    // Accept both `n.prop = literal` and `literal = n.prop`.
7339    let (prop_name, lit) = if let Expr::PropAccess { var, prop } = left {
7340        if var.as_str() == node_var {
7341            (prop.as_str(), right)
7342        } else {
7343            return None;
7344        }
7345    } else if let Expr::PropAccess { var, prop } = right {
7346        if var.as_str() == node_var {
7347            (prop.as_str(), left)
7348        } else {
7349            return None;
7350        }
7351    } else {
7352        return None;
7353    };
7354
7355    let raw_value: u64 = match lit {
7356        Expr::Literal(Literal::Int(n)) => StoreValue::Int64(*n).to_u64(),
7357        Expr::Literal(Literal::String(s)) if s.len() <= 7 => {
7358            StoreValue::Bytes(s.as_bytes().to_vec()).to_u64()
7359        }
7360        _ => return None,
7361    };
7362
7363    let col_id = prop_name_to_col_id(prop_name);
7364    if !prop_index.is_indexed(label_id, col_id) {
7365        return None;
7366    }
7367    Some(prop_index.lookup(label_id, col_id, raw_value).to_vec())
7368}
7369
7370/// SPA-249 Phase 2: Try to use the property range index for WHERE-clause range
7371/// predicates (`>`, `>=`, `<`, `<=`) and compound AND range predicates.
7372///
7373/// Handles:
7374/// - Single bound: `n.age > 30`, `n.age >= 18`, `n.age < 100`, `n.age <= 65`.
7375/// - Compound AND with same prop and both bounds:
7376///   `n.age >= 18 AND n.age <= 65`.
7377///
7378/// Returns `Some(slots)` when a range can be resolved via the index.
7379/// Returns `None` to fall back to full scan.
7380fn try_where_range_index_lookup(
7381    expr: &Expr,
7382    node_var: &str,
7383    label_id: u32,
7384    prop_index: &sparrowdb_storage::property_index::PropertyIndex,
7385) -> Option<Vec<u32>> {
7386    use sparrowdb_storage::property_index::sort_key;
7387
7388    /// Encode an integer literal to raw u64 (same as node_store).
7389    fn encode_int(n: i64) -> u64 {
7390        StoreValue::Int64(n).to_u64()
7391    }
7392
7393    /// Extract a single (prop_name, lo, hi) range from a simple comparison.
7394    /// Returns None if not a recognised range pattern.
7395    #[allow(clippy::type_complexity)]
7396    fn extract_single_bound<'a>(
7397        expr: &'a Expr,
7398        node_var: &'a str,
7399    ) -> Option<(&'a str, Option<(u64, bool)>, Option<(u64, bool)>)> {
7400        let (left, op, right) = match expr {
7401            Expr::BinOp { left, op, right }
7402                if matches!(
7403                    op,
7404                    BinOpKind::Gt | BinOpKind::Ge | BinOpKind::Lt | BinOpKind::Le
7405                ) =>
7406            {
7407                (left.as_ref(), op, right.as_ref())
7408            }
7409            _ => return None,
7410        };
7411
7412        // `n.prop OP literal`
7413        if let (Expr::PropAccess { var, prop }, Expr::Literal(Literal::Int(n))) = (left, right) {
7414            if var.as_str() != node_var {
7415                return None;
7416            }
7417            let sk = sort_key(encode_int(*n));
7418            let prop_name = prop.as_str();
7419            return match op {
7420                BinOpKind::Gt => Some((prop_name, Some((sk, false)), None)),
7421                BinOpKind::Ge => Some((prop_name, Some((sk, true)), None)),
7422                BinOpKind::Lt => Some((prop_name, None, Some((sk, false)))),
7423                BinOpKind::Le => Some((prop_name, None, Some((sk, true)))),
7424                _ => None,
7425            };
7426        }
7427
7428        // `literal OP n.prop` — flip the operator direction.
7429        if let (Expr::Literal(Literal::Int(n)), Expr::PropAccess { var, prop }) = (left, right) {
7430            if var.as_str() != node_var {
7431                return None;
7432            }
7433            let sk = sort_key(encode_int(*n));
7434            let prop_name = prop.as_str();
7435            // `literal > n.prop` ↔ `n.prop < literal`
7436            return match op {
7437                BinOpKind::Gt => Some((prop_name, None, Some((sk, false)))),
7438                BinOpKind::Ge => Some((prop_name, None, Some((sk, true)))),
7439                BinOpKind::Lt => Some((prop_name, Some((sk, false)), None)),
7440                BinOpKind::Le => Some((prop_name, Some((sk, true)), None)),
7441                _ => None,
7442            };
7443        }
7444
7445        None
7446    }
7447
7448    // Try compound AND: `lhs AND rhs` where both sides are range predicates on
7449    // the same property.
7450    if let Expr::BinOp {
7451        left,
7452        op: BinOpKind::And,
7453        right,
7454    } = expr
7455    {
7456        if let (Some((lp, llo, lhi)), Some((rp, rlo, rhi))) = (
7457            extract_single_bound(left, node_var),
7458            extract_single_bound(right, node_var),
7459        ) {
7460            if lp == rp {
7461                let col_id = prop_name_to_col_id(lp);
7462                if !prop_index.is_indexed(label_id, col_id) {
7463                    return None;
7464                }
7465                // Merge the two half-open bounds: pick the most restrictive
7466                // (largest lower bound, smallest upper bound).  Plain `.or()`
7467                // is order-dependent and would silently accept a looser bound
7468                // when both sides specify the same direction (e.g. `age > 10
7469                // AND age > 20` must use `> 20`, not `> 10`).
7470                let lo: Option<(u64, bool)> = match (llo, rlo) {
7471                    (Some(a), Some(b)) => Some(std::cmp::max(a, b)),
7472                    (Some(a), None) | (None, Some(a)) => Some(a),
7473                    (None, None) => None,
7474                };
7475                let hi: Option<(u64, bool)> = match (lhi, rhi) {
7476                    (Some(a), Some(b)) => Some(std::cmp::min(a, b)),
7477                    (Some(a), None) | (None, Some(a)) => Some(a),
7478                    (None, None) => None,
7479                };
7480                // Validate: we need at least one bound.
7481                if lo.is_none() && hi.is_none() {
7482                    return None;
7483                }
7484                return Some(prop_index.lookup_range(label_id, col_id, lo, hi));
7485            }
7486        }
7487    }
7488
7489    // Try single bound.
7490    if let Some((prop_name, lo, hi)) = extract_single_bound(expr, node_var) {
7491        let col_id = prop_name_to_col_id(prop_name);
7492        if !prop_index.is_indexed(label_id, col_id) {
7493            return None;
7494        }
7495        return Some(prop_index.lookup_range(label_id, col_id, lo, hi));
7496    }
7497
7498    None
7499}
7500
7501/// Map a property name to a col_id via the canonical FNV-1a hash.
7502///
7503/// All property names — including those that start with `col_` (e.g. `col_id`,
7504/// `col_name`, `col_0`) — are hashed with [`col_id_of`] so that the col_id
7505/// computed here always agrees with what the storage layer wrote to disk
7506/// (SPA-160).  The Cypher write path (`create_node_named`,
7507/// `execute_create_standalone`) consistently uses `col_id_of`, so the read
7508/// path must too.
7509///
7510/// ## SPA-165 bug fix
7511///
7512/// The previous implementation special-cased names matching `col_N`:
7513/// - If the suffix parsed as a `u32` the numeric value was returned directly.
7514/// - If it did not parse, `unwrap_or(0)` silently mapped to column 0.
7515///
7516/// Both behaviours were wrong for user-defined property names.  A name like
7517/// `col_id` resolved to column 0 (the tombstone sentinel), and even `col_0`
7518/// was inconsistent because `create_node_named` writes it at `col_id_of("col_0")`
7519/// while the old read path returned column 0.  The fix removes the `col_`
7520/// prefix shorthand entirely; every name goes through `col_id_of`.
7521fn prop_name_to_col_id(name: &str) -> u32 {
7522    col_id_of(name)
7523}
7524
7525fn collect_col_ids_from_columns(column_names: &[String]) -> Vec<u32> {
7526    let mut ids = Vec::new();
7527    for name in column_names {
7528        // name could be "var.col_N" or "col_N"
7529        let prop = name.split('.').next_back().unwrap_or(name.as_str());
7530        let col_id = prop_name_to_col_id(prop);
7531        if !ids.contains(&col_id) {
7532            ids.push(col_id);
7533        }
7534    }
7535    ids
7536}
7537
7538/// Collect the set of column IDs referenced by `var` in `column_names`.
7539///
7540/// `_label_id` is accepted to keep call sites consistent and is reserved for
7541/// future use (e.g. per-label schema lookups). It is intentionally unused in
7542/// the current implementation which derives column IDs purely from column names.
7543fn collect_col_ids_for_var(var: &str, column_names: &[String], _label_id: u32) -> Vec<u32> {
7544    let mut ids = Vec::new();
7545    for name in column_names {
7546        // name is either "var.col_N" or "col_N"
7547        if let Some((v, prop)) = name.split_once('.') {
7548            if v == var {
7549                let col_id = prop_name_to_col_id(prop);
7550                if !ids.contains(&col_id) {
7551                    ids.push(col_id);
7552                }
7553            }
7554        } else {
7555            // No dot — could be this var's column
7556            let col_id = prop_name_to_col_id(name.as_str());
7557            if !ids.contains(&col_id) {
7558                ids.push(col_id);
7559            }
7560        }
7561    }
7562    if ids.is_empty() {
7563        // Default: read col_0
7564        ids.push(0);
7565    }
7566    ids
7567}
7568
7569/// Read node properties using the nullable store path (SPA-197).
7570///
7571/// Calls `get_node_raw_nullable` so that columns that were never written for
7572/// this node are returned as `None` (absent) rather than `0u64`.  The result
7573/// is a `Vec<(col_id, raw_u64)>` containing only the columns that have a real
7574/// stored value; callers that iterate over `col_ids` but don't find a column
7575/// in the result will receive `Value::Null` (e.g. via `project_row`).
7576///
7577/// This is the correct read path for any code that eventually projects
7578/// property values into query results.  Use `get_node_raw` only for
7579/// tombstone checks (col 0 == u64::MAX) where the raw sentinel is meaningful.
7580fn read_node_props(
7581    store: &NodeStore,
7582    node_id: NodeId,
7583    col_ids: &[u32],
7584) -> sparrowdb_common::Result<Vec<(u32, u64)>> {
7585    if col_ids.is_empty() {
7586        return Ok(vec![]);
7587    }
7588    let nullable = store.get_node_raw_nullable(node_id, col_ids)?;
7589    Ok(nullable
7590        .into_iter()
7591        .filter_map(|(col_id, opt): (u32, Option<u64>)| opt.map(|v| (col_id, v)))
7592        .collect())
7593}
7594
7595/// Decode a raw `u64` column value (as returned by `get_node_raw`) into the
7596/// execution-layer `Value` type.
7597///
7598/// Uses `NodeStore::decode_raw_value` to honour the type tag embedded in the
7599/// top byte (SPA-169/SPA-212), reading from the overflow string heap when
7600/// necessary, then maps `StoreValue::Bytes` → `Value::String`.
7601fn decode_raw_val(raw: u64, store: &NodeStore) -> Value {
7602    match store.decode_raw_value(raw) {
7603        StoreValue::Int64(n) => Value::Int64(n),
7604        StoreValue::Bytes(b) => Value::String(String::from_utf8_lossy(&b).into_owned()),
7605        StoreValue::Float(f) => Value::Float64(f),
7606    }
7607}
7608
7609fn build_row_vals(
7610    props: &[(u32, u64)],
7611    var_name: &str,
7612    _col_ids: &[u32],
7613    store: &NodeStore,
7614) -> HashMap<String, Value> {
7615    let mut map = HashMap::new();
7616    for &(col_id, raw) in props {
7617        let key = format!("{var_name}.col_{col_id}");
7618        map.insert(key, decode_raw_val(raw, store));
7619    }
7620    map
7621}
7622
7623// ── Reserved label/type protection (SPA-208) ──────────────────────────────────
7624
7625/// Returns `true` if `label` starts with the reserved `__SO_` prefix.
7626///
7627/// The `__SO_` namespace is reserved for internal SparrowDB system objects.
7628#[inline]
7629fn is_reserved_label(label: &str) -> bool {
7630    label.starts_with("__SO_")
7631}
7632
7633/// Compare two `Value`s for equality, handling the mixed `Int64`/`String` case.
7634///
7635/// Properties are stored as raw `u64` and read back as `Value::Int64` by
7636/// `build_row_vals`, while a WHERE string literal evaluates to `Value::String`.
7637/// When one side is `Int64` and the other is `String`, encode the string using
7638/// the same inline-bytes encoding the storage layer uses and compare numerically
7639/// (SPA-161).
7640fn values_equal(a: &Value, b: &Value) -> bool {
7641    match (a, b) {
7642        // Normal same-type comparisons.
7643        (Value::Int64(x), Value::Int64(y)) => x == y,
7644        // SPA-212: overflow string storage ensures values are never truncated,
7645        // so a plain equality check is now correct and sufficient.  The former
7646        // 7-byte inline-encoding fallback (SPA-169) has been removed because it
7647        // caused two distinct strings sharing the same 7-byte prefix to compare
7648        // equal (e.g. "TypeScript" == "TypeScripx").
7649        (Value::String(x), Value::String(y)) => x == y,
7650        (Value::Bool(x), Value::Bool(y)) => x == y,
7651        (Value::Float64(x), Value::Float64(y)) => x == y,
7652        // Mixed: stored raw-int vs string literal — kept for backwards
7653        // compatibility; should not be triggered after SPA-169 since string
7654        // props are now decoded to Value::String by decode_raw_val.
7655        (Value::Int64(raw), Value::String(s)) => *raw as u64 == string_to_raw_u64(s),
7656        (Value::String(s), Value::Int64(raw)) => string_to_raw_u64(s) == *raw as u64,
7657        // Null is only equal to null.
7658        (Value::Null, Value::Null) => true,
7659        _ => false,
7660    }
7661}
7662
7663fn eval_where(expr: &Expr, vals: &HashMap<String, Value>) -> bool {
7664    match expr {
7665        Expr::BinOp { left, op, right } => {
7666            let lv = eval_expr(left, vals);
7667            let rv = eval_expr(right, vals);
7668            match op {
7669                BinOpKind::Eq => values_equal(&lv, &rv),
7670                BinOpKind::Neq => !values_equal(&lv, &rv),
7671                BinOpKind::Contains => lv.contains(&rv),
7672                BinOpKind::StartsWith => {
7673                    matches!((&lv, &rv), (Value::String(l), Value::String(r)) if l.starts_with(r.as_str()))
7674                }
7675                BinOpKind::EndsWith => {
7676                    matches!((&lv, &rv), (Value::String(l), Value::String(r)) if l.ends_with(r.as_str()))
7677                }
7678                BinOpKind::Lt => match (&lv, &rv) {
7679                    (Value::Int64(a), Value::Int64(b)) => a < b,
7680                    _ => false,
7681                },
7682                BinOpKind::Le => match (&lv, &rv) {
7683                    (Value::Int64(a), Value::Int64(b)) => a <= b,
7684                    _ => false,
7685                },
7686                BinOpKind::Gt => match (&lv, &rv) {
7687                    (Value::Int64(a), Value::Int64(b)) => a > b,
7688                    _ => false,
7689                },
7690                BinOpKind::Ge => match (&lv, &rv) {
7691                    (Value::Int64(a), Value::Int64(b)) => a >= b,
7692                    _ => false,
7693                },
7694                _ => false,
7695            }
7696        }
7697        Expr::And(l, r) => eval_where(l, vals) && eval_where(r, vals),
7698        Expr::Or(l, r) => eval_where(l, vals) || eval_where(r, vals),
7699        Expr::Not(inner) => !eval_where(inner, vals),
7700        Expr::Literal(Literal::Bool(b)) => *b,
7701        Expr::Literal(_) => false,
7702        Expr::InList {
7703            expr,
7704            list,
7705            negated,
7706        } => {
7707            let lv = eval_expr(expr, vals);
7708            let matched = list
7709                .iter()
7710                .any(|item| values_equal(&lv, &eval_expr(item, vals)));
7711            if *negated {
7712                !matched
7713            } else {
7714                matched
7715            }
7716        }
7717        Expr::ListPredicate { .. } => {
7718            // Delegate to eval_expr which handles ListPredicate and returns Value::Bool.
7719            match eval_expr(expr, vals) {
7720                Value::Bool(b) => b,
7721                _ => false,
7722            }
7723        }
7724        Expr::IsNull(inner) => matches!(eval_expr(inner, vals), Value::Null),
7725        Expr::IsNotNull(inner) => !matches!(eval_expr(inner, vals), Value::Null),
7726        // CASE WHEN — evaluate via eval_expr.
7727        Expr::CaseWhen { .. } => matches!(eval_expr(expr, vals), Value::Bool(true)),
7728        // EXISTS subquery and ShortestPath require graph access.
7729        // Engine::eval_where_graph handles them; standalone eval_where returns false.
7730        Expr::ExistsSubquery(_) | Expr::ShortestPath(_) | Expr::NotExists(_) | Expr::CountStar => {
7731            false
7732        }
7733        _ => false, // unsupported expression — reject row rather than silently pass
7734    }
7735}
7736
7737fn eval_expr(expr: &Expr, vals: &HashMap<String, Value>) -> Value {
7738    match expr {
7739        Expr::PropAccess { var, prop } => {
7740            // First try the direct name key (e.g. "n.name").
7741            let key = format!("{var}.{prop}");
7742            if let Some(v) = vals.get(&key) {
7743                return v.clone();
7744            }
7745            // Fall back to the hashed col_id key (e.g. "n.col_12345").
7746            // build_row_vals stores values under this form because the storage
7747            // layer does not carry property names — only numeric col IDs.
7748            let col_id = prop_name_to_col_id(prop);
7749            let fallback_key = format!("{var}.col_{col_id}");
7750            vals.get(&fallback_key).cloned().unwrap_or(Value::Null)
7751        }
7752        Expr::Var(v) => vals.get(v.as_str()).cloned().unwrap_or(Value::Null),
7753        Expr::Literal(lit) => match lit {
7754            Literal::Int(n) => Value::Int64(*n),
7755            Literal::Float(f) => Value::Float64(*f),
7756            Literal::Bool(b) => Value::Bool(*b),
7757            Literal::String(s) => Value::String(s.clone()),
7758            Literal::Param(p) => {
7759                // Runtime parameters are stored in `vals` with a `$` prefix key
7760                // (inserted by the engine before evaluation via `inject_params`).
7761                vals.get(&format!("${p}")).cloned().unwrap_or(Value::Null)
7762            }
7763            Literal::Null => Value::Null,
7764        },
7765        Expr::FnCall { name, args } => {
7766            // Special-case metadata functions that need direct row-map access.
7767            // type(r) and labels(n) look up pre-inserted metadata keys rather
7768            // than dispatching through the function library with evaluated args.
7769            let name_lc = name.to_lowercase();
7770            if name_lc == "type" {
7771                if let Some(Expr::Var(var_name)) = args.first() {
7772                    let meta_key = format!("{}.__type__", var_name);
7773                    return vals.get(&meta_key).cloned().unwrap_or(Value::Null);
7774                }
7775            }
7776            if name_lc == "labels" {
7777                if let Some(Expr::Var(var_name)) = args.first() {
7778                    let meta_key = format!("{}.__labels__", var_name);
7779                    return vals.get(&meta_key).cloned().unwrap_or(Value::Null);
7780                }
7781            }
7782            // SPA-213: id(n) must look up the NodeRef even when var n holds a Map.
7783            // Check __node_id__ first so it works with both NodeRef and Map values.
7784            if name_lc == "id" {
7785                if let Some(Expr::Var(var_name)) = args.first() {
7786                    // Prefer the explicit __node_id__ entry (present whenever eval path is used).
7787                    let id_key = format!("{}.__node_id__", var_name);
7788                    if let Some(Value::NodeRef(nid)) = vals.get(&id_key) {
7789                        return Value::Int64(nid.0 as i64);
7790                    }
7791                    // Fallback: var itself may be a NodeRef (old code path).
7792                    if let Some(Value::NodeRef(nid)) = vals.get(var_name.as_str()) {
7793                        return Value::Int64(nid.0 as i64);
7794                    }
7795                    return Value::Null;
7796                }
7797            }
7798            // Evaluate each argument recursively, then dispatch to the function library.
7799            let evaluated: Vec<Value> = args.iter().map(|a| eval_expr(a, vals)).collect();
7800            crate::functions::dispatch_function(name, evaluated).unwrap_or(Value::Null)
7801        }
7802        Expr::BinOp { left, op, right } => {
7803            // Evaluate binary operations for use in RETURN expressions.
7804            let lv = eval_expr(left, vals);
7805            let rv = eval_expr(right, vals);
7806            match op {
7807                BinOpKind::Eq => Value::Bool(lv == rv),
7808                BinOpKind::Neq => Value::Bool(lv != rv),
7809                BinOpKind::Lt => match (&lv, &rv) {
7810                    (Value::Int64(a), Value::Int64(b)) => Value::Bool(a < b),
7811                    (Value::Float64(a), Value::Float64(b)) => Value::Bool(a < b),
7812                    _ => Value::Null,
7813                },
7814                BinOpKind::Le => match (&lv, &rv) {
7815                    (Value::Int64(a), Value::Int64(b)) => Value::Bool(a <= b),
7816                    (Value::Float64(a), Value::Float64(b)) => Value::Bool(a <= b),
7817                    _ => Value::Null,
7818                },
7819                BinOpKind::Gt => match (&lv, &rv) {
7820                    (Value::Int64(a), Value::Int64(b)) => Value::Bool(a > b),
7821                    (Value::Float64(a), Value::Float64(b)) => Value::Bool(a > b),
7822                    _ => Value::Null,
7823                },
7824                BinOpKind::Ge => match (&lv, &rv) {
7825                    (Value::Int64(a), Value::Int64(b)) => Value::Bool(a >= b),
7826                    (Value::Float64(a), Value::Float64(b)) => Value::Bool(a >= b),
7827                    _ => Value::Null,
7828                },
7829                BinOpKind::Contains => match (&lv, &rv) {
7830                    (Value::String(l), Value::String(r)) => Value::Bool(l.contains(r.as_str())),
7831                    _ => Value::Null,
7832                },
7833                BinOpKind::StartsWith => match (&lv, &rv) {
7834                    (Value::String(l), Value::String(r)) => Value::Bool(l.starts_with(r.as_str())),
7835                    _ => Value::Null,
7836                },
7837                BinOpKind::EndsWith => match (&lv, &rv) {
7838                    (Value::String(l), Value::String(r)) => Value::Bool(l.ends_with(r.as_str())),
7839                    _ => Value::Null,
7840                },
7841                BinOpKind::And => match (&lv, &rv) {
7842                    (Value::Bool(a), Value::Bool(b)) => Value::Bool(*a && *b),
7843                    _ => Value::Null,
7844                },
7845                BinOpKind::Or => match (&lv, &rv) {
7846                    (Value::Bool(a), Value::Bool(b)) => Value::Bool(*a || *b),
7847                    _ => Value::Null,
7848                },
7849                BinOpKind::Add => match (&lv, &rv) {
7850                    (Value::Int64(a), Value::Int64(b)) => Value::Int64(a + b),
7851                    (Value::Float64(a), Value::Float64(b)) => Value::Float64(a + b),
7852                    (Value::Int64(a), Value::Float64(b)) => Value::Float64(*a as f64 + b),
7853                    (Value::Float64(a), Value::Int64(b)) => Value::Float64(a + *b as f64),
7854                    (Value::String(a), Value::String(b)) => Value::String(format!("{a}{b}")),
7855                    _ => Value::Null,
7856                },
7857                BinOpKind::Sub => match (&lv, &rv) {
7858                    (Value::Int64(a), Value::Int64(b)) => Value::Int64(a - b),
7859                    (Value::Float64(a), Value::Float64(b)) => Value::Float64(a - b),
7860                    (Value::Int64(a), Value::Float64(b)) => Value::Float64(*a as f64 - b),
7861                    (Value::Float64(a), Value::Int64(b)) => Value::Float64(a - *b as f64),
7862                    _ => Value::Null,
7863                },
7864                BinOpKind::Mul => match (&lv, &rv) {
7865                    (Value::Int64(a), Value::Int64(b)) => Value::Int64(a * b),
7866                    (Value::Float64(a), Value::Float64(b)) => Value::Float64(a * b),
7867                    (Value::Int64(a), Value::Float64(b)) => Value::Float64(*a as f64 * b),
7868                    (Value::Float64(a), Value::Int64(b)) => Value::Float64(a * *b as f64),
7869                    _ => Value::Null,
7870                },
7871                BinOpKind::Div => match (&lv, &rv) {
7872                    (Value::Int64(a), Value::Int64(b)) => {
7873                        if *b == 0 {
7874                            Value::Null
7875                        } else {
7876                            Value::Int64(a / b)
7877                        }
7878                    }
7879                    (Value::Float64(a), Value::Float64(b)) => Value::Float64(a / b),
7880                    (Value::Int64(a), Value::Float64(b)) => Value::Float64(*a as f64 / b),
7881                    (Value::Float64(a), Value::Int64(b)) => Value::Float64(a / *b as f64),
7882                    _ => Value::Null,
7883                },
7884                BinOpKind::Mod => match (&lv, &rv) {
7885                    (Value::Int64(a), Value::Int64(b)) => {
7886                        if *b == 0 {
7887                            Value::Null
7888                        } else {
7889                            Value::Int64(a % b)
7890                        }
7891                    }
7892                    _ => Value::Null,
7893                },
7894            }
7895        }
7896        Expr::Not(inner) => match eval_expr(inner, vals) {
7897            Value::Bool(b) => Value::Bool(!b),
7898            _ => Value::Null,
7899        },
7900        Expr::And(l, r) => match (eval_expr(l, vals), eval_expr(r, vals)) {
7901            (Value::Bool(a), Value::Bool(b)) => Value::Bool(a && b),
7902            _ => Value::Null,
7903        },
7904        Expr::Or(l, r) => match (eval_expr(l, vals), eval_expr(r, vals)) {
7905            (Value::Bool(a), Value::Bool(b)) => Value::Bool(a || b),
7906            _ => Value::Null,
7907        },
7908        Expr::InList {
7909            expr,
7910            list,
7911            negated,
7912        } => {
7913            let lv = eval_expr(expr, vals);
7914            let matched = list
7915                .iter()
7916                .any(|item| values_equal(&lv, &eval_expr(item, vals)));
7917            Value::Bool(if *negated { !matched } else { matched })
7918        }
7919        Expr::List(items) => {
7920            let evaluated: Vec<Value> = items.iter().map(|e| eval_expr(e, vals)).collect();
7921            Value::List(evaluated)
7922        }
7923        Expr::ListPredicate {
7924            kind,
7925            variable,
7926            list_expr,
7927            predicate,
7928        } => {
7929            let list_val = eval_expr(list_expr, vals);
7930            let items = match list_val {
7931                Value::List(v) => v,
7932                _ => return Value::Null,
7933            };
7934            let mut satisfied_count = 0usize;
7935            // Clone vals once and reuse the same scope map each iteration,
7936            // updating only the loop variable binding to avoid O(n * |scope|) clones.
7937            let mut scope = vals.clone();
7938            for item in &items {
7939                scope.insert(variable.clone(), item.clone());
7940                let result = eval_expr(predicate, &scope);
7941                if result == Value::Bool(true) {
7942                    satisfied_count += 1;
7943                }
7944            }
7945            let result = match kind {
7946                ListPredicateKind::Any => satisfied_count > 0,
7947                ListPredicateKind::All => satisfied_count == items.len(),
7948                ListPredicateKind::None => satisfied_count == 0,
7949                ListPredicateKind::Single => satisfied_count == 1,
7950            };
7951            Value::Bool(result)
7952        }
7953        Expr::IsNull(inner) => Value::Bool(matches!(eval_expr(inner, vals), Value::Null)),
7954        Expr::IsNotNull(inner) => Value::Bool(!matches!(eval_expr(inner, vals), Value::Null)),
7955        // CASE WHEN cond THEN val ... [ELSE val] END (SPA-138).
7956        Expr::CaseWhen {
7957            branches,
7958            else_expr,
7959        } => {
7960            for (cond, then_val) in branches {
7961                if let Value::Bool(true) = eval_expr(cond, vals) {
7962                    return eval_expr(then_val, vals);
7963                }
7964            }
7965            else_expr
7966                .as_ref()
7967                .map(|e| eval_expr(e, vals))
7968                .unwrap_or(Value::Null)
7969        }
7970        // Graph-dependent expressions — return Null without engine context.
7971        Expr::ExistsSubquery(_) | Expr::ShortestPath(_) | Expr::NotExists(_) | Expr::CountStar => {
7972            Value::Null
7973        }
7974    }
7975}
7976
7977fn project_row(
7978    props: &[(u32, u64)],
7979    column_names: &[String],
7980    _col_ids: &[u32],
7981    // Variable name for the scanned node (e.g. "n"), used for labels(n) columns.
7982    var_name: &str,
7983    // Primary label for the scanned node, used for labels(n) columns.
7984    node_label: &str,
7985    store: &NodeStore,
7986) -> Vec<Value> {
7987    column_names
7988        .iter()
7989        .map(|col_name| {
7990            // Handle labels(var) column.
7991            if let Some(inner) = col_name
7992                .strip_prefix("labels(")
7993                .and_then(|s| s.strip_suffix(')'))
7994            {
7995                if inner == var_name && !node_label.is_empty() {
7996                    return Value::List(vec![Value::String(node_label.to_string())]);
7997                }
7998                return Value::Null;
7999            }
8000            let prop = col_name.split('.').next_back().unwrap_or(col_name.as_str());
8001            let col_id = prop_name_to_col_id(prop);
8002            props
8003                .iter()
8004                .find(|(c, _)| *c == col_id)
8005                .map(|(_, v)| decode_raw_val(*v, store))
8006                .unwrap_or(Value::Null)
8007        })
8008        .collect()
8009}
8010
8011#[allow(clippy::too_many_arguments)]
8012fn project_hop_row(
8013    src_props: &[(u32, u64)],
8014    dst_props: &[(u32, u64)],
8015    column_names: &[String],
8016    src_var: &str,
8017    _dst_var: &str,
8018    // Optional (rel_var, rel_type) for resolving `type(rel_var)` columns.
8019    rel_var_type: Option<(&str, &str)>,
8020    // Optional (src_var, src_label) for resolving `labels(src_var)` columns.
8021    src_label_meta: Option<(&str, &str)>,
8022    // Optional (dst_var, dst_label) for resolving `labels(dst_var)` columns.
8023    dst_label_meta: Option<(&str, &str)>,
8024    store: &NodeStore,
8025    // Edge properties for the matched relationship variable (SPA-178).
8026    // Keyed by rel_var name; the slice contains (col_id, raw_u64) pairs.
8027    edge_props: Option<(&str, &[(u32, u64)])>,
8028) -> Vec<Value> {
8029    column_names
8030        .iter()
8031        .map(|col_name| {
8032            // Handle metadata function calls: type(r) → "type(r)" column name.
8033            if let Some(inner) = col_name
8034                .strip_prefix("type(")
8035                .and_then(|s| s.strip_suffix(')'))
8036            {
8037                // inner is the variable name, e.g. "r"
8038                if let Some((rel_var, rel_type)) = rel_var_type {
8039                    if inner == rel_var {
8040                        return Value::String(rel_type.to_string());
8041                    }
8042                }
8043                return Value::Null;
8044            }
8045            // Handle labels(n) → "labels(n)" column name.
8046            if let Some(inner) = col_name
8047                .strip_prefix("labels(")
8048                .and_then(|s| s.strip_suffix(')'))
8049            {
8050                if let Some((meta_var, label)) = src_label_meta {
8051                    if inner == meta_var {
8052                        return Value::List(vec![Value::String(label.to_string())]);
8053                    }
8054                }
8055                if let Some((meta_var, label)) = dst_label_meta {
8056                    if inner == meta_var {
8057                        return Value::List(vec![Value::String(label.to_string())]);
8058                    }
8059                }
8060                return Value::Null;
8061            }
8062            if let Some((v, prop)) = col_name.split_once('.') {
8063                let col_id = prop_name_to_col_id(prop);
8064                // Check if this is a relationship variable property access (SPA-178).
8065                if let Some((evar, eprops)) = edge_props {
8066                    if v == evar {
8067                        return eprops
8068                            .iter()
8069                            .find(|(c, _)| *c == col_id)
8070                            .map(|(_, val)| decode_raw_val(*val, store))
8071                            .unwrap_or(Value::Null);
8072                    }
8073                }
8074                let props = if v == src_var { src_props } else { dst_props };
8075                props
8076                    .iter()
8077                    .find(|(c, _)| *c == col_id)
8078                    .map(|(_, val)| decode_raw_val(*val, store))
8079                    .unwrap_or(Value::Null)
8080            } else {
8081                Value::Null
8082            }
8083        })
8084        .collect()
8085}
8086
8087/// Project a single 2-hop result row.
8088///
8089/// For each return column of the form `var.prop`, looks up the property value
8090/// from `src_props` when `var == src_var`, and from `fof_props` otherwise.
8091/// This ensures that `RETURN a.name, c.name` correctly reads the source and
8092/// destination node properties independently (SPA-252).
8093fn project_fof_row(
8094    src_props: &[(u32, u64)],
8095    fof_props: &[(u32, u64)],
8096    column_names: &[String],
8097    src_var: &str,
8098    store: &NodeStore,
8099) -> Vec<Value> {
8100    column_names
8101        .iter()
8102        .map(|col_name| {
8103            if let Some((var, prop)) = col_name.split_once('.') {
8104                let col_id = prop_name_to_col_id(prop);
8105                let props = if !src_var.is_empty() && var == src_var {
8106                    src_props
8107                } else {
8108                    fof_props
8109                };
8110                props
8111                    .iter()
8112                    .find(|(c, _)| *c == col_id)
8113                    .map(|(_, v)| decode_raw_val(*v, store))
8114                    .unwrap_or(Value::Null)
8115            } else {
8116                Value::Null
8117            }
8118        })
8119        .collect()
8120}
8121
8122/// SPA-201: Three-variable row projection for the incoming second-hop pattern
8123/// `(a)-[:R]->(m)<-[:R]-(b) RETURN m.name`.
8124///
8125/// Resolves column references to src (a), mid (m), or fof (b) props based on
8126/// variable name matching.  Any unrecognised variable falls back to fof_props.
8127fn project_three_var_row(
8128    src_props: &[(u32, u64)],
8129    mid_props: &[(u32, u64)],
8130    fof_props: &[(u32, u64)],
8131    column_names: &[String],
8132    src_var: &str,
8133    mid_var: &str,
8134    store: &NodeStore,
8135) -> Vec<Value> {
8136    column_names
8137        .iter()
8138        .map(|col_name| {
8139            if let Some((var, prop)) = col_name.split_once('.') {
8140                let col_id = prop_name_to_col_id(prop);
8141                let props: &[(u32, u64)] = if !src_var.is_empty() && var == src_var {
8142                    src_props
8143                } else if !mid_var.is_empty() && var == mid_var {
8144                    mid_props
8145                } else {
8146                    fof_props
8147                };
8148                props
8149                    .iter()
8150                    .find(|(c, _)| *c == col_id)
8151                    .map(|(_, v)| decode_raw_val(*v, store))
8152                    .unwrap_or(Value::Null)
8153            } else {
8154                Value::Null
8155            }
8156        })
8157        .collect()
8158}
8159
8160fn deduplicate_rows(rows: &mut Vec<Vec<Value>>) {
8161    // Deduplicate using structural row equality to avoid false collisions from
8162    // string-key approaches (e.g. ["a|", "b"] vs ["a", "|b"] would hash equal).
8163    let mut unique: Vec<Vec<Value>> = Vec::with_capacity(rows.len());
8164    for row in rows.drain(..) {
8165        if !unique.iter().any(|existing| existing == &row) {
8166            unique.push(row);
8167        }
8168    }
8169    *rows = unique;
8170}
8171
8172/// Maximum rows to sort in-memory before spilling to disk (SPA-100).
8173fn sort_spill_threshold() -> usize {
8174    std::env::var("SPARROWDB_SORT_SPILL_ROWS")
8175        .ok()
8176        .and_then(|v| v.parse().ok())
8177        .unwrap_or(crate::sort_spill::DEFAULT_ROW_THRESHOLD)
8178}
8179
8180/// Build a sort key from a single row and the ORDER BY spec.
8181fn make_sort_key(
8182    row: &[Value],
8183    order_by: &[(Expr, SortDir)],
8184    column_names: &[String],
8185) -> Vec<crate::sort_spill::SortKeyVal> {
8186    use crate::sort_spill::{OrdValue, SortKeyVal};
8187    order_by
8188        .iter()
8189        .map(|(expr, dir)| {
8190            let col_idx = match expr {
8191                Expr::PropAccess { var, prop } => {
8192                    let key = format!("{var}.{prop}");
8193                    column_names.iter().position(|c| c == &key)
8194                }
8195                Expr::Var(v) => column_names.iter().position(|c| c == v.as_str()),
8196                _ => None,
8197            };
8198            let val = col_idx
8199                .and_then(|i| row.get(i))
8200                .map(OrdValue::from_value)
8201                .unwrap_or(OrdValue::Null);
8202            match dir {
8203                SortDir::Asc => SortKeyVal::Asc(val),
8204                SortDir::Desc => SortKeyVal::Desc(std::cmp::Reverse(val)),
8205            }
8206        })
8207        .collect()
8208}
8209
8210fn apply_order_by(rows: &mut Vec<Vec<Value>>, m: &MatchStatement, column_names: &[String]) {
8211    if m.order_by.is_empty() {
8212        return;
8213    }
8214
8215    let threshold = sort_spill_threshold();
8216
8217    if rows.len() <= threshold {
8218        rows.sort_by(|a, b| {
8219            for (expr, dir) in &m.order_by {
8220                let col_idx = match expr {
8221                    Expr::PropAccess { var, prop } => {
8222                        let key = format!("{var}.{prop}");
8223                        column_names.iter().position(|c| c == &key)
8224                    }
8225                    Expr::Var(v) => column_names.iter().position(|c| c == v.as_str()),
8226                    _ => None,
8227                };
8228                if let Some(idx) = col_idx {
8229                    if idx < a.len() && idx < b.len() {
8230                        let cmp = compare_values(&a[idx], &b[idx]);
8231                        let cmp = if *dir == SortDir::Desc {
8232                            cmp.reverse()
8233                        } else {
8234                            cmp
8235                        };
8236                        if cmp != std::cmp::Ordering::Equal {
8237                            return cmp;
8238                        }
8239                    }
8240                }
8241            }
8242            std::cmp::Ordering::Equal
8243        });
8244    } else {
8245        use crate::sort_spill::{SortableRow, SpillingSorter};
8246        let mut sorter: SpillingSorter<SortableRow> = SpillingSorter::new();
8247        for row in rows.drain(..) {
8248            let key = make_sort_key(&row, &m.order_by, column_names);
8249            if sorter.push(SortableRow { key, data: row }).is_err() {
8250                return;
8251            }
8252        }
8253        if let Ok(iter) = sorter.finish() {
8254            *rows = iter.map(|sr| sr.data).collect::<Vec<_>>();
8255        }
8256    }
8257}
8258
8259fn compare_values(a: &Value, b: &Value) -> std::cmp::Ordering {
8260    match (a, b) {
8261        (Value::Int64(x), Value::Int64(y)) => x.cmp(y),
8262        (Value::Float64(x), Value::Float64(y)) => {
8263            x.partial_cmp(y).unwrap_or(std::cmp::Ordering::Equal)
8264        }
8265        (Value::String(x), Value::String(y)) => x.cmp(y),
8266        _ => std::cmp::Ordering::Equal,
8267    }
8268}
8269
8270// ── aggregation (COUNT/SUM/AVG/MIN/MAX/collect) ───────────────────────────────
8271
8272/// Returns `true` if `expr` is any aggregate call.
8273fn is_aggregate_expr(expr: &Expr) -> bool {
8274    match expr {
8275        Expr::CountStar => true,
8276        Expr::FnCall { name, .. } => matches!(
8277            name.to_lowercase().as_str(),
8278            "count" | "sum" | "avg" | "min" | "max" | "collect"
8279        ),
8280        // ANY/ALL/NONE/SINGLE(x IN collect(...) WHERE pred) is an aggregate.
8281        Expr::ListPredicate { list_expr, .. } => expr_has_collect(list_expr),
8282        _ => false,
8283    }
8284}
8285
8286/// Returns `true` if the expression contains a `collect()` call (directly or nested).
8287fn expr_has_collect(expr: &Expr) -> bool {
8288    match expr {
8289        Expr::FnCall { name, .. } => name.to_lowercase() == "collect",
8290        Expr::ListPredicate { list_expr, .. } => expr_has_collect(list_expr),
8291        _ => false,
8292    }
8293}
8294
8295/// Extract the `collect()` argument from an expression that contains `collect()`.
8296///
8297/// Handles two forms:
8298/// - Direct: `collect(expr)` → evaluates `expr` against `row_vals`
8299/// - Nested: `ANY(x IN collect(expr) WHERE pred)` → evaluates `expr` against `row_vals`
8300fn extract_collect_arg(expr: &Expr, row_vals: &HashMap<String, Value>) -> Value {
8301    match expr {
8302        Expr::FnCall { args, .. } if !args.is_empty() => eval_expr(&args[0], row_vals),
8303        Expr::ListPredicate { list_expr, .. } => extract_collect_arg(list_expr, row_vals),
8304        _ => Value::Null,
8305    }
8306}
8307
8308/// Evaluate an aggregate expression given the already-accumulated list.
8309///
8310/// For a bare `collect(...)`, returns the list itself.
8311/// For `ANY/ALL/NONE/SINGLE(x IN collect(...) WHERE pred)`, substitutes the
8312/// accumulated list and evaluates the predicate.
8313fn evaluate_aggregate_expr(
8314    expr: &Expr,
8315    accumulated_list: &Value,
8316    outer_vals: &HashMap<String, Value>,
8317) -> Value {
8318    match expr {
8319        Expr::FnCall { name, .. } if name.to_lowercase() == "collect" => accumulated_list.clone(),
8320        Expr::ListPredicate {
8321            kind,
8322            variable,
8323            predicate,
8324            ..
8325        } => {
8326            let items = match accumulated_list {
8327                Value::List(v) => v,
8328                _ => return Value::Null,
8329            };
8330            let mut satisfied_count = 0usize;
8331            for item in items {
8332                let mut scope = outer_vals.clone();
8333                scope.insert(variable.clone(), item.clone());
8334                let result = eval_expr(predicate, &scope);
8335                if result == Value::Bool(true) {
8336                    satisfied_count += 1;
8337                }
8338            }
8339            let result = match kind {
8340                ListPredicateKind::Any => satisfied_count > 0,
8341                ListPredicateKind::All => satisfied_count == items.len(),
8342                ListPredicateKind::None => satisfied_count == 0,
8343                ListPredicateKind::Single => satisfied_count == 1,
8344            };
8345            Value::Bool(result)
8346        }
8347        _ => Value::Null,
8348    }
8349}
8350
8351/// Returns `true` if any RETURN item is an aggregate expression.
8352fn has_aggregate_in_return(items: &[ReturnItem]) -> bool {
8353    items.iter().any(|item| is_aggregate_expr(&item.expr))
8354}
8355
8356/// Returns `true` if any RETURN item requires a `NodeRef` / `EdgeRef` value to
8357/// be present in the row map in order to evaluate correctly.
8358///
8359/// This covers:
8360/// - `id(var)` — a scalar function that receives the whole node reference.
8361/// - Bare `var` — projecting a node variable as a property map (SPA-213).
8362///
8363/// When this returns `true`, the scan must use the eval path (which inserts
8364/// `Value::Map` / `Value::NodeRef` under the variable key) instead of the fast
8365/// `project_row` path (which only stores individual property columns).
8366fn needs_node_ref_in_return(items: &[ReturnItem]) -> bool {
8367    items.iter().any(|item| {
8368        matches!(&item.expr, Expr::FnCall { name, .. } if name.to_lowercase() == "id")
8369            || matches!(&item.expr, Expr::Var(_))
8370            || expr_needs_graph(&item.expr)
8371            || expr_needs_eval_path(&item.expr)
8372    })
8373}
8374
8375/// Returns `true` when the expression contains a scalar `FnCall` that cannot
8376/// be resolved by the fast `project_row` column-name lookup.
8377///
8378/// `project_row` maps column names like `"n.name"` directly to stored property
8379/// values.  Any function call such as `coalesce(n.missing, n.name)`,
8380/// `toUpper(n.name)`, or `size(n.name)` produces a column name like
8381/// `"coalesce(n.missing, n.name)"` which has no matching stored property.
8382/// Those expressions must be evaluated via `eval_expr` on the full row map.
8383///
8384/// Aggregate functions (`count`, `sum`, etc.) are already handled via the
8385/// `use_agg` flag; we exclude them here to avoid double-counting.
8386fn expr_needs_eval_path(expr: &Expr) -> bool {
8387    match expr {
8388        Expr::FnCall { name, args } => {
8389            let name_lc = name.to_lowercase();
8390            // Aggregates are handled separately by use_agg.
8391            if matches!(
8392                name_lc.as_str(),
8393                "count" | "sum" | "avg" | "min" | "max" | "collect"
8394            ) {
8395                return false;
8396            }
8397            // Any other FnCall (coalesce, toUpper, size, labels, type, id, etc.)
8398            // needs the eval path.  We include id/labels/type here even though
8399            // they are special-cased in eval_expr, because the fast project_row
8400            // path cannot handle them at all.
8401            let _ = args; // args not needed for this check
8402            true
8403        }
8404        // Recurse into compound expressions that may contain FnCalls.
8405        Expr::BinOp { left, right, .. } => {
8406            expr_needs_eval_path(left) || expr_needs_eval_path(right)
8407        }
8408        Expr::And(l, r) | Expr::Or(l, r) => expr_needs_eval_path(l) || expr_needs_eval_path(r),
8409        Expr::Not(inner) | Expr::IsNull(inner) | Expr::IsNotNull(inner) => {
8410            expr_needs_eval_path(inner)
8411        }
8412        _ => false,
8413    }
8414}
8415
8416/// Collect the variable names that appear as bare `Expr::Var` in a RETURN clause (SPA-213).
8417///
8418/// These variables must be projected as a `Value::Map` containing all node properties
8419/// rather than returning `Value::Null` or a raw `NodeRef`.
8420fn bare_var_names_in_return(items: &[ReturnItem]) -> Vec<String> {
8421    items
8422        .iter()
8423        .filter_map(|item| {
8424            if let Expr::Var(v) = &item.expr {
8425                Some(v.clone())
8426            } else {
8427                None
8428            }
8429        })
8430        .collect()
8431}
8432
8433/// Build a `Value::Map` from a raw property slice.
8434///
8435/// Keys are `"col_{col_id}"` strings; values are decoded via [`decode_raw_val`].
8436/// This is used to project a bare node variable (SPA-213).
8437fn build_node_map(props: &[(u32, u64)], store: &NodeStore) -> Value {
8438    let entries: Vec<(String, Value)> = props
8439        .iter()
8440        .map(|&(col_id, raw)| (format!("col_{col_id}"), decode_raw_val(raw, store)))
8441        .collect();
8442    Value::Map(entries)
8443}
8444
8445/// The aggregation kind for a single RETURN item.
8446#[derive(Debug, Clone, PartialEq)]
8447enum AggKind {
8448    /// Non-aggregate — used as a grouping key.
8449    Key,
8450    CountStar,
8451    Count,
8452    Sum,
8453    Avg,
8454    Min,
8455    Max,
8456    Collect,
8457}
8458
8459fn agg_kind(expr: &Expr) -> AggKind {
8460    match expr {
8461        Expr::CountStar => AggKind::CountStar,
8462        Expr::FnCall { name, .. } => match name.to_lowercase().as_str() {
8463            "count" => AggKind::Count,
8464            "sum" => AggKind::Sum,
8465            "avg" => AggKind::Avg,
8466            "min" => AggKind::Min,
8467            "max" => AggKind::Max,
8468            "collect" => AggKind::Collect,
8469            _ => AggKind::Key,
8470        },
8471        // ANY/ALL/NONE/SINGLE(x IN collect(...) WHERE pred) treated as Collect-kind aggregate.
8472        Expr::ListPredicate { list_expr, .. } if expr_has_collect(list_expr) => AggKind::Collect,
8473        _ => AggKind::Key,
8474    }
8475}
8476
8477/// Aggregate a set of flat `HashMap<String, Value>` rows by evaluating RETURN
8478/// items that contain aggregate calls (COUNT(*), COUNT, SUM, AVG, MIN, MAX, collect).
8479///
8480/// Non-aggregate RETURN items become the group key.  Returns one output
8481/// `Vec<Value>` per unique key in the same column order as `return_items`.
8482/// Returns `true` if the expression contains a `CASE WHEN`, `shortestPath`,
8483/// or `EXISTS` sub-expression that requires the graph-aware eval path
8484/// (rather than the fast `project_row` column lookup).
8485fn expr_needs_graph(expr: &Expr) -> bool {
8486    match expr {
8487        Expr::ShortestPath(_) | Expr::ExistsSubquery(_) | Expr::CaseWhen { .. } => true,
8488        Expr::And(l, r) | Expr::Or(l, r) => expr_needs_graph(l) || expr_needs_graph(r),
8489        Expr::Not(inner) | Expr::IsNull(inner) | Expr::IsNotNull(inner) => expr_needs_graph(inner),
8490        Expr::BinOp { left, right, .. } => expr_needs_graph(left) || expr_needs_graph(right),
8491        _ => false,
8492    }
8493}
8494
8495fn aggregate_rows(rows: &[HashMap<String, Value>], return_items: &[ReturnItem]) -> Vec<Vec<Value>> {
8496    // Classify each return item.
8497    let kinds: Vec<AggKind> = return_items
8498        .iter()
8499        .map(|item| agg_kind(&item.expr))
8500        .collect();
8501
8502    let key_indices: Vec<usize> = kinds
8503        .iter()
8504        .enumerate()
8505        .filter(|(_, k)| **k == AggKind::Key)
8506        .map(|(i, _)| i)
8507        .collect();
8508
8509    let agg_indices: Vec<usize> = kinds
8510        .iter()
8511        .enumerate()
8512        .filter(|(_, k)| **k != AggKind::Key)
8513        .map(|(i, _)| i)
8514        .collect();
8515
8516    // No aggregate items — fall through to plain projection.
8517    if agg_indices.is_empty() {
8518        return rows
8519            .iter()
8520            .map(|row_vals| {
8521                return_items
8522                    .iter()
8523                    .map(|item| eval_expr(&item.expr, row_vals))
8524                    .collect()
8525            })
8526            .collect();
8527    }
8528
8529    // Build groups preserving insertion order.
8530    let mut group_keys: Vec<Vec<Value>> = Vec::new();
8531    // [group_idx][agg_col_pos] → accumulated raw values
8532    let mut group_accum: Vec<Vec<Vec<Value>>> = Vec::new();
8533
8534    for row_vals in rows {
8535        let key: Vec<Value> = key_indices
8536            .iter()
8537            .map(|&i| eval_expr(&return_items[i].expr, row_vals))
8538            .collect();
8539
8540        let group_idx = if let Some(pos) = group_keys.iter().position(|k| k == &key) {
8541            pos
8542        } else {
8543            group_keys.push(key);
8544            group_accum.push(vec![vec![]; agg_indices.len()]);
8545            group_keys.len() - 1
8546        };
8547
8548        for (ai, &ri) in agg_indices.iter().enumerate() {
8549            match &kinds[ri] {
8550                AggKind::CountStar => {
8551                    // Sentinel: count the number of sentinels after grouping.
8552                    group_accum[group_idx][ai].push(Value::Int64(1));
8553                }
8554                AggKind::Count | AggKind::Sum | AggKind::Avg | AggKind::Min | AggKind::Max => {
8555                    let arg_val = match &return_items[ri].expr {
8556                        Expr::FnCall { args, .. } if !args.is_empty() => {
8557                            eval_expr(&args[0], row_vals)
8558                        }
8559                        _ => Value::Null,
8560                    };
8561                    // All aggregates ignore NULLs (standard Cypher semantics).
8562                    if !matches!(arg_val, Value::Null) {
8563                        group_accum[group_idx][ai].push(arg_val);
8564                    }
8565                }
8566                AggKind::Collect => {
8567                    // For collect() or ListPredicate(x IN collect(...) WHERE ...), extract the
8568                    // collect() argument (handles both direct and nested forms).
8569                    let arg_val = extract_collect_arg(&return_items[ri].expr, row_vals);
8570                    // Standard Cypher: collect() ignores nulls.
8571                    if !matches!(arg_val, Value::Null) {
8572                        group_accum[group_idx][ai].push(arg_val);
8573                    }
8574                }
8575                AggKind::Key => unreachable!(),
8576            }
8577        }
8578    }
8579
8580    // No grouping keys and no rows → one result row of zero/empty aggregates.
8581    if group_keys.is_empty() && key_indices.is_empty() {
8582        let empty_vals: HashMap<String, Value> = HashMap::new();
8583        let row: Vec<Value> = return_items
8584            .iter()
8585            .zip(kinds.iter())
8586            .map(|(item, k)| match k {
8587                AggKind::CountStar | AggKind::Count | AggKind::Sum => Value::Int64(0),
8588                AggKind::Avg | AggKind::Min | AggKind::Max => Value::Null,
8589                AggKind::Collect => {
8590                    evaluate_aggregate_expr(&item.expr, &Value::List(vec![]), &empty_vals)
8591                }
8592                AggKind::Key => Value::Null,
8593            })
8594            .collect();
8595        return vec![row];
8596    }
8597
8598    // There are grouping keys but no rows → no output rows.
8599    if group_keys.is_empty() {
8600        return vec![];
8601    }
8602
8603    // Finalize and assemble output rows — one per group.
8604    let mut out: Vec<Vec<Value>> = Vec::with_capacity(group_keys.len());
8605    for (gi, key_vals) in group_keys.into_iter().enumerate() {
8606        let mut output_row: Vec<Value> = Vec::with_capacity(return_items.len());
8607        let mut ki = 0usize;
8608        let mut ai = 0usize;
8609        // Build outer scope from key columns for ListPredicate predicate evaluation.
8610        let outer_vals: HashMap<String, Value> = key_indices
8611            .iter()
8612            .enumerate()
8613            .map(|(pos, &i)| {
8614                let name = return_items[i]
8615                    .alias
8616                    .clone()
8617                    .unwrap_or_else(|| format!("_k{i}"));
8618                (name, key_vals[pos].clone())
8619            })
8620            .collect();
8621        for col_idx in 0..return_items.len() {
8622            if kinds[col_idx] == AggKind::Key {
8623                output_row.push(key_vals[ki].clone());
8624                ki += 1;
8625            } else {
8626                let accumulated = Value::List(group_accum[gi][ai].clone());
8627                let result = if kinds[col_idx] == AggKind::Collect {
8628                    evaluate_aggregate_expr(&return_items[col_idx].expr, &accumulated, &outer_vals)
8629                } else {
8630                    finalize_aggregate(&kinds[col_idx], &group_accum[gi][ai])
8631                };
8632                output_row.push(result);
8633                ai += 1;
8634            }
8635        }
8636        out.push(output_row);
8637    }
8638    out
8639}
8640
8641/// Reduce accumulated values for a single aggregate column into a final `Value`.
8642fn finalize_aggregate(kind: &AggKind, vals: &[Value]) -> Value {
8643    match kind {
8644        AggKind::CountStar | AggKind::Count => Value::Int64(vals.len() as i64),
8645        AggKind::Sum => {
8646            let mut sum_i: i64 = 0;
8647            let mut sum_f: f64 = 0.0;
8648            let mut is_float = false;
8649            for v in vals {
8650                match v {
8651                    Value::Int64(n) => sum_i += n,
8652                    Value::Float64(f) => {
8653                        is_float = true;
8654                        sum_f += f;
8655                    }
8656                    _ => {}
8657                }
8658            }
8659            if is_float {
8660                Value::Float64(sum_f + sum_i as f64)
8661            } else {
8662                Value::Int64(sum_i)
8663            }
8664        }
8665        AggKind::Avg => {
8666            if vals.is_empty() {
8667                return Value::Null;
8668            }
8669            let mut sum: f64 = 0.0;
8670            let mut count: i64 = 0;
8671            for v in vals {
8672                match v {
8673                    Value::Int64(n) => {
8674                        sum += *n as f64;
8675                        count += 1;
8676                    }
8677                    Value::Float64(f) => {
8678                        sum += f;
8679                        count += 1;
8680                    }
8681                    _ => {}
8682                }
8683            }
8684            if count == 0 {
8685                Value::Null
8686            } else {
8687                Value::Float64(sum / count as f64)
8688            }
8689        }
8690        AggKind::Min => vals
8691            .iter()
8692            .fold(None::<Value>, |acc, v| match (acc, v) {
8693                (None, v) => Some(v.clone()),
8694                (Some(Value::Int64(a)), Value::Int64(b)) => Some(Value::Int64(a.min(*b))),
8695                (Some(Value::Float64(a)), Value::Float64(b)) => Some(Value::Float64(a.min(*b))),
8696                (Some(Value::String(a)), Value::String(b)) => {
8697                    Some(Value::String(if a <= *b { a } else { b.clone() }))
8698                }
8699                (Some(a), _) => Some(a),
8700            })
8701            .unwrap_or(Value::Null),
8702        AggKind::Max => vals
8703            .iter()
8704            .fold(None::<Value>, |acc, v| match (acc, v) {
8705                (None, v) => Some(v.clone()),
8706                (Some(Value::Int64(a)), Value::Int64(b)) => Some(Value::Int64(a.max(*b))),
8707                (Some(Value::Float64(a)), Value::Float64(b)) => Some(Value::Float64(a.max(*b))),
8708                (Some(Value::String(a)), Value::String(b)) => {
8709                    Some(Value::String(if a >= *b { a } else { b.clone() }))
8710                }
8711                (Some(a), _) => Some(a),
8712            })
8713            .unwrap_or(Value::Null),
8714        AggKind::Collect => Value::List(vals.to_vec()),
8715        AggKind::Key => Value::Null,
8716    }
8717}
8718
8719// ── Storage-size helpers (SPA-171) ────────────────────────────────────────────
8720
8721fn dir_size_bytes(dir: &std::path::Path) -> u64 {
8722    let mut total: u64 = 0;
8723    let Ok(entries) = std::fs::read_dir(dir) else {
8724        return 0;
8725    };
8726    for e in entries.flatten() {
8727        let p = e.path();
8728        if p.is_dir() {
8729            total += dir_size_bytes(&p);
8730        } else if let Ok(m) = std::fs::metadata(&p) {
8731            total += m.len();
8732        }
8733    }
8734    total
8735}
8736
8737// ── CALL helpers ─────────────────────────────────────────────────────────────
8738
8739/// Evaluate an expression to a string value for use as a procedure argument.
8740///
8741/// Supports `Literal::String(s)` only for v1.  Parameter binding would require
8742/// a runtime `params` map that is not yet threaded through the CALL path.
8743fn eval_expr_to_string(expr: &Expr) -> Result<String> {
8744    match expr {
8745        Expr::Literal(Literal::String(s)) => Ok(s.clone()),
8746        Expr::Literal(Literal::Param(p)) => Err(sparrowdb_common::Error::InvalidArgument(format!(
8747            "parameter ${p} requires runtime binding; pass a literal string instead"
8748        ))),
8749        other => Err(sparrowdb_common::Error::InvalidArgument(format!(
8750            "procedure argument must be a string literal, got: {other:?}"
8751        ))),
8752    }
8753}
8754
8755/// Derive a display column name from a return expression (used when no AS alias
8756/// is provided).
8757fn expr_to_col_name(expr: &Expr) -> String {
8758    match expr {
8759        Expr::PropAccess { var, prop } => format!("{var}.{prop}"),
8760        Expr::Var(v) => v.clone(),
8761        _ => "value".to_owned(),
8762    }
8763}
8764
8765/// Evaluate a RETURN expression against a CALL row environment.
8766///
8767/// The environment maps YIELD column names → values (e.g. `"node"` →
8768/// `Value::NodeRef`).  For `PropAccess` on a NodeRef the property is looked up
8769/// from the node store.
8770fn eval_call_expr(expr: &Expr, env: &HashMap<String, Value>, store: &NodeStore) -> Value {
8771    match expr {
8772        Expr::Var(v) => env.get(v.as_str()).cloned().unwrap_or(Value::Null),
8773        Expr::PropAccess { var, prop } => match env.get(var.as_str()) {
8774            Some(Value::NodeRef(node_id)) => {
8775                let col_id = prop_name_to_col_id(prop);
8776                read_node_props(store, *node_id, &[col_id])
8777                    .ok()
8778                    .and_then(|pairs| pairs.into_iter().find(|(c, _)| *c == col_id))
8779                    .map(|(_, raw)| decode_raw_val(raw, store))
8780                    .unwrap_or(Value::Null)
8781            }
8782            Some(other) => other.clone(),
8783            None => Value::Null,
8784        },
8785        Expr::Literal(lit) => match lit {
8786            Literal::Int(n) => Value::Int64(*n),
8787            Literal::Float(f) => Value::Float64(*f),
8788            Literal::Bool(b) => Value::Bool(*b),
8789            Literal::String(s) => Value::String(s.clone()),
8790            _ => Value::Null,
8791        },
8792        _ => Value::Null,
8793    }
8794}
sparrowdb_execution/engine.rs

sparrowdb_execution/
engine.rs