sqry_db/planner/
execute.rs

1//! Plan executor — walks a [`QueryPlan`] tree and produces a sorted, deduplicated
2//! `Vec<NodeId>`.
3//!
4//! # Pipeline position
5//!
6//! ```text
7//!   text syntax / builder API
8//!         │
9//!         ▼
10//!     [ir]                                         (DB09)
11//!         │
12//!         ▼
13//!   [compile]  ── QueryBuilder materialises the IR (DB10)
14//!         │
15//!         ▼
16//!     [fuse]   ── merge shared NodeScan prefixes   (DB11)
17//!         │
18//!         ▼
19//!   [execute]  ← THIS MODULE                       (DB12)
20//!         │
21//!         ▼
22//!     Vec<NodeId>
23//! ```
24//!
25//! # Public surface
26//!
27//! - [`execute_plan`] — evaluate a single [`QueryPlan`] against a [`QueryDb`].
28//! - [`execute_batch`] — evaluate a [`FusedPlanBatch`] and scatter per-tail
29//!   results back into submission order.
30//! - [`PlanExecutor`] — lower-level entry point when the caller wants to
31//!   evaluate multiple plans while sharing the batch-local shared-node cache.
32//!
33//! # Semantics
34//!
35//! - All output node-sets are **sorted and deduplicated** by
36//!   [`NodeId`]'s `(index, generation)` ordering. This makes results stable
37//!   across runs and makes [`SetOperation`] implementations trivially correct.
38//! - `NodeScan` uses the pre-built [`AuxiliaryIndices::by_kind`] slice when a
39//!   [`NodeKind`] filter is present; otherwise it falls back to iterating the
40//!   arena.
41//! - `EdgeTraversal` performs a bounded BFS up to `max_depth` hops. `max_depth
42//!   == 0` short-circuits to the empty set (the builder rejects this at
43//!   construction time; the guard is here for defence-in-depth when callers
44//!   hand-roll [`QueryPlan`] values). Seed nodes are **excluded** from the
45//!   output — the traversal yields only nodes reached by at least one hop.
46//! - `Filter` applies a [`Predicate`] per input node. Boolean combinators
47//!   short-circuit.
48//! - `SetOp::Union` concatenates and deduplicates. `Intersect` and
49//!   `Difference` evaluate the right-hand side, build a [`HashSet`] of it, and
50//!   stream the left-hand side against the set.
51//! - `Chain` threads each step's output into the next step's input. The first
52//!   step must be context-free (IR contract — validated by [`QueryBuilder`]).
53//!
54//! # Shared-node caching
55//!
56//! [`PlanExecutor`] memoises every promoted shared subtree and subquery
57//! [`PlanNode`] it evaluates by structural equality. This mirrors the
58//! fuser's deduplication contract: identical executable subtrees share a
59//! single evaluation. The cache is per-executor; callers that want to share
60//! across plan submissions should use [`execute_batch`] or manually drive
61//! [`PlanExecutor`].
62//!
63//! # Dependency tracking
64//!
65//! When called from inside a [`DependencyRecorderGuard`] scope, the executor
66//! records every [`FileId`] it reads via [`record_file_dep`]. Outside a guard
67//! (standalone `execute_plan` call), the recorder is a no-op — the thread-local
68//! simply accumulates into a vector that is never drained.
69//!
70//! # Design references
71//!
72//! - Spec: `docs/superpowers/specs/2026-04-12-derived-analysis-db-query-planner-design.md` (§3 — Execution)
73//! - DAG: `docs/superpowers/plans/2026-04-12-phase3-4-combined-implementation-dag.toml` (unit DB12)
74//!
75//! [`AuxiliaryIndices::by_kind`]: sqry_core::graph::unified::storage::indices::AuxiliaryIndices::by_kind
76//! [`DependencyRecorderGuard`]: crate::dependency::DependencyRecorderGuard
77//! [`QueryBuilder`]: super::compile::QueryBuilder
78
79use std::collections::{HashMap, HashSet, VecDeque};
80use std::sync::Arc;
81
82use globset::GlobBuilder;
83
84use sqry_core::graph::unified::bind::scope::arena::ScopeKind;
85use sqry_core::graph::unified::concurrent::GraphSnapshot;
86use sqry_core::graph::unified::edge::kind::{EdgeKind, TypeOfContext};
87use sqry_core::graph::unified::edge::store::StoreEdgeRef;
88use sqry_core::graph::unified::materialize::display_entry_qualified_name;
89use sqry_core::graph::unified::node::id::NodeId;
90use sqry_core::graph::unified::node::kind::NodeKind;
91use sqry_core::graph::unified::storage::arena::NodeEntry;
92use sqry_core::schema::Visibility;
93
94use super::fuse::{FusedPlanBatch, FusionTail};
95use super::ir::{
96    Direction, MatchMode, PathPattern, PlanNode, Predicate, PredicateValue, QueryPlan,
97    SetOperation, StringPattern,
98};
99use crate::QueryDb;
100use crate::dependency::record_file_dep;
101use crate::queries::relation::{RelationKey, RelationKind, relation_matches_node_via_set};
102use crate::queries::{
103    CalleesQuery, CallersQuery, ExportsQuery, ImplementsQuery, ImportsQuery, ReferencesQuery,
104};
105
106// ============================================================================
107// Public entry points
108// ============================================================================
109
110/// Evaluates a single [`QueryPlan`] and returns the sorted, deduplicated set
111/// of matching [`NodeId`]s.
112///
113/// A fresh [`PlanExecutor`] is allocated for this call; subquery results
114/// cached during evaluation are discarded on return. For evaluating multiple
115/// plans that may share subqueries, prefer [`execute_batch`] or construct a
116/// [`PlanExecutor`] directly.
117#[must_use]
118pub fn execute_plan(plan: &QueryPlan, db: &QueryDb) -> Vec<NodeId> {
119    let mut executor = PlanExecutor::new(db);
120    executor.run(&plan.root, None).as_ref().clone()
121}
122
123/// Evaluates a [`FusedPlanBatch`] produced by [`super::fuse::fuse_plans`].
124///
125/// Each fusion group's shared prefix is evaluated exactly once and routed
126/// into every tail, avoiding the redundant-scan cost that motivated DB11.
127/// Subqueries detected during fusion are evaluated *first* (priming the
128/// shared-node cache) so that top-level tail predicates referencing them hit
129/// the cache instead of re-evaluating.
130///
131/// The returned `Vec<Vec<NodeId>>` is indexed by the original submission
132/// order — `out[i]` corresponds to `plans[i]` in the input batch given to
133/// [`super::fuse::fuse_plans`].
134#[must_use]
135pub fn execute_batch(batch: &FusedPlanBatch, db: &QueryDb) -> Vec<Vec<NodeId>> {
136    let mut executor = PlanExecutor::new(db);
137    executor.prime_subqueries(batch);
138    executor.prime_shared_nodes(batch);
139
140    let total = batch.total_plans();
141    let mut out: Vec<Vec<NodeId>> = vec![Vec::new(); total];
142
143    for group in batch.groups() {
144        let prefix_result = executor.run(group.prefix(), None);
145        for fused in group.tails() {
146            let tail_result: Arc<Vec<NodeId>> = match &fused.tail {
147                FusionTail::Identity => Arc::clone(&prefix_result),
148                FusionTail::ChainContinuation { remaining_steps } => executor
149                    .run_chain_continuation(
150                        group.prefix(),
151                        Arc::clone(&prefix_result),
152                        remaining_steps,
153                    ),
154            };
155            // Defence-in-depth: preserve idempotence even for malformed batches
156            // with duplicated original_index values.
157            if let Some(slot) = out.get_mut(fused.original_index) {
158                slot.clone_from(tail_result.as_ref());
159            }
160        }
161    }
162
163    out
164}
165
166// ============================================================================
167// PlanExecutor
168// ============================================================================
169
170/// Stateful executor for walking [`PlanNode`] trees.
171///
172/// Holds a reference to the [`QueryDb`] (reserved for DB14+ when relation
173/// predicate evaluation is routed through [`QueryDb::get`]), a strong [`Arc`]
174/// to the [`GraphSnapshot`] taken at construction time (so subsequent
175/// snapshot swaps do not race with evaluation), and a shared-node result
176/// cache keyed on the structural [`PlanNode`] of each evaluated subtree.
177pub struct PlanExecutor<'db> {
178    /// Host database — reserved for DB14+ when relation predicate evaluation
179    /// is routed through [`QueryDb::get`].
180    #[allow(dead_code)]
181    db: &'db QueryDb,
182    /// Snapshot view for this evaluation session. Taking the snapshot once at
183    /// construction means every step in a multi-step plan sees a consistent
184    /// view, even if another thread swaps `db.snapshot()` mid-run.
185    snapshot: Arc<GraphSnapshot>,
186    /// Cache of shared [`PlanNode`] → result set.
187    ///
188    /// Keyed on structural equality of the full sub-plan (matching the fuser's
189    /// dedup contract), so two predicates carrying the same subquery share a
190    /// single evaluation.
191    shared_node_cache: HashMap<PlanNode, Arc<Vec<NodeId>>>,
192}
193
194impl<'db> PlanExecutor<'db> {
195    /// Constructs a new executor bound to the database's current snapshot.
196    #[must_use]
197    pub fn new(db: &'db QueryDb) -> Self {
198        Self {
199            db,
200            snapshot: db.snapshot_arc(),
201            shared_node_cache: HashMap::new(),
202        }
203    }
204
205    /// Evaluates a [`QueryPlan`] with this executor's cache in scope.
206    #[must_use]
207    pub fn execute(&mut self, plan: &QueryPlan) -> Vec<NodeId> {
208        self.run(&plan.root, None).as_ref().clone()
209    }
210
211    /// Pre-populates the shared-node cache from a fused batch's promoted
212    /// shared subtrees.
213    fn prime_shared_nodes(&mut self, batch: &FusedPlanBatch) {
214        for shared_node in batch.shared_nodes() {
215            let result = self.run(shared_node.canonical_plan(), None);
216            self.shared_node_cache
217                .insert(shared_node.canonical_plan().clone(), result);
218        }
219    }
220
221    /// Pre-populates the shared-node cache from a fused batch's
222    /// [`FusedPlanBatch::subquery_batch`] so that top-level tail execution
223    /// finds every subquery already cached.
224    fn prime_subqueries(&mut self, batch: &FusedPlanBatch) {
225        let Some(sub_batch) = batch.subquery_batch() else {
226            return;
227        };
228        // Recurse — nested subqueries are primed before their parents.
229        self.prime_subqueries(sub_batch);
230        self.prime_shared_nodes(sub_batch);
231        for group in sub_batch.groups() {
232            let prefix_result = self.run(group.prefix(), None);
233            for fused in group.tails() {
234                let plan = fused.reconstruct(group.prefix());
235                let result: Arc<Vec<NodeId>> = match &fused.tail {
236                    FusionTail::Identity => Arc::clone(&prefix_result),
237                    FusionTail::ChainContinuation { remaining_steps } => self
238                        .run_chain_continuation(
239                            group.prefix(),
240                            Arc::clone(&prefix_result),
241                            remaining_steps,
242                        ),
243                };
244                self.shared_node_cache.insert(plan.root, result);
245            }
246        }
247    }
248
249    /// Core dispatch. `input` is the carry set from an enclosing
250    /// [`PlanNode::Chain`]; `None` means the node is being evaluated in a
251    /// context-free position (root, set-op operand, subquery).
252    fn run(&mut self, node: &PlanNode, input: Option<Arc<Vec<NodeId>>>) -> Arc<Vec<NodeId>> {
253        if input.is_none()
254            && let Some(existing) = self.shared_node_cache.get(node)
255        {
256            return Arc::clone(existing);
257        }
258
259        match node {
260            PlanNode::NodeScan {
261                kind,
262                visibility,
263                name_pattern,
264            } => self.run_scan(*kind, *visibility, name_pattern.as_ref()),
265            PlanNode::EdgeTraversal {
266                direction,
267                edge_kind,
268                max_depth,
269            } => {
270                let input = input.unwrap_or_else(|| Arc::new(Vec::new()));
271                self.run_traversal(input.as_ref(), *direction, edge_kind.as_ref(), *max_depth)
272            }
273            PlanNode::Filter { predicate } => {
274                let input = input.unwrap_or_else(|| Arc::new(Vec::new()));
275                self.run_filter(input.as_ref(), predicate)
276            }
277            PlanNode::SetOp { op, left, right } => self.run_setop(*op, left, right),
278            PlanNode::Chain { steps } => self.run_chain(steps),
279        }
280    }
281
282    fn run_chain(&mut self, steps: &[PlanNode]) -> Arc<Vec<NodeId>> {
283        if steps.is_empty() {
284            return Arc::new(Vec::new());
285        }
286        let prefix = steps[0].clone();
287        let current = self.run(&steps[0], None);
288        self.run_chain_continuation(&prefix, current, &steps[1..])
289    }
290
291    fn run_chain_continuation(
292        &mut self,
293        prefix: &PlanNode,
294        mut current: Arc<Vec<NodeId>>,
295        remaining: &[PlanNode],
296    ) -> Arc<Vec<NodeId>> {
297        let mut current_prefix = prefix.clone();
298        let mut remaining_steps = remaining;
299
300        while !remaining_steps.is_empty() {
301            if let Some((cached_result, consumed, combined_prefix)) =
302                self.lookup_shared_chain_prefix(&current_prefix, remaining_steps)
303            {
304                current = cached_result;
305                current_prefix = combined_prefix;
306                remaining_steps = &remaining_steps[consumed..];
307                continue;
308            }
309
310            current = self.run(&remaining_steps[0], Some(Arc::clone(&current)));
311            current_prefix = append_chain_prefix(&current_prefix, &remaining_steps[..1]);
312            remaining_steps = &remaining_steps[1..];
313        }
314        current
315    }
316
317    fn lookup_shared_chain_prefix(
318        &self,
319        prefix: &PlanNode,
320        remaining: &[PlanNode],
321    ) -> Option<(Arc<Vec<NodeId>>, usize, PlanNode)> {
322        for consumed in (1..=remaining.len()).rev() {
323            let combined_prefix = append_chain_prefix(prefix, &remaining[..consumed]);
324            if let Some(existing) = self.shared_node_cache.get(&combined_prefix) {
325                return Some((Arc::clone(existing), consumed, combined_prefix));
326            }
327        }
328        None
329    }
330
331    fn run_scan(
332        &self,
333        kind: Option<NodeKind>,
334        visibility: Option<Visibility>,
335        name_pattern: Option<&StringPattern>,
336    ) -> Arc<Vec<NodeId>> {
337        let compiled_name = name_pattern.and_then(CompiledStringPattern::compile);
338
339        let mut out: Vec<NodeId> = Vec::new();
340        match kind {
341            Some(k) => {
342                let ids = self.snapshot.indices().by_kind(k);
343                out.reserve(ids.len());
344                for &id in ids {
345                    if let Some(entry) = self.snapshot.nodes().get(id) {
346                        Self::record_entry_deps(entry);
347                        if self.scan_match(id, entry, visibility, compiled_name.as_ref()) {
348                            out.push(id);
349                        }
350                    }
351                }
352            }
353            None => {
354                for (id, entry) in self.snapshot.nodes().iter() {
355                    // Gate 0d iter-2 fix: skip unified losers from
356                    // full-snapshot NodeScan. See
357                    // `NodeEntry::is_unified_loser`.
358                    if entry.is_unified_loser() {
359                        continue;
360                    }
361                    Self::record_entry_deps(entry);
362                    if self.scan_match(id, entry, visibility, compiled_name.as_ref()) {
363                        out.push(id);
364                    }
365                }
366            }
367        }
368        dedup_sort(&mut out);
369        Arc::new(out)
370    }
371
372    /// Apply scan-time predicates to a single node.
373    ///
374    /// **`B1_ALIGN` contract.** When `compiled_name` is present, the name
375    /// match is checked against **both** `entry.name` and
376    /// `entry.qualified_name` (mirroring
377    /// [`sqry_core::graph::unified::concurrent::graph::GraphSnapshot::find_by_exact_name`]),
378    /// and synthetic placeholder nodes (Go-plugin
379    /// `<field:operand.field>` shadows, `<ident>@<offset>`
380    /// per-binding-site Variables, `NodeMetadata::Synthetic`-flagged
381    /// nodes) are excluded via
382    /// [`sqry_core::graph::unified::concurrent::graph::GraphSnapshot::is_node_synthetic`].
383    /// This keeps the planner's `name:` predicate set-equal with the
384    /// CLI `--exact` shorthand against any fixture.
385    ///
386    /// When no name pattern is present the synthetic filter is **not**
387    /// applied — `kind:function` and similar context-free scans
388    /// preserve their existing behaviour (synthetics remain visible to
389    /// kind-only scans because the suppression bit is bound to the
390    /// name-resolution surface, not the kind index).
391    fn scan_match(
392        &self,
393        node_id: NodeId,
394        entry: &NodeEntry,
395        visibility: Option<Visibility>,
396        compiled_name: Option<&CompiledStringPattern>,
397    ) -> bool {
398        if let Some(required) = visibility
399            && entry_visibility(&self.snapshot, entry) != Some(required)
400        {
401            return false;
402        }
403        if let Some(pattern) = compiled_name {
404            // Check `entry.name` first (most common hit), then
405            // `entry.qualified_name` plus its language-aware display alias so a step like
406            // `name:main.SelectorSource.NeedTags` matches Property
407            // nodes whose simple name is the bare field but whose
408            // qualified name carries the package + receiver prefix.
409            if !entry_name_or_display_matches(&self.snapshot, entry, pattern) {
410                return false;
411            }
412            // B1_ALIGN: synthetic placeholders are invisible to the
413            // `name:` surface so the planner predicate matches the CLI
414            // `--exact` shorthand byte-for-byte.
415            if self.snapshot.is_node_synthetic(node_id) {
416                return false;
417            }
418        }
419        true
420    }
421
422    fn run_traversal(
423        &self,
424        input: &[NodeId],
425        direction: Direction,
426        edge_kind: Option<&EdgeKind>,
427        max_depth: u32,
428    ) -> Arc<Vec<NodeId>> {
429        if max_depth == 0 || input.is_empty() {
430            return Arc::new(Vec::new());
431        }
432        let target_discriminant = edge_kind.map(std::mem::discriminant);
433
434        let mut visited: HashSet<NodeId> = input.iter().copied().collect();
435        let mut result: Vec<NodeId> = Vec::new();
436        let mut queue: VecDeque<(NodeId, u32)> = input.iter().map(|&id| (id, 0_u32)).collect();
437
438        while let Some((current, depth)) = queue.pop_front() {
439            if depth >= max_depth {
440                continue;
441            }
442            for edge in self.neighbours(current, direction) {
443                if let Some(disc) = target_discriminant
444                    && std::mem::discriminant(&edge.kind) != disc
445                {
446                    continue;
447                }
448                let next = match direction {
449                    Direction::Forward => edge.target,
450                    Direction::Reverse => edge.source,
451                    Direction::Both => {
452                        if edge.source == current {
453                            edge.target
454                        } else {
455                            edge.source
456                        }
457                    }
458                };
459                if visited.insert(next) {
460                    // Record the file owning the edge and the visited node.
461                    record_file_dep(edge.file);
462                    if let Some(entry) = self.snapshot.nodes().get(next) {
463                        record_file_dep(entry.file);
464                    }
465                    result.push(next);
466                    queue.push_back((next, depth + 1));
467                }
468            }
469        }
470        dedup_sort(&mut result);
471        Arc::new(result)
472    }
473
474    fn run_filter(&mut self, input: &[NodeId], predicate: &Predicate) -> Arc<Vec<NodeId>> {
475        let compiled = CompiledPredicate::compile(predicate);
476        let mut out: Vec<NodeId> = Vec::with_capacity(input.len());
477        for &node_id in input {
478            if self.check_predicate(node_id, &compiled) {
479                out.push(node_id);
480            }
481        }
482        dedup_sort(&mut out);
483        Arc::new(out)
484    }
485
486    fn run_setop(
487        &mut self,
488        op: SetOperation,
489        left: &PlanNode,
490        right: &PlanNode,
491    ) -> Arc<Vec<NodeId>> {
492        let left_result = self.run(left, None);
493        let right_result = self.run(right, None);
494        let l = left_result.as_ref();
495        let r = right_result.as_ref();
496
497        let mut out: Vec<NodeId> = match op {
498            SetOperation::Union => {
499                let mut v = Vec::with_capacity(l.len() + r.len());
500                v.extend_from_slice(l);
501                v.extend_from_slice(r);
502                v
503            }
504            SetOperation::Intersect => {
505                let rhs: HashSet<NodeId> = r.iter().copied().collect();
506                l.iter().copied().filter(|id| rhs.contains(id)).collect()
507            }
508            SetOperation::Difference => {
509                let rhs: HashSet<NodeId> = r.iter().copied().collect();
510                l.iter().copied().filter(|id| !rhs.contains(id)).collect()
511            }
512        };
513        dedup_sort(&mut out);
514        Arc::new(out)
515    }
516
517    // ------------------------------------------------------------------
518    // Predicate dispatch
519    // ------------------------------------------------------------------
520
521    fn check_predicate(&mut self, node_id: NodeId, predicate: &CompiledPredicate) -> bool {
522        let Some(entry) = self.snapshot.nodes().get(node_id) else {
523            return false;
524        };
525        Self::record_entry_deps(entry);
526
527        match predicate {
528            CompiledPredicate::HasCaller => {
529                self.has_kind(node_id, Direction::Reverse, &CALLS_PROBE)
530            }
531            CompiledPredicate::HasCallee => {
532                self.has_kind(node_id, Direction::Forward, &CALLS_PROBE)
533            }
534            CompiledPredicate::IsUnused => !self.has_any_inbound_use(node_id),
535
536            // DB14: relation predicates dispatch through
537            // `QueryDb::get::<...Query>` (or the shared subquery helper) so
538            // every caller of the planner benefits from language-aware name
539            // matching and cache reuse across plan submissions.
540            CompiledPredicate::Callers(value) => {
541                self.relation_matches_via_db::<CallersQuery>(node_id, RelationKind::Callers, value)
542            }
543            CompiledPredicate::Callees(value) => {
544                self.relation_matches_via_db::<CalleesQuery>(node_id, RelationKind::Callees, value)
545            }
546            CompiledPredicate::Imports(value) => {
547                self.relation_matches_via_db::<ImportsQuery>(node_id, RelationKind::Imports, value)
548            }
549            CompiledPredicate::Exports(value) => {
550                self.relation_matches_via_db::<ExportsQuery>(node_id, RelationKind::Exports, value)
551            }
552            CompiledPredicate::References(value) => self
553                .relation_matches_via_db::<ReferencesQuery>(
554                    node_id,
555                    RelationKind::References,
556                    value,
557                ),
558            CompiledPredicate::Implements(value) => self
559                .relation_matches_via_db::<ImplementsQuery>(
560                    node_id,
561                    RelationKind::Implements,
562                    value,
563                ),
564
565            CompiledPredicate::InFile(glob) => entry_in_file(&self.snapshot, entry, glob),
566            CompiledPredicate::InScope(kind) => entry_in_scope(&self.snapshot, node_id, *kind),
567            CompiledPredicate::MatchesName(pattern) => {
568                entry_name_matches(&self.snapshot, node_id, entry, pattern)
569            }
570            CompiledPredicate::Returns(type_name) => self.node_returns_type(node_id, type_name),
571
572            CompiledPredicate::And(list) => list
573                .iter()
574                .all(|inner| self.check_predicate(node_id, inner)),
575            CompiledPredicate::Or(list) => list
576                .iter()
577                .any(|inner| self.check_predicate(node_id, inner)),
578            CompiledPredicate::Not(inner) => !self.check_predicate(node_id, inner),
579        }
580    }
581
582    /// DB14 relation dispatch.
583    ///
584    /// * **Pattern / Regex values** — route through the [`DerivedQuery`] `Q`
585    ///   (`CallersQuery`, `CalleesQuery`, `ImportsQuery`, …). The query
586    ///   returns a sorted, deduplicated node set; we do a binary search for
587    ///   `node_id` to decide whether this row satisfies the predicate.
588    ///
589    /// * **Subquery values** — reuse the executor's [`Self::shared_node_cache`]
590    ///   to materialise the inner plan once, then delegate to
591    ///   [`relation_matches_node_via_set`] which walks the edge store with
592    ///   the same direction/role/edge-kind semantics as the cached path.
593    ///
594    /// Routing both paths through the shared set logic (plus cache) means
595    /// the planner's "name matches" semantics — segment matching, language
596    /// canonicalization, dynamic-language method-segment fallback — live in
597    /// exactly one place.
598    fn relation_matches_via_db<Q>(
599        &mut self,
600        node_id: NodeId,
601        relation: RelationKind,
602        value: &PredicateValue,
603    ) -> bool
604    where
605        Q: crate::query::DerivedQuery<Key = RelationKey, Value = Arc<Vec<NodeId>>>,
606    {
607        match value {
608            PredicateValue::Pattern(pat) => {
609                let key = RelationKey::Pattern(pat.clone());
610                let matches = self.db.get::<Q>(&key);
611                matches.as_ref().binary_search(&node_id).is_ok()
612            }
613            PredicateValue::Regex(re) => {
614                let key = RelationKey::Regex(re.clone());
615                let matches = self.db.get::<Q>(&key);
616                matches.as_ref().binary_search(&node_id).is_ok()
617            }
618            PredicateValue::Subquery(sub_plan) => {
619                let subquery_set = self.subquery_result(sub_plan);
620                let endpoints: HashSet<NodeId> = subquery_set.iter().copied().collect();
621                relation_matches_node_via_set(relation, node_id, &endpoints, &self.snapshot)
622            }
623        }
624    }
625
626    /// Execute a subquery plan, memoising the result on the executor.
627    fn subquery_result(&mut self, sub_plan: &PlanNode) -> Arc<Vec<NodeId>> {
628        if let Some(existing) = self.shared_node_cache.get(sub_plan) {
629            return Arc::clone(existing);
630        }
631        let result = self.run(sub_plan, None);
632        self.shared_node_cache
633            .insert(sub_plan.clone(), Arc::clone(&result));
634        result
635    }
636
637    // ------------------------------------------------------------------
638    // Edge helpers
639    // ------------------------------------------------------------------
640
641    fn neighbours(&self, node_id: NodeId, direction: Direction) -> Vec<StoreEdgeRef> {
642        match direction {
643            Direction::Forward => self.snapshot.edges().edges_from(node_id),
644            Direction::Reverse => self.snapshot.edges().edges_to(node_id),
645            Direction::Both => {
646                let mut out = self.snapshot.edges().edges_from(node_id);
647                out.extend(self.snapshot.edges().edges_to(node_id));
648                out
649            }
650        }
651    }
652
653    fn has_kind(&self, node_id: NodeId, direction: Direction, probe: &EdgeKind) -> bool {
654        let wanted = std::mem::discriminant(probe);
655        self.neighbours(node_id, direction)
656            .iter()
657            .any(|edge| std::mem::discriminant(&edge.kind) == wanted)
658    }
659
660    /// `IsUnused` heuristic — no inbound edges that represent *use* of this
661    /// node. DB14+ may refine this with entry-point classification; at DB12
662    /// we treat "no incoming `Calls` / `References` / `Imports` / `FfiCall` /
663    /// `GrpcCall` / `HttpRequest` / `WebAssemblyCall` / `Implements` /
664    /// `Inherits`" as unused.
665    fn has_any_inbound_use(&self, node_id: NodeId) -> bool {
666        for edge in self.snapshot.edges().edges_to(node_id) {
667            if matches!(
668                edge.kind,
669                EdgeKind::Calls { .. }
670                    | EdgeKind::References
671                    | EdgeKind::Imports { .. }
672                    | EdgeKind::FfiCall { .. }
673                    | EdgeKind::GrpcCall { .. }
674                    | EdgeKind::HttpRequest { .. }
675                    | EdgeKind::WebAssemblyCall
676                    | EdgeKind::Implements
677                    | EdgeKind::Inherits
678            ) {
679                return true;
680            }
681        }
682        false
683    }
684
685    /// Evaluates `returns:<TypeName>` for a single candidate node.
686    ///
687    /// Walks every outgoing edge from `node_id` and checks for the first
688    /// `EdgeKind::TypeOf { context: Some(TypeOfContext::Return), .. }` whose
689    /// target node's interned primary name equals `type_name` byte-exactly
690    /// (case-sensitive). Returns `false` if the candidate has no `Return`-
691    /// context type edges, or if every such edge targets a different name.
692    ///
693    /// This routes through `snapshot.edges().edges_from(...)` directly rather
694    /// than the relation-query `DerivedQuery` cache: `Predicate::Returns`
695    /// lands as a fresh predicate without sqry-db backing in this unit
696    /// (`B2_PLANNER`); a future unit can add a `ReturnsQuery` derived query
697    /// for cache reuse, but the dispatch surface stays edge-based here so
698    /// that the contract — `TypeOf { Return }` edges, not `NodeEntry.signature`
699    /// text — is enforced unconditionally.
700    fn node_returns_type(&self, node_id: NodeId, type_name: &str) -> bool {
701        for edge in self.snapshot.edges().edges_from(node_id) {
702            if !matches!(
703                edge.kind,
704                EdgeKind::TypeOf {
705                    context: Some(TypeOfContext::Return),
706                    ..
707                }
708            ) {
709                continue;
710            }
711            let Some(target_entry) = self.snapshot.nodes().get(edge.target) else {
712                continue;
713            };
714            // Record file dependency for the resolved target so the
715            // dependency recorder mirrors the data we read.
716            record_file_dep(target_entry.file);
717            if let Some(name) = self.snapshot.strings().resolve(target_entry.name)
718                && name.as_ref() == type_name
719            {
720                return true;
721            }
722        }
723        false
724    }
725
726    /// Records a file-level dependency for the given node entry.
727    ///
728    /// Associated function (no `self`) because the body only consults the
729    /// thread-local `DependencyRecorderGuard` — the executor's state is not
730    /// involved. Call sites still read as `Self::record_entry_deps(entry)`
731    /// so they remain symmetric with the rest of the executor's helpers.
732    fn record_entry_deps(entry: &NodeEntry) {
733        record_file_dep(entry.file);
734    }
735}
736
737// ============================================================================
738// Compiled predicate / pattern / value
739// ============================================================================
740
741/// Path / name patterns compile once per filter call (not per input node).
742///
743/// Relation-value patterns are no longer compiled here — DB14 moved relation
744/// predicate evaluation into the [`crate::queries::relation`] `DerivedQuery`
745/// impls, which own their own compilation and caching. Relation variants
746/// carry the raw [`PredicateValue`] so the planner can build a
747/// [`RelationKey`] for `db.get::<...>()` dispatch, or walk the executor's
748/// subquery cache when the value is a subquery.
749#[derive(Debug)]
750enum CompiledPredicate {
751    HasCaller,
752    HasCallee,
753    IsUnused,
754    Callers(PredicateValue),
755    Callees(PredicateValue),
756    Imports(PredicateValue),
757    Exports(PredicateValue),
758    References(PredicateValue),
759    Implements(PredicateValue),
760    InFile(CompiledPathPattern),
761    InScope(ScopeKind),
762    MatchesName(CompiledStringPattern),
763    /// `returns:<TypeName>`. Carries the byte-exact type-name needle that
764    /// the executor compares against the resolved name string of any node
765    /// targeted by a forward `TypeOf { context: Some(Return), .. }` edge.
766    /// See [`Predicate::Returns`] for full semantics.
767    Returns(String),
768    And(Vec<CompiledPredicate>),
769    Or(Vec<CompiledPredicate>),
770    Not(Box<CompiledPredicate>),
771}
772
773impl CompiledPredicate {
774    fn compile(predicate: &Predicate) -> Self {
775        match predicate {
776            Predicate::HasCaller => CompiledPredicate::HasCaller,
777            Predicate::HasCallee => CompiledPredicate::HasCallee,
778            Predicate::IsUnused => CompiledPredicate::IsUnused,
779            // Relation values keep their raw form — compilation is the
780            // DerivedQuery's job.
781            Predicate::Callers(v) => CompiledPredicate::Callers(v.clone()),
782            Predicate::Callees(v) => CompiledPredicate::Callees(v.clone()),
783            Predicate::Imports(v) => CompiledPredicate::Imports(v.clone()),
784            Predicate::Exports(v) => CompiledPredicate::Exports(v.clone()),
785            Predicate::References(v) => CompiledPredicate::References(v.clone()),
786            Predicate::Implements(v) => CompiledPredicate::Implements(v.clone()),
787            Predicate::InFile(path) => {
788                CompiledPredicate::InFile(CompiledPathPattern::compile(path))
789            }
790            Predicate::InScope(kind) => CompiledPredicate::InScope(*kind),
791            Predicate::MatchesName(pattern) => CompiledPredicate::MatchesName(
792                CompiledStringPattern::compile(pattern)
793                    .unwrap_or(CompiledStringPattern::REJECT_ALL),
794            ),
795            Predicate::Returns(type_name) => CompiledPredicate::Returns(type_name.clone()),
796            Predicate::And(list) => {
797                CompiledPredicate::And(list.iter().map(CompiledPredicate::compile).collect())
798            }
799            Predicate::Or(list) => {
800                CompiledPredicate::Or(list.iter().map(CompiledPredicate::compile).collect())
801            }
802            Predicate::Not(inner) => {
803                CompiledPredicate::Not(Box::new(CompiledPredicate::compile(inner)))
804            }
805        }
806    }
807}
808
809/// Compiled [`StringPattern`] — either a glob matcher or a raw-string checker.
810#[derive(Debug)]
811enum CompiledStringPattern {
812    Literal {
813        needle: String,
814        mode: MatchMode,
815        case_insensitive: bool,
816    },
817    Glob {
818        matcher: globset::GlobMatcher,
819    },
820    /// Fallback used when pattern compilation fails (malformed user input).
821    /// Matches nothing so evaluation cannot panic or produce false positives.
822    RejectAll,
823}
824
825impl CompiledStringPattern {
826    const REJECT_ALL: Self = CompiledStringPattern::RejectAll;
827
828    fn compile(pattern: &StringPattern) -> Option<Self> {
829        match pattern.mode {
830            MatchMode::Glob => {
831                let glob = GlobBuilder::new(&pattern.raw)
832                    .case_insensitive(pattern.case_insensitive)
833                    .literal_separator(false)
834                    .build()
835                    .ok()?;
836                Some(CompiledStringPattern::Glob {
837                    matcher: glob.compile_matcher(),
838                })
839            }
840            MatchMode::Exact | MatchMode::Prefix | MatchMode::Suffix | MatchMode::Contains => {
841                let needle = if pattern.case_insensitive {
842                    pattern.raw.to_lowercase()
843                } else {
844                    pattern.raw.clone()
845                };
846                Some(CompiledStringPattern::Literal {
847                    needle,
848                    mode: pattern.mode,
849                    case_insensitive: pattern.case_insensitive,
850                })
851            }
852        }
853    }
854
855    fn matches(&self, candidate: &str) -> bool {
856        match self {
857            CompiledStringPattern::Literal {
858                needle,
859                mode,
860                case_insensitive,
861            } => {
862                let haystack_owned: Option<String> = if *case_insensitive {
863                    Some(candidate.to_lowercase())
864                } else {
865                    None
866                };
867                let haystack = haystack_owned.as_deref().unwrap_or(candidate);
868                match mode {
869                    MatchMode::Exact => haystack == needle,
870                    MatchMode::Prefix => haystack.starts_with(needle.as_str()),
871                    MatchMode::Suffix => haystack.ends_with(needle.as_str()),
872                    MatchMode::Contains => haystack.contains(needle.as_str()),
873                    MatchMode::Glob => false, // unreachable — glob compiles to Self::Glob.
874                }
875            }
876            CompiledStringPattern::Glob { matcher } => matcher.is_match(candidate),
877            CompiledStringPattern::RejectAll => false,
878        }
879    }
880}
881
882#[derive(Debug)]
883enum CompiledPathPattern {
884    Glob(globset::GlobMatcher),
885    RejectAll,
886}
887
888impl CompiledPathPattern {
889    fn compile(path: &PathPattern) -> Self {
890        match globset::Glob::new(&path.glob) {
891            Ok(glob) => CompiledPathPattern::Glob(glob.compile_matcher()),
892            Err(_) => CompiledPathPattern::RejectAll,
893        }
894    }
895
896    fn matches(&self, candidate: &str) -> bool {
897        match self {
898            CompiledPathPattern::Glob(matcher) => matcher.is_match(candidate),
899            CompiledPathPattern::RejectAll => false,
900        }
901    }
902}
903
904// ============================================================================
905// Edge-kind probe constants
906// ============================================================================
907
908/// Probe value used to compute the `Calls` discriminant for existence checks.
909/// Carries zero metadata so it is interchangeable with any canonicalised
910/// edge-kind probe. Still needed by [`CompiledPredicate::HasCaller`] and
911/// [`CompiledPredicate::HasCallee`]; relation predicates route through
912/// [`crate::queries::relation`] since DB14.
913const CALLS_PROBE: EdgeKind = EdgeKind::Calls {
914    argument_count: 0,
915    is_async: false,
916};
917
918// ============================================================================
919// Free-standing helpers
920// ============================================================================
921
922fn dedup_sort(v: &mut Vec<NodeId>) {
923    v.sort_unstable_by_key(|id| (id.index(), id.generation()));
924    v.dedup();
925}
926
927fn append_chain_prefix(prefix: &PlanNode, appended_steps: &[PlanNode]) -> PlanNode {
928    if appended_steps.is_empty() {
929        return prefix.clone();
930    }
931
932    let mut steps = match prefix {
933        PlanNode::Chain { steps } => steps.clone(),
934        _ => vec![prefix.clone()],
935    };
936    steps.extend(appended_steps.iter().cloned());
937    PlanNode::Chain { steps }
938}
939
940fn entry_visibility(snapshot: &GraphSnapshot, entry: &NodeEntry) -> Option<Visibility> {
941    entry
942        .visibility
943        .and_then(|sid| snapshot.strings().resolve(sid))
944        .and_then(|s| Visibility::parse(s.as_ref()))
945}
946
947fn entry_in_file(snapshot: &GraphSnapshot, entry: &NodeEntry, glob: &CompiledPathPattern) -> bool {
948    let Some(path) = snapshot.files().resolve(entry.file) else {
949        return false;
950    };
951    let path_str = path.to_string_lossy();
952    glob.matches(path_str.as_ref())
953}
954
955fn entry_in_scope(snapshot: &GraphSnapshot, node_id: NodeId, kind: ScopeKind) -> bool {
956    let Some(entry) = snapshot.nodes().get(node_id) else {
957        return false;
958    };
959    for (_, scope) in snapshot.scope_arena().iter() {
960        if scope.kind != kind {
961            continue;
962        }
963        if scope.file != entry.file {
964            continue;
965        }
966        // Scope byte_span is `[start, end)` per ScopeArena docs.
967        if scope.byte_span.0 <= entry.start_byte && entry.end_byte <= scope.byte_span.1 {
968            return true;
969        }
970    }
971    false
972}
973
974/// Filter-time `name:` predicate.
975///
976/// **`B1_ALIGN` contract.** Mirrors
977/// [`sqry_core::graph::unified::concurrent::graph::GraphSnapshot::find_by_exact_name`]
978/// (and the `--exact` CLI shorthand): matches `entry.name` or
979/// `entry.qualified_name` or their user-facing display aliases, then
980/// suppresses synthetic placeholder nodes via
981/// [`sqry_core::graph::unified::concurrent::graph::GraphSnapshot::is_node_synthetic`]
982/// so the planner's `name:` predicate returns the same set as
983/// `--exact` against any fixture.
984fn entry_name_matches(
985    snapshot: &GraphSnapshot,
986    node_id: NodeId,
987    entry: &NodeEntry,
988    pattern: &CompiledStringPattern,
989) -> bool {
990    if !entry_name_or_display_matches(snapshot, entry, pattern) {
991        return false;
992    }
993    // B1_ALIGN: synthetic placeholder nodes are invisible to the
994    // `name:` surface (locked by C_SUPPRESS in
995    // `sqry-core/src/graph/unified/concurrent/graph.rs`).
996    !snapshot.is_node_synthetic(node_id)
997}
998
999fn entry_name_or_display_matches(
1000    snapshot: &GraphSnapshot,
1001    entry: &NodeEntry,
1002    pattern: &CompiledStringPattern,
1003) -> bool {
1004    let simple_name = snapshot.strings().resolve(entry.name);
1005    if simple_name
1006        .as_ref()
1007        .is_some_and(|name| pattern.matches(name.as_ref()))
1008    {
1009        return true;
1010    }
1011
1012    let Some(qualified_name) = entry
1013        .qualified_name
1014        .and_then(|sid| snapshot.strings().resolve(sid))
1015    else {
1016        return false;
1017    };
1018    if pattern.matches(qualified_name.as_ref()) {
1019        return true;
1020    }
1021
1022    let fallback_name = simple_name.as_deref().unwrap_or_default();
1023    let display_name =
1024        display_entry_qualified_name(entry, snapshot.strings(), snapshot.files(), fallback_name);
1025    display_name != qualified_name.as_ref() && pattern.matches(&display_name)
1026}
1027
1028// ============================================================================
1029// Inline smoke tests — unit-level coverage that doesn't require a full graph
1030// fixture lives here; end-to-end tests against a populated graph live in
1031// `sqry-db/tests/execute_plan_test.rs`.
1032// ============================================================================
1033
1034#[cfg(test)]
1035mod tests {
1036    use super::*;
1037    use crate::planner::ir::{MatchMode, StringPattern};
1038
1039    #[test]
1040    fn compiled_string_pattern_exact_case_sensitive() {
1041        let p = CompiledStringPattern::compile(&StringPattern::exact("Foo")).unwrap();
1042        assert!(p.matches("Foo"));
1043        assert!(!p.matches("foo"));
1044    }
1045
1046    #[test]
1047    fn compiled_string_pattern_exact_case_insensitive() {
1048        let p = CompiledStringPattern::compile(&StringPattern::exact("Foo").case_insensitive())
1049            .unwrap();
1050        assert!(p.matches("FOO"));
1051        assert!(p.matches("foo"));
1052        assert!(!p.matches("bar"));
1053    }
1054
1055    #[test]
1056    fn compiled_string_pattern_prefix_suffix_contains() {
1057        let pref = CompiledStringPattern::compile(&StringPattern::prefix("abc")).unwrap();
1058        assert!(pref.matches("abcdef"));
1059        assert!(!pref.matches("zabc"));
1060
1061        let suf = CompiledStringPattern::compile(&StringPattern::suffix("xyz")).unwrap();
1062        assert!(suf.matches("foo_xyz"));
1063        assert!(!suf.matches("xyz_foo"));
1064
1065        let cont = CompiledStringPattern::compile(&StringPattern::contains("mid")).unwrap();
1066        assert!(cont.matches("prefix_mid_suffix"));
1067        assert!(!cont.matches("no"));
1068    }
1069
1070    #[test]
1071    fn compiled_string_pattern_glob() {
1072        let p = CompiledStringPattern::compile(&StringPattern::glob("parse_*")).unwrap();
1073        assert!(p.matches("parse_expr"));
1074        assert!(p.matches("parse_"));
1075        assert!(!p.matches("lexer"));
1076    }
1077
1078    #[test]
1079    fn compiled_string_pattern_malformed_glob_rejects_all() {
1080        // An invalid glob (unterminated character class) compiles to RejectAll.
1081        let malformed = StringPattern {
1082            raw: "[abc".into(),
1083            mode: MatchMode::Glob,
1084            case_insensitive: false,
1085        };
1086        let p =
1087            CompiledStringPattern::compile(&malformed).unwrap_or(CompiledStringPattern::REJECT_ALL);
1088        assert!(!p.matches("abc"));
1089        assert!(!p.matches("[abc"));
1090    }
1091
1092    // `CompiledRegex` tests were removed in DB14 — regex compilation now
1093    // lives in the relation DerivedQuery impls. Coverage for the DB14
1094    // regex path lives in `sqry-db/tests/execute_plan_test.rs`
1095    // (`filter_callers_with_regex_matches_on_source_name`, etc.).
1096
1097    #[test]
1098    fn dedup_sort_sorts_and_dedupes_by_index_then_generation() {
1099        let mut v = vec![
1100            NodeId::new(3, 1),
1101            NodeId::new(1, 1),
1102            NodeId::new(3, 1),
1103            NodeId::new(2, 1),
1104            NodeId::new(1, 1),
1105        ];
1106        dedup_sort(&mut v);
1107        assert_eq!(
1108            v,
1109            vec![NodeId::new(1, 1), NodeId::new(2, 1), NodeId::new(3, 1)]
1110        );
1111    }
1112
1113    #[test]
1114    fn compiled_path_pattern_glob_and_reject() {
1115        let good = CompiledPathPattern::compile(&PathPattern::new("src/**/*.rs"));
1116        assert!(good.matches("src/graph/unified/mod.rs"));
1117        assert!(!good.matches("docs/README.md"));
1118
1119        // Invalid globs compile to RejectAll.
1120        let bad = CompiledPathPattern::compile(&PathPattern::new("[abc"));
1121        assert!(!bad.matches("abc"));
1122    }
1123}
sqry_db/planner/execute.rs

sqry_db/planner/
execute.rs