Skip to main content

sqry_core/graph/unified/bind/
shadow.rs

1//! Shadow derivation for the Phase 2 binding plane.
2//!
3//! A `ShadowEntry` names one Variable/Parameter node inside a function-scope
4//! chain, ordered by its byte offset. Multiple entries for the same
5//! `(scope, symbol)` pair form a shadow chain: the innermost definition
6//! whose byte offset is strictly less than a query offset is the effective
7//! binding at that offset.
8
9use std::collections::HashMap;
10
11use serde::{Deserialize, Serialize};
12
13use crate::graph::unified::build::phase4e_binding::BindingEdgeIndex;
14use crate::graph::unified::edge::kind::EdgeKind;
15use crate::graph::unified::mutation_target::GraphMutationTarget;
16use crate::graph::unified::node::id::NodeId;
17use crate::graph::unified::node::kind::NodeKind;
18use crate::graph::unified::string::id::StringId;
19
20use super::scope::{ScopeArena, ScopeId, ScopeKind};
21
22/// Dense handle into a `ShadowTable`.
23///
24/// The id is the position of the entry in the sorted `entries` slice, so
25/// `ShadowEntryId(k)` is valid as long as the table hasn't been replaced.
26/// `ShadowTable::get(id)` performs a single bounds-checked slice index.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
28pub struct ShadowEntryId(pub u32);
29
30/// One shadow-chain entry.
31///
32/// Records the function scope that owns the variable, the interned symbol
33/// name, the `NodeId` of the defining `Variable` or `Parameter` node, the
34/// byte offset of that definition, and a stable dense handle into the owning
35/// `ShadowTable`.
36///
37/// The `id` field is the position of this entry in the sorted entries slice
38/// after `derive_shadows` has run. It is assigned by the sort and must not
39/// be mutated by callers.
40#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
41pub struct ShadowEntry {
42    /// Stable dense handle for this entry within its owning `ShadowTable`.
43    /// Assigned after sorting; equals `ShadowEntryId(idx)` where `idx` is
44    /// the position of this entry in the sorted entries slice.
45    pub id: ShadowEntryId,
46    /// The function scope that contains this definition.
47    pub scope: ScopeId,
48    /// The interned symbol name of the variable or parameter.
49    pub symbol: StringId,
50    /// `NodeId` of the `Variable` or `Parameter` node that introduces the
51    /// binding at this offset.
52    pub node: NodeId,
53    /// Byte offset of the definition site within its source file.
54    ///
55    /// Entries within the same `(scope, symbol)` chain are sorted by this
56    /// field in ascending order; `effective_binding` performs a reverse-scan
57    /// to find the last entry whose offset is strictly less than the query
58    /// offset.
59    pub byte_offset: u32,
60}
61
62/// Content-addressed shadow lookup table.
63///
64/// Entries are sorted by `(scope, symbol, byte_offset)`. The `chains` index
65/// maps each `(ScopeId, StringId)` key to a half-open range `[start, end)`
66/// into `entries`. `effective_binding` does a backward linear scan through
67/// the relevant range to find the innermost definition before a query offset.
68///
69/// # Serialization
70///
71/// Only `entries` is persisted. The `chains` index is derived from `entries`
72/// on deserialization and cannot drift out of sync. This keeps the V9 snapshot
73/// footprint minimal and eliminates the dual-write hazard.
74#[derive(Debug, Clone, Default)]
75pub struct ShadowTable {
76    /// All entries, sorted by `(scope, symbol, byte_offset)`.
77    entries: Vec<ShadowEntry>,
78    /// Per-`(scope, symbol)` range index mapping a `(ScopeId, StringId)` key
79    /// to its `[start, end)` slice in `entries`. **Not** persisted.
80    chains: HashMap<(ScopeId, StringId), (u32, u32)>,
81}
82
83impl ShadowTable {
84    /// Creates an empty `ShadowTable`.
85    #[must_use]
86    pub fn new() -> Self {
87        Self::default()
88    }
89
90    /// Returns the total number of shadow entries.
91    #[must_use]
92    pub fn len(&self) -> usize {
93        self.entries.len()
94    }
95
96    /// Returns `true` if the table contains no entries.
97    #[must_use]
98    pub fn is_empty(&self) -> bool {
99        self.entries.is_empty()
100    }
101
102    /// Returns the full sorted entries slice.
103    ///
104    /// Useful for V9 persistence walkers, debug dumps, stats collection, and
105    /// any future consumer that needs to iterate the entire table.
106    #[must_use]
107    pub fn entries(&self) -> &[ShadowEntry] {
108        &self.entries
109    }
110
111    /// Looks up a shadow entry by its dense `ShadowEntryId`.
112    ///
113    /// Returns `None` if `id.0 >= self.len()`. The id is the position of the
114    /// entry in the sorted slice, assigned during `derive_shadows`. It is
115    /// stable for the lifetime of this table instance.
116    ///
117    /// Used by resolution steps that record a shadow entry by id and later
118    /// need to recover the full `ShadowEntry`.
119    #[must_use]
120    pub fn get(&self, id: ShadowEntryId) -> Option<&ShadowEntry> {
121        self.entries.get(id.0 as usize)
122    }
123
124    /// Returns all shadow entries declared in `scope`, sorted by
125    /// `(symbol, byte_offset)`.
126    ///
127    /// Returns an empty `Vec` when no entries belong to `scope`. The returned
128    /// entries span all symbols in the scope, not a single symbol chain.
129    #[must_use]
130    pub fn shadows_in(&self, scope: ScopeId) -> Vec<&ShadowEntry> {
131        self.entries.iter().filter(|e| e.scope == scope).collect()
132    }
133
134    /// Returns the innermost shadow entry for `(scope, symbol)` whose byte
135    /// offset is strictly less than `query_byte_offset`.
136    ///
137    /// Returns `None` when no such entry exists — i.e., the query position is
138    /// before the first definition of `symbol` in `scope`, or the `(scope,
139    /// symbol)` pair has no entries at all.
140    ///
141    /// # Shadow semantics
142    ///
143    /// This implements the Rust/JS/Python shadow semantic: a re-binding at
144    /// offset B is visible only to references at offsets > B. The last
145    /// definition strictly before the query offset wins.
146    #[must_use]
147    pub fn effective_binding(
148        &self,
149        scope: ScopeId,
150        symbol: StringId,
151        query_byte_offset: u32,
152    ) -> Option<NodeId> {
153        let (start, end) = self.chains.get(&(scope, symbol)).copied()?;
154        let slice = &self.entries[start as usize..end as usize];
155        // Reverse scan: the last entry whose byte_offset < query_byte_offset wins.
156        slice
157            .iter()
158            .rev()
159            .find(|e| e.byte_offset < query_byte_offset)
160            .map(|e| e.node)
161    }
162
163    /// Applies `keep` to every entry's `node`, dropping entries whose defining
164    /// `Variable`/`Parameter` node fails the predicate.
165    ///
166    /// After filtering, the `id` of each surviving entry is reassigned to its
167    /// new position in the sorted `entries` slice, and the `chains` range
168    /// index is rebuilt from scratch so `effective_binding` / `shadows_in`
169    /// stay consistent. The relative order of surviving entries is preserved,
170    /// so the table remains sorted by `(scope, symbol, byte_offset)`.
171    ///
172    /// This is the mutation entry point used by the Gate 0c `NodeIdBearing`
173    /// impl (A2 §K row K.A13). Callers that hold the table behind an `Arc`
174    /// must reach it through `Arc::make_mut` before invoking this method.
175    ///
176    /// Live in the default build: the consumer is `RebuildGraph::finalize()`
177    /// via the `retain_nodes` impl, reached from the ungated public
178    /// `build::incremental::incremental_rebuild` -> `finalize` path (the
179    /// `rebuild::coverage` unit tests exercise it too).
180    pub(crate) fn retain_by_node(&mut self, keep: &dyn Fn(NodeId) -> bool) {
181        self.entries.retain(|entry| keep(entry.node));
182        for (idx, entry) in self.entries.iter_mut().enumerate() {
183            entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
184        }
185        self.rebuild_index();
186    }
187
188    /// Rewrite every `StringId` stored on every entry through `remap`,
189    /// replacing any ID that appears as a key with its canonical value.
190    ///
191    /// Used by Gate 0c's finalize step 1 (`StringId` canonicalisation).
192    /// `symbol` participates in the `(scope, symbol, byte_offset)` sort
193    /// key AND in the `chains: HashMap<(ScopeId, StringId), _>` index, so
194    /// after rewrite the entries are re-sorted and the `chains` map is
195    /// rebuilt from scratch. A stable sort preserves the prior order
196    /// within each `(scope, symbol, byte_offset)` group (only relevant if
197    /// two entries collide on all three — legal after dedup but rare).
198    ///
199    /// Empty `remap` is a no-op. Callers that hold the table behind an
200    /// `Arc` must reach it through `Arc::make_mut` first.
201    ///
202    /// Live in the default build: the consumer is `RebuildGraph::finalize()`
203    /// step 1, reached from the ungated public
204    /// `build::incremental::incremental_rebuild` -> `finalize` path.
205    pub(crate) fn rewrite_string_ids_through_remap(&mut self, remap: &HashMap<StringId, StringId>) {
206        if remap.is_empty() {
207            return;
208        }
209        let mut changed = false;
210        for entry in &mut self.entries {
211            if let Some(&canon) = remap.get(&entry.symbol) {
212                entry.symbol = canon;
213                changed = true;
214            }
215        }
216        if !changed {
217            return;
218        }
219        self.entries.sort_by(|a, b| {
220            (a.scope, a.symbol, a.byte_offset).cmp(&(b.scope, b.symbol, b.byte_offset))
221        });
222        for (idx, entry) in self.entries.iter_mut().enumerate() {
223            entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
224        }
225        self.rebuild_index();
226    }
227
228    /// Rebuilds the `chains` range index from `self.entries`.
229    ///
230    /// Called after deserialization (where `chains` is not persisted) and
231    /// after constructing a `ShadowTable` from a raw `Vec<ShadowEntry>`. The
232    /// entries must already be sorted by `(scope, symbol, byte_offset)`.
233    fn rebuild_index(&mut self) {
234        debug_assert!(
235            self.entries
236                .windows(2)
237                .all(|w| { (w[0].scope, w[0].symbol) <= (w[1].scope, w[1].symbol) }),
238            "rebuild_index requires entries sorted by (scope, symbol) ascending"
239        );
240        self.chains.clear();
241        let mut cursor = 0u32;
242        while (cursor as usize) < self.entries.len() {
243            let key = (
244                self.entries[cursor as usize].scope,
245                self.entries[cursor as usize].symbol,
246            );
247            let start = cursor;
248            while (cursor as usize) < self.entries.len()
249                && (
250                    self.entries[cursor as usize].scope,
251                    self.entries[cursor as usize].symbol,
252                ) == key
253            {
254                cursor += 1;
255            }
256            self.chains.insert(key, (start, cursor));
257        }
258    }
259}
260
261// ---------------------------------------------------------------------------
262// Serde: persist only `entries`; rebuild `chains` on deserialize.
263// ---------------------------------------------------------------------------
264
265impl Serialize for ShadowTable {
266    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
267        // Serialize as a newtype wrapping just the entries Vec.
268        self.entries.serialize(serializer)
269    }
270}
271
272impl<'de> Deserialize<'de> for ShadowTable {
273    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
274        let entries = Vec::<ShadowEntry>::deserialize(deserializer)?;
275        let mut table = ShadowTable {
276            entries,
277            chains: HashMap::default(),
278        };
279        table.rebuild_index();
280        Ok(table)
281    }
282}
283
284/// Walks `NodeKind::Variable` and `NodeKind::Parameter` nodes whose enclosing
285/// scope is a `ScopeKind::Function`, groups them by `(scope, symbol)`, sorts
286/// each group by `byte_offset`, assigns stable `ShadowEntryId`s, and builds
287/// the `chains` range index.
288///
289/// Uses `ScopeArena::iter()` to build the node → scope map, avoiding the
290/// generation-1 hardcode. Only `Contains`, `Defines` edges feed into scope
291/// derivation; `derive_shadows` operates on the already-finalized scope arena
292/// and never walks cross-language edges.
293///
294/// Returns an empty `ShadowTable` when no Variable/Parameter nodes belong to
295/// any Function scope.
296///
297/// # Performance
298///
299/// The `edge_index` parameter replaces the per-function `edges_from()` calls
300/// that previously triggered O(E) delta scans, turning per-node edge lookup
301/// from O(E) to O(1).
302pub(crate) fn derive_shadows<G: GraphMutationTarget>(
303    graph: &G,
304    scopes: &ScopeArena,
305    edge_index: &BindingEdgeIndex,
306) -> ShadowTable {
307    // Build NodeId → ScopeId for every scope whose kind is Function.
308    // Reuse the same node_to_scope approach as derive_aliases but then filter
309    // to Function-kind scopes.
310    let node_to_function_scope: HashMap<NodeId, ScopeId> = scopes
311        .iter()
312        .filter(|(_, scope)| scope.kind == ScopeKind::Function)
313        .map(|(id, scope)| (scope.node, id))
314        .collect();
315
316    // For nodes that are contained within a function (i.e., their direct
317    // parent is the function node), we need to know which scope corresponds
318    // to which function node. We already have that in node_to_function_scope
319    // (function_node → scope_id). We need to find which function scope
320    // contains each Variable/Parameter.
321    //
322    // Strategy: for each function node find its `Defines`/`Contains` children
323    // that are Variable/Parameter nodes using the pre-built forward index.
324    // This is correct for single-level nesting (typical for let-bindings and
325    // parameters) and avoids the O(E) delta scan that edges_from() would
326    // trigger.
327
328    let mut raw: Vec<ShadowEntry> = Vec::new();
329
330    for (&func_node, &scope_id) in &node_to_function_scope {
331        // Look up pre-indexed outgoing Defines/Contains edges from the function node.
332        let Some(children) = edge_index.defines_contains_by_source.get(&func_node) else {
333            continue;
334        };
335
336        for (child, edge_kind) in children {
337            if !matches!(edge_kind, EdgeKind::Defines | EdgeKind::Contains) {
338                continue;
339            }
340            let Some(child_entry) = graph.nodes().get(*child) else {
341                continue;
342            };
343            if !matches!(child_entry.kind, NodeKind::Variable | NodeKind::Parameter) {
344                continue;
345            }
346            raw.push(ShadowEntry {
347                // Placeholder id; overwritten after sorting.
348                id: ShadowEntryId(0),
349                scope: scope_id,
350                symbol: child_entry.name,
351                node: *child,
352                byte_offset: child_entry.start_byte,
353            });
354        }
355    }
356
357    if raw.is_empty() {
358        log::debug!("derive_shadows: no Variable/Parameter nodes under Function scopes");
359        return ShadowTable::new();
360    }
361
362    // Sort by (scope, symbol, byte_offset) for stable chain partitioning.
363    raw.sort_by(|a, b| (a.scope, a.symbol, a.byte_offset).cmp(&(b.scope, b.symbol, b.byte_offset)));
364
365    // Assign stable ids: position in the sorted slice IS the ShadowEntryId.
366    for (idx, entry) in raw.iter_mut().enumerate() {
367        entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
368    }
369
370    // Build the per-(scope, symbol) range index via rebuild_index so the
371    // logic lives in exactly one place (deserialization also calls it).
372    let mut table = ShadowTable {
373        entries: raw,
374        chains: HashMap::default(),
375    };
376    table.rebuild_index();
377
378    log::debug!(
379        "derive_shadows: derived {} shadow entries across {} chains",
380        table.len(),
381        table.chains.len(),
382    );
383
384    table
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390    use crate::graph::unified::node::id::NodeId;
391    use crate::graph::unified::string::id::StringId;
392
393    fn make_scope(index: u32) -> ScopeId {
394        ScopeId::new(index, 1)
395    }
396
397    /// Build a `ShadowTable` directly from a pre-sorted entries slice (for
398    /// unit tests that don't want to go through `derive_shadows`).
399    fn table_from_sorted(mut entries: Vec<ShadowEntry>) -> ShadowTable {
400        for (idx, e) in entries.iter_mut().enumerate() {
401            e.id = ShadowEntryId(u32::try_from(idx).unwrap());
402        }
403        let mut table = ShadowTable {
404            entries,
405            chains: HashMap::default(),
406        };
407        table.rebuild_index();
408        table
409    }
410
411    fn make_entry(scope: ScopeId, symbol: StringId, node: NodeId, byte_offset: u32) -> ShadowEntry {
412        ShadowEntry {
413            id: ShadowEntryId(0), // patched by table_from_sorted
414            scope,
415            symbol,
416            node,
417            byte_offset,
418        }
419    }
420
421    #[test]
422    fn t12_effective_binding_innermost_before_offset() {
423        let scope = make_scope(0);
424        let symbol = StringId::new(1);
425        let entries = vec![
426            make_entry(scope, symbol, NodeId::new(1, 1), 10),
427            make_entry(scope, symbol, NodeId::new(2, 1), 30),
428        ];
429        let table = table_from_sorted(entries);
430
431        // Query at offset 40 — both definitions are before, innermost is node 2.
432        assert_eq!(
433            table.effective_binding(scope, symbol, 40),
434            Some(NodeId::new(2, 1)),
435            "innermost definition before offset 40 must be node 2 (at offset 30)"
436        );
437        // Query at offset 20 — only first definition is before.
438        assert_eq!(
439            table.effective_binding(scope, symbol, 20),
440            Some(NodeId::new(1, 1)),
441            "only node 1 (offset 10) is before offset 20"
442        );
443    }
444
445    #[test]
446    fn t13_effective_binding_before_first_returns_none() {
447        let scope = make_scope(0);
448        let symbol = StringId::new(1);
449        let entries = vec![make_entry(scope, symbol, NodeId::new(1, 1), 50)];
450        let table = table_from_sorted(entries);
451
452        // Query at offset 10 — before the first definition.
453        assert_eq!(
454            table.effective_binding(scope, symbol, 10),
455            None,
456            "query before first definition must return None"
457        );
458    }
459
460    #[test]
461    fn empty_table_behavior() {
462        let table = ShadowTable::new();
463        assert!(table.is_empty());
464        assert_eq!(table.len(), 0);
465        let scope = make_scope(0);
466        let symbol = StringId::new(1);
467        assert!(table.shadows_in(scope).is_empty());
468        assert_eq!(table.effective_binding(scope, symbol, 100), None);
469        assert!(table.get(ShadowEntryId(0)).is_none());
470    }
471
472    #[test]
473    fn get_by_shadow_entry_id_round_trips() {
474        let scope = make_scope(0);
475        let sym_a = StringId::new(1);
476        let sym_b = StringId::new(2);
477        let entries = vec![
478            make_entry(scope, sym_a, NodeId::new(1, 1), 10),
479            make_entry(scope, sym_b, NodeId::new(2, 1), 20),
480        ];
481        let table = table_from_sorted(entries);
482
483        let e0 = table.get(ShadowEntryId(0)).expect("id 0 must exist");
484        let e1 = table.get(ShadowEntryId(1)).expect("id 1 must exist");
485        assert_eq!(e0.id, ShadowEntryId(0));
486        assert_eq!(e0.symbol, sym_a);
487        assert_eq!(e1.id, ShadowEntryId(1));
488        assert_eq!(e1.symbol, sym_b);
489        assert!(table.get(ShadowEntryId(2)).is_none());
490    }
491
492    #[test]
493    fn entries_accessor_returns_full_slice() {
494        let scope = make_scope(0);
495        let symbol = StringId::new(1);
496        let entries = vec![
497            make_entry(scope, symbol, NodeId::new(1, 1), 5),
498            make_entry(scope, symbol, NodeId::new(2, 1), 15),
499            make_entry(scope, symbol, NodeId::new(3, 1), 25),
500        ];
501        let table = table_from_sorted(entries);
502        assert_eq!(table.entries().len(), 3);
503    }
504
505    #[test]
506    fn scope_partitioning_no_collision() {
507        // Two scopes each declaring the same symbol name must not collide.
508        let s_a = make_scope(0);
509        let s_b = make_scope(1);
510        let sym = StringId::new(42);
511
512        let entries = vec![
513            make_entry(s_a, sym, NodeId::new(10, 1), 5),
514            make_entry(s_a, sym, NodeId::new(11, 1), 20),
515            make_entry(s_b, sym, NodeId::new(20, 1), 5),
516            make_entry(s_b, sym, NodeId::new(21, 1), 30),
517        ];
518        let table = table_from_sorted(entries);
519
520        // s_a at offset 25 → should return node 11 (offset 20 < 25).
521        assert_eq!(
522            table.effective_binding(s_a, sym, 25),
523            Some(NodeId::new(11, 1)),
524            "s_a effective binding at 25 must be node 11"
525        );
526        // s_b at offset 25 → should return node 20 (offset 5 < 25, 30 ≥ 25).
527        assert_eq!(
528            table.effective_binding(s_b, sym, 25),
529            Some(NodeId::new(20, 1)),
530            "s_b effective binding at 25 must be node 20"
531        );
532        // s_a and s_b resolve to different nodes for the same symbol and offset
533        assert_ne!(
534            table.effective_binding(s_a, sym, 25),
535            table.effective_binding(s_b, sym, 25),
536            "same symbol in different scopes must resolve independently"
537        );
538    }
539
540    #[test]
541    fn postcard_round_trip_preserves_entries_and_rebuilds_chains() {
542        let s_a = make_scope(1);
543        let s_b = make_scope(2);
544        let sym_x = StringId::new(10);
545        let sym_y = StringId::new(11);
546
547        let entries = vec![
548            make_entry(s_a, sym_x, NodeId::new(1, 1), 10),
549            make_entry(s_a, sym_x, NodeId::new(2, 1), 30),
550            make_entry(s_b, sym_y, NodeId::new(3, 1), 5),
551        ];
552        let original = table_from_sorted(entries);
553
554        let bytes = postcard::to_allocvec(&original).expect("serialize");
555        let restored: ShadowTable = postcard::from_bytes(&bytes).expect("deserialize");
556
557        assert_eq!(
558            restored.entries().len(),
559            original.entries().len(),
560            "entry count must survive round-trip"
561        );
562        assert_eq!(
563            restored.entries(),
564            original.entries(),
565            "entries must be byte-equal after round-trip"
566        );
567
568        // Verify chains were rebuilt correctly.
569        assert_eq!(
570            restored.effective_binding(s_a, sym_x, 40),
571            Some(NodeId::new(2, 1)),
572            "effective_binding on restored table must find node 2 for s_a/sym_x at 40"
573        );
574        assert_eq!(
575            restored.effective_binding(s_a, sym_x, 20),
576            Some(NodeId::new(1, 1)),
577            "effective_binding on restored table must find node 1 for s_a/sym_x at 20"
578        );
579        assert_eq!(
580            restored.effective_binding(s_b, sym_y, 10),
581            Some(NodeId::new(3, 1)),
582            "effective_binding on restored table must find node 3 for s_b/sym_y at 10"
583        );
584        assert_eq!(
585            restored.effective_binding(s_b, sym_y, 3),
586            None,
587            "query before first def must return None after round-trip"
588        );
589    }
590}