Skip to main content

sqry_core/graph/unified/bind/
shadow.rs

1//! Shadow derivation for the Phase 2 binding plane.
2//!
3//! A `ShadowEntry` names one Variable/Parameter node inside a function-scope
4//! chain, ordered by its byte offset. Multiple entries for the same
5//! `(scope, symbol)` pair form a shadow chain: the innermost definition
6//! whose byte offset is strictly less than a query offset is the effective
7//! binding at that offset.
8
9use std::collections::HashMap;
10
11use serde::{Deserialize, Serialize};
12
13use crate::graph::unified::build::phase4e_binding::BindingEdgeIndex;
14use crate::graph::unified::edge::kind::EdgeKind;
15use crate::graph::unified::mutation_target::GraphMutationTarget;
16use crate::graph::unified::node::id::NodeId;
17use crate::graph::unified::node::kind::NodeKind;
18use crate::graph::unified::string::id::StringId;
19
20use super::scope::{ScopeArena, ScopeId, ScopeKind};
21
22/// Dense handle into a `ShadowTable`.
23///
24/// The id is the position of the entry in the sorted `entries` slice, so
25/// `ShadowEntryId(k)` is valid as long as the table hasn't been replaced.
26/// `ShadowTable::get(id)` performs a single bounds-checked slice index.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
28pub struct ShadowEntryId(pub u32);
29
30/// One shadow-chain entry.
31///
32/// Records the function scope that owns the variable, the interned symbol
33/// name, the `NodeId` of the defining `Variable` or `Parameter` node, the
34/// byte offset of that definition, and a stable dense handle into the owning
35/// `ShadowTable`.
36///
37/// The `id` field is the position of this entry in the sorted entries slice
38/// after `derive_shadows` has run. It is assigned by the sort and must not
39/// be mutated by callers.
40#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
41pub struct ShadowEntry {
42    /// Stable dense handle for this entry within its owning `ShadowTable`.
43    /// Assigned after sorting; equals `ShadowEntryId(idx)` where `idx` is
44    /// the position of this entry in the sorted entries slice.
45    pub id: ShadowEntryId,
46    /// The function scope that contains this definition.
47    pub scope: ScopeId,
48    /// The interned symbol name of the variable or parameter.
49    pub symbol: StringId,
50    /// `NodeId` of the `Variable` or `Parameter` node that introduces the
51    /// binding at this offset.
52    pub node: NodeId,
53    /// Byte offset of the definition site within its source file.
54    ///
55    /// Entries within the same `(scope, symbol)` chain are sorted by this
56    /// field in ascending order; `effective_binding` performs a reverse-scan
57    /// to find the last entry whose offset is strictly less than the query
58    /// offset.
59    pub byte_offset: u32,
60}
61
62/// Content-addressed shadow lookup table.
63///
64/// Entries are sorted by `(scope, symbol, byte_offset)`. The `chains` index
65/// maps each `(ScopeId, StringId)` key to a half-open range `[start, end)`
66/// into `entries`. `effective_binding` does a backward linear scan through
67/// the relevant range to find the innermost definition before a query offset.
68///
69/// # Serialization
70///
71/// Only `entries` is persisted. The `chains` index is derived from `entries`
72/// on deserialization and cannot drift out of sync. This keeps the V9 snapshot
73/// footprint minimal and eliminates the dual-write hazard.
74#[derive(Debug, Clone, Default)]
75pub struct ShadowTable {
76    /// All entries, sorted by `(scope, symbol, byte_offset)`.
77    entries: Vec<ShadowEntry>,
78    /// Per-`(scope, symbol)` range index mapping a `(ScopeId, StringId)` key
79    /// to its `[start, end)` slice in `entries`. **Not** persisted.
80    chains: HashMap<(ScopeId, StringId), (u32, u32)>,
81}
82
83impl ShadowTable {
84    /// Creates an empty `ShadowTable`.
85    #[must_use]
86    pub fn new() -> Self {
87        Self::default()
88    }
89
90    /// Returns the total number of shadow entries.
91    #[must_use]
92    pub fn len(&self) -> usize {
93        self.entries.len()
94    }
95
96    /// Returns `true` if the table contains no entries.
97    #[must_use]
98    pub fn is_empty(&self) -> bool {
99        self.entries.is_empty()
100    }
101
102    /// Returns the full sorted entries slice.
103    ///
104    /// Useful for V9 persistence walkers, debug dumps, stats collection, and
105    /// any future consumer that needs to iterate the entire table.
106    #[must_use]
107    pub fn entries(&self) -> &[ShadowEntry] {
108        &self.entries
109    }
110
111    /// Looks up a shadow entry by its dense `ShadowEntryId`.
112    ///
113    /// Returns `None` if `id.0 >= self.len()`. The id is the position of the
114    /// entry in the sorted slice, assigned during `derive_shadows`. It is
115    /// stable for the lifetime of this table instance.
116    ///
117    /// Used by resolution steps that record a shadow entry by id and later
118    /// need to recover the full `ShadowEntry`.
119    #[must_use]
120    pub fn get(&self, id: ShadowEntryId) -> Option<&ShadowEntry> {
121        self.entries.get(id.0 as usize)
122    }
123
124    /// Returns all shadow entries declared in `scope`, sorted by
125    /// `(symbol, byte_offset)`.
126    ///
127    /// Returns an empty `Vec` when no entries belong to `scope`. The returned
128    /// entries span all symbols in the scope, not a single symbol chain.
129    #[must_use]
130    pub fn shadows_in(&self, scope: ScopeId) -> Vec<&ShadowEntry> {
131        self.entries.iter().filter(|e| e.scope == scope).collect()
132    }
133
134    /// Returns the innermost shadow entry for `(scope, symbol)` whose byte
135    /// offset is strictly less than `query_byte_offset`.
136    ///
137    /// Returns `None` when no such entry exists — i.e., the query position is
138    /// before the first definition of `symbol` in `scope`, or the `(scope,
139    /// symbol)` pair has no entries at all.
140    ///
141    /// # Shadow semantics
142    ///
143    /// This implements the Rust/JS/Python shadow semantic: a re-binding at
144    /// offset B is visible only to references at offsets > B. The last
145    /// definition strictly before the query offset wins.
146    #[must_use]
147    pub fn effective_binding(
148        &self,
149        scope: ScopeId,
150        symbol: StringId,
151        query_byte_offset: u32,
152    ) -> Option<NodeId> {
153        let (start, end) = self.chains.get(&(scope, symbol)).copied()?;
154        let slice = &self.entries[start as usize..end as usize];
155        // Reverse scan: the last entry whose byte_offset < query_byte_offset wins.
156        slice
157            .iter()
158            .rev()
159            .find(|e| e.byte_offset < query_byte_offset)
160            .map(|e| e.node)
161    }
162
163    /// Applies `keep` to every entry's `node`, dropping entries whose defining
164    /// `Variable`/`Parameter` node fails the predicate.
165    ///
166    /// After filtering, the `id` of each surviving entry is reassigned to its
167    /// new position in the sorted `entries` slice, and the `chains` range
168    /// index is rebuilt from scratch so `effective_binding` / `shadows_in`
169    /// stay consistent. The relative order of surviving entries is preserved,
170    /// so the table remains sorted by `(scope, symbol, byte_offset)`.
171    ///
172    /// This is the mutation entry point used by the Gate 0c `NodeIdBearing`
173    /// impl (A2 §K row K.A13). Callers that hold the table behind an `Arc`
174    /// must reach it through `Arc::make_mut` before invoking this method.
175    ///
176    /// `#[allow(dead_code)]` mirrors the NodeIdBearing trait itself: Gate 0b
177    /// lands the scaffolding and unit tests, Gate 0c adds the production
178    /// call site in `RebuildGraph::finalize()`.
179    #[allow(dead_code)]
180    pub(crate) fn retain_by_node(&mut self, keep: &dyn Fn(NodeId) -> bool) {
181        self.entries.retain(|entry| keep(entry.node));
182        for (idx, entry) in self.entries.iter_mut().enumerate() {
183            entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
184        }
185        self.rebuild_index();
186    }
187
188    /// Rewrite every `StringId` stored on every entry through `remap`,
189    /// replacing any ID that appears as a key with its canonical value.
190    ///
191    /// Used by Gate 0c's finalize step 1 (StringId canonicalisation).
192    /// `symbol` participates in the `(scope, symbol, byte_offset)` sort
193    /// key AND in the `chains: HashMap<(ScopeId, StringId), _>` index, so
194    /// after rewrite the entries are re-sorted and the `chains` map is
195    /// rebuilt from scratch. A stable sort preserves the prior order
196    /// within each `(scope, symbol, byte_offset)` group (only relevant if
197    /// two entries collide on all three — legal after dedup but rare).
198    ///
199    /// Empty `remap` is a no-op. Callers that hold the table behind an
200    /// `Arc` must reach it through `Arc::make_mut` first.
201    #[allow(dead_code)]
202    pub(crate) fn rewrite_string_ids_through_remap(&mut self, remap: &HashMap<StringId, StringId>) {
203        if remap.is_empty() {
204            return;
205        }
206        let mut changed = false;
207        for entry in &mut self.entries {
208            if let Some(&canon) = remap.get(&entry.symbol) {
209                entry.symbol = canon;
210                changed = true;
211            }
212        }
213        if !changed {
214            return;
215        }
216        self.entries.sort_by(|a, b| {
217            (a.scope, a.symbol, a.byte_offset).cmp(&(b.scope, b.symbol, b.byte_offset))
218        });
219        for (idx, entry) in self.entries.iter_mut().enumerate() {
220            entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
221        }
222        self.rebuild_index();
223    }
224
225    /// Rebuilds the `chains` range index from `self.entries`.
226    ///
227    /// Called after deserialization (where `chains` is not persisted) and
228    /// after constructing a `ShadowTable` from a raw `Vec<ShadowEntry>`. The
229    /// entries must already be sorted by `(scope, symbol, byte_offset)`.
230    fn rebuild_index(&mut self) {
231        debug_assert!(
232            self.entries
233                .windows(2)
234                .all(|w| { (w[0].scope, w[0].symbol) <= (w[1].scope, w[1].symbol) }),
235            "rebuild_index requires entries sorted by (scope, symbol) ascending"
236        );
237        self.chains.clear();
238        let mut cursor = 0u32;
239        while (cursor as usize) < self.entries.len() {
240            let key = (
241                self.entries[cursor as usize].scope,
242                self.entries[cursor as usize].symbol,
243            );
244            let start = cursor;
245            while (cursor as usize) < self.entries.len()
246                && (
247                    self.entries[cursor as usize].scope,
248                    self.entries[cursor as usize].symbol,
249                ) == key
250            {
251                cursor += 1;
252            }
253            self.chains.insert(key, (start, cursor));
254        }
255    }
256}
257
258// ---------------------------------------------------------------------------
259// Serde: persist only `entries`; rebuild `chains` on deserialize.
260// ---------------------------------------------------------------------------
261
262impl Serialize for ShadowTable {
263    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
264        // Serialize as a newtype wrapping just the entries Vec.
265        self.entries.serialize(serializer)
266    }
267}
268
269impl<'de> Deserialize<'de> for ShadowTable {
270    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
271        let entries = Vec::<ShadowEntry>::deserialize(deserializer)?;
272        let mut table = ShadowTable {
273            entries,
274            chains: HashMap::default(),
275        };
276        table.rebuild_index();
277        Ok(table)
278    }
279}
280
281/// Walks `NodeKind::Variable` and `NodeKind::Parameter` nodes whose enclosing
282/// scope is a `ScopeKind::Function`, groups them by `(scope, symbol)`, sorts
283/// each group by `byte_offset`, assigns stable `ShadowEntryId`s, and builds
284/// the `chains` range index.
285///
286/// Uses `ScopeArena::iter()` to build the node → scope map, avoiding the
287/// generation-1 hardcode. Only `Contains`, `Defines` edges feed into scope
288/// derivation; `derive_shadows` operates on the already-finalized scope arena
289/// and never walks cross-language edges.
290///
291/// Returns an empty `ShadowTable` when no Variable/Parameter nodes belong to
292/// any Function scope.
293///
294/// # Performance
295///
296/// The `edge_index` parameter replaces the per-function `edges_from()` calls
297/// that previously triggered O(E) delta scans, turning per-node edge lookup
298/// from O(E) to O(1).
299pub(crate) fn derive_shadows<G: GraphMutationTarget>(
300    graph: &G,
301    scopes: &ScopeArena,
302    edge_index: &BindingEdgeIndex,
303) -> ShadowTable {
304    // Build NodeId → ScopeId for every scope whose kind is Function.
305    // Reuse the same node_to_scope approach as derive_aliases but then filter
306    // to Function-kind scopes.
307    let node_to_function_scope: HashMap<NodeId, ScopeId> = scopes
308        .iter()
309        .filter(|(_, scope)| scope.kind == ScopeKind::Function)
310        .map(|(id, scope)| (scope.node, id))
311        .collect();
312
313    // For nodes that are contained within a function (i.e., their direct
314    // parent is the function node), we need to know which scope corresponds
315    // to which function node. We already have that in node_to_function_scope
316    // (function_node → scope_id). We need to find which function scope
317    // contains each Variable/Parameter.
318    //
319    // Strategy: for each function node find its `Defines`/`Contains` children
320    // that are Variable/Parameter nodes using the pre-built forward index.
321    // This is correct for single-level nesting (typical for let-bindings and
322    // parameters) and avoids the O(E) delta scan that edges_from() would
323    // trigger.
324
325    let mut raw: Vec<ShadowEntry> = Vec::new();
326
327    for (&func_node, &scope_id) in &node_to_function_scope {
328        // Look up pre-indexed outgoing Defines/Contains edges from the function node.
329        let Some(children) = edge_index.defines_contains_by_source.get(&func_node) else {
330            continue;
331        };
332
333        for (child, edge_kind) in children {
334            if !matches!(edge_kind, EdgeKind::Defines | EdgeKind::Contains) {
335                continue;
336            }
337            let Some(child_entry) = graph.nodes().get(*child) else {
338                continue;
339            };
340            if !matches!(child_entry.kind, NodeKind::Variable | NodeKind::Parameter) {
341                continue;
342            }
343            raw.push(ShadowEntry {
344                // Placeholder id; overwritten after sorting.
345                id: ShadowEntryId(0),
346                scope: scope_id,
347                symbol: child_entry.name,
348                node: *child,
349                byte_offset: child_entry.start_byte,
350            });
351        }
352    }
353
354    if raw.is_empty() {
355        log::debug!("derive_shadows: no Variable/Parameter nodes under Function scopes");
356        return ShadowTable::new();
357    }
358
359    // Sort by (scope, symbol, byte_offset) for stable chain partitioning.
360    raw.sort_by(|a, b| (a.scope, a.symbol, a.byte_offset).cmp(&(b.scope, b.symbol, b.byte_offset)));
361
362    // Assign stable ids: position in the sorted slice IS the ShadowEntryId.
363    for (idx, entry) in raw.iter_mut().enumerate() {
364        entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
365    }
366
367    // Build the per-(scope, symbol) range index via rebuild_index so the
368    // logic lives in exactly one place (deserialization also calls it).
369    let mut table = ShadowTable {
370        entries: raw,
371        chains: HashMap::default(),
372    };
373    table.rebuild_index();
374
375    log::debug!(
376        "derive_shadows: derived {} shadow entries across {} chains",
377        table.len(),
378        table.chains.len(),
379    );
380
381    table
382}
383
384#[cfg(test)]
385mod tests {
386    use super::*;
387    use crate::graph::unified::node::id::NodeId;
388    use crate::graph::unified::string::id::StringId;
389
390    fn make_scope(index: u32) -> ScopeId {
391        ScopeId::new(index, 1)
392    }
393
394    /// Build a `ShadowTable` directly from a pre-sorted entries slice (for
395    /// unit tests that don't want to go through `derive_shadows`).
396    fn table_from_sorted(mut entries: Vec<ShadowEntry>) -> ShadowTable {
397        for (idx, e) in entries.iter_mut().enumerate() {
398            e.id = ShadowEntryId(u32::try_from(idx).unwrap());
399        }
400        let mut table = ShadowTable {
401            entries,
402            chains: HashMap::default(),
403        };
404        table.rebuild_index();
405        table
406    }
407
408    fn make_entry(scope: ScopeId, symbol: StringId, node: NodeId, byte_offset: u32) -> ShadowEntry {
409        ShadowEntry {
410            id: ShadowEntryId(0), // patched by table_from_sorted
411            scope,
412            symbol,
413            node,
414            byte_offset,
415        }
416    }
417
418    #[test]
419    fn t12_effective_binding_innermost_before_offset() {
420        let scope = make_scope(0);
421        let symbol = StringId::new(1);
422        let entries = vec![
423            make_entry(scope, symbol, NodeId::new(1, 1), 10),
424            make_entry(scope, symbol, NodeId::new(2, 1), 30),
425        ];
426        let table = table_from_sorted(entries);
427
428        // Query at offset 40 — both definitions are before, innermost is node 2.
429        assert_eq!(
430            table.effective_binding(scope, symbol, 40),
431            Some(NodeId::new(2, 1)),
432            "innermost definition before offset 40 must be node 2 (at offset 30)"
433        );
434        // Query at offset 20 — only first definition is before.
435        assert_eq!(
436            table.effective_binding(scope, symbol, 20),
437            Some(NodeId::new(1, 1)),
438            "only node 1 (offset 10) is before offset 20"
439        );
440    }
441
442    #[test]
443    fn t13_effective_binding_before_first_returns_none() {
444        let scope = make_scope(0);
445        let symbol = StringId::new(1);
446        let entries = vec![make_entry(scope, symbol, NodeId::new(1, 1), 50)];
447        let table = table_from_sorted(entries);
448
449        // Query at offset 10 — before the first definition.
450        assert_eq!(
451            table.effective_binding(scope, symbol, 10),
452            None,
453            "query before first definition must return None"
454        );
455    }
456
457    #[test]
458    fn empty_table_behavior() {
459        let table = ShadowTable::new();
460        assert!(table.is_empty());
461        assert_eq!(table.len(), 0);
462        let scope = make_scope(0);
463        let symbol = StringId::new(1);
464        assert!(table.shadows_in(scope).is_empty());
465        assert_eq!(table.effective_binding(scope, symbol, 100), None);
466        assert!(table.get(ShadowEntryId(0)).is_none());
467    }
468
469    #[test]
470    fn get_by_shadow_entry_id_round_trips() {
471        let scope = make_scope(0);
472        let sym_a = StringId::new(1);
473        let sym_b = StringId::new(2);
474        let entries = vec![
475            make_entry(scope, sym_a, NodeId::new(1, 1), 10),
476            make_entry(scope, sym_b, NodeId::new(2, 1), 20),
477        ];
478        let table = table_from_sorted(entries);
479
480        let e0 = table.get(ShadowEntryId(0)).expect("id 0 must exist");
481        let e1 = table.get(ShadowEntryId(1)).expect("id 1 must exist");
482        assert_eq!(e0.id, ShadowEntryId(0));
483        assert_eq!(e0.symbol, sym_a);
484        assert_eq!(e1.id, ShadowEntryId(1));
485        assert_eq!(e1.symbol, sym_b);
486        assert!(table.get(ShadowEntryId(2)).is_none());
487    }
488
489    #[test]
490    fn entries_accessor_returns_full_slice() {
491        let scope = make_scope(0);
492        let symbol = StringId::new(1);
493        let entries = vec![
494            make_entry(scope, symbol, NodeId::new(1, 1), 5),
495            make_entry(scope, symbol, NodeId::new(2, 1), 15),
496            make_entry(scope, symbol, NodeId::new(3, 1), 25),
497        ];
498        let table = table_from_sorted(entries);
499        assert_eq!(table.entries().len(), 3);
500    }
501
502    #[test]
503    fn scope_partitioning_no_collision() {
504        // Two scopes each declaring the same symbol name must not collide.
505        let s_a = make_scope(0);
506        let s_b = make_scope(1);
507        let sym = StringId::new(42);
508
509        let entries = vec![
510            make_entry(s_a, sym, NodeId::new(10, 1), 5),
511            make_entry(s_a, sym, NodeId::new(11, 1), 20),
512            make_entry(s_b, sym, NodeId::new(20, 1), 5),
513            make_entry(s_b, sym, NodeId::new(21, 1), 30),
514        ];
515        let table = table_from_sorted(entries);
516
517        // s_a at offset 25 → should return node 11 (offset 20 < 25).
518        assert_eq!(
519            table.effective_binding(s_a, sym, 25),
520            Some(NodeId::new(11, 1)),
521            "s_a effective binding at 25 must be node 11"
522        );
523        // s_b at offset 25 → should return node 20 (offset 5 < 25, 30 ≥ 25).
524        assert_eq!(
525            table.effective_binding(s_b, sym, 25),
526            Some(NodeId::new(20, 1)),
527            "s_b effective binding at 25 must be node 20"
528        );
529        // s_a and s_b resolve to different nodes for the same symbol and offset
530        assert_ne!(
531            table.effective_binding(s_a, sym, 25),
532            table.effective_binding(s_b, sym, 25),
533            "same symbol in different scopes must resolve independently"
534        );
535    }
536
537    #[test]
538    fn postcard_round_trip_preserves_entries_and_rebuilds_chains() {
539        let s_a = make_scope(1);
540        let s_b = make_scope(2);
541        let sym_x = StringId::new(10);
542        let sym_y = StringId::new(11);
543
544        let entries = vec![
545            make_entry(s_a, sym_x, NodeId::new(1, 1), 10),
546            make_entry(s_a, sym_x, NodeId::new(2, 1), 30),
547            make_entry(s_b, sym_y, NodeId::new(3, 1), 5),
548        ];
549        let original = table_from_sorted(entries);
550
551        let bytes = postcard::to_allocvec(&original).expect("serialize");
552        let restored: ShadowTable = postcard::from_bytes(&bytes).expect("deserialize");
553
554        assert_eq!(
555            restored.entries().len(),
556            original.entries().len(),
557            "entry count must survive round-trip"
558        );
559        assert_eq!(
560            restored.entries(),
561            original.entries(),
562            "entries must be byte-equal after round-trip"
563        );
564
565        // Verify chains were rebuilt correctly.
566        assert_eq!(
567            restored.effective_binding(s_a, sym_x, 40),
568            Some(NodeId::new(2, 1)),
569            "effective_binding on restored table must find node 2 for s_a/sym_x at 40"
570        );
571        assert_eq!(
572            restored.effective_binding(s_a, sym_x, 20),
573            Some(NodeId::new(1, 1)),
574            "effective_binding on restored table must find node 1 for s_a/sym_x at 20"
575        );
576        assert_eq!(
577            restored.effective_binding(s_b, sym_y, 10),
578            Some(NodeId::new(3, 1)),
579            "effective_binding on restored table must find node 3 for s_b/sym_y at 10"
580        );
581        assert_eq!(
582            restored.effective_binding(s_b, sym_y, 3),
583            None,
584            "query before first def must return None after round-trip"
585        );
586    }
587}