sqry_core/graph/unified/bind/shadow.rs
1//! Shadow derivation for the Phase 2 binding plane.
2//!
3//! A `ShadowEntry` names one Variable/Parameter node inside a function-scope
4//! chain, ordered by its byte offset. Multiple entries for the same
5//! `(scope, symbol)` pair form a shadow chain: the innermost definition
6//! whose byte offset is strictly less than a query offset is the effective
7//! binding at that offset.
8
9use std::collections::HashMap;
10
11use serde::{Deserialize, Serialize};
12
13use crate::graph::unified::build::phase4e_binding::BindingEdgeIndex;
14use crate::graph::unified::edge::kind::EdgeKind;
15use crate::graph::unified::mutation_target::GraphMutationTarget;
16use crate::graph::unified::node::id::NodeId;
17use crate::graph::unified::node::kind::NodeKind;
18use crate::graph::unified::string::id::StringId;
19
20use super::scope::{ScopeArena, ScopeId, ScopeKind};
21
22/// Dense handle into a `ShadowTable`.
23///
24/// The id is the position of the entry in the sorted `entries` slice, so
25/// `ShadowEntryId(k)` is valid as long as the table hasn't been replaced.
26/// `ShadowTable::get(id)` performs a single bounds-checked slice index.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
28pub struct ShadowEntryId(pub u32);
29
30/// One shadow-chain entry.
31///
32/// Records the function scope that owns the variable, the interned symbol
33/// name, the `NodeId` of the defining `Variable` or `Parameter` node, the
34/// byte offset of that definition, and a stable dense handle into the owning
35/// `ShadowTable`.
36///
37/// The `id` field is the position of this entry in the sorted entries slice
38/// after `derive_shadows` has run. It is assigned by the sort and must not
39/// be mutated by callers.
40#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
41pub struct ShadowEntry {
42 /// Stable dense handle for this entry within its owning `ShadowTable`.
43 /// Assigned after sorting; equals `ShadowEntryId(idx)` where `idx` is
44 /// the position of this entry in the sorted entries slice.
45 pub id: ShadowEntryId,
46 /// The function scope that contains this definition.
47 pub scope: ScopeId,
48 /// The interned symbol name of the variable or parameter.
49 pub symbol: StringId,
50 /// `NodeId` of the `Variable` or `Parameter` node that introduces the
51 /// binding at this offset.
52 pub node: NodeId,
53 /// Byte offset of the definition site within its source file.
54 ///
55 /// Entries within the same `(scope, symbol)` chain are sorted by this
56 /// field in ascending order; `effective_binding` performs a reverse-scan
57 /// to find the last entry whose offset is strictly less than the query
58 /// offset.
59 pub byte_offset: u32,
60}
61
62/// Content-addressed shadow lookup table.
63///
64/// Entries are sorted by `(scope, symbol, byte_offset)`. The `chains` index
65/// maps each `(ScopeId, StringId)` key to a half-open range `[start, end)`
66/// into `entries`. `effective_binding` does a backward linear scan through
67/// the relevant range to find the innermost definition before a query offset.
68///
69/// # Serialization
70///
71/// Only `entries` is persisted. The `chains` index is derived from `entries`
72/// on deserialization and cannot drift out of sync. This keeps the V9 snapshot
73/// footprint minimal and eliminates the dual-write hazard.
74#[derive(Debug, Clone, Default)]
75pub struct ShadowTable {
76 /// All entries, sorted by `(scope, symbol, byte_offset)`.
77 entries: Vec<ShadowEntry>,
78 /// Per-`(scope, symbol)` range index mapping a `(ScopeId, StringId)` key
79 /// to its `[start, end)` slice in `entries`. **Not** persisted.
80 chains: HashMap<(ScopeId, StringId), (u32, u32)>,
81}
82
83impl ShadowTable {
84 /// Creates an empty `ShadowTable`.
85 #[must_use]
86 pub fn new() -> Self {
87 Self::default()
88 }
89
90 /// Returns the total number of shadow entries.
91 #[must_use]
92 pub fn len(&self) -> usize {
93 self.entries.len()
94 }
95
96 /// Returns `true` if the table contains no entries.
97 #[must_use]
98 pub fn is_empty(&self) -> bool {
99 self.entries.is_empty()
100 }
101
102 /// Returns the full sorted entries slice.
103 ///
104 /// Useful for V9 persistence walkers, debug dumps, stats collection, and
105 /// any future consumer that needs to iterate the entire table.
106 #[must_use]
107 pub fn entries(&self) -> &[ShadowEntry] {
108 &self.entries
109 }
110
111 /// Looks up a shadow entry by its dense `ShadowEntryId`.
112 ///
113 /// Returns `None` if `id.0 >= self.len()`. The id is the position of the
114 /// entry in the sorted slice, assigned during `derive_shadows`. It is
115 /// stable for the lifetime of this table instance.
116 ///
117 /// Used by resolution steps that record a shadow entry by id and later
118 /// need to recover the full `ShadowEntry`.
119 #[must_use]
120 pub fn get(&self, id: ShadowEntryId) -> Option<&ShadowEntry> {
121 self.entries.get(id.0 as usize)
122 }
123
124 /// Returns all shadow entries declared in `scope`, sorted by
125 /// `(symbol, byte_offset)`.
126 ///
127 /// Returns an empty `Vec` when no entries belong to `scope`. The returned
128 /// entries span all symbols in the scope, not a single symbol chain.
129 #[must_use]
130 pub fn shadows_in(&self, scope: ScopeId) -> Vec<&ShadowEntry> {
131 self.entries.iter().filter(|e| e.scope == scope).collect()
132 }
133
134 /// Returns the innermost shadow entry for `(scope, symbol)` whose byte
135 /// offset is strictly less than `query_byte_offset`.
136 ///
137 /// Returns `None` when no such entry exists — i.e., the query position is
138 /// before the first definition of `symbol` in `scope`, or the `(scope,
139 /// symbol)` pair has no entries at all.
140 ///
141 /// # Shadow semantics
142 ///
143 /// This implements the Rust/JS/Python shadow semantic: a re-binding at
144 /// offset B is visible only to references at offsets > B. The last
145 /// definition strictly before the query offset wins.
146 #[must_use]
147 pub fn effective_binding(
148 &self,
149 scope: ScopeId,
150 symbol: StringId,
151 query_byte_offset: u32,
152 ) -> Option<NodeId> {
153 let (start, end) = self.chains.get(&(scope, symbol)).copied()?;
154 let slice = &self.entries[start as usize..end as usize];
155 // Reverse scan: the last entry whose byte_offset < query_byte_offset wins.
156 slice
157 .iter()
158 .rev()
159 .find(|e| e.byte_offset < query_byte_offset)
160 .map(|e| e.node)
161 }
162
163 /// Applies `keep` to every entry's `node`, dropping entries whose defining
164 /// `Variable`/`Parameter` node fails the predicate.
165 ///
166 /// After filtering, the `id` of each surviving entry is reassigned to its
167 /// new position in the sorted `entries` slice, and the `chains` range
168 /// index is rebuilt from scratch so `effective_binding` / `shadows_in`
169 /// stay consistent. The relative order of surviving entries is preserved,
170 /// so the table remains sorted by `(scope, symbol, byte_offset)`.
171 ///
172 /// This is the mutation entry point used by the Gate 0c `NodeIdBearing`
173 /// impl (A2 §K row K.A13). Callers that hold the table behind an `Arc`
174 /// must reach it through `Arc::make_mut` before invoking this method.
175 ///
176 /// `#[allow(dead_code)]` mirrors the NodeIdBearing trait itself: Gate 0b
177 /// lands the scaffolding and unit tests, Gate 0c adds the production
178 /// call site in `RebuildGraph::finalize()`.
179 #[allow(dead_code)]
180 pub(crate) fn retain_by_node(&mut self, keep: &dyn Fn(NodeId) -> bool) {
181 self.entries.retain(|entry| keep(entry.node));
182 for (idx, entry) in self.entries.iter_mut().enumerate() {
183 entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
184 }
185 self.rebuild_index();
186 }
187
188 /// Rewrite every `StringId` stored on every entry through `remap`,
189 /// replacing any ID that appears as a key with its canonical value.
190 ///
191 /// Used by Gate 0c's finalize step 1 (StringId canonicalisation).
192 /// `symbol` participates in the `(scope, symbol, byte_offset)` sort
193 /// key AND in the `chains: HashMap<(ScopeId, StringId), _>` index, so
194 /// after rewrite the entries are re-sorted and the `chains` map is
195 /// rebuilt from scratch. A stable sort preserves the prior order
196 /// within each `(scope, symbol, byte_offset)` group (only relevant if
197 /// two entries collide on all three — legal after dedup but rare).
198 ///
199 /// Empty `remap` is a no-op. Callers that hold the table behind an
200 /// `Arc` must reach it through `Arc::make_mut` first.
201 #[allow(dead_code)]
202 pub(crate) fn rewrite_string_ids_through_remap(&mut self, remap: &HashMap<StringId, StringId>) {
203 if remap.is_empty() {
204 return;
205 }
206 let mut changed = false;
207 for entry in &mut self.entries {
208 if let Some(&canon) = remap.get(&entry.symbol) {
209 entry.symbol = canon;
210 changed = true;
211 }
212 }
213 if !changed {
214 return;
215 }
216 self.entries.sort_by(|a, b| {
217 (a.scope, a.symbol, a.byte_offset).cmp(&(b.scope, b.symbol, b.byte_offset))
218 });
219 for (idx, entry) in self.entries.iter_mut().enumerate() {
220 entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
221 }
222 self.rebuild_index();
223 }
224
225 /// Rebuilds the `chains` range index from `self.entries`.
226 ///
227 /// Called after deserialization (where `chains` is not persisted) and
228 /// after constructing a `ShadowTable` from a raw `Vec<ShadowEntry>`. The
229 /// entries must already be sorted by `(scope, symbol, byte_offset)`.
230 fn rebuild_index(&mut self) {
231 debug_assert!(
232 self.entries
233 .windows(2)
234 .all(|w| { (w[0].scope, w[0].symbol) <= (w[1].scope, w[1].symbol) }),
235 "rebuild_index requires entries sorted by (scope, symbol) ascending"
236 );
237 self.chains.clear();
238 let mut cursor = 0u32;
239 while (cursor as usize) < self.entries.len() {
240 let key = (
241 self.entries[cursor as usize].scope,
242 self.entries[cursor as usize].symbol,
243 );
244 let start = cursor;
245 while (cursor as usize) < self.entries.len()
246 && (
247 self.entries[cursor as usize].scope,
248 self.entries[cursor as usize].symbol,
249 ) == key
250 {
251 cursor += 1;
252 }
253 self.chains.insert(key, (start, cursor));
254 }
255 }
256}
257
258// ---------------------------------------------------------------------------
259// Serde: persist only `entries`; rebuild `chains` on deserialize.
260// ---------------------------------------------------------------------------
261
262impl Serialize for ShadowTable {
263 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
264 // Serialize as a newtype wrapping just the entries Vec.
265 self.entries.serialize(serializer)
266 }
267}
268
269impl<'de> Deserialize<'de> for ShadowTable {
270 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
271 let entries = Vec::<ShadowEntry>::deserialize(deserializer)?;
272 let mut table = ShadowTable {
273 entries,
274 chains: HashMap::default(),
275 };
276 table.rebuild_index();
277 Ok(table)
278 }
279}
280
281/// Walks `NodeKind::Variable` and `NodeKind::Parameter` nodes whose enclosing
282/// scope is a `ScopeKind::Function`, groups them by `(scope, symbol)`, sorts
283/// each group by `byte_offset`, assigns stable `ShadowEntryId`s, and builds
284/// the `chains` range index.
285///
286/// Uses `ScopeArena::iter()` to build the node → scope map, avoiding the
287/// generation-1 hardcode. Only `Contains`, `Defines` edges feed into scope
288/// derivation; `derive_shadows` operates on the already-finalized scope arena
289/// and never walks cross-language edges.
290///
291/// Returns an empty `ShadowTable` when no Variable/Parameter nodes belong to
292/// any Function scope.
293///
294/// # Performance
295///
296/// The `edge_index` parameter replaces the per-function `edges_from()` calls
297/// that previously triggered O(E) delta scans, turning per-node edge lookup
298/// from O(E) to O(1).
299pub(crate) fn derive_shadows<G: GraphMutationTarget>(
300 graph: &G,
301 scopes: &ScopeArena,
302 edge_index: &BindingEdgeIndex,
303) -> ShadowTable {
304 // Build NodeId → ScopeId for every scope whose kind is Function.
305 // Reuse the same node_to_scope approach as derive_aliases but then filter
306 // to Function-kind scopes.
307 let node_to_function_scope: HashMap<NodeId, ScopeId> = scopes
308 .iter()
309 .filter(|(_, scope)| scope.kind == ScopeKind::Function)
310 .map(|(id, scope)| (scope.node, id))
311 .collect();
312
313 // For nodes that are contained within a function (i.e., their direct
314 // parent is the function node), we need to know which scope corresponds
315 // to which function node. We already have that in node_to_function_scope
316 // (function_node → scope_id). We need to find which function scope
317 // contains each Variable/Parameter.
318 //
319 // Strategy: for each function node find its `Defines`/`Contains` children
320 // that are Variable/Parameter nodes using the pre-built forward index.
321 // This is correct for single-level nesting (typical for let-bindings and
322 // parameters) and avoids the O(E) delta scan that edges_from() would
323 // trigger.
324
325 let mut raw: Vec<ShadowEntry> = Vec::new();
326
327 for (&func_node, &scope_id) in &node_to_function_scope {
328 // Look up pre-indexed outgoing Defines/Contains edges from the function node.
329 let Some(children) = edge_index.defines_contains_by_source.get(&func_node) else {
330 continue;
331 };
332
333 for (child, edge_kind) in children {
334 if !matches!(edge_kind, EdgeKind::Defines | EdgeKind::Contains) {
335 continue;
336 }
337 let Some(child_entry) = graph.nodes().get(*child) else {
338 continue;
339 };
340 if !matches!(child_entry.kind, NodeKind::Variable | NodeKind::Parameter) {
341 continue;
342 }
343 raw.push(ShadowEntry {
344 // Placeholder id; overwritten after sorting.
345 id: ShadowEntryId(0),
346 scope: scope_id,
347 symbol: child_entry.name,
348 node: *child,
349 byte_offset: child_entry.start_byte,
350 });
351 }
352 }
353
354 if raw.is_empty() {
355 log::debug!("derive_shadows: no Variable/Parameter nodes under Function scopes");
356 return ShadowTable::new();
357 }
358
359 // Sort by (scope, symbol, byte_offset) for stable chain partitioning.
360 raw.sort_by(|a, b| (a.scope, a.symbol, a.byte_offset).cmp(&(b.scope, b.symbol, b.byte_offset)));
361
362 // Assign stable ids: position in the sorted slice IS the ShadowEntryId.
363 for (idx, entry) in raw.iter_mut().enumerate() {
364 entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
365 }
366
367 // Build the per-(scope, symbol) range index via rebuild_index so the
368 // logic lives in exactly one place (deserialization also calls it).
369 let mut table = ShadowTable {
370 entries: raw,
371 chains: HashMap::default(),
372 };
373 table.rebuild_index();
374
375 log::debug!(
376 "derive_shadows: derived {} shadow entries across {} chains",
377 table.len(),
378 table.chains.len(),
379 );
380
381 table
382}
383
384#[cfg(test)]
385mod tests {
386 use super::*;
387 use crate::graph::unified::node::id::NodeId;
388 use crate::graph::unified::string::id::StringId;
389
390 fn make_scope(index: u32) -> ScopeId {
391 ScopeId::new(index, 1)
392 }
393
394 /// Build a `ShadowTable` directly from a pre-sorted entries slice (for
395 /// unit tests that don't want to go through `derive_shadows`).
396 fn table_from_sorted(mut entries: Vec<ShadowEntry>) -> ShadowTable {
397 for (idx, e) in entries.iter_mut().enumerate() {
398 e.id = ShadowEntryId(u32::try_from(idx).unwrap());
399 }
400 let mut table = ShadowTable {
401 entries,
402 chains: HashMap::default(),
403 };
404 table.rebuild_index();
405 table
406 }
407
408 fn make_entry(scope: ScopeId, symbol: StringId, node: NodeId, byte_offset: u32) -> ShadowEntry {
409 ShadowEntry {
410 id: ShadowEntryId(0), // patched by table_from_sorted
411 scope,
412 symbol,
413 node,
414 byte_offset,
415 }
416 }
417
418 #[test]
419 fn t12_effective_binding_innermost_before_offset() {
420 let scope = make_scope(0);
421 let symbol = StringId::new(1);
422 let entries = vec![
423 make_entry(scope, symbol, NodeId::new(1, 1), 10),
424 make_entry(scope, symbol, NodeId::new(2, 1), 30),
425 ];
426 let table = table_from_sorted(entries);
427
428 // Query at offset 40 — both definitions are before, innermost is node 2.
429 assert_eq!(
430 table.effective_binding(scope, symbol, 40),
431 Some(NodeId::new(2, 1)),
432 "innermost definition before offset 40 must be node 2 (at offset 30)"
433 );
434 // Query at offset 20 — only first definition is before.
435 assert_eq!(
436 table.effective_binding(scope, symbol, 20),
437 Some(NodeId::new(1, 1)),
438 "only node 1 (offset 10) is before offset 20"
439 );
440 }
441
442 #[test]
443 fn t13_effective_binding_before_first_returns_none() {
444 let scope = make_scope(0);
445 let symbol = StringId::new(1);
446 let entries = vec![make_entry(scope, symbol, NodeId::new(1, 1), 50)];
447 let table = table_from_sorted(entries);
448
449 // Query at offset 10 — before the first definition.
450 assert_eq!(
451 table.effective_binding(scope, symbol, 10),
452 None,
453 "query before first definition must return None"
454 );
455 }
456
457 #[test]
458 fn empty_table_behavior() {
459 let table = ShadowTable::new();
460 assert!(table.is_empty());
461 assert_eq!(table.len(), 0);
462 let scope = make_scope(0);
463 let symbol = StringId::new(1);
464 assert!(table.shadows_in(scope).is_empty());
465 assert_eq!(table.effective_binding(scope, symbol, 100), None);
466 assert!(table.get(ShadowEntryId(0)).is_none());
467 }
468
469 #[test]
470 fn get_by_shadow_entry_id_round_trips() {
471 let scope = make_scope(0);
472 let sym_a = StringId::new(1);
473 let sym_b = StringId::new(2);
474 let entries = vec![
475 make_entry(scope, sym_a, NodeId::new(1, 1), 10),
476 make_entry(scope, sym_b, NodeId::new(2, 1), 20),
477 ];
478 let table = table_from_sorted(entries);
479
480 let e0 = table.get(ShadowEntryId(0)).expect("id 0 must exist");
481 let e1 = table.get(ShadowEntryId(1)).expect("id 1 must exist");
482 assert_eq!(e0.id, ShadowEntryId(0));
483 assert_eq!(e0.symbol, sym_a);
484 assert_eq!(e1.id, ShadowEntryId(1));
485 assert_eq!(e1.symbol, sym_b);
486 assert!(table.get(ShadowEntryId(2)).is_none());
487 }
488
489 #[test]
490 fn entries_accessor_returns_full_slice() {
491 let scope = make_scope(0);
492 let symbol = StringId::new(1);
493 let entries = vec![
494 make_entry(scope, symbol, NodeId::new(1, 1), 5),
495 make_entry(scope, symbol, NodeId::new(2, 1), 15),
496 make_entry(scope, symbol, NodeId::new(3, 1), 25),
497 ];
498 let table = table_from_sorted(entries);
499 assert_eq!(table.entries().len(), 3);
500 }
501
502 #[test]
503 fn scope_partitioning_no_collision() {
504 // Two scopes each declaring the same symbol name must not collide.
505 let s_a = make_scope(0);
506 let s_b = make_scope(1);
507 let sym = StringId::new(42);
508
509 let entries = vec![
510 make_entry(s_a, sym, NodeId::new(10, 1), 5),
511 make_entry(s_a, sym, NodeId::new(11, 1), 20),
512 make_entry(s_b, sym, NodeId::new(20, 1), 5),
513 make_entry(s_b, sym, NodeId::new(21, 1), 30),
514 ];
515 let table = table_from_sorted(entries);
516
517 // s_a at offset 25 → should return node 11 (offset 20 < 25).
518 assert_eq!(
519 table.effective_binding(s_a, sym, 25),
520 Some(NodeId::new(11, 1)),
521 "s_a effective binding at 25 must be node 11"
522 );
523 // s_b at offset 25 → should return node 20 (offset 5 < 25, 30 ≥ 25).
524 assert_eq!(
525 table.effective_binding(s_b, sym, 25),
526 Some(NodeId::new(20, 1)),
527 "s_b effective binding at 25 must be node 20"
528 );
529 // s_a and s_b resolve to different nodes for the same symbol and offset
530 assert_ne!(
531 table.effective_binding(s_a, sym, 25),
532 table.effective_binding(s_b, sym, 25),
533 "same symbol in different scopes must resolve independently"
534 );
535 }
536
537 #[test]
538 fn postcard_round_trip_preserves_entries_and_rebuilds_chains() {
539 let s_a = make_scope(1);
540 let s_b = make_scope(2);
541 let sym_x = StringId::new(10);
542 let sym_y = StringId::new(11);
543
544 let entries = vec![
545 make_entry(s_a, sym_x, NodeId::new(1, 1), 10),
546 make_entry(s_a, sym_x, NodeId::new(2, 1), 30),
547 make_entry(s_b, sym_y, NodeId::new(3, 1), 5),
548 ];
549 let original = table_from_sorted(entries);
550
551 let bytes = postcard::to_allocvec(&original).expect("serialize");
552 let restored: ShadowTable = postcard::from_bytes(&bytes).expect("deserialize");
553
554 assert_eq!(
555 restored.entries().len(),
556 original.entries().len(),
557 "entry count must survive round-trip"
558 );
559 assert_eq!(
560 restored.entries(),
561 original.entries(),
562 "entries must be byte-equal after round-trip"
563 );
564
565 // Verify chains were rebuilt correctly.
566 assert_eq!(
567 restored.effective_binding(s_a, sym_x, 40),
568 Some(NodeId::new(2, 1)),
569 "effective_binding on restored table must find node 2 for s_a/sym_x at 40"
570 );
571 assert_eq!(
572 restored.effective_binding(s_a, sym_x, 20),
573 Some(NodeId::new(1, 1)),
574 "effective_binding on restored table must find node 1 for s_a/sym_x at 20"
575 );
576 assert_eq!(
577 restored.effective_binding(s_b, sym_y, 10),
578 Some(NodeId::new(3, 1)),
579 "effective_binding on restored table must find node 3 for s_b/sym_y at 10"
580 );
581 assert_eq!(
582 restored.effective_binding(s_b, sym_y, 3),
583 None,
584 "query before first def must return None after round-trip"
585 );
586 }
587}