sqry_core/graph/unified/bind/shadow.rs
1//! Shadow derivation for the Phase 2 binding plane.
2//!
3//! A `ShadowEntry` names one Variable/Parameter node inside a function-scope
4//! chain, ordered by its byte offset. Multiple entries for the same
5//! `(scope, symbol)` pair form a shadow chain: the innermost definition
6//! whose byte offset is strictly less than a query offset is the effective
7//! binding at that offset.
8
9use std::collections::HashMap;
10
11use serde::{Deserialize, Serialize};
12
13use crate::graph::unified::build::phase4e_binding::BindingEdgeIndex;
14use crate::graph::unified::edge::kind::EdgeKind;
15use crate::graph::unified::mutation_target::GraphMutationTarget;
16use crate::graph::unified::node::id::NodeId;
17use crate::graph::unified::node::kind::NodeKind;
18use crate::graph::unified::string::id::StringId;
19
20use super::scope::{ScopeArena, ScopeId, ScopeKind};
21
22/// Dense handle into a `ShadowTable`.
23///
24/// The id is the position of the entry in the sorted `entries` slice, so
25/// `ShadowEntryId(k)` is valid as long as the table hasn't been replaced.
26/// `ShadowTable::get(id)` performs a single bounds-checked slice index.
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
28pub struct ShadowEntryId(pub u32);
29
30/// One shadow-chain entry.
31///
32/// Records the function scope that owns the variable, the interned symbol
33/// name, the `NodeId` of the defining `Variable` or `Parameter` node, the
34/// byte offset of that definition, and a stable dense handle into the owning
35/// `ShadowTable`.
36///
37/// The `id` field is the position of this entry in the sorted entries slice
38/// after `derive_shadows` has run. It is assigned by the sort and must not
39/// be mutated by callers.
40#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
41pub struct ShadowEntry {
42 /// Stable dense handle for this entry within its owning `ShadowTable`.
43 /// Assigned after sorting; equals `ShadowEntryId(idx)` where `idx` is
44 /// the position of this entry in the sorted entries slice.
45 pub id: ShadowEntryId,
46 /// The function scope that contains this definition.
47 pub scope: ScopeId,
48 /// The interned symbol name of the variable or parameter.
49 pub symbol: StringId,
50 /// `NodeId` of the `Variable` or `Parameter` node that introduces the
51 /// binding at this offset.
52 pub node: NodeId,
53 /// Byte offset of the definition site within its source file.
54 ///
55 /// Entries within the same `(scope, symbol)` chain are sorted by this
56 /// field in ascending order; `effective_binding` performs a reverse-scan
57 /// to find the last entry whose offset is strictly less than the query
58 /// offset.
59 pub byte_offset: u32,
60}
61
62/// Content-addressed shadow lookup table.
63///
64/// Entries are sorted by `(scope, symbol, byte_offset)`. The `chains` index
65/// maps each `(ScopeId, StringId)` key to a half-open range `[start, end)`
66/// into `entries`. `effective_binding` does a backward linear scan through
67/// the relevant range to find the innermost definition before a query offset.
68///
69/// # Serialization
70///
71/// Only `entries` is persisted. The `chains` index is derived from `entries`
72/// on deserialization and cannot drift out of sync. This keeps the V9 snapshot
73/// footprint minimal and eliminates the dual-write hazard.
74#[derive(Debug, Clone, Default)]
75pub struct ShadowTable {
76 /// All entries, sorted by `(scope, symbol, byte_offset)`.
77 entries: Vec<ShadowEntry>,
78 /// Per-`(scope, symbol)` range index mapping a `(ScopeId, StringId)` key
79 /// to its `[start, end)` slice in `entries`. **Not** persisted.
80 chains: HashMap<(ScopeId, StringId), (u32, u32)>,
81}
82
83impl ShadowTable {
84 /// Creates an empty `ShadowTable`.
85 #[must_use]
86 pub fn new() -> Self {
87 Self::default()
88 }
89
90 /// Returns the total number of shadow entries.
91 #[must_use]
92 pub fn len(&self) -> usize {
93 self.entries.len()
94 }
95
96 /// Returns `true` if the table contains no entries.
97 #[must_use]
98 pub fn is_empty(&self) -> bool {
99 self.entries.is_empty()
100 }
101
102 /// Returns the full sorted entries slice.
103 ///
104 /// Useful for V9 persistence walkers, debug dumps, stats collection, and
105 /// any future consumer that needs to iterate the entire table.
106 #[must_use]
107 pub fn entries(&self) -> &[ShadowEntry] {
108 &self.entries
109 }
110
111 /// Looks up a shadow entry by its dense `ShadowEntryId`.
112 ///
113 /// Returns `None` if `id.0 >= self.len()`. The id is the position of the
114 /// entry in the sorted slice, assigned during `derive_shadows`. It is
115 /// stable for the lifetime of this table instance.
116 ///
117 /// Used by resolution steps that record a shadow entry by id and later
118 /// need to recover the full `ShadowEntry`.
119 #[must_use]
120 pub fn get(&self, id: ShadowEntryId) -> Option<&ShadowEntry> {
121 self.entries.get(id.0 as usize)
122 }
123
124 /// Returns all shadow entries declared in `scope`, sorted by
125 /// `(symbol, byte_offset)`.
126 ///
127 /// Returns an empty `Vec` when no entries belong to `scope`. The returned
128 /// entries span all symbols in the scope, not a single symbol chain.
129 #[must_use]
130 pub fn shadows_in(&self, scope: ScopeId) -> Vec<&ShadowEntry> {
131 self.entries.iter().filter(|e| e.scope == scope).collect()
132 }
133
134 /// Returns the innermost shadow entry for `(scope, symbol)` whose byte
135 /// offset is strictly less than `query_byte_offset`.
136 ///
137 /// Returns `None` when no such entry exists — i.e., the query position is
138 /// before the first definition of `symbol` in `scope`, or the `(scope,
139 /// symbol)` pair has no entries at all.
140 ///
141 /// # Shadow semantics
142 ///
143 /// This implements the Rust/JS/Python shadow semantic: a re-binding at
144 /// offset B is visible only to references at offsets > B. The last
145 /// definition strictly before the query offset wins.
146 #[must_use]
147 pub fn effective_binding(
148 &self,
149 scope: ScopeId,
150 symbol: StringId,
151 query_byte_offset: u32,
152 ) -> Option<NodeId> {
153 let (start, end) = self.chains.get(&(scope, symbol)).copied()?;
154 let slice = &self.entries[start as usize..end as usize];
155 // Reverse scan: the last entry whose byte_offset < query_byte_offset wins.
156 slice
157 .iter()
158 .rev()
159 .find(|e| e.byte_offset < query_byte_offset)
160 .map(|e| e.node)
161 }
162
163 /// Applies `keep` to every entry's `node`, dropping entries whose defining
164 /// `Variable`/`Parameter` node fails the predicate.
165 ///
166 /// After filtering, the `id` of each surviving entry is reassigned to its
167 /// new position in the sorted `entries` slice, and the `chains` range
168 /// index is rebuilt from scratch so `effective_binding` / `shadows_in`
169 /// stay consistent. The relative order of surviving entries is preserved,
170 /// so the table remains sorted by `(scope, symbol, byte_offset)`.
171 ///
172 /// This is the mutation entry point used by the Gate 0c `NodeIdBearing`
173 /// impl (A2 §K row K.A13). Callers that hold the table behind an `Arc`
174 /// must reach it through `Arc::make_mut` before invoking this method.
175 ///
176 /// Live in the default build: the consumer is `RebuildGraph::finalize()`
177 /// via the `retain_nodes` impl, reached from the ungated public
178 /// `build::incremental::incremental_rebuild` -> `finalize` path (the
179 /// `rebuild::coverage` unit tests exercise it too).
180 pub(crate) fn retain_by_node(&mut self, keep: &dyn Fn(NodeId) -> bool) {
181 self.entries.retain(|entry| keep(entry.node));
182 for (idx, entry) in self.entries.iter_mut().enumerate() {
183 entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
184 }
185 self.rebuild_index();
186 }
187
188 /// Rewrite every `StringId` stored on every entry through `remap`,
189 /// replacing any ID that appears as a key with its canonical value.
190 ///
191 /// Used by Gate 0c's finalize step 1 (`StringId` canonicalisation).
192 /// `symbol` participates in the `(scope, symbol, byte_offset)` sort
193 /// key AND in the `chains: HashMap<(ScopeId, StringId), _>` index, so
194 /// after rewrite the entries are re-sorted and the `chains` map is
195 /// rebuilt from scratch. A stable sort preserves the prior order
196 /// within each `(scope, symbol, byte_offset)` group (only relevant if
197 /// two entries collide on all three — legal after dedup but rare).
198 ///
199 /// Empty `remap` is a no-op. Callers that hold the table behind an
200 /// `Arc` must reach it through `Arc::make_mut` first.
201 ///
202 /// Live in the default build: the consumer is `RebuildGraph::finalize()`
203 /// step 1, reached from the ungated public
204 /// `build::incremental::incremental_rebuild` -> `finalize` path.
205 pub(crate) fn rewrite_string_ids_through_remap(&mut self, remap: &HashMap<StringId, StringId>) {
206 if remap.is_empty() {
207 return;
208 }
209 let mut changed = false;
210 for entry in &mut self.entries {
211 if let Some(&canon) = remap.get(&entry.symbol) {
212 entry.symbol = canon;
213 changed = true;
214 }
215 }
216 if !changed {
217 return;
218 }
219 self.entries.sort_by(|a, b| {
220 (a.scope, a.symbol, a.byte_offset).cmp(&(b.scope, b.symbol, b.byte_offset))
221 });
222 for (idx, entry) in self.entries.iter_mut().enumerate() {
223 entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
224 }
225 self.rebuild_index();
226 }
227
228 /// Rebuilds the `chains` range index from `self.entries`.
229 ///
230 /// Called after deserialization (where `chains` is not persisted) and
231 /// after constructing a `ShadowTable` from a raw `Vec<ShadowEntry>`. The
232 /// entries must already be sorted by `(scope, symbol, byte_offset)`.
233 fn rebuild_index(&mut self) {
234 debug_assert!(
235 self.entries
236 .windows(2)
237 .all(|w| { (w[0].scope, w[0].symbol) <= (w[1].scope, w[1].symbol) }),
238 "rebuild_index requires entries sorted by (scope, symbol) ascending"
239 );
240 self.chains.clear();
241 let mut cursor = 0u32;
242 while (cursor as usize) < self.entries.len() {
243 let key = (
244 self.entries[cursor as usize].scope,
245 self.entries[cursor as usize].symbol,
246 );
247 let start = cursor;
248 while (cursor as usize) < self.entries.len()
249 && (
250 self.entries[cursor as usize].scope,
251 self.entries[cursor as usize].symbol,
252 ) == key
253 {
254 cursor += 1;
255 }
256 self.chains.insert(key, (start, cursor));
257 }
258 }
259}
260
261// ---------------------------------------------------------------------------
262// Serde: persist only `entries`; rebuild `chains` on deserialize.
263// ---------------------------------------------------------------------------
264
265impl Serialize for ShadowTable {
266 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
267 // Serialize as a newtype wrapping just the entries Vec.
268 self.entries.serialize(serializer)
269 }
270}
271
272impl<'de> Deserialize<'de> for ShadowTable {
273 fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
274 let entries = Vec::<ShadowEntry>::deserialize(deserializer)?;
275 let mut table = ShadowTable {
276 entries,
277 chains: HashMap::default(),
278 };
279 table.rebuild_index();
280 Ok(table)
281 }
282}
283
284/// Walks `NodeKind::Variable` and `NodeKind::Parameter` nodes whose enclosing
285/// scope is a `ScopeKind::Function`, groups them by `(scope, symbol)`, sorts
286/// each group by `byte_offset`, assigns stable `ShadowEntryId`s, and builds
287/// the `chains` range index.
288///
289/// Uses `ScopeArena::iter()` to build the node → scope map, avoiding the
290/// generation-1 hardcode. Only `Contains`, `Defines` edges feed into scope
291/// derivation; `derive_shadows` operates on the already-finalized scope arena
292/// and never walks cross-language edges.
293///
294/// Returns an empty `ShadowTable` when no Variable/Parameter nodes belong to
295/// any Function scope.
296///
297/// # Performance
298///
299/// The `edge_index` parameter replaces the per-function `edges_from()` calls
300/// that previously triggered O(E) delta scans, turning per-node edge lookup
301/// from O(E) to O(1).
302pub(crate) fn derive_shadows<G: GraphMutationTarget>(
303 graph: &G,
304 scopes: &ScopeArena,
305 edge_index: &BindingEdgeIndex,
306) -> ShadowTable {
307 // Build NodeId → ScopeId for every scope whose kind is Function.
308 // Reuse the same node_to_scope approach as derive_aliases but then filter
309 // to Function-kind scopes.
310 let node_to_function_scope: HashMap<NodeId, ScopeId> = scopes
311 .iter()
312 .filter(|(_, scope)| scope.kind == ScopeKind::Function)
313 .map(|(id, scope)| (scope.node, id))
314 .collect();
315
316 // For nodes that are contained within a function (i.e., their direct
317 // parent is the function node), we need to know which scope corresponds
318 // to which function node. We already have that in node_to_function_scope
319 // (function_node → scope_id). We need to find which function scope
320 // contains each Variable/Parameter.
321 //
322 // Strategy: for each function node find its `Defines`/`Contains` children
323 // that are Variable/Parameter nodes using the pre-built forward index.
324 // This is correct for single-level nesting (typical for let-bindings and
325 // parameters) and avoids the O(E) delta scan that edges_from() would
326 // trigger.
327
328 let mut raw: Vec<ShadowEntry> = Vec::new();
329
330 for (&func_node, &scope_id) in &node_to_function_scope {
331 // Look up pre-indexed outgoing Defines/Contains edges from the function node.
332 let Some(children) = edge_index.defines_contains_by_source.get(&func_node) else {
333 continue;
334 };
335
336 for (child, edge_kind) in children {
337 if !matches!(edge_kind, EdgeKind::Defines | EdgeKind::Contains) {
338 continue;
339 }
340 let Some(child_entry) = graph.nodes().get(*child) else {
341 continue;
342 };
343 if !matches!(child_entry.kind, NodeKind::Variable | NodeKind::Parameter) {
344 continue;
345 }
346 raw.push(ShadowEntry {
347 // Placeholder id; overwritten after sorting.
348 id: ShadowEntryId(0),
349 scope: scope_id,
350 symbol: child_entry.name,
351 node: *child,
352 byte_offset: child_entry.start_byte,
353 });
354 }
355 }
356
357 if raw.is_empty() {
358 log::debug!("derive_shadows: no Variable/Parameter nodes under Function scopes");
359 return ShadowTable::new();
360 }
361
362 // Sort by (scope, symbol, byte_offset) for stable chain partitioning.
363 raw.sort_by(|a, b| (a.scope, a.symbol, a.byte_offset).cmp(&(b.scope, b.symbol, b.byte_offset)));
364
365 // Assign stable ids: position in the sorted slice IS the ShadowEntryId.
366 for (idx, entry) in raw.iter_mut().enumerate() {
367 entry.id = ShadowEntryId(u32::try_from(idx).expect("shadow count fits u32"));
368 }
369
370 // Build the per-(scope, symbol) range index via rebuild_index so the
371 // logic lives in exactly one place (deserialization also calls it).
372 let mut table = ShadowTable {
373 entries: raw,
374 chains: HashMap::default(),
375 };
376 table.rebuild_index();
377
378 log::debug!(
379 "derive_shadows: derived {} shadow entries across {} chains",
380 table.len(),
381 table.chains.len(),
382 );
383
384 table
385}
386
387#[cfg(test)]
388mod tests {
389 use super::*;
390 use crate::graph::unified::node::id::NodeId;
391 use crate::graph::unified::string::id::StringId;
392
393 fn make_scope(index: u32) -> ScopeId {
394 ScopeId::new(index, 1)
395 }
396
397 /// Build a `ShadowTable` directly from a pre-sorted entries slice (for
398 /// unit tests that don't want to go through `derive_shadows`).
399 fn table_from_sorted(mut entries: Vec<ShadowEntry>) -> ShadowTable {
400 for (idx, e) in entries.iter_mut().enumerate() {
401 e.id = ShadowEntryId(u32::try_from(idx).unwrap());
402 }
403 let mut table = ShadowTable {
404 entries,
405 chains: HashMap::default(),
406 };
407 table.rebuild_index();
408 table
409 }
410
411 fn make_entry(scope: ScopeId, symbol: StringId, node: NodeId, byte_offset: u32) -> ShadowEntry {
412 ShadowEntry {
413 id: ShadowEntryId(0), // patched by table_from_sorted
414 scope,
415 symbol,
416 node,
417 byte_offset,
418 }
419 }
420
421 #[test]
422 fn t12_effective_binding_innermost_before_offset() {
423 let scope = make_scope(0);
424 let symbol = StringId::new(1);
425 let entries = vec![
426 make_entry(scope, symbol, NodeId::new(1, 1), 10),
427 make_entry(scope, symbol, NodeId::new(2, 1), 30),
428 ];
429 let table = table_from_sorted(entries);
430
431 // Query at offset 40 — both definitions are before, innermost is node 2.
432 assert_eq!(
433 table.effective_binding(scope, symbol, 40),
434 Some(NodeId::new(2, 1)),
435 "innermost definition before offset 40 must be node 2 (at offset 30)"
436 );
437 // Query at offset 20 — only first definition is before.
438 assert_eq!(
439 table.effective_binding(scope, symbol, 20),
440 Some(NodeId::new(1, 1)),
441 "only node 1 (offset 10) is before offset 20"
442 );
443 }
444
445 #[test]
446 fn t13_effective_binding_before_first_returns_none() {
447 let scope = make_scope(0);
448 let symbol = StringId::new(1);
449 let entries = vec![make_entry(scope, symbol, NodeId::new(1, 1), 50)];
450 let table = table_from_sorted(entries);
451
452 // Query at offset 10 — before the first definition.
453 assert_eq!(
454 table.effective_binding(scope, symbol, 10),
455 None,
456 "query before first definition must return None"
457 );
458 }
459
460 #[test]
461 fn empty_table_behavior() {
462 let table = ShadowTable::new();
463 assert!(table.is_empty());
464 assert_eq!(table.len(), 0);
465 let scope = make_scope(0);
466 let symbol = StringId::new(1);
467 assert!(table.shadows_in(scope).is_empty());
468 assert_eq!(table.effective_binding(scope, symbol, 100), None);
469 assert!(table.get(ShadowEntryId(0)).is_none());
470 }
471
472 #[test]
473 fn get_by_shadow_entry_id_round_trips() {
474 let scope = make_scope(0);
475 let sym_a = StringId::new(1);
476 let sym_b = StringId::new(2);
477 let entries = vec![
478 make_entry(scope, sym_a, NodeId::new(1, 1), 10),
479 make_entry(scope, sym_b, NodeId::new(2, 1), 20),
480 ];
481 let table = table_from_sorted(entries);
482
483 let e0 = table.get(ShadowEntryId(0)).expect("id 0 must exist");
484 let e1 = table.get(ShadowEntryId(1)).expect("id 1 must exist");
485 assert_eq!(e0.id, ShadowEntryId(0));
486 assert_eq!(e0.symbol, sym_a);
487 assert_eq!(e1.id, ShadowEntryId(1));
488 assert_eq!(e1.symbol, sym_b);
489 assert!(table.get(ShadowEntryId(2)).is_none());
490 }
491
492 #[test]
493 fn entries_accessor_returns_full_slice() {
494 let scope = make_scope(0);
495 let symbol = StringId::new(1);
496 let entries = vec![
497 make_entry(scope, symbol, NodeId::new(1, 1), 5),
498 make_entry(scope, symbol, NodeId::new(2, 1), 15),
499 make_entry(scope, symbol, NodeId::new(3, 1), 25),
500 ];
501 let table = table_from_sorted(entries);
502 assert_eq!(table.entries().len(), 3);
503 }
504
505 #[test]
506 fn scope_partitioning_no_collision() {
507 // Two scopes each declaring the same symbol name must not collide.
508 let s_a = make_scope(0);
509 let s_b = make_scope(1);
510 let sym = StringId::new(42);
511
512 let entries = vec![
513 make_entry(s_a, sym, NodeId::new(10, 1), 5),
514 make_entry(s_a, sym, NodeId::new(11, 1), 20),
515 make_entry(s_b, sym, NodeId::new(20, 1), 5),
516 make_entry(s_b, sym, NodeId::new(21, 1), 30),
517 ];
518 let table = table_from_sorted(entries);
519
520 // s_a at offset 25 → should return node 11 (offset 20 < 25).
521 assert_eq!(
522 table.effective_binding(s_a, sym, 25),
523 Some(NodeId::new(11, 1)),
524 "s_a effective binding at 25 must be node 11"
525 );
526 // s_b at offset 25 → should return node 20 (offset 5 < 25, 30 ≥ 25).
527 assert_eq!(
528 table.effective_binding(s_b, sym, 25),
529 Some(NodeId::new(20, 1)),
530 "s_b effective binding at 25 must be node 20"
531 );
532 // s_a and s_b resolve to different nodes for the same symbol and offset
533 assert_ne!(
534 table.effective_binding(s_a, sym, 25),
535 table.effective_binding(s_b, sym, 25),
536 "same symbol in different scopes must resolve independently"
537 );
538 }
539
540 #[test]
541 fn postcard_round_trip_preserves_entries_and_rebuilds_chains() {
542 let s_a = make_scope(1);
543 let s_b = make_scope(2);
544 let sym_x = StringId::new(10);
545 let sym_y = StringId::new(11);
546
547 let entries = vec![
548 make_entry(s_a, sym_x, NodeId::new(1, 1), 10),
549 make_entry(s_a, sym_x, NodeId::new(2, 1), 30),
550 make_entry(s_b, sym_y, NodeId::new(3, 1), 5),
551 ];
552 let original = table_from_sorted(entries);
553
554 let bytes = postcard::to_allocvec(&original).expect("serialize");
555 let restored: ShadowTable = postcard::from_bytes(&bytes).expect("deserialize");
556
557 assert_eq!(
558 restored.entries().len(),
559 original.entries().len(),
560 "entry count must survive round-trip"
561 );
562 assert_eq!(
563 restored.entries(),
564 original.entries(),
565 "entries must be byte-equal after round-trip"
566 );
567
568 // Verify chains were rebuilt correctly.
569 assert_eq!(
570 restored.effective_binding(s_a, sym_x, 40),
571 Some(NodeId::new(2, 1)),
572 "effective_binding on restored table must find node 2 for s_a/sym_x at 40"
573 );
574 assert_eq!(
575 restored.effective_binding(s_a, sym_x, 20),
576 Some(NodeId::new(1, 1)),
577 "effective_binding on restored table must find node 1 for s_a/sym_x at 20"
578 );
579 assert_eq!(
580 restored.effective_binding(s_b, sym_y, 10),
581 Some(NodeId::new(3, 1)),
582 "effective_binding on restored table must find node 3 for s_b/sym_y at 10"
583 );
584 assert_eq!(
585 restored.effective_binding(s_b, sym_y, 3),
586 None,
587 "query before first def must return None after round-trip"
588 );
589 }
590}