Skip to main content

formualizer_eval/engine/
live_edges.rs

1//! Live-edge collection for statically-cyclic SCC evaluation (Stage 1 of the
2//! runtime-cycle-verdicts work; pre-work for RFC #112).
3//!
4//! When a statically-cyclic SCC is evaluated member-by-member (Stage 2), we
5//! must record which reads *actually occurred* targeting other SCC members
6//! ("live edges"). Untaken short-circuit branches (`IF`/`IFS`/`CHOOSE`/
7//! `SWITCH`, ...) never execute their reads, so they contribute no live edges
8//! for free. After a pass, Stage 2 classifies the live subgraph: acyclic means
9//! the cycle was phantom (values stand); cyclic means `#CIRC!` or iterative
10//! evaluation.
11//!
12//! Stage 1 ships only the collection machinery:
13//!
14//! * [`LiveEdgeCollector`] — a per-SCC set of member cells plus the live edges
15//!   observed so far.
16//! * [`RecordingContext`] — a delegating [`EvaluationContext`] wrapper around
17//!   `&Engine<R>` that records reads as they resolve and forwards everything
18//!   else verbatim.
19//!
20//! # Inertness (binding constraint)
21//!
22//! Nothing in this module is wired into any production evaluation path. The
23//! acyclic/hot evaluation path never constructs a `RecordingContext`; no
24//! `Engine` field, flag, or branch was added. The wrapper is only exercised by
25//! Stage-2 SCC tasks (future) and by tests, so its cost is strictly zero for
26//! ordinary recalculation.
27//!
28//! # Threading
29//!
30//! SCC members are evaluated **sequentially on a single thread**; the
31//! collector is never contended. Interior mutability is required because the
32//! resolver traits take `&self`, and the `Send + Sync` super-bounds on
33//! [`crate::traits::ReferenceResolver`] et al. rule out `RefCell`, so we use a
34//! `Mutex`. It is uncontended by construction (single-threaded SCC pass), so
35//! the lock is a fast path (uncontested futex acquire) and never blocks.
36//!
37//! # Coordinates
38//!
39//! The collector API uses the engine's internal convention: 0-based row and
40//! column indices, rectangles **inclusive** of both corners (matching
41//! [`RangeView::start_row`]/[`RangeView::end_row`] and `CellRef`'s `Coord`).
42//! Resolver-level call sites (1-based Excel coordinates) convert before
43//! recording.
44
45use std::sync::Mutex;
46
47use formualizer_common::{ExcelError, LiteralValue};
48use formualizer_parse::parser::{ReferenceType, TableReference};
49use rustc_hash::{FxHashMap, FxHashSet};
50
51use crate::engine::eval::Engine;
52use crate::engine::range_view::RangeView;
53use crate::reference::{CellRef, SheetId};
54use crate::traits::{
55    EvaluationContext, FunctionProvider, NamedRangeResolver, Range, RangeResolver, ReferenceInfo,
56    ReferenceResolver, Resolver, SourceResolver, Table, TableResolver,
57};
58
59/* ───────────────────────── LiveEdgeCollector ───────────────────────── */
60
61/// One SCC member cell in collector-internal form (0-based coordinates).
62#[derive(Clone, Copy, Debug, Eq, PartialEq)]
63struct MemberCell {
64    sheet_id: SheetId,
65    row: u32,
66    col: u32,
67}
68
69#[derive(Default)]
70struct CollectorState {
71    /// Index (into `members`) of the member currently being evaluated.
72    /// `None` until `set_current` is called; reads observed while `None` are
73    /// not attributable and are dropped.
74    current: Option<u32>,
75    /// Live edges as `(from_member_idx, to_member_idx)`. Self-edges `(i, i)`
76    /// are recorded (e.g. a member whose range argument includes itself).
77    edges: FxHashSet<(u32, u32)>,
78}
79
80/// Records which reads actually occurred targeting SCC members during a
81/// sequential member-by-member evaluation pass.
82///
83/// * Scalar reads are O(1) (hash lookup keyed by `(sheet, row, col)`).
84/// * Rectangle reads are recorded **once per resolved rect** and intersected
85///   with the membership in O(|SCC|) — never per cell of the rect.
86/// * Name reads (named-formula SCC members, spec §7.13) are O(1) lookups by
87///   the engine-folded name key.
88///
89/// Member indices are split: cell members occupy `0..cell_count`, name
90/// members occupy `cell_count..cell_count + name_count` (matching the spec
91/// §7.13 member ordering used by SCC tasks: cells first, then names).
92pub struct LiveEdgeCollector {
93    /// Iterable membership for rect intersection.
94    members: Vec<MemberCell>,
95    /// O(1) scalar lookup: (sheet_id, row0, col0) -> member index.
96    index: FxHashMap<(SheetId, u32, u32), u32>,
97    /// O(1) name lookup: engine-folded name key -> member index (indices
98    /// start after the cell members).
99    name_index: FxHashMap<String, u32>,
100    /// Total member count (cells + names); valid `set_current` range.
101    total_members: usize,
102    /// See module docs: uncontended Mutex forced by `Send + Sync` bounds on
103    /// the resolver traits; SCC passes are single-threaded.
104    state: Mutex<CollectorState>,
105}
106
107impl LiveEdgeCollector {
108    /// Build a collector for the given SCC membership. Member order defines
109    /// the indices used in recorded edges.
110    pub fn new(members: &[CellRef]) -> Self {
111        Self::new_with_names(members, &[])
112    }
113
114    /// Build a collector over cell members plus name-vertex members. Cell
115    /// members get indices `0..cells.len()`; name member `j` gets index
116    /// `cells.len() + j`. `names` must already be folded with the engine's
117    /// name-folding rule (see [`Engine::fold_name_key`]).
118    pub fn new_with_names(cells: &[CellRef], names: &[String]) -> Self {
119        let members: Vec<MemberCell> = cells
120            .iter()
121            .map(|c| MemberCell {
122                sheet_id: c.sheet_id,
123                row: c.coord.row(),
124                col: c.coord.col(),
125            })
126            .collect();
127        let mut index = FxHashMap::default();
128        index.reserve(members.len());
129        for (i, m) in members.iter().enumerate() {
130            index.insert((m.sheet_id, m.row, m.col), i as u32);
131        }
132        let mut name_index = FxHashMap::default();
133        name_index.reserve(names.len());
134        for (j, name) in names.iter().enumerate() {
135            name_index.insert(name.clone(), (members.len() + j) as u32);
136        }
137        let total_members = members.len() + names.len();
138        Self {
139            members,
140            index,
141            name_index,
142            total_members,
143            state: Mutex::new(CollectorState::default()),
144        }
145    }
146
147    pub fn member_count(&self) -> usize {
148        self.total_members
149    }
150
151    /// Set the member whose formula is about to be evaluated; subsequent
152    /// recorded reads are attributed to it.
153    pub fn set_current(&self, member_idx: u32) {
154        debug_assert!((member_idx as usize) < self.total_members);
155        self.state.lock().unwrap().current = Some(member_idx);
156    }
157
158    /// Stop attributing reads to any member (used between passes so that
159    /// out-of-band reads — snapshots, deltas — never record edges).
160    pub fn clear_current(&self) {
161        self.state.lock().unwrap().current = None;
162    }
163
164    /// Record a scalar read of `(sheet_id, row, col)` (0-based).
165    pub fn record_scalar(&self, sheet_id: SheetId, row: u32, col: u32) {
166        let Some(&to) = self.index.get(&(sheet_id, row, col)) else {
167            return;
168        };
169        let mut st = self.state.lock().unwrap();
170        if let Some(from) = st.current {
171            st.edges.insert((from, to));
172        }
173    }
174
175    /// Record a rectangle read (0-based, inclusive corners). Intersection is
176    /// O(|SCC|): each member is tested against the rect once; the rect is
177    /// never enumerated per cell.
178    pub fn record_rect(&self, sheet_id: SheetId, sr: u32, sc: u32, er: u32, ec: u32) {
179        let mut st = self.state.lock().unwrap();
180        let Some(from) = st.current else {
181            return;
182        };
183        for (i, m) in self.members.iter().enumerate() {
184            if m.sheet_id == sheet_id && m.row >= sr && m.row <= er && m.col >= sc && m.col <= ec {
185                st.edges.insert((from, i as u32));
186            }
187        }
188    }
189
190    /// Record a read of a named entity by folded name key (e.g. a formula
191    /// referencing a named-formula SCC member).
192    pub fn record_name(&self, folded_name: &str) {
193        let Some(&to) = self.name_index.get(folded_name) else {
194            return;
195        };
196        let mut st = self.state.lock().unwrap();
197        if let Some(from) = st.current {
198            st.edges.insert((from, to));
199        }
200    }
201
202    /// Drain the collected edges, leaving the collector empty (current member
203    /// attribution is preserved).
204    pub fn take_edges(&self) -> FxHashSet<(u32, u32)> {
205        std::mem::take(&mut self.state.lock().unwrap().edges)
206    }
207}
208
209/* ───────────────────────── RecordingContext ───────────────────────── */
210
211/// Delegating [`EvaluationContext`] that wraps `&Engine<R>` and records reads
212/// into a [`LiveEdgeCollector`].
213///
214/// Interception points (everything else is pure delegation):
215///
216/// * `EvaluationContext::resolve_cell_reference_value` — the interpreter's
217///   scalar read path (current-sheet aware).
218/// * `EvaluationContext::resolve_range_view` — the single choke point for
219///   range, named-range, table and dynamic (`INDIRECT`/`OFFSET`) reads. The
220///   engine resolves un/partially-bounded references to concrete used-region
221///   bounds, and the returned view carries the resolved sheet + rect, so we
222///   record exactly that rect once. Views materialised from owned rows (array
223///   literals, named literals/formulas) carry the synthetic `"__tmp"` sheet,
224///   which has no `SheetId`, so they are skipped automatically.
225/// * `ReferenceResolver::resolve_cell_reference` — sheet-qualified scalar
226///   reads (e.g. implicit intersection).
227/// * `RangeResolver::resolve_range_reference` — legacy boxed-range path; the
228///   rect is resolved via the engine's own `resolve_range_view` normalisation
229///   so unbounded references record their used-region bounds.
230///
231/// Not recordable at this layer (Stage 2 follow-ups, noted in tests):
232///
233/// * `NamedRangeResolver::resolve_named_range_reference` — values-only API
234///   with no sheet/region context. The engine-level named-range path flows
235///   through `resolve_range_view` (intercepted); only the external-resolver
236///   fallback is invisible.
237/// * `TableResolver::resolve_table_reference` — returns an opaque `Table`.
238///   Engine-registered tables flow through `resolve_range_view` (intercepted).
239pub struct RecordingContext<'a, R: EvaluationContext> {
240    engine: &'a Engine<R>,
241    collector: &'a LiveEdgeCollector,
242}
243
244impl<'a, R: EvaluationContext> RecordingContext<'a, R> {
245    pub fn new(engine: &'a Engine<R>, collector: &'a LiveEdgeCollector) -> Self {
246        Self { engine, collector }
247    }
248
249    /// Record a read of a named entity, folding the raw reference text with
250    /// the engine's name-folding rule so it matches collector name keys.
251    fn record_name(&self, raw_name: &str) {
252        let key = self.engine.graph.name_lookup_key(raw_name);
253        self.collector.record_name(&key);
254    }
255
256    /// Record a scalar read given Excel 1-based coordinates.
257    fn record_cell_1based(&self, sheet_name: &str, row: u32, col: u32) {
258        if row == 0 || col == 0 {
259            return;
260        }
261        if let Some(sid) = self.engine.sheet_id(sheet_name) {
262            self.collector.record_scalar(sid, row - 1, col - 1);
263        }
264    }
265
266    /// Record the resolved rect of a `RangeView`. View bounds are absolute,
267    /// 0-based and inclusive. Owned/temporary views (sheet `"__tmp"`) have no
268    /// registered `SheetId` and are skipped.
269    fn record_view(&self, view: &RangeView<'_>) {
270        if view.is_empty() {
271            return;
272        }
273        if let Some(sid) = self.engine.sheet_id(view.sheet_name()) {
274            self.collector.record_rect(
275                sid,
276                view.start_row() as u32,
277                view.start_col() as u32,
278                view.end_row() as u32,
279                view.end_col() as u32,
280            );
281        }
282    }
283}
284
285impl<'a, R: EvaluationContext> ReferenceResolver for RecordingContext<'a, R> {
286    fn resolve_cell_reference(
287        &self,
288        sheet: Option<&str>,
289        row: u32,
290        col: u32,
291    ) -> Result<LiteralValue, ExcelError> {
292        // Unqualified (`None`) references are rejected by the engine itself
293        // (no current-sheet context at this trait level), so there is nothing
294        // attributable to record in that case.
295        if let Some(sheet_name) = sheet {
296            self.record_cell_1based(sheet_name, row, col);
297        }
298        self.engine.resolve_cell_reference(sheet, row, col)
299    }
300}
301
302impl<'a, R: EvaluationContext> RangeResolver for RecordingContext<'a, R> {
303    fn resolve_range_reference(
304        &self,
305        sheet: Option<&str>,
306        sr: Option<u32>,
307        sc: Option<u32>,
308        er: Option<u32>,
309        ec: Option<u32>,
310    ) -> Result<Box<dyn Range>, ExcelError> {
311        // Resolve the rect through the engine's own bound normalisation
312        // (used-region for unbounded axes) rather than duplicating it here.
313        if let Some(sheet_name) = sheet {
314            let reference = ReferenceType::Range {
315                sheet: Some(sheet_name.to_string()),
316                start_row: sr,
317                start_col: sc,
318                end_row: er,
319                end_col: ec,
320                start_row_abs: true,
321                start_col_abs: true,
322                end_row_abs: true,
323                end_col_abs: true,
324            };
325            if let Ok(view) = self.engine.resolve_range_view(&reference, sheet_name) {
326                self.record_view(&view);
327            }
328        }
329        self.engine.resolve_range_reference(sheet, sr, sc, er, ec)
330    }
331}
332
333impl<'a, R: EvaluationContext> NamedRangeResolver for RecordingContext<'a, R> {
334    fn resolve_named_range_reference(
335        &self,
336        name: &str,
337    ) -> Result<Vec<Vec<LiteralValue>>, ExcelError> {
338        // Values-only API without sheet/region context; record the *name*
339        // member edge (if the name itself is an SCC member) — region-level
340        // reads flow through `resolve_range_view` instead.
341        self.record_name(name);
342        self.engine.resolve_named_range_reference(name)
343    }
344}
345
346impl<'a, R: EvaluationContext> TableResolver for RecordingContext<'a, R> {
347    fn resolve_table_reference(&self, tref: &TableReference) -> Result<Box<dyn Table>, ExcelError> {
348        // Opaque `Table` without region context; engine-registered tables are
349        // intercepted in `resolve_range_view` instead.
350        self.engine.resolve_table_reference(tref)
351    }
352}
353
354impl<'a, R: EvaluationContext> SourceResolver for RecordingContext<'a, R> {
355    fn source_scalar_version(&self, name: &str) -> Option<u64> {
356        self.engine.source_scalar_version(name)
357    }
358    fn resolve_source_scalar(&self, name: &str) -> Result<LiteralValue, ExcelError> {
359        self.engine.resolve_source_scalar(name)
360    }
361    fn source_table_version(&self, name: &str) -> Option<u64> {
362        self.engine.source_table_version(name)
363    }
364    fn resolve_source_table(&self, name: &str) -> Result<Box<dyn Table>, ExcelError> {
365        self.engine.resolve_source_table(name)
366    }
367}
368
369impl<'a, R: EvaluationContext> Resolver for RecordingContext<'a, R> {}
370
371impl<'a, R: EvaluationContext> FunctionProvider for RecordingContext<'a, R> {
372    fn get_function(
373        &self,
374        ns: &str,
375        name: &str,
376    ) -> Option<std::sync::Arc<dyn crate::traits::Function>> {
377        self.engine.get_function(ns, name)
378    }
379}
380
381impl<'a, R: EvaluationContext> EvaluationContext for RecordingContext<'a, R> {
382    /* ── intercept-and-record ── */
383
384    fn resolve_range_view<'c>(
385        &'c self,
386        reference: &ReferenceType,
387        current_sheet: &str,
388    ) -> Result<RangeView<'c>, ExcelError> {
389        // Named reads can target a name *vertex* that is itself an SCC member
390        // (a named formula, spec §7.13). Those resolve to owned-row views with
391        // no sheet rect, so they must be recorded by name here in addition to
392        // the rect recording below (which covers Cell/Range definitions).
393        if let ReferenceType::NamedRange(name) = reference {
394            self.record_name(name);
395        }
396        let view = self.engine.resolve_range_view(reference, current_sheet)?;
397        self.record_view(&view);
398        Ok(view)
399    }
400
401    fn resolve_cell_reference_value(
402        &self,
403        sheet: Option<&str>,
404        row: u32,
405        col: u32,
406        current_sheet: &str,
407    ) -> Result<LiteralValue, ExcelError> {
408        self.record_cell_1based(sheet.unwrap_or(current_sheet), row, col);
409        self.engine
410            .resolve_cell_reference_value(sheet, row, col, current_sheet)
411    }
412
413    /* ── pure delegation ── */
414
415    fn thread_pool(&self) -> Option<&std::sync::Arc<rayon::ThreadPool>> {
416        self.engine.thread_pool()
417    }
418    fn cancellation_token(&self) -> Option<std::sync::Arc<std::sync::atomic::AtomicBool>> {
419        self.engine.cancellation_token()
420    }
421    fn chunk_hint(&self) -> Option<usize> {
422        self.engine.chunk_hint()
423    }
424    fn locale(&self) -> crate::locale::Locale {
425        self.engine.locale()
426    }
427    fn workbook_sheet_count(&self) -> Option<usize> {
428        self.engine.workbook_sheet_count()
429    }
430    fn sheet_index_by_name(&self, sheet: &str) -> Option<usize> {
431        self.engine.sheet_index_by_name(sheet)
432    }
433    fn current_sheet_index(&self, current_sheet: &str) -> Option<usize> {
434        self.engine.current_sheet_index(current_sheet)
435    }
436    fn inspect_reference(
437        &self,
438        reference: &ReferenceType,
439        current_sheet: &str,
440    ) -> Result<Option<ReferenceInfo>, ExcelError> {
441        self.engine.inspect_reference(reference, current_sheet)
442    }
443    fn formula_text_at_cell(&self, cell: CellRef) -> Result<Option<String>, ExcelError> {
444        self.engine.formula_text_at_cell(cell)
445    }
446    fn clock(&self) -> &dyn crate::timezone::ClockProvider {
447        self.engine.clock()
448    }
449    fn timezone(&self) -> &crate::timezone::TimeZoneSpec {
450        self.engine.timezone()
451    }
452    fn volatile_level(&self) -> crate::traits::VolatileLevel {
453        self.engine.volatile_level()
454    }
455    fn workbook_seed(&self) -> u64 {
456        self.engine.workbook_seed()
457    }
458    fn recalc_epoch(&self) -> u64 {
459        self.engine.recalc_epoch()
460    }
461    fn used_rows_for_columns(
462        &self,
463        sheet: &str,
464        start_col: u32,
465        end_col: u32,
466    ) -> Option<(u32, u32)> {
467        self.engine.used_rows_for_columns(sheet, start_col, end_col)
468    }
469    fn used_cols_for_rows(&self, sheet: &str, start_row: u32, end_row: u32) -> Option<(u32, u32)> {
470        self.engine.used_cols_for_rows(sheet, start_row, end_row)
471    }
472    fn sheet_bounds(&self, sheet: &str) -> Option<(u32, u32)> {
473        self.engine.sheet_bounds(sheet)
474    }
475    fn data_snapshot_id(&self) -> u64 {
476        self.engine.data_snapshot_id()
477    }
478    fn backend_caps(&self) -> crate::traits::BackendCaps {
479        self.engine.backend_caps()
480    }
481    fn date_system(&self) -> crate::engine::DateSystem {
482        self.engine.date_system()
483    }
484    fn build_lookup_index(
485        &self,
486        view: &RangeView<'_>,
487        axis: crate::engine::lookup_index_cache::LookupAxis,
488    ) -> Option<std::sync::Arc<crate::engine::lookup_index_cache::LookupIndex>> {
489        self.engine.build_lookup_index(view, axis)
490    }
491    fn build_criteria_mask(
492        &self,
493        view: &RangeView<'_>,
494        col_in_view: usize,
495        pred: &crate::args::CriteriaPredicate,
496    ) -> Option<std::sync::Arc<arrow_array::BooleanArray>> {
497        self.engine.build_criteria_mask(view, col_in_view, pred)
498    }
499    fn build_row_visibility_mask(
500        &self,
501        view: &RangeView<'_>,
502        mode: crate::engine::row_visibility::VisibilityMaskMode,
503    ) -> Option<std::sync::Arc<arrow_array::BooleanArray>> {
504        self.engine.build_row_visibility_mask(view, mode)
505    }
506}