Skip to main content

formualizer_eval/engine/graph/
mod.rs

1use crate::SheetId;
2use crate::engine::TombstoneRegistry;
3use crate::engine::named_range::{NameScope, NamedDefinition, NamedRange};
4use crate::engine::sheet_registry::SheetRegistry;
5use formualizer_common::{ExcelError, ExcelErrorKind, LiteralValue};
6use formualizer_parse::parser::{ASTNode, ASTNodeType, ReferenceType};
7use rustc_hash::{FxHashMap, FxHashSet};
8
9#[cfg(debug_assertions)]
10use std::sync::atomic::{AtomicU64, Ordering};
11
12#[cfg(test)]
13#[derive(Debug, Default, Clone)]
14pub struct GraphInstrumentation {
15    pub edges_added: u64,
16    pub stripe_inserts: u64,
17    pub stripe_removes: u64,
18    pub dependents_scan_fallback_calls: u64,
19    pub dependents_scan_vertices_scanned: u64,
20}
21
22mod ast_utils;
23pub mod editor;
24mod formula_analysis;
25mod names;
26mod range_deps;
27mod sheets;
28pub mod snapshot;
29mod sources;
30mod tables;
31
32use super::arena::{AstNodeId, DataStore, ValueRef};
33use super::delta_edges::CsrMutableEdges;
34use super::sheet_index::SheetIndex;
35use super::vertex::{VertexId, VertexKind};
36use super::vertex_store::{FIRST_NORMAL_VERTEX, VertexStore};
37use crate::engine::topo::{
38    GraphAdapter,
39    pk::{DynamicTopo, PkConfig},
40};
41use crate::reference::{CellRef, Coord, SharedRangeRef, SharedRef, SharedSheetLocator};
42use formualizer_common::Coord as AbsCoord;
43// topo::pk wiring will be integrated behind config.use_dynamic_topo in a follow-up step
44
45#[inline]
46fn normalize_stored_literal(value: LiteralValue) -> LiteralValue {
47    match value {
48        // Public contract: store numerics as Number(f64).
49        LiteralValue::Int(i) => LiteralValue::Number(i as f64),
50        other => other,
51    }
52}
53
54pub use editor::change_log::{ChangeEvent, ChangeLog};
55
56// ChangeEvent is now imported from change_log module
57
58/// đź”® Scalability Hook: Dependency reference types for range compression
59#[derive(Debug, Clone, PartialEq, Eq, Hash)]
60pub enum DependencyRef {
61    /// A specific cell dependency
62    Cell(VertexId),
63    /// A dependency on a finite, rectangular range
64    Range {
65        sheet: String,
66        start_row: u32,
67        start_col: u32,
68        end_row: u32, // Inclusive
69        end_col: u32, // Inclusive
70    },
71    /// A whole column dependency (A:A) - future range compression
72    WholeColumn { sheet: String, col: u32 },
73    /// A whole row dependency (1:1) - future range compression  
74    WholeRow { sheet: String, row: u32 },
75}
76
77/// A key representing a coarse-grained section of a sheet
78#[derive(Debug, Clone, Hash, PartialEq, Eq)]
79pub struct StripeKey {
80    pub sheet_id: SheetId,
81    pub stripe_type: StripeType,
82    pub index: u32, // The index of the row, column, or block stripe
83}
84
85#[derive(Debug, Clone, Hash, PartialEq, Eq)]
86pub enum StripeType {
87    Row,
88    Column,
89    Block, // For dense, square-like ranges
90}
91
92/// Block stripe indexing mathematics
93const BLOCK_H: u32 = 256;
94const BLOCK_W: u32 = 256;
95
96pub fn block_index(row: u32, col: u32) -> u32 {
97    (row / BLOCK_H) << 16 | (col / BLOCK_W)
98}
99
100/// A summary of the results of a mutating operation on the graph.
101/// This serves as a "changelog" to the application layer.
102#[derive(Debug, Clone)]
103pub struct OperationSummary {
104    /// Vertices whose values have been directly or indirectly affected.
105    pub affected_vertices: Vec<VertexId>,
106    /// Placeholder cells that were newly created to satisfy dependencies.
107    pub created_placeholders: Vec<CellRef>,
108}
109
110/// SoA-based dependency graph implementation
111#[derive(Debug)]
112pub struct DependencyGraph {
113    // Core columnar storage
114    store: VertexStore,
115
116    // Edge storage with delta slab
117    edges: CsrMutableEdges,
118
119    // Arena-based value and formula storage
120    data_store: DataStore,
121    vertex_values: FxHashMap<VertexId, ValueRef>,
122    vertex_formulas: FxHashMap<VertexId, AstNodeId>,
123
124    /// Gate for storing grid-backed (cell/formula) LiteralValue payloads inside the dependency graph.
125    ///
126    /// When `false` (Arrow-canonical mode), the graph does not store values for cell/formula
127    /// vertices. Arrow (base + overlays) is the sole value store for sheet cells.
128    value_cache_enabled: bool,
129
130    /// Debug-only instrumentation: count attempts to read *cell/formula* graph values while
131    /// caching is disabled (canonical mode guard).
132    #[cfg(debug_assertions)]
133    graph_value_read_attempts: AtomicU64,
134
135    // Address mappings using fast hashing
136    cell_to_vertex: FxHashMap<CellRef, VertexId>,
137
138    // Scheduling state - using HashSet for O(1) operations
139    dirty_vertices: FxHashSet<VertexId>,
140    volatile_vertices: FxHashSet<VertexId>,
141
142    /// Vertices explicitly marked as #REF! by structural operations.
143    ///
144    /// In Arrow-truth mode, the dependency graph does not cache cell/formula values.
145    /// We still need a place to record deterministic #REF! invalidations for editor
146    /// operations and structural transforms.
147    ref_error_vertices: FxHashSet<VertexId>,
148
149    // NEW: Specialized managers for range dependencies (Hybrid Model)
150    /// Maps a formula vertex to the ranges it depends on.
151    formula_to_range_deps: FxHashMap<VertexId, Vec<SharedRangeRef<'static>>>,
152
153    /// Maps a stripe to formulas that depend on it via a compressed range.
154    /// CRITICAL: VertexIds are deduplicated within each stripe to avoid quadratic blow-ups.
155    stripe_to_dependents: FxHashMap<StripeKey, FxHashSet<VertexId>>,
156
157    // Sheet-level sparse indexes for O(log n + k) range queries
158    /// Maps sheet_id to its interval tree index for efficient row/column operations
159    sheet_indexes: FxHashMap<SheetId, SheetIndex>,
160
161    // Sheet name/ID mapping
162    sheet_reg: SheetRegistry,
163    default_sheet_id: SheetId,
164
165    // Named ranges support
166    /// Workbook-scoped named ranges
167    named_ranges: FxHashMap<String, NamedRange>,
168
169    /// Normalized-key lookup for workbook-scoped names.
170    ///
171    /// When `config.case_sensitive_names == false`, keys are ASCII-lowercased.
172    /// Values are the canonical (original-cased) name stored in `named_ranges`.
173    named_ranges_lookup: FxHashMap<String, String>,
174
175    /// Sheet-scoped named ranges  
176    sheet_named_ranges: FxHashMap<(SheetId, String), NamedRange>,
177
178    /// Normalized-key lookup for sheet-scoped names.
179    ///
180    /// Key is (SheetId, normalized_name_key). Value is the canonical (original-cased)
181    /// name stored in `sheet_named_ranges`.
182    sheet_named_ranges_lookup: FxHashMap<(SheetId, String), String>,
183
184    /// Reverse mapping: vertex -> names it uses (by vertex id)
185    vertex_to_names: FxHashMap<VertexId, Vec<VertexId>>,
186
187    /// Lookup for name vertex -> (scope, name) to avoid map scans
188    name_vertex_lookup: FxHashMap<VertexId, (NameScope, String)>,
189
190    /// Pending formula vertices referencing unresolved bare symbolic names.
191    ///
192    /// Keys are normalized through `name_lookup_key(...)` so workbook names and
193    /// source scalars can both wake the same waiting formulas when a symbol appears.
194    pending_name_links: FxHashMap<String, FxHashSet<(SheetId, VertexId)>>,
195
196    /// Reverse mapping used to clear stale pending-name registrations when a
197    /// formula is edited, overwritten with a value, or otherwise rebuilt.
198    vertex_to_pending_names: FxHashMap<VertexId, FxHashSet<String>>,
199
200    // Native workbook tables (ListObjects)
201    tables: FxHashMap<String, tables::TableEntry>,
202    /// Normalized-key lookup for tables.
203    tables_lookup: FxHashMap<String, String>,
204    table_vertex_lookup: FxHashMap<VertexId, String>,
205
206    // External sources (SourceVertex)
207    source_scalars: FxHashMap<String, sources::SourceScalarEntry>,
208    source_tables: FxHashMap<String, sources::SourceTableEntry>,
209    source_vertex_lookup: FxHashMap<VertexId, String>,
210
211    /// Monotonic counter to assign synthetic coordinates to name vertices
212    name_vertex_seq: u32,
213
214    /// Monotonic counter to assign synthetic coordinates to source vertices
215    source_vertex_seq: u32,
216
217    /// Mapping from cell vertices to named range vertices that depend on them
218    cell_to_name_dependents: FxHashMap<VertexId, FxHashSet<VertexId>>,
219    /// Cached list of cell dependencies per named range vertex (for teardown)
220    name_to_cell_dependencies: FxHashMap<VertexId, Vec<VertexId>>,
221
222    // Evaluation configuration
223    config: super::EvalConfig,
224
225    // Dynamic topology orderer (Pearce–Kelly) maintained alongside edges when enabled
226    pk_order: Option<DynamicTopo<VertexId>>,
227
228    // Spill registry: anchor -> cells, and reverse mapping for blockers
229    spill_anchor_to_cells: FxHashMap<VertexId, Vec<CellRef>>,
230    spill_cell_to_anchor: FxHashMap<CellRef, VertexId>,
231
232    // Hint: during initial bulk load, many cells are guaranteed new; allow skipping existence checks per-sheet
233    first_load_assume_new: bool,
234    ensure_touched_sheets: FxHashSet<SheetId>,
235
236    // handled deleted references, in case they are reintroduced.
237    pub tombstone_registry: TombstoneRegistry,
238
239    #[cfg(test)]
240    instr: std::sync::Mutex<GraphInstrumentation>,
241}
242
243impl Default for DependencyGraph {
244    fn default() -> Self {
245        Self::new()
246    }
247}
248
249impl DependencyGraph {
250    /// Expose range expansion limit for planners
251    pub fn range_expansion_limit(&self) -> usize {
252        self.config.range_expansion_limit
253    }
254
255    pub fn get_config(&self) -> &super::EvalConfig {
256        &self.config
257    }
258
259    #[inline]
260    pub(crate) fn value_cache_enabled(&self) -> bool {
261        self.value_cache_enabled
262    }
263
264    /// Debug-only: how many times `get_value`/`get_cell_value` were called while caching is disabled.
265    ///
266    /// In Arrow-canonical mode this should remain 0 for engine/interpreter reads.
267    #[cfg(test)]
268    pub fn debug_graph_value_read_attempts(&self) -> u64 {
269        #[cfg(debug_assertions)]
270        {
271            self.graph_value_read_attempts.load(Ordering::Relaxed)
272        }
273        #[cfg(not(debug_assertions))]
274        {
275            0
276        }
277    }
278
279    /// Build a dependency plan for a set of formulas on sheets
280    pub fn plan_dependencies<'a, I>(
281        &mut self,
282        items: I,
283        policy: &formualizer_parse::parser::CollectPolicy,
284        volatile: Option<&[bool]>,
285    ) -> Result<crate::engine::plan::DependencyPlan, formualizer_common::ExcelError>
286    where
287        I: IntoIterator<Item = (&'a str, u32, u32, &'a formualizer_parse::parser::ASTNode)>,
288    {
289        crate::engine::plan::build_dependency_plan(
290            &mut self.sheet_reg,
291            items.into_iter(),
292            policy,
293            volatile,
294        )
295    }
296
297    /// Ensure vertices exist for given coords; allocate missing in contiguous batches and add to edges/index.
298    /// Returns a list suitable for edges.add_vertices_batch.
299    pub fn ensure_vertices_batch(
300        &mut self,
301        coords: &[(SheetId, AbsCoord)],
302    ) -> Vec<(AbsCoord, u32)> {
303        use rustc_hash::FxHashMap;
304        let mut grouped: FxHashMap<SheetId, Vec<AbsCoord>> = FxHashMap::default();
305        for (sid, pc) in coords.iter().copied() {
306            let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
307            if self.cell_to_vertex.contains_key(&addr) {
308                continue;
309            }
310            grouped.entry(sid).or_default().push(pc);
311        }
312        let mut add_batch: Vec<(AbsCoord, u32)> = Vec::new();
313        for (sid, pcs) in grouped {
314            if pcs.is_empty() {
315                continue;
316            }
317            // Mark sheet as touched by ensure to disable fast-path new allocations for its values
318            self.ensure_touched_sheets.insert(sid);
319            let vids = self.store.allocate_contiguous(sid, &pcs, 0x00);
320            for (i, pc) in pcs.iter().enumerate() {
321                let vid = vids[i];
322                add_batch.push((*pc, vid.0));
323                let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
324                self.cell_to_vertex.insert(addr, vid);
325                // Respect sheet index mode: skip index updates in Lazy mode during bulk ensure
326                match self.config.sheet_index_mode {
327                    crate::engine::SheetIndexMode::Eager
328                    | crate::engine::SheetIndexMode::FastBatch => {
329                        self.sheet_index_mut(sid).add_vertex(*pc, vid);
330                    }
331                    crate::engine::SheetIndexMode::Lazy => {
332                        // defer index build
333                    }
334                }
335            }
336        }
337        if !add_batch.is_empty() {
338            self.edges.add_vertices_batch(&add_batch);
339        }
340        add_batch
341    }
342
343    /// Enable/disable the first-load fast path for value inserts.
344    pub fn set_first_load_assume_new(&mut self, enabled: bool) {
345        self.first_load_assume_new = enabled;
346    }
347
348    /// Reset the per-sheet ensure touch tracking.
349    pub fn reset_ensure_touched(&mut self) {
350        self.ensure_touched_sheets.clear();
351    }
352
353    /// Store ASTs in batch and return their arena ids
354    pub fn store_asts_batch<'a, I>(&mut self, asts: I) -> Vec<AstNodeId>
355    where
356        I: IntoIterator<Item = &'a formualizer_parse::parser::ASTNode>,
357    {
358        self.data_store.store_asts_batch(asts, &self.sheet_reg)
359    }
360
361    /// Lookup VertexId for a (SheetId, AbsCoord)
362    pub fn vid_for_sid_pc(&self, sid: SheetId, pc: AbsCoord) -> Option<VertexId> {
363        let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
364        self.cell_to_vertex.get(&addr).copied()
365    }
366
367    /// Helper to map a global cell index in a plan to a VertexId
368    pub fn vid_for_plan_idx(
369        &self,
370        plan: &crate::engine::plan::DependencyPlan,
371        idx: u32,
372    ) -> Option<VertexId> {
373        let (sid, pc) = plan.global_cells.get(idx as usize).copied()?;
374        self.vid_for_sid_pc(sid, pc)
375    }
376    /// Assign a formula to an existing vertex, removing prior edges and setting flags
377    pub fn assign_formula_vertex(
378        &mut self,
379        vid: VertexId,
380        ast_id: AstNodeId,
381        volatile: bool,
382        dynamic: bool,
383    ) {
384        if self.vertex_formulas.contains_key(&vid) {
385            self.remove_dependent_edges(vid);
386        }
387        self.store
388            .set_kind(vid, crate::engine::vertex::VertexKind::FormulaScalar);
389        self.vertex_values.remove(&vid);
390        self.vertex_formulas.insert(vid, ast_id);
391        self.mark_volatile(vid, volatile);
392        self.store.set_dynamic(vid, dynamic);
393
394        // schedule evaluation
395        self.mark_vertex_dirty(vid);
396    }
397
398    /// Public wrapper for adding edges without beginning a batch (caller manages batch)
399    pub fn add_edges_nobatch(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
400        self.add_dependent_edges_nobatch(dependent, dependencies);
401    }
402
403    /// Iterate all normal vertex ids
404    pub fn iter_vertex_ids(&self) -> impl Iterator<Item = VertexId> + '_ {
405        self.store.all_vertices()
406    }
407
408    /// Get current AbsCoord for a vertex
409    pub fn vertex_coord(&self, vid: VertexId) -> AbsCoord {
410        self.store.coord(vid)
411    }
412
413    /// Total number of allocated vertices (including deleted)
414    pub fn vertex_count(&self) -> usize {
415        self.store.len()
416    }
417
418    /// Replace CSR edges in one shot from adjacency and coords
419    pub fn build_edges_from_adjacency(
420        &mut self,
421        adjacency: Vec<(u32, Vec<u32>)>,
422        coords: Vec<AbsCoord>,
423        vertex_ids: Vec<u32>,
424    ) {
425        self.edges
426            .build_from_adjacency(adjacency, coords, vertex_ids);
427    }
428    /// Compute min/max used row among vertices within [start_col..=end_col] on a sheet.
429    pub fn used_row_bounds_for_columns(
430        &self,
431        sheet_id: SheetId,
432        start_col: u32,
433        end_col: u32,
434    ) -> Option<(u32, u32)> {
435        // Prefer sheet index when available
436        if let Some(index) = self.sheet_indexes.get(&sheet_id)
437            && !index.is_empty()
438        {
439            let mut min_r: Option<u32> = None;
440            let mut max_r: Option<u32> = None;
441            for vid in index.vertices_in_col_range(start_col, end_col) {
442                let r = self.store.coord(vid).row();
443                min_r = Some(min_r.map(|m| m.min(r)).unwrap_or(r));
444                max_r = Some(max_r.map(|m| m.max(r)).unwrap_or(r));
445            }
446            return match (min_r, max_r) {
447                (Some(a), Some(b)) => Some((a, b)),
448                _ => None,
449            };
450        }
451        // Fallback: scan cell map for bounds on the fly
452        let mut min_r: Option<u32> = None;
453        let mut max_r: Option<u32> = None;
454        for cref in self.cell_to_vertex.keys() {
455            if cref.sheet_id == sheet_id {
456                let c = cref.coord.col();
457                if c >= start_col && c <= end_col {
458                    let r = cref.coord.row();
459                    min_r = Some(min_r.map(|m| m.min(r)).unwrap_or(r));
460                    max_r = Some(max_r.map(|m| m.max(r)).unwrap_or(r));
461                }
462            }
463        }
464        match (min_r, max_r) {
465            (Some(a), Some(b)) => Some((a, b)),
466            _ => None,
467        }
468    }
469
470    /// Build (or rebuild) the sheet index for a given sheet if running in Lazy mode.
471    pub fn finalize_sheet_index(&mut self, sheet: &str) {
472        let Some(sheet_id) = self.sheet_reg.get_id(sheet) else {
473            return;
474        };
475        // If already present and non-empty, skip
476        if let Some(idx) = self.sheet_indexes.get(&sheet_id)
477            && !idx.is_empty()
478        {
479            return;
480        }
481        let mut idx = SheetIndex::new();
482        // Collect coords for this sheet
483        let mut batch: Vec<(AbsCoord, VertexId)> = Vec::with_capacity(self.cell_to_vertex.len());
484        for (cref, vid) in &self.cell_to_vertex {
485            if cref.sheet_id == sheet_id {
486                batch.push((AbsCoord::new(cref.coord.row(), cref.coord.col()), *vid));
487            }
488        }
489        // Use batch builder
490        idx.add_vertices_batch(&batch);
491        self.sheet_indexes.insert(sheet_id, idx);
492    }
493
494    pub fn set_sheet_index_mode(&mut self, mode: crate::engine::SheetIndexMode) {
495        self.config.sheet_index_mode = mode;
496    }
497
498    /// Compute min/max used column among vertices within [start_row..=end_row] on a sheet.
499    pub fn used_col_bounds_for_rows(
500        &self,
501        sheet_id: SheetId,
502        start_row: u32,
503        end_row: u32,
504    ) -> Option<(u32, u32)> {
505        if let Some(index) = self.sheet_indexes.get(&sheet_id)
506            && !index.is_empty()
507        {
508            let mut min_c: Option<u32> = None;
509            let mut max_c: Option<u32> = None;
510            for vid in index.vertices_in_row_range(start_row, end_row) {
511                let c = self.store.coord(vid).col();
512                min_c = Some(min_c.map(|m| m.min(c)).unwrap_or(c));
513                max_c = Some(max_c.map(|m| m.max(c)).unwrap_or(c));
514            }
515            return match (min_c, max_c) {
516                (Some(a), Some(b)) => Some((a, b)),
517                _ => None,
518            };
519        }
520        // Fallback: scan cell map on the fly
521        let mut min_c: Option<u32> = None;
522        let mut max_c: Option<u32> = None;
523        for cref in self.cell_to_vertex.keys() {
524            if cref.sheet_id == sheet_id {
525                let r = cref.coord.row();
526                if r >= start_row && r <= end_row {
527                    let c = cref.coord.col();
528                    min_c = Some(min_c.map(|m| m.min(c)).unwrap_or(c));
529                    max_c = Some(max_c.map(|m| m.max(c)).unwrap_or(c));
530                }
531            }
532        }
533        match (min_c, max_c) {
534            (Some(a), Some(b)) => Some((a, b)),
535            _ => None,
536        }
537    }
538
539    /// Returns true if the given sheet currently contains any formula vertices.
540    pub fn sheet_has_formulas(&self, sheet_id: SheetId) -> bool {
541        // Check vertex_formulas keys; they represent formula vertices
542        for &vid in self.vertex_formulas.keys() {
543            if self.store.sheet_id(vid) == sheet_id {
544                return true;
545            }
546        }
547        false
548    }
549    pub fn new() -> Self {
550        Self::new_with_config(super::EvalConfig::default())
551    }
552
553    pub fn new_with_config(config: super::EvalConfig) -> Self {
554        let mut sheet_reg = SheetRegistry::new();
555        let default_sheet_id = sheet_reg.id_for(&config.default_sheet_name);
556
557        let mut g = Self {
558            store: VertexStore::new(),
559            edges: CsrMutableEdges::new(),
560            data_store: DataStore::new(),
561            vertex_values: FxHashMap::default(),
562            vertex_formulas: FxHashMap::default(),
563            // Phase 1 (ticket 610): Arrow-truth is the only supported mode.
564            // The dependency graph does not cache cell/formula literal payloads.
565            value_cache_enabled: false,
566            #[cfg(debug_assertions)]
567            graph_value_read_attempts: AtomicU64::new(0),
568            cell_to_vertex: FxHashMap::default(),
569            dirty_vertices: FxHashSet::default(),
570            volatile_vertices: FxHashSet::default(),
571            ref_error_vertices: FxHashSet::default(),
572            formula_to_range_deps: FxHashMap::default(),
573            stripe_to_dependents: FxHashMap::default(),
574            sheet_indexes: FxHashMap::default(),
575            sheet_reg,
576            default_sheet_id,
577            named_ranges: FxHashMap::default(),
578            named_ranges_lookup: FxHashMap::default(),
579            sheet_named_ranges: FxHashMap::default(),
580            sheet_named_ranges_lookup: FxHashMap::default(),
581            vertex_to_names: FxHashMap::default(),
582            name_vertex_lookup: FxHashMap::default(),
583            pending_name_links: FxHashMap::default(),
584            vertex_to_pending_names: FxHashMap::default(),
585            tables: FxHashMap::default(),
586            tables_lookup: FxHashMap::default(),
587            table_vertex_lookup: FxHashMap::default(),
588            source_scalars: FxHashMap::default(),
589            source_tables: FxHashMap::default(),
590            source_vertex_lookup: FxHashMap::default(),
591            name_vertex_seq: 0,
592            source_vertex_seq: 0,
593            cell_to_name_dependents: FxHashMap::default(),
594            name_to_cell_dependencies: FxHashMap::default(),
595            config: config.clone(),
596            pk_order: None,
597            spill_anchor_to_cells: FxHashMap::default(),
598            spill_cell_to_anchor: FxHashMap::default(),
599            first_load_assume_new: false,
600            ensure_touched_sheets: FxHashSet::default(),
601            tombstone_registry: TombstoneRegistry::default(),
602            #[cfg(test)]
603            instr: std::sync::Mutex::new(GraphInstrumentation::default()),
604        };
605
606        if config.use_dynamic_topo {
607            // Seed with currently active vertices (likely empty at startup)
608            let nodes = g
609                .store
610                .all_vertices()
611                .filter(|&id| g.store.vertex_exists_active(id));
612            let mut pk = DynamicTopo::new(
613                nodes,
614                PkConfig {
615                    visit_budget: config.pk_visit_budget,
616                    compaction_interval_ops: config.pk_compaction_interval_ops,
617                },
618            );
619            // Build an initial order using current graph
620            let adapter = GraphAdapter { g: &g };
621            pk.rebuild_full(&adapter);
622            g.pk_order = Some(pk);
623        }
624
625        g
626    }
627
628    /// When dynamic topology is enabled, compute layers for a subset using PK ordering.
629    pub(crate) fn pk_layers_for(&self, subset: &[VertexId]) -> Option<Vec<crate::engine::Layer>> {
630        let pk = self.pk_order.as_ref()?;
631        let adapter = crate::engine::topo::GraphAdapter { g: self };
632        let layers = pk.layers_for(&adapter, subset, self.config.max_layer_width);
633        Some(
634            layers
635                .into_iter()
636                .map(|vs| crate::engine::Layer { vertices: vs })
637                .collect(),
638        )
639    }
640
641    #[inline]
642    pub(crate) fn dynamic_topo_enabled(&self) -> bool {
643        self.pk_order.is_some()
644    }
645
646    #[cfg(test)]
647    pub fn reset_instr(&mut self) {
648        if let Ok(mut g) = self.instr.lock() {
649            *g = GraphInstrumentation::default();
650        }
651    }
652
653    #[cfg(test)]
654    pub fn instr(&self) -> GraphInstrumentation {
655        self.instr.lock().map(|g| g.clone()).unwrap_or_default()
656    }
657
658    /// Begin batch operations - defer CSR rebuilds until end_batch() is called
659    pub fn begin_batch(&mut self) {
660        self.edges.begin_batch();
661    }
662
663    /// End batch operations and trigger CSR rebuild if needed
664    pub fn end_batch(&mut self) {
665        self.edges.end_batch();
666    }
667
668    pub fn default_sheet_id(&self) -> SheetId {
669        self.default_sheet_id
670    }
671
672    pub fn default_sheet_name(&self) -> &str {
673        self.sheet_reg.name(self.default_sheet_id)
674    }
675
676    pub fn set_default_sheet_by_name(&mut self, name: &str) {
677        self.default_sheet_id = self.sheet_id_mut(name);
678    }
679
680    pub fn set_default_sheet_by_id(&mut self, id: SheetId) {
681        self.default_sheet_id = id;
682    }
683
684    /// Returns the ID for a sheet name, creating one if it doesn't exist.
685    pub fn sheet_id_mut(&mut self, name: &str) -> SheetId {
686        self.sheet_reg.id_for(name)
687    }
688
689    pub fn sheet_id(&self, name: &str) -> Option<SheetId> {
690        self.sheet_reg.get_id(name)
691    }
692
693    /// Resolve a sheet name to an existing ID or return a #REF! error.
694    fn resolve_existing_sheet_id(&self, name: &str) -> Result<SheetId, ExcelError> {
695        self.sheet_id(name).ok_or_else(|| {
696            ExcelError::new(ExcelErrorKind::Ref).with_message(format!("Sheet not found: {name}"))
697        })
698    }
699
700    /// Returns the name of a sheet given its ID.
701    pub fn sheet_name(&self, id: SheetId) -> &str {
702        self.sheet_reg.name(id)
703    }
704
705    /// Access the sheet registry (read-only) for external bindings
706    pub fn sheet_reg(&self) -> &SheetRegistry {
707        &self.sheet_reg
708    }
709
710    pub(crate) fn data_store(&self) -> &DataStore {
711        &self.data_store
712    }
713
714    /// Converts a `CellRef` to a fully qualified A1-style string (e.g., "SheetName!A1").
715    pub fn to_a1(&self, cell_ref: CellRef) -> String {
716        format!("{}!{}", self.sheet_name(cell_ref.sheet_id), cell_ref.coord)
717    }
718
719    pub(crate) fn vertex_len(&self) -> usize {
720        self.store.len()
721    }
722
723    /// Get mutable access to a sheet's index, creating it if it doesn't exist
724    /// This is the primary way VertexEditor and internal operations access the index
725    pub fn sheet_index_mut(&mut self, sheet_id: SheetId) -> &mut SheetIndex {
726        self.sheet_indexes.entry(sheet_id).or_default()
727    }
728
729    /// Get immutable access to a sheet's index, returns None if not initialized
730    pub fn sheet_index(&self, sheet_id: SheetId) -> Option<&SheetIndex> {
731        self.sheet_indexes.get(&sheet_id)
732    }
733
734    /// Set a value in a cell, returns affected vertex IDs
735    pub fn set_cell_value(
736        &mut self,
737        sheet: &str,
738        row: u32,
739        col: u32,
740        value: LiteralValue,
741    ) -> Result<OperationSummary, ExcelError> {
742        let value = normalize_stored_literal(value);
743        let sheet_id = self.sheet_id_mut(sheet);
744        // External API is 1-based; store 0-based coords internally.
745        let coord = Coord::from_excel(row, col, true, true);
746        let addr = CellRef::new(sheet_id, coord);
747        let mut created_placeholders = Vec::new();
748
749        let vertex_id = if let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
750            // Check if it was a formula and remove dependencies
751            let is_formula = matches!(
752                self.store.kind(existing_id),
753                VertexKind::FormulaScalar | VertexKind::FormulaArray
754            );
755
756            if is_formula {
757                self.remove_dependent_edges(existing_id);
758                self.detach_vertex_from_names(existing_id);
759                self.clear_pending_name_references(existing_id);
760                self.vertex_formulas.remove(&existing_id);
761            }
762
763            // Update to value kind
764            self.store.set_kind(existing_id, VertexKind::Cell);
765            if self.value_cache_enabled {
766                let value_ref = self.data_store.store_value(value);
767                self.vertex_values.insert(existing_id, value_ref);
768            } else {
769                // Ensure no stale payload remains if cache is disabled.
770                self.vertex_values.remove(&existing_id);
771            }
772            existing_id
773        } else {
774            // Create new vertex
775            created_placeholders.push(addr);
776            let packed_coord = AbsCoord::from_excel(row, col);
777            let vertex_id = self.store.allocate(packed_coord, sheet_id, 0x01); // dirty flag
778
779            // Add vertex coordinate for CSR
780            self.edges.add_vertex(packed_coord, vertex_id.0);
781
782            // Add to sheet index for O(log n + k) range queries
783            self.sheet_index_mut(sheet_id)
784                .add_vertex(packed_coord, vertex_id);
785
786            self.store.set_kind(vertex_id, VertexKind::Cell);
787            if self.value_cache_enabled {
788                let value_ref = self.data_store.store_value(value);
789                self.vertex_values.insert(vertex_id, value_ref);
790            }
791            self.cell_to_vertex.insert(addr, vertex_id);
792            vertex_id
793        };
794
795        // Cell edits clear any structural #REF! marking for this vertex.
796        self.ref_error_vertices.remove(&vertex_id);
797
798        Ok(OperationSummary {
799            affected_vertices: self.mark_dirty(vertex_id),
800            created_placeholders,
801        })
802    }
803
804    /// Reserve capacity hints for upcoming bulk cell inserts (values only for now).
805    pub fn reserve_cells(&mut self, additional: usize) {
806        self.store.reserve(additional);
807        if self.value_cache_enabled {
808            self.vertex_values.reserve(additional);
809        }
810        self.cell_to_vertex.reserve(additional);
811        // sheet_indexes: cannot easily reserve per-sheet without distribution; skip.
812    }
813
814    /// Fast path for initial bulk load of value cells: avoids dirty propagation & dependency work.
815    pub fn set_cell_value_bulk_untracked(
816        &mut self,
817        sheet: &str,
818        row: u32,
819        col: u32,
820        value: LiteralValue,
821    ) {
822        let value = normalize_stored_literal(value);
823        let sheet_id = self.sheet_id_mut(sheet);
824        let coord = Coord::from_excel(row, col, true, true);
825        let addr = CellRef::new(sheet_id, coord);
826        if let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
827            // Overwrite existing value vertex only (ignore formulas in bulk path)
828            if matches!(
829                self.store.kind(existing_id),
830                VertexKind::FormulaScalar | VertexKind::FormulaArray
831            ) {
832                self.remove_dependent_edges(existing_id);
833                self.detach_vertex_from_names(existing_id);
834                self.clear_pending_name_references(existing_id);
835                self.vertex_formulas.remove(&existing_id);
836            }
837            if self.value_cache_enabled {
838                let value_ref = self.data_store.store_value(value);
839                self.vertex_values.insert(existing_id, value_ref);
840            } else {
841                self.vertex_values.remove(&existing_id);
842            }
843            self.store.set_kind(existing_id, VertexKind::Cell);
844            self.ref_error_vertices.remove(&existing_id);
845            return;
846        }
847        let packed_coord = AbsCoord::from_excel(row, col);
848        let vertex_id = self.store.allocate(packed_coord, sheet_id, 0x00); // not dirty
849        self.edges.add_vertex(packed_coord, vertex_id.0);
850        self.sheet_index_mut(sheet_id)
851            .add_vertex(packed_coord, vertex_id);
852        self.store.set_kind(vertex_id, VertexKind::Cell);
853        self.ref_error_vertices.remove(&vertex_id);
854        if self.value_cache_enabled {
855            let value_ref = self.data_store.store_value(value);
856            self.vertex_values.insert(vertex_id, value_ref);
857        }
858        self.cell_to_vertex.insert(addr, vertex_id);
859    }
860
861    /// Bulk insert a collection of plain value cells (no formulas) more efficiently.
862    pub fn bulk_insert_values<I>(&mut self, sheet: &str, cells: I)
863    where
864        I: IntoIterator<Item = (u32, u32, LiteralValue)>,
865    {
866        use crate::instant::FzInstant as Instant;
867        let t0 = Instant::now();
868        // Collect first to know size
869        let collected: Vec<(u32, u32, LiteralValue)> = cells.into_iter().collect();
870        if collected.is_empty() {
871            return;
872        }
873        let sheet_id = self.sheet_id_mut(sheet);
874        self.reserve_cells(collected.len());
875        let t_reserve = Instant::now();
876        let mut new_vertices: Vec<(AbsCoord, u32)> = Vec::with_capacity(collected.len());
877        let mut index_items: Vec<(AbsCoord, VertexId)> = Vec::with_capacity(collected.len());
878        // For new allocations, accumulate values and assign after a single batch store
879        let mut new_value_coords: Vec<(AbsCoord, VertexId)> = Vec::with_capacity(collected.len());
880        let mut new_value_literals: Vec<LiteralValue> = Vec::with_capacity(collected.len());
881        // Detect fast path: during initial ingest, caller may guarantee most cells are new.
882        let assume_new = self.first_load_assume_new
883            && self
884                .sheet_id(sheet)
885                .map(|sid| !self.ensure_touched_sheets.contains(&sid))
886                .unwrap_or(false);
887
888        for (row, col, value) in collected {
889            let value = normalize_stored_literal(value);
890            let coord = Coord::from_excel(row, col, true, true);
891            let addr = CellRef::new(sheet_id, coord);
892            if !assume_new && let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
893                if matches!(
894                    self.store.kind(existing_id),
895                    VertexKind::FormulaScalar | VertexKind::FormulaArray
896                ) {
897                    self.remove_dependent_edges(existing_id);
898                    self.detach_vertex_from_names(existing_id);
899                    self.clear_pending_name_references(existing_id);
900                    self.vertex_formulas.remove(&existing_id);
901                }
902                if self.value_cache_enabled {
903                    let value_ref = self.data_store.store_value(value);
904                    self.vertex_values.insert(existing_id, value_ref);
905                } else {
906                    self.vertex_values.remove(&existing_id);
907                }
908                self.store.set_kind(existing_id, VertexKind::Cell);
909                continue;
910            }
911            let packed = AbsCoord::from_excel(row, col);
912            let vertex_id = self.store.allocate(packed, sheet_id, 0x00);
913            self.store.set_kind(vertex_id, VertexKind::Cell);
914            // Defer value arena storage to a single batch
915            new_value_coords.push((packed, vertex_id));
916            new_value_literals.push(value);
917            self.cell_to_vertex.insert(addr, vertex_id);
918            new_vertices.push((packed, vertex_id.0));
919            index_items.push((packed, vertex_id));
920        }
921        // Perform a single batch store for newly allocated values
922        if self.value_cache_enabled && !new_value_literals.is_empty() {
923            let vrefs = self.data_store.store_values_batch(new_value_literals);
924            debug_assert_eq!(vrefs.len(), new_value_coords.len());
925            for (i, (_pc, vid)) in new_value_coords.iter().enumerate() {
926                self.vertex_values.insert(*vid, vrefs[i]);
927            }
928        }
929        let t_after_alloc = Instant::now();
930        if !new_vertices.is_empty() {
931            let t_edges_start = Instant::now();
932            self.edges.add_vertices_batch(&new_vertices);
933            let t_edges_done = Instant::now();
934
935            match self.config.sheet_index_mode {
936                crate::engine::SheetIndexMode::Eager => {
937                    self.sheet_index_mut(sheet_id)
938                        .add_vertices_batch(&index_items);
939                }
940                crate::engine::SheetIndexMode::Lazy => {
941                    // Skip building index now; will be built on-demand
942                }
943                crate::engine::SheetIndexMode::FastBatch => {
944                    // FastBatch for now delegates to same batch insert (future: build from sorted arrays)
945                    self.sheet_index_mut(sheet_id)
946                        .add_vertices_batch(&index_items);
947                }
948            }
949            let t_index_done = Instant::now();
950        }
951    }
952
953    /// Set a formula in a cell, returns affected vertex IDs
954    pub fn set_cell_formula(
955        &mut self,
956        sheet: &str,
957        row: u32,
958        col: u32,
959        ast: ASTNode,
960    ) -> Result<OperationSummary, ExcelError> {
961        let volatile = self.is_ast_volatile(&ast);
962        self.set_cell_formula_with_volatility(sheet, row, col, ast, volatile)
963    }
964
965    /// Set a formula in a cell with a known volatility flag (context-scoped detection upstream)
966    pub fn set_cell_formula_with_volatility(
967        &mut self,
968        sheet: &str,
969        row: u32,
970        col: u32,
971        ast: ASTNode,
972        volatile: bool,
973    ) -> Result<OperationSummary, ExcelError> {
974        let dbg = std::env::var("FZ_DEBUG_LOAD")
975            .ok()
976            .is_some_and(|v| v != "0");
977        let dep_ms_thresh: u128 = std::env::var("FZ_DEBUG_DEP_MS")
978            .ok()
979            .and_then(|s| s.parse().ok())
980            .unwrap_or(0);
981        let sample_n: usize = std::env::var("FZ_DEBUG_SAMPLE_N")
982            .ok()
983            .and_then(|s| s.parse().ok())
984            .unwrap_or(0);
985        let t0 = if dbg {
986            Some(crate::instant::FzInstant::now())
987        } else {
988            None
989        };
990        let sheet_id = self.sheet_id_mut(sheet);
991        let coord = Coord::from_excel(row, col, true, true);
992        let addr = CellRef::new(sheet_id, coord);
993
994        // Rewrite context-dependent structured references (e.g., this-row selectors) into
995        // concrete cell/range references for this formula cell.
996        let mut ast = ast;
997        self.rewrite_structured_references_for_cell(&mut ast, addr)?;
998
999        // Extract dependencies from AST, creating placeholders if needed
1000        let t_dep0 = if dbg {
1001            Some(crate::instant::FzInstant::now())
1002        } else {
1003            None
1004        };
1005        let (
1006            new_dependencies,
1007            new_range_dependencies,
1008            mut created_placeholders,
1009            named_dependencies,
1010            unresolved_names,
1011        ) = self.extract_dependencies_with_pending_names(&ast, sheet_id)?;
1012        if let (true, Some(t)) = (dbg, t_dep0) {
1013            let elapsed = t.elapsed().as_millis();
1014            // Only log if over threshold or sampled
1015            let do_log = (dep_ms_thresh > 0 && elapsed >= dep_ms_thresh)
1016                || (sample_n > 0 && (row as usize).is_multiple_of(sample_n));
1017            if dep_ms_thresh == 0 && sample_n == 0 {
1018                // default: very light sampling every 1000 rows
1019                if row.is_multiple_of(1000) {
1020                    eprintln!(
1021                        "[fz][dep] {}!{} extracted: deps={}, ranges={}, placeholders={}, names={} in {} ms",
1022                        self.sheet_name(sheet_id),
1023                        crate::reference::Coord::from_excel(row, col, true, true),
1024                        new_dependencies.len(),
1025                        new_range_dependencies.len(),
1026                        created_placeholders.len(),
1027                        named_dependencies.len(),
1028                        elapsed
1029                    );
1030                }
1031            } else if do_log {
1032                eprintln!(
1033                    "[fz][dep] {}!{} extracted: deps={}, ranges={}, placeholders={}, names={} in {} ms",
1034                    self.sheet_name(sheet_id),
1035                    crate::reference::Coord::from_excel(row, col, true, true),
1036                    new_dependencies.len(),
1037                    new_range_dependencies.len(),
1038                    created_placeholders.len(),
1039                    named_dependencies.len(),
1040                    elapsed
1041                );
1042            }
1043        }
1044
1045        // Check for self-reference (immediate cycle detection)
1046        let addr_vertex_id = self.get_or_create_vertex(&addr, &mut created_placeholders);
1047
1048        // Editing a formula clears any prior structural #REF! marking for this vertex.
1049        self.ref_error_vertices.remove(&addr_vertex_id);
1050
1051        if new_dependencies.contains(&addr_vertex_id) {
1052            return Err(ExcelError::new(ExcelErrorKind::Circ)
1053                .with_message("Self-reference detected".to_string()));
1054        }
1055
1056        for &name_vertex in &named_dependencies {
1057            let mut visited = FxHashSet::default();
1058            if self.name_depends_on_vertex(name_vertex, addr_vertex_id, &mut visited) {
1059                return Err(ExcelError::new(ExcelErrorKind::Circ)
1060                    .with_message("Circular reference through named range".to_string()));
1061            }
1062        }
1063
1064        // Remove old dependencies first
1065        self.remove_dependent_edges(addr_vertex_id);
1066        self.detach_vertex_from_names(addr_vertex_id);
1067        self.clear_pending_name_references(addr_vertex_id);
1068
1069        // Update vertex properties
1070        self.store
1071            .set_kind(addr_vertex_id, VertexKind::FormulaScalar);
1072        let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
1073        self.vertex_formulas.insert(addr_vertex_id, ast_id);
1074        self.store.set_dirty(addr_vertex_id, true);
1075
1076        // Clear any cached value since this is now a formula
1077        self.vertex_values.remove(&addr_vertex_id);
1078
1079        self.mark_volatile(addr_vertex_id, volatile);
1080        let dynamic = self.is_ast_dynamic(&ast);
1081        self.store.set_dynamic(addr_vertex_id, dynamic);
1082
1083        if !named_dependencies.is_empty() {
1084            self.attach_vertex_to_names(addr_vertex_id, &named_dependencies);
1085        }
1086        for unresolved_name in &unresolved_names {
1087            self.record_pending_name_reference(sheet_id, unresolved_name, addr_vertex_id);
1088        }
1089
1090        if let (true, Some(t)) = (dbg, t0) {
1091            let elapsed = t.elapsed().as_millis();
1092            let log_set = dep_ms_thresh > 0 && elapsed >= dep_ms_thresh;
1093            if log_set {
1094                eprintln!(
1095                    "[fz][set] {}!{} total {} ms",
1096                    self.sheet_name(sheet_id),
1097                    crate::reference::Coord::from_excel(row, col, true, true),
1098                    elapsed
1099                );
1100            }
1101        }
1102
1103        // Add new dependency edges
1104        self.add_dependent_edges(addr_vertex_id, &new_dependencies);
1105        self.add_range_dependent_edges(addr_vertex_id, &new_range_dependencies, sheet_id);
1106
1107        Ok(OperationSummary {
1108            affected_vertices: self.mark_dirty(addr_vertex_id),
1109            created_placeholders,
1110        })
1111    }
1112
1113    pub(crate) fn rewrite_structured_references_for_cell(
1114        &self,
1115        ast: &mut ASTNode,
1116        cell: CellRef,
1117    ) -> Result<(), ExcelError> {
1118        self.rewrite_structured_references_node(ast, cell)
1119    }
1120
1121    fn rewrite_structured_references_node(
1122        &self,
1123        node: &mut ASTNode,
1124        cell: CellRef,
1125    ) -> Result<(), ExcelError> {
1126        match &mut node.node_type {
1127            ASTNodeType::Reference { reference, .. } => {
1128                self.rewrite_structured_reference(reference, cell)
1129            }
1130            ASTNodeType::UnaryOp { expr, .. } => {
1131                self.rewrite_structured_references_node(expr, cell)
1132            }
1133            ASTNodeType::BinaryOp { left, right, .. } => {
1134                self.rewrite_structured_references_node(left, cell)?;
1135                self.rewrite_structured_references_node(right, cell)
1136            }
1137            ASTNodeType::Function { args, .. } => {
1138                for a in args.iter_mut() {
1139                    self.rewrite_structured_references_node(a, cell)?;
1140                }
1141                Ok(())
1142            }
1143            ASTNodeType::Array(rows) => {
1144                for r in rows.iter_mut() {
1145                    for item in r.iter_mut() {
1146                        self.rewrite_structured_references_node(item, cell)?;
1147                    }
1148                }
1149                Ok(())
1150            }
1151            ASTNodeType::Literal(_) => Ok(()),
1152        }
1153    }
1154
1155    fn rewrite_structured_reference(
1156        &self,
1157        reference: &mut ReferenceType,
1158        cell: CellRef,
1159    ) -> Result<(), ExcelError> {
1160        use formualizer_parse::parser::{SpecialItem, TableSpecifier};
1161
1162        let ReferenceType::Table(tref) = reference else {
1163            return Ok(());
1164        };
1165
1166        // This-row shorthand: parsed as an unnamed table reference with a Combination specifier.
1167        if !tref.name.is_empty() {
1168            return Ok(());
1169        }
1170
1171        let col_name = match &tref.specifier {
1172            Some(TableSpecifier::Combination(parts)) => {
1173                let mut saw_this_row = false;
1174                let mut col: Option<&str> = None;
1175                for p in parts {
1176                    match p.as_ref() {
1177                        TableSpecifier::SpecialItem(SpecialItem::ThisRow) => {
1178                            saw_this_row = true;
1179                        }
1180                        TableSpecifier::Column(c) => {
1181                            if col.is_some() {
1182                                return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1183                                    "This-row structured reference with multiple columns is not supported"
1184                                        .to_string(),
1185                                ));
1186                            }
1187                            col = Some(c.as_str());
1188                        }
1189                        other => {
1190                            return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1191                                format!(
1192                                    "Unsupported this-row structured reference component: {other}"
1193                                ),
1194                            ));
1195                        }
1196                    }
1197                }
1198                if !saw_this_row {
1199                    return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1200                        "Unnamed structured reference requires a this-row selector".to_string(),
1201                    ));
1202                }
1203                col.ok_or_else(|| {
1204                    ExcelError::new(ExcelErrorKind::NImpl).with_message(
1205                        "This-row structured reference missing column selector".to_string(),
1206                    )
1207                })?
1208            }
1209            _ => {
1210                return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1211                    "Unnamed structured reference form is not supported".to_string(),
1212                ));
1213            }
1214        };
1215
1216        let Some(table) = self.find_table_containing_cell(cell) else {
1217            return Err(ExcelError::new(ExcelErrorKind::Name)
1218                .with_message("This-row structured reference used outside a table".to_string()));
1219        };
1220
1221        let row0 = cell.coord.row();
1222        let col0 = cell.coord.col();
1223        let sr0 = table.range.start.coord.row();
1224        let sc0 = table.range.start.coord.col();
1225        let er0 = table.range.end.coord.row();
1226        let ec0 = table.range.end.coord.col();
1227
1228        if row0 < sr0 || row0 > er0 || col0 < sc0 || col0 > ec0 {
1229            return Err(ExcelError::new(ExcelErrorKind::Name)
1230                .with_message("This-row structured reference used outside a table".to_string()));
1231        }
1232
1233        if table.header_row && row0 == sr0 {
1234            return Err(ExcelError::new(ExcelErrorKind::Ref).with_message(
1235                "This-row structured references are not valid in the table header row".to_string(),
1236            ));
1237        }
1238
1239        let data_start = if table.header_row { sr0 + 1 } else { sr0 };
1240        if row0 < data_start {
1241            return Err(ExcelError::new(ExcelErrorKind::Ref).with_message(
1242                "This-row structured references require a data/totals row context".to_string(),
1243            ));
1244        }
1245
1246        let Some(idx) = table.col_index(col_name) else {
1247            return Err(ExcelError::new(ExcelErrorKind::Ref).with_message(format!(
1248                "Unknown table column in this-row reference: {col_name}"
1249            )));
1250        };
1251        let target_col0 = sc0 + (idx as u32);
1252        let target_row = row0 + 1;
1253        let target_col = target_col0 + 1;
1254
1255        *reference = ReferenceType::Cell {
1256            sheet: None,
1257            row: target_row,
1258            col: target_col,
1259            row_abs: true,
1260            col_abs: true,
1261        };
1262
1263        Ok(())
1264    }
1265
1266    fn find_table_containing_cell(&self, cell: CellRef) -> Option<&tables::TableEntry> {
1267        let row0 = cell.coord.row();
1268        let col0 = cell.coord.col();
1269
1270        let mut best: Option<&tables::TableEntry> = None;
1271        let mut best_area: u64 = u64::MAX;
1272        let mut best_name: &str = "";
1273
1274        for t in self.tables.values() {
1275            if t.sheet_id() != cell.sheet_id {
1276                continue;
1277            }
1278            let sr0 = t.range.start.coord.row();
1279            let sc0 = t.range.start.coord.col();
1280            let er0 = t.range.end.coord.row();
1281            let ec0 = t.range.end.coord.col();
1282            if row0 < sr0 || row0 > er0 || col0 < sc0 || col0 > ec0 {
1283                continue;
1284            }
1285
1286            let h = (er0 - sr0 + 1) as u64;
1287            let w = (ec0 - sc0 + 1) as u64;
1288            let area = h.saturating_mul(w);
1289            let name = t.name.as_str();
1290            let better = match best {
1291                None => true,
1292                Some(_) => area < best_area || (area == best_area && name < best_name),
1293            };
1294            if better {
1295                best = Some(t);
1296                best_area = area;
1297                best_name = name;
1298            }
1299        }
1300
1301        best
1302    }
1303
1304    pub fn set_cell_value_ref(
1305        &mut self,
1306        cell: formualizer_common::SheetCellRef<'_>,
1307        value: LiteralValue,
1308    ) -> Result<OperationSummary, ExcelError> {
1309        let owned = cell.into_owned();
1310        let sheet_id = match owned.sheet {
1311            formualizer_common::SheetLocator::Id(id) => id,
1312            formualizer_common::SheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
1313            formualizer_common::SheetLocator::Current => self.default_sheet_id,
1314        };
1315        let sheet_name = self.sheet_name(sheet_id).to_string();
1316        self.set_cell_value(
1317            &sheet_name,
1318            owned.coord.row() + 1,
1319            owned.coord.col() + 1,
1320            value,
1321        )
1322    }
1323
1324    pub fn set_cell_formula_ref(
1325        &mut self,
1326        cell: formualizer_common::SheetCellRef<'_>,
1327        ast: ASTNode,
1328    ) -> Result<OperationSummary, ExcelError> {
1329        let owned = cell.into_owned();
1330        let sheet_id = match owned.sheet {
1331            formualizer_common::SheetLocator::Id(id) => id,
1332            formualizer_common::SheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
1333            formualizer_common::SheetLocator::Current => self.default_sheet_id,
1334        };
1335        let sheet_name = self.sheet_name(sheet_id).to_string();
1336        self.set_cell_formula(
1337            &sheet_name,
1338            owned.coord.row() + 1,
1339            owned.coord.col() + 1,
1340            ast,
1341        )
1342    }
1343
1344    pub fn get_cell_value_ref(
1345        &self,
1346        cell: formualizer_common::SheetCellRef<'_>,
1347    ) -> Option<LiteralValue> {
1348        let owned = cell.into_owned();
1349        let sheet_id = match owned.sheet {
1350            formualizer_common::SheetLocator::Id(id) => id,
1351            formualizer_common::SheetLocator::Name(name) => self.sheet_id(name.as_ref())?,
1352            formualizer_common::SheetLocator::Current => self.default_sheet_id,
1353        };
1354        let sheet_name = self.sheet_name(sheet_id);
1355        self.get_cell_value(sheet_name, owned.coord.row() + 1, owned.coord.col() + 1)
1356    }
1357
1358    /// Get current value from a cell
1359    pub fn get_cell_value(&self, sheet: &str, row: u32, col: u32) -> Option<LiteralValue> {
1360        if !self.value_cache_enabled {
1361            #[cfg(debug_assertions)]
1362            {
1363                self.graph_value_read_attempts
1364                    .fetch_add(1, Ordering::Relaxed);
1365            }
1366            return None;
1367        }
1368        let sheet_id = self.sheet_reg.get_id(sheet)?;
1369        let coord = Coord::from_excel(row, col, true, true);
1370        let addr = CellRef::new(sheet_id, coord);
1371
1372        self.cell_to_vertex.get(&addr).and_then(|&vertex_id| {
1373            // Check values hashmap (stores both cell values and formula results)
1374            self.vertex_values
1375                .get(&vertex_id)
1376                .map(|&value_ref| self.data_store.retrieve_value(value_ref))
1377        })
1378    }
1379
1380    /// Mark vertex dirty and propagate to dependents
1381    fn mark_dirty(&mut self, vertex_id: VertexId) -> Vec<VertexId> {
1382        let mut affected = FxHashSet::default();
1383        let mut to_visit = Vec::new();
1384        let mut visited_for_propagation = FxHashSet::default();
1385
1386        // Only mark the source vertex as dirty if it's a formula
1387        // Value cells don't get marked dirty themselves but are still affected
1388        let is_formula = matches!(
1389            self.store.kind(vertex_id),
1390            VertexKind::FormulaScalar
1391                | VertexKind::FormulaArray
1392                | VertexKind::NamedScalar
1393                | VertexKind::NamedArray
1394        );
1395
1396        if is_formula {
1397            to_visit.push(vertex_id);
1398        } else {
1399            // Value cells are affected (for tracking) but not marked dirty
1400            affected.insert(vertex_id);
1401        }
1402
1403        // Initial propagation from direct and range dependents
1404        {
1405            // Get dependents (vertices that depend on this vertex)
1406            if let Some(dependents) = self.dependents_slice(vertex_id) {
1407                to_visit.extend(dependents.iter().copied());
1408            } else {
1409                let dependents = self.get_dependents(vertex_id);
1410                to_visit.extend(dependents);
1411            }
1412
1413            if let Some(name_set) = self.cell_to_name_dependents.get(&vertex_id) {
1414                for &name_vertex in name_set {
1415                    to_visit.push(name_vertex);
1416                }
1417            }
1418
1419            // Check range dependencies
1420            let view = self.store.view(vertex_id);
1421            let row = view.row();
1422            let col = view.col();
1423            let dirty_sheet_id = view.sheet_id();
1424
1425            // New stripe-based dependents lookup
1426            let mut potential_dependents = FxHashSet::default();
1427
1428            // 1. Column stripe lookup
1429            let column_key = StripeKey {
1430                sheet_id: dirty_sheet_id,
1431                stripe_type: StripeType::Column,
1432                index: col,
1433            };
1434            if let Some(dependents) = self.stripe_to_dependents.get(&column_key) {
1435                potential_dependents.extend(dependents);
1436            }
1437
1438            // 2. Row stripe lookup
1439            let row_key = StripeKey {
1440                sheet_id: dirty_sheet_id,
1441                stripe_type: StripeType::Row,
1442                index: row,
1443            };
1444            if let Some(dependents) = self.stripe_to_dependents.get(&row_key) {
1445                potential_dependents.extend(dependents);
1446            }
1447
1448            // 3. Block stripe lookup
1449            if self.config.enable_block_stripes {
1450                let block_key = StripeKey {
1451                    sheet_id: dirty_sheet_id,
1452                    stripe_type: StripeType::Block,
1453                    index: block_index(row, col),
1454                };
1455                if let Some(dependents) = self.stripe_to_dependents.get(&block_key) {
1456                    potential_dependents.extend(dependents);
1457                }
1458            }
1459
1460            // Precision check: ensure the dirtied cell is actually within the formula's range
1461            for &dep_id in &potential_dependents {
1462                if let Some(ranges) = self.formula_to_range_deps.get(&dep_id) {
1463                    for range in ranges {
1464                        let range_sheet_id = match range.sheet {
1465                            SharedSheetLocator::Id(id) => id,
1466                            _ => dirty_sheet_id,
1467                        };
1468                        if range_sheet_id != dirty_sheet_id {
1469                            continue;
1470                        }
1471                        let sr0 = range.start_row.map(|b| b.index).unwrap_or(0);
1472                        let er0 = range.end_row.map(|b| b.index).unwrap_or(u32::MAX);
1473                        let sc0 = range.start_col.map(|b| b.index).unwrap_or(0);
1474                        let ec0 = range.end_col.map(|b| b.index).unwrap_or(u32::MAX);
1475                        if row >= sr0 && row <= er0 && col >= sc0 && col <= ec0 {
1476                            to_visit.push(dep_id);
1477                            break;
1478                        }
1479                    }
1480                }
1481            }
1482        }
1483
1484        while let Some(id) = to_visit.pop() {
1485            if !visited_for_propagation.insert(id) {
1486                continue; // Already processed
1487            }
1488            affected.insert(id);
1489
1490            // Mark vertex as dirty
1491            self.store.set_dirty(id, true);
1492
1493            // Add direct dependents to visit list
1494            if let Some(dependents) = self.dependents_slice(id) {
1495                to_visit.extend(dependents.iter().copied());
1496            } else {
1497                let dependents = self.get_dependents(id);
1498                to_visit.extend(dependents);
1499            }
1500        }
1501
1502        // Add to dirty set
1503        self.dirty_vertices.extend(&affected);
1504
1505        // Return as Vec for compatibility
1506        affected.into_iter().collect()
1507    }
1508
1509    /// Get all vertices that need evaluation
1510    pub fn get_evaluation_vertices(&self) -> Vec<VertexId> {
1511        let mut combined = FxHashSet::default();
1512        combined.extend(&self.dirty_vertices);
1513        combined.extend(&self.volatile_vertices);
1514
1515        let mut result: Vec<VertexId> = combined
1516            .into_iter()
1517            .filter(|&id| {
1518                // Only include formula vertices
1519                matches!(
1520                    self.store.kind(id),
1521                    VertexKind::FormulaScalar
1522                        | VertexKind::FormulaArray
1523                        | VertexKind::NamedScalar
1524                        | VertexKind::NamedArray
1525                )
1526            })
1527            .collect();
1528        result.sort_unstable();
1529        result
1530    }
1531
1532    /// Clear dirty flags after successful evaluation
1533    pub fn clear_dirty_flags(&mut self, vertices: &[VertexId]) {
1534        for &vertex_id in vertices {
1535            self.store.set_dirty(vertex_id, false);
1536            self.dirty_vertices.remove(&vertex_id);
1537        }
1538    }
1539
1540    /// đź”® Scalability Hook: Clear volatile vertices after evaluation cycle
1541    pub fn clear_volatile_flags(&mut self) {
1542        self.volatile_vertices.clear();
1543    }
1544
1545    /// Re-marks all volatile vertices as dirty for the next evaluation cycle.
1546    pub(crate) fn redirty_volatiles(&mut self) {
1547        let volatile_ids: Vec<VertexId> = self.volatile_vertices.iter().copied().collect();
1548        for id in volatile_ids {
1549            self.mark_dirty(id);
1550        }
1551    }
1552
1553    fn get_or_create_vertex(
1554        &mut self,
1555        addr: &CellRef,
1556        created_placeholders: &mut Vec<CellRef>,
1557    ) -> VertexId {
1558        if let Some(&vertex_id) = self.cell_to_vertex.get(addr) {
1559            return vertex_id;
1560        }
1561
1562        created_placeholders.push(*addr);
1563        let packed_coord = AbsCoord::new(addr.coord.row(), addr.coord.col());
1564        let vertex_id = self.store.allocate(packed_coord, addr.sheet_id, 0x00);
1565
1566        // Add vertex coordinate for CSR
1567        self.edges.add_vertex(packed_coord, vertex_id.0);
1568
1569        // Add to sheet index for O(log n + k) range queries
1570        self.sheet_index_mut(addr.sheet_id)
1571            .add_vertex(packed_coord, vertex_id);
1572
1573        self.store.set_kind(vertex_id, VertexKind::Empty);
1574        self.cell_to_vertex.insert(*addr, vertex_id);
1575        vertex_id
1576    }
1577
1578    fn add_dependent_edges(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
1579        // Batch to avoid repeated CSR rebuilds and keep reverse edges current
1580        self.edges.begin_batch();
1581
1582        // If PK enabled, update order using a short-lived adapter without holding &mut self
1583        // Track dependencies that should be skipped if rejecting cycle-creating edges
1584        let mut skip_deps: rustc_hash::FxHashSet<VertexId> = rustc_hash::FxHashSet::default();
1585        if self.pk_order.is_some()
1586            && let Some(mut pk) = self.pk_order.take()
1587        {
1588            pk.ensure_nodes(std::iter::once(dependent));
1589            pk.ensure_nodes(dependencies.iter().copied());
1590            {
1591                let adapter = GraphAdapter { g: self };
1592                for &dep_id in dependencies {
1593                    match pk.try_add_edge(&adapter, dep_id, dependent) {
1594                        Ok(_) => {}
1595                        Err(_cycle) => {
1596                            if self.config.pk_reject_cycle_edges {
1597                                skip_deps.insert(dep_id);
1598                            } else {
1599                                pk.rebuild_full(&adapter);
1600                            }
1601                        }
1602                    }
1603                }
1604            } // drop adapter
1605            self.pk_order = Some(pk);
1606        }
1607
1608        // Now mutate engine edges; if rejecting cycles, re-check and skip those that would create cycles
1609        for &dep_id in dependencies {
1610            if self.config.pk_reject_cycle_edges && skip_deps.contains(&dep_id) {
1611                continue;
1612            }
1613            self.edges.add_edge(dependent, dep_id);
1614            #[cfg(test)]
1615            {
1616                if let Ok(mut g) = self.instr.lock() {
1617                    g.edges_added += 1;
1618                }
1619            }
1620        }
1621
1622        self.edges.end_batch();
1623    }
1624
1625    /// Like add_dependent_edges, but assumes caller is managing edges.begin_batch/end_batch
1626    fn add_dependent_edges_nobatch(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
1627        // If PK enabled, update order using a short-lived adapter without holding &mut self
1628        let mut skip_deps: rustc_hash::FxHashSet<VertexId> = rustc_hash::FxHashSet::default();
1629        if self.pk_order.is_some()
1630            && let Some(mut pk) = self.pk_order.take()
1631        {
1632            pk.ensure_nodes(std::iter::once(dependent));
1633            pk.ensure_nodes(dependencies.iter().copied());
1634            {
1635                let adapter = GraphAdapter { g: self };
1636                for &dep_id in dependencies {
1637                    match pk.try_add_edge(&adapter, dep_id, dependent) {
1638                        Ok(_) => {}
1639                        Err(_cycle) => {
1640                            if self.config.pk_reject_cycle_edges {
1641                                skip_deps.insert(dep_id);
1642                            } else {
1643                                pk.rebuild_full(&adapter);
1644                            }
1645                        }
1646                    }
1647                }
1648            }
1649            self.pk_order = Some(pk);
1650        }
1651
1652        for &dep_id in dependencies {
1653            if self.config.pk_reject_cycle_edges && skip_deps.contains(&dep_id) {
1654                continue;
1655            }
1656            self.edges.add_edge(dependent, dep_id);
1657            #[cfg(test)]
1658            {
1659                if let Ok(mut g) = self.instr.lock() {
1660                    g.edges_added += 1;
1661                }
1662            }
1663        }
1664    }
1665
1666    /// Bulk set formulas on a sheet using a single dependency plan and batched edge updates.
1667    pub fn bulk_set_formulas<I>(&mut self, sheet: &str, items: I) -> Result<usize, ExcelError>
1668    where
1669        I: IntoIterator<Item = (u32, u32, ASTNode)>,
1670    {
1671        let collected: Vec<(u32, u32, ASTNode)> = items.into_iter().collect();
1672        if collected.is_empty() {
1673            return Ok(0);
1674        }
1675        let vol_flags: Vec<bool> = collected
1676            .iter()
1677            .map(|(_, _, ast)| self.is_ast_volatile(ast))
1678            .collect();
1679        self.bulk_set_formulas_with_volatility(sheet, collected, vol_flags)
1680    }
1681
1682    pub fn bulk_set_formulas_with_volatility(
1683        &mut self,
1684        sheet: &str,
1685        collected: Vec<(u32, u32, ASTNode)>,
1686        vol_flags: Vec<bool>,
1687    ) -> Result<usize, ExcelError> {
1688        use formualizer_parse::parser::CollectPolicy;
1689        let sheet_id = self.sheet_id_mut(sheet);
1690
1691        if collected.is_empty() {
1692            return Ok(0);
1693        }
1694
1695        // 1) Build plan across all formulas (read-only, no graph mutation)
1696        let tiny_refs = collected.iter().map(|(r, c, ast)| (sheet, *r, *c, ast));
1697        let policy = CollectPolicy {
1698            expand_small_ranges: true,
1699            range_expansion_limit: self.config.range_expansion_limit,
1700            include_names: true,
1701        };
1702        let plan = crate::engine::plan::build_dependency_plan(
1703            &mut self.sheet_reg,
1704            tiny_refs,
1705            &policy,
1706            Some(&vol_flags),
1707        )?;
1708
1709        // 2) Ensure/create target vertices and referenced cells (placeholders) once
1710        let mut created_placeholders: Vec<CellRef> = Vec::new();
1711
1712        // Targets
1713        let mut target_vids: Vec<VertexId> = Vec::with_capacity(plan.formula_targets.len());
1714        for (sid, pc) in &plan.formula_targets {
1715            let addr = CellRef::new(*sid, Coord::new(pc.row(), pc.col(), true, true));
1716            let vid = if let Some(&existing) = self.cell_to_vertex.get(&addr) {
1717                existing
1718            } else {
1719                self.get_or_create_vertex(&addr, &mut created_placeholders)
1720            };
1721            target_vids.push(vid);
1722        }
1723
1724        // Global referenced cells
1725        let mut dep_vids: Vec<VertexId> = Vec::with_capacity(plan.global_cells.len());
1726        for (sid, pc) in &plan.global_cells {
1727            let addr = CellRef::new(*sid, Coord::new(pc.row(), pc.col(), true, true));
1728            let vid = if let Some(&existing) = self.cell_to_vertex.get(&addr) {
1729                existing
1730            } else {
1731                self.get_or_create_vertex(&addr, &mut created_placeholders)
1732            };
1733            dep_vids.push(vid);
1734        }
1735
1736        // 3) Store ASTs in batch and update kinds/flags/value map
1737        let ast_ids = self
1738            .data_store
1739            .store_asts_batch(collected.iter().map(|(_, _, ast)| ast), &self.sheet_reg);
1740        for (i, &tvid) in target_vids.iter().enumerate() {
1741            // If this cell already had a formula, remove its edges once here
1742            if self.vertex_formulas.contains_key(&tvid) {
1743                self.remove_dependent_edges(tvid);
1744            }
1745            self.store.set_kind(tvid, VertexKind::FormulaScalar);
1746            self.store.set_dirty(tvid, true);
1747            self.vertex_values.remove(&tvid);
1748            self.vertex_formulas.insert(tvid, ast_ids[i]);
1749            self.mark_volatile(tvid, vol_flags.get(i).copied().unwrap_or(false));
1750
1751            let dynamic = self.is_ast_dynamic(&collected[i].2);
1752            self.store.set_dynamic(tvid, dynamic);
1753        }
1754
1755        // 4) Add edges in one batch
1756        self.edges.begin_batch();
1757        for (i, tvid) in target_vids.iter().copied().enumerate() {
1758            let mut deps: Vec<VertexId> = Vec::new();
1759
1760            // Map per-formula indices into dep_vids
1761            if let Some(indices) = plan.per_formula_cells.get(i) {
1762                deps.reserve(indices.len());
1763                for &idx in indices {
1764                    if let Some(vid) = dep_vids.get(idx as usize) {
1765                        deps.push(*vid);
1766                    }
1767                }
1768            }
1769
1770            if let Some(names) = plan.per_formula_names.get(i)
1771                && !names.is_empty()
1772            {
1773                let mut name_vertices = Vec::new();
1774                let formula_sheet = plan
1775                    .formula_targets
1776                    .get(i)
1777                    .map(|(sid, _)| *sid)
1778                    .unwrap_or(sheet_id);
1779                for name in names {
1780                    if let Some(named) = self.resolve_name_entry(name, formula_sheet) {
1781                        deps.push(named.vertex);
1782                        name_vertices.push(named.vertex);
1783                    } else if let Some(source) = self.resolve_source_scalar_entry(name) {
1784                        deps.push(source.vertex);
1785                    } else {
1786                        self.record_pending_name_reference(formula_sheet, name, tvid);
1787                    }
1788                }
1789                if !name_vertices.is_empty() {
1790                    self.attach_vertex_to_names(tvid, &name_vertices);
1791                }
1792            }
1793
1794            if let Some(tables) = plan.per_formula_tables.get(i)
1795                && !tables.is_empty()
1796            {
1797                for table_name in tables {
1798                    if let Some(table) = self.resolve_table_entry(table_name) {
1799                        deps.push(table.vertex);
1800                    } else if let Some(source) = self.resolve_source_table_entry(table_name) {
1801                        deps.push(source.vertex);
1802                    }
1803                }
1804            }
1805
1806            if !deps.is_empty() {
1807                self.add_dependent_edges_nobatch(tvid, &deps);
1808            }
1809
1810            // Range deps from plan are already compact RangeKeys; register directly.
1811            if let Some(rks) = plan.per_formula_ranges.get(i) {
1812                self.add_range_deps_from_keys(tvid, rks, sheet_id);
1813            }
1814        }
1815        self.edges.end_batch();
1816
1817        Ok(collected.len())
1818    }
1819
1820    /// Public (crate) helper to add a single dependency edge (dependent -> dependency) used for restoration/undo.
1821    pub fn add_dependency_edge(&mut self, dependent: VertexId, dependency: VertexId) {
1822        if dependent == dependency {
1823            return;
1824        }
1825        // If PK enabled attempt to add maintaining ordering; fallback to rebuild if cycle
1826        if self.pk_order.is_some()
1827            && let Some(mut pk) = self.pk_order.take()
1828        {
1829            pk.ensure_nodes(std::iter::once(dependent));
1830            pk.ensure_nodes(std::iter::once(dependency));
1831            let adapter = GraphAdapter { g: self };
1832            if pk.try_add_edge(&adapter, dependency, dependent).is_err() {
1833                // Cycle: rebuild full (conservative)
1834                pk.rebuild_full(&adapter);
1835            }
1836            self.pk_order = Some(pk);
1837        }
1838        self.edges.add_edge(dependent, dependency);
1839        self.store.set_dirty(dependent, true);
1840        self.dirty_vertices.insert(dependent);
1841    }
1842
1843    fn remove_dependent_edges(&mut self, vertex: VertexId) {
1844        // Remove all outgoing edges from this vertex (its dependencies)
1845        let dependencies = self.edges.out_edges(vertex);
1846
1847        self.edges.begin_batch();
1848        if self.pk_order.is_some()
1849            && let Some(mut pk) = self.pk_order.take()
1850        {
1851            for dep in &dependencies {
1852                pk.remove_edge(*dep, vertex);
1853            }
1854            self.pk_order = Some(pk);
1855        }
1856        for dep in dependencies {
1857            self.edges.remove_edge(vertex, dep);
1858        }
1859        self.edges.end_batch();
1860
1861        // Remove range dependencies and clean up stripes
1862        if let Some(old_ranges) = self.formula_to_range_deps.remove(&vertex) {
1863            let old_sheet_id = self.store.sheet_id(vertex);
1864
1865            for range in &old_ranges {
1866                let sheet_id = match range.sheet {
1867                    SharedSheetLocator::Id(id) => id,
1868                    _ => old_sheet_id,
1869                };
1870                let s_row = range.start_row.map(|b| b.index);
1871                let e_row = range.end_row.map(|b| b.index);
1872                let s_col = range.start_col.map(|b| b.index);
1873                let e_col = range.end_col.map(|b| b.index);
1874
1875                let mut keys_to_clean = FxHashSet::default();
1876
1877                let col_stripes = (s_row.is_none() && e_row.is_none())
1878                    || (s_col.is_some() && e_col.is_some() && (s_row.is_none() || e_row.is_none()));
1879                let row_stripes = (s_col.is_none() && e_col.is_none())
1880                    || (s_row.is_some() && e_row.is_some() && (s_col.is_none() || e_col.is_none()));
1881
1882                if col_stripes && !row_stripes {
1883                    let sc = s_col.unwrap_or(0);
1884                    let ec = e_col.unwrap_or(sc);
1885                    for col in sc..=ec {
1886                        keys_to_clean.insert(StripeKey {
1887                            sheet_id,
1888                            stripe_type: StripeType::Column,
1889                            index: col,
1890                        });
1891                    }
1892                } else if row_stripes && !col_stripes {
1893                    let sr = s_row.unwrap_or(0);
1894                    let er = e_row.unwrap_or(sr);
1895                    for row in sr..=er {
1896                        keys_to_clean.insert(StripeKey {
1897                            sheet_id,
1898                            stripe_type: StripeType::Row,
1899                            index: row,
1900                        });
1901                    }
1902                } else {
1903                    let start_row = s_row.unwrap_or(0);
1904                    let start_col = s_col.unwrap_or(0);
1905                    let end_row = e_row.unwrap_or(start_row);
1906                    let end_col = e_col.unwrap_or(start_col);
1907
1908                    let height = end_row.saturating_sub(start_row) + 1;
1909                    let width = end_col.saturating_sub(start_col) + 1;
1910
1911                    if self.config.enable_block_stripes && height > 1 && width > 1 {
1912                        let start_block_row = start_row / BLOCK_H;
1913                        let end_block_row = end_row / BLOCK_H;
1914                        let start_block_col = start_col / BLOCK_W;
1915                        let end_block_col = end_col / BLOCK_W;
1916
1917                        for block_row in start_block_row..=end_block_row {
1918                            for block_col in start_block_col..=end_block_col {
1919                                keys_to_clean.insert(StripeKey {
1920                                    sheet_id,
1921                                    stripe_type: StripeType::Block,
1922                                    index: block_index(block_row * BLOCK_H, block_col * BLOCK_W),
1923                                });
1924                            }
1925                        }
1926                    } else if height > width {
1927                        for col in start_col..=end_col {
1928                            keys_to_clean.insert(StripeKey {
1929                                sheet_id,
1930                                stripe_type: StripeType::Column,
1931                                index: col,
1932                            });
1933                        }
1934                    } else {
1935                        for row in start_row..=end_row {
1936                            keys_to_clean.insert(StripeKey {
1937                                sheet_id,
1938                                stripe_type: StripeType::Row,
1939                                index: row,
1940                            });
1941                        }
1942                    }
1943                }
1944
1945                for key in keys_to_clean {
1946                    if let Some(dependents) = self.stripe_to_dependents.get_mut(&key) {
1947                        dependents.remove(&vertex);
1948                        if dependents.is_empty() {
1949                            self.stripe_to_dependents.remove(&key);
1950                            #[cfg(test)]
1951                            {
1952                                if let Ok(mut g) = self.instr.lock() {
1953                                    g.stripe_removes += 1;
1954                                }
1955                            }
1956                        }
1957                    }
1958                }
1959            }
1960        }
1961    }
1962
1963    // Removed: vertices() and get_vertex() methods - no longer needed with SoA
1964    // The old AoS Vertex struct has been eliminated in favor of direct
1965    // access to columnar data through the VertexStore
1966
1967    /// Updates the cached value of a formula vertex.
1968    pub(crate) fn update_vertex_value(&mut self, vertex_id: VertexId, value: LiteralValue) {
1969        if !self.value_cache_enabled {
1970            // Canonical mode: cell/formula vertices must not store values in the graph.
1971            match self.store.kind(vertex_id) {
1972                VertexKind::Cell
1973                | VertexKind::FormulaScalar
1974                | VertexKind::FormulaArray
1975                | VertexKind::Empty => {
1976                    self.vertex_values.remove(&vertex_id);
1977                    return;
1978                }
1979                _ => {
1980                    // Allow non-cell vertices to cache values (e.g. named-range formulas).
1981                }
1982            }
1983        }
1984        let value_ref = self.data_store.store_value(normalize_stored_literal(value));
1985        self.vertex_values.insert(vertex_id, value_ref);
1986    }
1987
1988    /// Plan a spill region for an anchor; returns #SPILL! if blocked
1989    pub fn plan_spill_region(
1990        &self,
1991        anchor: VertexId,
1992        target_cells: &[CellRef],
1993    ) -> Result<(), ExcelError> {
1994        self.plan_spill_region_allowing_formula_overwrite(anchor, target_cells, None)
1995    }
1996
1997    /// Plan a spill region, optionally allowing specific formula vertices to be overwritten.
1998    ///
1999    /// This is used by parallel evaluation to allow spill anchors to take precedence over
2000    /// other formula vertices that are being evaluated in the same layer.
2001    pub(crate) fn plan_spill_region_allowing_formula_overwrite(
2002        &self,
2003        anchor: VertexId,
2004        target_cells: &[CellRef],
2005        overwritable_formulas: Option<&rustc_hash::FxHashSet<VertexId>>,
2006    ) -> Result<(), ExcelError> {
2007        use formualizer_common::{ExcelErrorExtra, ExcelErrorKind};
2008        // Compute expected spill shape from the target rectangle for better diagnostics
2009        let (expected_rows, expected_cols) = if target_cells.is_empty() {
2010            (0u32, 0u32)
2011        } else {
2012            let mut min_r = u32::MAX;
2013            let mut max_r = 0u32;
2014            let mut min_c = u32::MAX;
2015            let mut max_c = 0u32;
2016            for cell in target_cells {
2017                let r = cell.coord.row();
2018                let c = cell.coord.col();
2019                if r < min_r {
2020                    min_r = r;
2021                }
2022                if r > max_r {
2023                    max_r = r;
2024                }
2025                if c < min_c {
2026                    min_c = c;
2027                }
2028                if c > max_c {
2029                    max_c = c;
2030                }
2031            }
2032            (
2033                max_r.saturating_sub(min_r).saturating_add(1),
2034                max_c.saturating_sub(min_c).saturating_add(1),
2035            )
2036        };
2037        // Allow overlapping with previously owned spill cells by this anchor
2038        for cell in target_cells {
2039            // If cell is already owned by this anchor's previous spill, it's allowed.
2040            let owned_by_anchor = match self.spill_cell_to_anchor.get(cell) {
2041                Some(&existing_anchor) if existing_anchor == anchor => true,
2042                Some(_other) => {
2043                    return Err(ExcelError::new(ExcelErrorKind::Spill)
2044                        .with_message("BlockedBySpill")
2045                        .with_extra(ExcelErrorExtra::Spill {
2046                            expected_rows,
2047                            expected_cols,
2048                        }));
2049                }
2050                None => false,
2051            };
2052
2053            if owned_by_anchor {
2054                continue;
2055            }
2056
2057            // If cell is occupied by another formula anchor, block unless explicitly allowed.
2058            if let Some(&vid) = self.cell_to_vertex.get(cell)
2059                && vid != anchor
2060            {
2061                // Prevent clobbering formulas (array or scalar) in the target area
2062                match self.store.kind(vid) {
2063                    VertexKind::FormulaScalar | VertexKind::FormulaArray => {
2064                        if let Some(allow) = overwritable_formulas
2065                            && allow.contains(&vid)
2066                        {
2067                            continue;
2068                        }
2069                        return Err(ExcelError::new(ExcelErrorKind::Spill)
2070                            .with_message("BlockedByFormula")
2071                            .with_extra(ExcelErrorExtra::Spill {
2072                                expected_rows,
2073                                expected_cols,
2074                            }));
2075                    }
2076                    _ => {
2077                        // If a non-empty value exists (and not this anchor), block
2078                        if let Some(vref) = self.vertex_values.get(&vid) {
2079                            let v = self.data_store.retrieve_value(*vref);
2080                            if !matches!(v, LiteralValue::Empty) {
2081                                return Err(ExcelError::new(ExcelErrorKind::Spill)
2082                                    .with_message("BlockedByValue")
2083                                    .with_extra(ExcelErrorExtra::Spill {
2084                                        expected_rows,
2085                                        expected_cols,
2086                                    }));
2087                            }
2088                        }
2089                    }
2090                }
2091            }
2092        }
2093        Ok(())
2094    }
2095
2096    // Note: non-atomic commit_spill_region has been removed. All callers must use
2097    // commit_spill_region_atomic_with_fault for atomicity and rollback on failure.
2098
2099    /// Commit a spill atomically with an internal shadow buffer and optional fault injection.
2100    /// If a fault is injected partway through, all changes are rolled back to the pre-commit state.
2101    /// This does not change behavior under normal operation; it's primarily for Phase 3 guarantees and tests.
2102    pub fn commit_spill_region_atomic_with_fault(
2103        &mut self,
2104        anchor: VertexId,
2105        target_cells: Vec<CellRef>,
2106        values: Vec<Vec<LiteralValue>>,
2107        fault_after_ops: Option<usize>,
2108    ) -> Result<(), ExcelError> {
2109        use rustc_hash::FxHashSet;
2110
2111        // Anchor cell coordinates (0-based) for special-casing writes.
2112        // We must never overwrite the anchor via set_cell_value(), because that would
2113        // strip the formula and break incremental recalculation.
2114        let anchor_cell = self
2115            .get_cell_ref(anchor)
2116            .expect("anchor cell ref for spill commit");
2117        let anchor_sheet_name = self.sheet_name(anchor_cell.sheet_id).to_string();
2118        let anchor_row = anchor_cell.coord.row();
2119        let anchor_col = anchor_cell.coord.col();
2120
2121        // Capture previous owned cells for this anchor
2122        let prev_cells = self
2123            .spill_anchor_to_cells
2124            .get(&anchor)
2125            .cloned()
2126            .unwrap_or_default();
2127        let new_set: FxHashSet<CellRef> = target_cells.iter().copied().collect();
2128        let prev_set: FxHashSet<CellRef> = prev_cells.iter().copied().collect();
2129
2130        // Compose operation list: clears first (prev - new), then writes for new rectangle
2131        #[derive(Clone)]
2132        struct Op {
2133            sheet: String,
2134            row: u32,
2135            col: u32,
2136            new_value: LiteralValue,
2137        }
2138        let mut ops: Vec<Op> = Vec::new();
2139
2140        // Clears for cells no longer used
2141        for cell in prev_cells.iter() {
2142            if !new_set.contains(cell) {
2143                let sheet = self.sheet_name(cell.sheet_id).to_string();
2144                ops.push(Op {
2145                    sheet,
2146                    row: cell.coord.row(),
2147                    col: cell.coord.col(),
2148                    new_value: LiteralValue::Empty,
2149                });
2150            }
2151        }
2152
2153        // Writes for new values (row-major to match target rectangle)
2154        if !target_cells.is_empty() {
2155            let first = target_cells.first().copied().unwrap();
2156            let row0 = first.coord.row();
2157            let col0 = first.coord.col();
2158            let sheet = self.sheet_name(first.sheet_id).to_string();
2159            for (r_off, row_vals) in values.iter().enumerate() {
2160                for (c_off, v) in row_vals.iter().enumerate() {
2161                    ops.push(Op {
2162                        sheet: sheet.clone(),
2163                        row: row0 + r_off as u32,
2164                        col: col0 + c_off as u32,
2165                        new_value: v.clone(),
2166                    });
2167                }
2168            }
2169        }
2170
2171        // Shadow buffer of old values for rollback
2172        #[derive(Clone)]
2173        struct OldVal {
2174            present: bool,
2175            value: LiteralValue,
2176        }
2177        let mut old_values: Vec<((String, u32, u32), OldVal)> = Vec::with_capacity(ops.len());
2178
2179        // Capture old values before applying
2180        for op in &ops {
2181            // op.row/op.col are internal 0-based; get_cell_value is a public 1-based API.
2182            let old = self
2183                .get_cell_value(&op.sheet, op.row + 1, op.col + 1)
2184                .unwrap_or(LiteralValue::Empty);
2185            let present = true; // unified model: we always treat as present
2186            old_values.push((
2187                (op.sheet.clone(), op.row, op.col),
2188                OldVal {
2189                    present,
2190                    value: old,
2191                },
2192            ));
2193        }
2194
2195        // Apply with optional injected fault
2196        for (applied, op) in ops.iter().enumerate() {
2197            if let Some(n) = fault_after_ops
2198                && applied == n
2199            {
2200                for idx in (0..applied).rev() {
2201                    let ((ref sheet, row, col), ref old) = old_values[idx];
2202                    if sheet == &anchor_sheet_name && row == anchor_row && col == anchor_col {
2203                        self.update_vertex_value(anchor, old.value.clone());
2204                    } else {
2205                        let _ = self.set_cell_value(sheet, row + 1, col + 1, old.value.clone());
2206                    }
2207                }
2208                return Err(ExcelError::new(ExcelErrorKind::Error)
2209                    .with_message("Injected persistence fault during spill commit"));
2210            }
2211            if op.sheet == anchor_sheet_name && op.row == anchor_row && op.col == anchor_col {
2212                self.update_vertex_value(anchor, op.new_value.clone());
2213            } else {
2214                let _ =
2215                    self.set_cell_value(&op.sheet, op.row + 1, op.col + 1, op.new_value.clone());
2216            }
2217        }
2218
2219        // Update spill ownership maps only on success
2220        // Clear previous ownership not reused
2221        for cell in prev_cells.iter() {
2222            if !new_set.contains(cell) {
2223                self.spill_cell_to_anchor.remove(cell);
2224            }
2225        }
2226        // Mark ownership for new rectangle using the declared target cells only
2227        for cell in &target_cells {
2228            self.spill_cell_to_anchor.insert(*cell, anchor);
2229        }
2230        self.spill_anchor_to_cells.insert(anchor, target_cells);
2231        Ok(())
2232    }
2233
2234    pub(crate) fn spill_cells_for_anchor(&self, anchor: VertexId) -> Option<&[CellRef]> {
2235        self.spill_anchor_to_cells
2236            .get(&anchor)
2237            .map(|v| v.as_slice())
2238    }
2239
2240    pub(crate) fn spill_registry_has_anchor(&self, anchor: VertexId) -> bool {
2241        self.spill_anchor_to_cells.contains_key(&anchor)
2242    }
2243
2244    pub(crate) fn spill_registry_anchor_for_cell(&self, cell: CellRef) -> Option<VertexId> {
2245        self.spill_cell_to_anchor.get(&cell).copied()
2246    }
2247
2248    pub(crate) fn spill_registry_counts(&self) -> (usize, usize) {
2249        (
2250            self.spill_anchor_to_cells.len(),
2251            self.spill_cell_to_anchor.len(),
2252        )
2253    }
2254
2255    /// Clear an existing spill region for an anchor (set cells to Empty and forget ownership)
2256    pub fn clear_spill_region(&mut self, anchor: VertexId) {
2257        let _ = self.clear_spill_region_bulk(anchor);
2258    }
2259
2260    /// Bulk clear an existing spill region for an anchor.
2261    ///
2262    /// This avoids calling `set_cell_value()` per spill child (which can trigger O(N*V)
2263    /// dependent scans when `edges.delta_size() > 0`). Instead, it clears values directly and
2264    /// performs a single dirty propagation over the affected spill children.
2265    ///
2266    /// Returns the previously registered spill cells (including the anchor cell) for callers that
2267    /// want to mirror/record deltas.
2268    pub fn clear_spill_region_bulk(&mut self, anchor: VertexId) -> Vec<CellRef> {
2269        let anchor_cell = self.get_cell_ref(anchor);
2270        let Some(cells) = self.spill_anchor_to_cells.remove(&anchor) else {
2271            return Vec::new();
2272        };
2273
2274        // Remove ownership for all cells first.
2275        for cell in cells.iter() {
2276            self.spill_cell_to_anchor.remove(cell);
2277        }
2278
2279        // Prepare a single arena value ref for Empty (only when caching is enabled).
2280        let empty_ref = if self.value_cache_enabled {
2281            Some(self.data_store.store_value(LiteralValue::Empty))
2282        } else {
2283            None
2284        };
2285
2286        // Clear all spill children (excluding the anchor cell).
2287        let mut changed_vertices: Vec<VertexId> = Vec::new();
2288        for cell in cells.iter().copied() {
2289            let is_anchor = anchor_cell.map(|a| a == cell).unwrap_or(false);
2290            if is_anchor {
2291                continue;
2292            }
2293            let Some(&vid) = self.cell_to_vertex.get(&cell) else {
2294                continue;
2295            };
2296            // Ensure this vertex is a plain value cell.
2297            if self.vertex_formulas.remove(&vid).is_some() {
2298                // Be conservative: remove outgoing edges if this was a formula vertex.
2299                // This should be rare for spill children under normal policies.
2300                self.remove_dependent_edges(vid);
2301            }
2302            self.store.set_kind(vid, VertexKind::Cell);
2303            if let Some(er) = empty_ref {
2304                self.vertex_values.insert(vid, er);
2305            } else {
2306                self.vertex_values.remove(&vid);
2307            }
2308            self.store.set_dirty(vid, false);
2309            self.dirty_vertices.remove(&vid);
2310            changed_vertices.push(vid);
2311        }
2312
2313        // Single dirty propagation for all changed spill children.
2314        if !changed_vertices.is_empty() {
2315            self.mark_dirty_many_value_cells(&changed_vertices);
2316        }
2317
2318        cells
2319    }
2320
2321    fn mark_dirty_many_value_cells(&mut self, vertex_ids: &[VertexId]) -> Vec<VertexId> {
2322        if vertex_ids.is_empty() {
2323            return Vec::new();
2324        }
2325
2326        // Ensure reverse edges are usable (delta.in_edges is intentionally not delta-aware).
2327        if self.edges.delta_size() > 0 {
2328            self.edges.rebuild();
2329        }
2330
2331        let mut affected: FxHashSet<VertexId> = FxHashSet::default();
2332        let mut to_visit: Vec<VertexId> = Vec::new();
2333        let mut visited_for_propagation: FxHashSet<VertexId> = FxHashSet::default();
2334
2335        // Value sources are affected but not marked dirty themselves.
2336        for &src in vertex_ids {
2337            affected.insert(src);
2338        }
2339
2340        // Collect initial direct dependents and name dependents.
2341        for &src in vertex_ids {
2342            to_visit.extend(self.edges.in_edges(src));
2343            if let Some(name_set) = self.cell_to_name_dependents.get(&src) {
2344                for &name_vertex in name_set {
2345                    to_visit.push(name_vertex);
2346                }
2347            }
2348        }
2349
2350        // Collect range dependents in bulk using spill rect bounds per sheet.
2351        let mut bounds_by_sheet: FxHashMap<SheetId, (u32, u32, u32, u32)> = FxHashMap::default();
2352        for &src in vertex_ids {
2353            let view = self.store.view(src);
2354            let sid = view.sheet_id();
2355            let r = view.row();
2356            let c = view.col();
2357            bounds_by_sheet
2358                .entry(sid)
2359                .and_modify(|b| {
2360                    b.0 = b.0.min(r);
2361                    b.1 = b.1.max(r);
2362                    b.2 = b.2.min(c);
2363                    b.3 = b.3.max(c);
2364                })
2365                .or_insert((r, r, c, c));
2366        }
2367
2368        for (sid, (sr, er, sc, ec)) in bounds_by_sheet {
2369            to_visit.extend(self.collect_range_dependents_for_rect(sid, sr, sc, er, ec));
2370        }
2371
2372        while let Some(id) = to_visit.pop() {
2373            if !visited_for_propagation.insert(id) {
2374                continue;
2375            }
2376            affected.insert(id);
2377            self.store.set_dirty(id, true);
2378            to_visit.extend(self.edges.in_edges(id));
2379        }
2380
2381        self.dirty_vertices.extend(&affected);
2382        affected.into_iter().collect()
2383    }
2384
2385    fn collect_range_dependents_for_rect(
2386        &self,
2387        sheet_id: SheetId,
2388        start_row: u32,
2389        start_col: u32,
2390        end_row: u32,
2391        end_col: u32,
2392    ) -> Vec<VertexId> {
2393        if self.stripe_to_dependents.is_empty() {
2394            return Vec::new();
2395        }
2396        let mut candidates: FxHashSet<VertexId> = FxHashSet::default();
2397
2398        for col in start_col..=end_col {
2399            let key = StripeKey {
2400                sheet_id,
2401                stripe_type: StripeType::Column,
2402                index: col,
2403            };
2404            if let Some(deps) = self.stripe_to_dependents.get(&key) {
2405                candidates.extend(deps);
2406            }
2407        }
2408        for row in start_row..=end_row {
2409            let key = StripeKey {
2410                sheet_id,
2411                stripe_type: StripeType::Row,
2412                index: row,
2413            };
2414            if let Some(deps) = self.stripe_to_dependents.get(&key) {
2415                candidates.extend(deps);
2416            }
2417        }
2418        if self.config.enable_block_stripes {
2419            let br0 = start_row / BLOCK_H;
2420            let br1 = end_row / BLOCK_H;
2421            let bc0 = start_col / BLOCK_W;
2422            let bc1 = end_col / BLOCK_W;
2423            for br in br0..=br1 {
2424                for bc in bc0..=bc1 {
2425                    let key = StripeKey {
2426                        sheet_id,
2427                        stripe_type: StripeType::Block,
2428                        index: block_index(br * BLOCK_H, bc * BLOCK_W),
2429                    };
2430                    if let Some(deps) = self.stripe_to_dependents.get(&key) {
2431                        candidates.extend(deps);
2432                    }
2433                }
2434            }
2435        }
2436
2437        // Precision check: the dirty rect must overlap at least one of the formula's registered ranges.
2438        let mut out: Vec<VertexId> = Vec::new();
2439        for dep_id in candidates {
2440            let Some(ranges) = self.formula_to_range_deps.get(&dep_id) else {
2441                continue;
2442            };
2443            let mut hit = false;
2444            for range in ranges {
2445                let range_sheet_id = match range.sheet {
2446                    SharedSheetLocator::Id(id) => id,
2447                    _ => sheet_id,
2448                };
2449                if range_sheet_id != sheet_id {
2450                    continue;
2451                }
2452                let sr0 = range.start_row.map(|b| b.index).unwrap_or(0);
2453                let er0 = range.end_row.map(|b| b.index).unwrap_or(u32::MAX);
2454                let sc0 = range.start_col.map(|b| b.index).unwrap_or(0);
2455                let ec0 = range.end_col.map(|b| b.index).unwrap_or(u32::MAX);
2456                let overlap =
2457                    sr0 <= end_row && er0 >= start_row && sc0 <= end_col && ec0 >= start_col;
2458                if overlap {
2459                    hit = true;
2460                    break;
2461                }
2462            }
2463            if hit {
2464                out.push(dep_id);
2465            }
2466        }
2467        out
2468    }
2469
2470    /// Check if a vertex exists
2471    pub(crate) fn vertex_exists(&self, vertex_id: VertexId) -> bool {
2472        if vertex_id.0 < FIRST_NORMAL_VERTEX {
2473            return false;
2474        }
2475        let index = (vertex_id.0 - FIRST_NORMAL_VERTEX) as usize;
2476        index < self.store.len()
2477    }
2478
2479    /// Get the kind of a vertex
2480    pub(crate) fn get_vertex_kind(&self, vertex_id: VertexId) -> VertexKind {
2481        self.store.kind(vertex_id)
2482    }
2483
2484    /// Get the sheet ID of a vertex
2485    pub(crate) fn get_vertex_sheet_id(&self, vertex_id: VertexId) -> SheetId {
2486        self.store.sheet_id(vertex_id)
2487    }
2488
2489    pub fn get_formula_id(&self, vertex_id: VertexId) -> Option<AstNodeId> {
2490        self.vertex_formulas.get(&vertex_id).copied()
2491    }
2492
2493    pub fn get_formula_id_and_volatile(&self, vertex_id: VertexId) -> Option<(AstNodeId, bool)> {
2494        let ast_id = self.get_formula_id(vertex_id)?;
2495        Some((ast_id, self.is_volatile(vertex_id)))
2496    }
2497
2498    pub fn get_formula_node(&self, vertex_id: VertexId) -> Option<&super::arena::AstNodeData> {
2499        let ast_id = self.get_formula_id(vertex_id)?;
2500        self.data_store.get_node(ast_id)
2501    }
2502
2503    pub fn get_formula_node_and_volatile(
2504        &self,
2505        vertex_id: VertexId,
2506    ) -> Option<(&super::arena::AstNodeData, bool)> {
2507        let (ast_id, vol) = self.get_formula_id_and_volatile(vertex_id)?;
2508        let node = self.data_store.get_node(ast_id)?;
2509        Some((node, vol))
2510    }
2511
2512    /// Get the formula AST for a vertex.
2513    ///
2514    /// Not used in hot paths; reconstructs from arena.
2515    pub fn get_formula(&self, vertex_id: VertexId) -> Option<ASTNode> {
2516        let ast_id = self.get_formula_id(vertex_id)?;
2517        self.data_store.retrieve_ast(ast_id, &self.sheet_reg)
2518    }
2519
2520    /// Get the value stored for a vertex
2521    pub fn get_value(&self, vertex_id: VertexId) -> Option<LiteralValue> {
2522        if !self.value_cache_enabled {
2523            // In canonical mode, cell/formula values must not be read from the graph.
2524            // Non-cell vertices (e.g. named ranges, external sources) may still use graph storage.
2525            match self.store.kind(vertex_id) {
2526                VertexKind::Cell
2527                | VertexKind::FormulaScalar
2528                | VertexKind::FormulaArray
2529                | VertexKind::Empty => {
2530                    #[cfg(debug_assertions)]
2531                    {
2532                        self.graph_value_read_attempts
2533                            .fetch_add(1, Ordering::Relaxed);
2534                    }
2535                    return None;
2536                }
2537                _ => {
2538                    // Allow non-cell vertices to use vertex_values.
2539                }
2540            }
2541        }
2542        self.vertex_values
2543            .get(&vertex_id)
2544            .map(|&value_ref| self.data_store.retrieve_value(value_ref))
2545    }
2546
2547    /// Get the cell reference for a vertex
2548    pub(crate) fn get_cell_ref(&self, vertex_id: VertexId) -> Option<CellRef> {
2549        let packed_coord = self.store.coord(vertex_id);
2550        let sheet_id = self.store.sheet_id(vertex_id);
2551        let coord = Coord::new(packed_coord.row(), packed_coord.col(), true, true);
2552        Some(CellRef::new(sheet_id, coord))
2553    }
2554
2555    /// Create a cell reference (helper for internal use)
2556    pub(crate) fn make_cell_ref_internal(&self, sheet_id: SheetId, row: u32, col: u32) -> CellRef {
2557        let coord = Coord::new(row, col, true, true);
2558        CellRef::new(sheet_id, coord)
2559    }
2560
2561    /// Create a cell reference from sheet name and Excel 1-based coordinates.
2562    pub fn make_cell_ref(&self, sheet_name: &str, row: u32, col: u32) -> CellRef {
2563        let sheet_id = self.sheet_reg.get_id(sheet_name).unwrap_or(0);
2564        let coord = Coord::from_excel(row, col, true, true);
2565        CellRef::new(sheet_id, coord)
2566    }
2567
2568    /// Check if a vertex is dirty
2569    pub(crate) fn is_dirty(&self, vertex_id: VertexId) -> bool {
2570        self.store.is_dirty(vertex_id)
2571    }
2572
2573    /// Check if a vertex is volatile
2574    pub(crate) fn is_volatile(&self, vertex_id: VertexId) -> bool {
2575        self.store.is_volatile(vertex_id)
2576    }
2577
2578    pub(crate) fn is_dynamic(&self, vertex_id: VertexId) -> bool {
2579        self.store.is_dynamic(vertex_id)
2580    }
2581
2582    /// Get vertex ID for a cell address
2583    pub fn get_vertex_id_for_address(&self, addr: &CellRef) -> Option<&VertexId> {
2584        self.cell_to_vertex.get(addr)
2585    }
2586
2587    #[cfg(test)]
2588    pub fn cell_to_vertex(&self) -> &FxHashMap<CellRef, VertexId> {
2589        &self.cell_to_vertex
2590    }
2591
2592    /// Borrow dependencies of a vertex when no pending edge delta exists.
2593    ///
2594    /// This enables zero-allocation traversal in hot scheduler paths.
2595    #[inline]
2596    pub(crate) fn dependencies_slice(&self, vertex_id: VertexId) -> Option<&[VertexId]> {
2597        self.edges.out_edges_ref(vertex_id)
2598    }
2599
2600    /// Get the dependencies of a vertex (for scheduler)
2601    pub(crate) fn get_dependencies(&self, vertex_id: VertexId) -> Vec<VertexId> {
2602        self.edges.out_edges(vertex_id)
2603    }
2604
2605    /// Check if a vertex has a self-loop
2606    pub(crate) fn has_self_loop(&self, vertex_id: VertexId) -> bool {
2607        if let Some(deps) = self.dependencies_slice(vertex_id) {
2608            deps.contains(&vertex_id)
2609        } else {
2610            self.edges.out_edges(vertex_id).contains(&vertex_id)
2611        }
2612    }
2613
2614    /// Borrow dependents of a vertex when no pending edge delta exists.
2615    ///
2616    /// This enables zero-allocation traversal in hot scheduler paths.
2617    #[inline]
2618    pub(crate) fn dependents_slice(&self, vertex_id: VertexId) -> Option<&[VertexId]> {
2619        self.edges.in_edges_ref(vertex_id)
2620    }
2621
2622    /// Get dependents of a vertex (vertices that depend on this vertex)
2623    /// Uses reverse edges for O(1) lookup when available
2624    pub(crate) fn get_dependents(&self, vertex_id: VertexId) -> Vec<VertexId> {
2625        // If there are pending changes in delta, we need to scan
2626        // Otherwise we can use the fast reverse edges
2627        if self.edges.delta_size() > 0 {
2628            #[cfg(test)]
2629            {
2630                // This scan is intentionally tracked for perf regression tests.
2631                // It is expected to be rare in normal operation.
2632                if let Ok(mut g) = self.instr.lock() {
2633                    g.dependents_scan_fallback_calls += 1;
2634                    g.dependents_scan_vertices_scanned += self.cell_to_vertex.len() as u64;
2635                }
2636            }
2637            // Fall back to scanning when delta has changes
2638            let mut dependents = Vec::new();
2639            for (&_addr, &vid) in &self.cell_to_vertex {
2640                let out_edges = self.edges.out_edges(vid);
2641                if out_edges.contains(&vertex_id) {
2642                    dependents.push(vid);
2643                }
2644            }
2645            for named in self.named_ranges.values() {
2646                let vid = named.vertex;
2647                let out_edges = self.edges.out_edges(vid);
2648                if out_edges.contains(&vertex_id) {
2649                    dependents.push(vid);
2650                }
2651            }
2652            for named in self.sheet_named_ranges.values() {
2653                let vid = named.vertex;
2654                let out_edges = self.edges.out_edges(vid);
2655                if out_edges.contains(&vertex_id) {
2656                    dependents.push(vid);
2657                }
2658            }
2659            dependents
2660        } else {
2661            // Fast path: use reverse edges from CSR
2662            self.edges.in_edges(vertex_id).to_vec()
2663        }
2664    }
2665
2666    // Internal helper methods for Milestone 0.4
2667
2668    /// Internal: Create a snapshot of vertex state for rollback
2669    #[doc(hidden)]
2670    pub fn snapshot_vertex(&self, id: VertexId) -> crate::engine::VertexSnapshot {
2671        let coord = self.store.coord(id);
2672        let sheet_id = self.store.sheet_id(id);
2673        let kind = self.store.kind(id);
2674        let flags = self.store.flags(id);
2675
2676        // Get value and formula references
2677        let value_ref = self.vertex_values.get(&id).copied();
2678        let formula_ref = self.vertex_formulas.get(&id).copied();
2679
2680        // Get outgoing edges (dependencies)
2681        let out_edges = self.get_dependencies(id);
2682
2683        crate::engine::VertexSnapshot {
2684            coord,
2685            sheet_id,
2686            kind,
2687            flags,
2688            value_ref,
2689            formula_ref,
2690            out_edges,
2691        }
2692    }
2693
2694    /// Internal: Remove all edges for a vertex
2695    #[doc(hidden)]
2696    pub fn remove_all_edges(&mut self, id: VertexId) {
2697        // Enter batch mode to avoid intermediate rebuilds
2698        self.edges.begin_batch();
2699
2700        // Remove outgoing edges (this vertex's dependencies)
2701        self.remove_dependent_edges(id);
2702
2703        // Force rebuild to get accurate dependents list
2704        // This is necessary because get_dependents uses CSR reverse edges
2705        self.edges.rebuild();
2706
2707        // Remove incoming edges (vertices that depend on this vertex)
2708        let dependents = self.get_dependents(id);
2709        if self.pk_order.is_some()
2710            && let Some(mut pk) = self.pk_order.take()
2711        {
2712            for dependent in &dependents {
2713                pk.remove_edge(id, *dependent);
2714            }
2715            self.pk_order = Some(pk);
2716        }
2717        for dependent in dependents {
2718            self.edges.remove_edge(dependent, id);
2719        }
2720
2721        // Exit batch mode and rebuild once with all changes
2722        self.edges.end_batch();
2723    }
2724
2725    /// Internal: Mark vertex as having #REF! error
2726    #[doc(hidden)]
2727    pub fn mark_as_ref_error(&mut self, id: VertexId) {
2728        if !self.value_cache_enabled {
2729            match self.store.kind(id) {
2730                VertexKind::Cell
2731                | VertexKind::FormulaScalar
2732                | VertexKind::FormulaArray
2733                | VertexKind::Empty => {
2734                    self.ref_error_vertices.insert(id);
2735                    // Canonical-only: graph does not cache cell/formula values.
2736                    // Ensure the dependent subgraph is dirtied so evaluation updates Arrow truth.
2737                    self.vertex_values.remove(&id);
2738                    let _ = self.mark_dirty(id);
2739                    return;
2740                }
2741                _ => {
2742                    // Allow non-cell vertices to use cached values.
2743                }
2744            }
2745        }
2746        let error = LiteralValue::Error(ExcelError::new(ExcelErrorKind::Ref));
2747        let value_ref = self.data_store.store_value(error);
2748        self.vertex_values.insert(id, value_ref);
2749        let _ = self.mark_dirty(id);
2750    }
2751
2752    /// Check if a vertex has a #REF! error
2753    pub fn is_ref_error(&self, id: VertexId) -> bool {
2754        if !self.value_cache_enabled {
2755            match self.store.kind(id) {
2756                VertexKind::Cell
2757                | VertexKind::FormulaScalar
2758                | VertexKind::FormulaArray
2759                | VertexKind::Empty => {
2760                    return self.ref_error_vertices.contains(&id);
2761                }
2762                _ => {
2763                    // Non-cell vertices may still have cached values.
2764                }
2765            }
2766        }
2767        if let Some(value_ref) = self.vertex_values.get(&id) {
2768            let value = self.data_store.retrieve_value(*value_ref);
2769            if let LiteralValue::Error(err) = value {
2770                return err.kind == ExcelErrorKind::Ref;
2771            }
2772        }
2773        false
2774    }
2775
2776    /// Internal: Mark all direct dependents as dirty
2777    #[doc(hidden)]
2778    pub fn mark_dependents_dirty(&mut self, id: VertexId) {
2779        let dependents = self.get_dependents(id);
2780        for dep_id in dependents {
2781            self.store.set_dirty(dep_id, true);
2782            self.dirty_vertices.insert(dep_id);
2783        }
2784    }
2785
2786    /// Internal: Mark a vertex as volatile
2787    #[doc(hidden)]
2788    pub fn mark_volatile(&mut self, id: VertexId, volatile: bool) {
2789        self.store.set_volatile(id, volatile);
2790        if volatile {
2791            self.volatile_vertices.insert(id);
2792        } else {
2793            self.volatile_vertices.remove(&id);
2794        }
2795    }
2796
2797    /// Update vertex coordinate
2798    #[doc(hidden)]
2799    pub fn set_coord(&mut self, id: VertexId, coord: AbsCoord) {
2800        self.store.set_coord(id, coord);
2801    }
2802
2803    /// Update edge cache coordinate
2804    #[doc(hidden)]
2805    pub fn update_edge_coord(&mut self, id: VertexId, coord: AbsCoord) {
2806        self.edges.update_coord(id, coord);
2807    }
2808
2809    /// Mark vertex as deleted (tombstone)
2810    #[doc(hidden)]
2811    pub fn mark_deleted(&mut self, id: VertexId, deleted: bool) {
2812        self.store.mark_deleted(id, deleted);
2813    }
2814
2815    /// Set vertex kind
2816    #[doc(hidden)]
2817    pub fn set_kind(&mut self, id: VertexId, kind: VertexKind) {
2818        self.store.set_kind(id, kind);
2819    }
2820
2821    /// Set vertex dirty flag
2822    #[doc(hidden)]
2823    pub fn set_dirty(&mut self, id: VertexId, dirty: bool) {
2824        self.store.set_dirty(id, dirty);
2825        if dirty {
2826            self.dirty_vertices.insert(id);
2827        } else {
2828            self.dirty_vertices.remove(&id);
2829        }
2830    }
2831
2832    /// Get vertex kind (for testing)
2833    #[cfg(test)]
2834    pub(crate) fn get_kind(&self, id: VertexId) -> VertexKind {
2835        self.store.kind(id)
2836    }
2837
2838    /// Get vertex flags (for testing)
2839    #[cfg(test)]
2840    pub(crate) fn get_flags(&self, id: VertexId) -> u8 {
2841        self.store.flags(id)
2842    }
2843
2844    /// Check if vertex is deleted (for testing)
2845    #[cfg(test)]
2846    pub(crate) fn is_deleted(&self, id: VertexId) -> bool {
2847        self.store.is_deleted(id)
2848    }
2849
2850    /// Force edge rebuild (internal use)
2851    #[doc(hidden)]
2852    pub fn rebuild_edges(&mut self) {
2853        self.edges.rebuild();
2854    }
2855
2856    /// Get delta size (internal use)
2857    #[doc(hidden)]
2858    pub fn edges_delta_size(&self) -> usize {
2859        self.edges.delta_size()
2860    }
2861
2862    /// Get vertex ID for specific cell address
2863    pub fn get_vertex_for_cell(&self, addr: &CellRef) -> Option<VertexId> {
2864        self.cell_to_vertex.get(addr).copied()
2865    }
2866
2867    /// Get coord for a vertex (public for VertexEditor)
2868    pub fn get_coord(&self, id: VertexId) -> AbsCoord {
2869        self.store.coord(id)
2870    }
2871
2872    /// Get sheet_id for a vertex (public for VertexEditor)
2873    pub fn get_sheet_id(&self, id: VertexId) -> SheetId {
2874        self.store.sheet_id(id)
2875    }
2876
2877    /// Get all vertices in a sheet
2878    pub fn vertices_in_sheet(&self, sheet_id: SheetId) -> impl Iterator<Item = VertexId> + '_ {
2879        self.store
2880            .all_vertices()
2881            .filter(move |&id| self.vertex_exists(id) && self.store.sheet_id(id) == sheet_id)
2882    }
2883
2884    /// Does a vertex have a formula associated
2885    pub fn vertex_has_formula(&self, id: VertexId) -> bool {
2886        self.vertex_formulas.contains_key(&id)
2887    }
2888
2889    /// Get all vertices with formulas
2890    pub fn vertices_with_formulas(&self) -> impl Iterator<Item = VertexId> + '_ {
2891        self.vertex_formulas.keys().copied()
2892    }
2893
2894    /// Update a vertex's formula
2895    pub fn update_vertex_formula(&mut self, id: VertexId, ast: ASTNode) -> Result<(), ExcelError> {
2896        // Get the sheet_id for this vertex
2897        let sheet_id = self.store.sheet_id(id);
2898
2899        // If the adjusted AST contains special #REF markers (from structural edits),
2900        // treat this as a REF error on the vertex instead of attempting to resolve.
2901        // This prevents failures when reference_adjuster injected placeholder refs.
2902        let has_ref_marker = ast.get_dependencies().into_iter().any(|r| {
2903            matches!(
2904                r,
2905                ReferenceType::Cell { sheet: Some(s), .. }
2906                    | ReferenceType::Range { sheet: Some(s), .. } if s == "#REF"
2907            )
2908        });
2909        if has_ref_marker {
2910            // Store the adjusted AST for round-tripping/display, but set value state to #REF!
2911            let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
2912            self.vertex_formulas.insert(id, ast_id);
2913            self.mark_as_ref_error(id);
2914            self.store.set_kind(id, VertexKind::FormulaScalar);
2915            return Ok(());
2916        }
2917
2918        // Extract dependencies from AST
2919        let (new_dependencies, new_range_dependencies, _, named_dependencies) =
2920            self.extract_dependencies(&ast, sheet_id)?;
2921
2922        // Remove old dependencies first
2923        self.remove_dependent_edges(id);
2924        self.detach_vertex_from_names(id);
2925
2926        // Store the new formula
2927        let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
2928        self.vertex_formulas.insert(id, ast_id);
2929
2930        // Add new dependency edges
2931        self.add_dependent_edges(id, &new_dependencies);
2932        self.add_range_dependent_edges(id, &new_range_dependencies, sheet_id);
2933
2934        if !named_dependencies.is_empty() {
2935            self.attach_vertex_to_names(id, &named_dependencies);
2936        }
2937
2938        // Mark as formula vertex
2939        self.store.set_kind(id, VertexKind::FormulaScalar);
2940
2941        Ok(())
2942    }
2943
2944    /// Mark a vertex as dirty without propagation (for VertexEditor)
2945    pub fn mark_vertex_dirty(&mut self, vertex_id: VertexId) {
2946        self.store.set_dirty(vertex_id, true);
2947        self.dirty_vertices.insert(vertex_id);
2948    }
2949
2950    /// Update cell mapping for a vertex (for VertexEditor)
2951    pub fn update_cell_mapping(
2952        &mut self,
2953        id: VertexId,
2954        old_addr: Option<CellRef>,
2955        new_addr: CellRef,
2956    ) {
2957        // Remove old mapping if it exists
2958        if let Some(old) = old_addr {
2959            self.cell_to_vertex.remove(&old);
2960        }
2961        // Add new mapping
2962        self.cell_to_vertex.insert(new_addr, id);
2963    }
2964
2965    /// Remove cell mapping (for VertexEditor)
2966    pub fn remove_cell_mapping(&mut self, addr: &CellRef) {
2967        self.cell_to_vertex.remove(addr);
2968    }
2969
2970    /// Get the cell reference for a vertex
2971    pub fn get_cell_ref_for_vertex(&self, id: VertexId) -> Option<CellRef> {
2972        let coord = self.store.coord(id);
2973        let sheet_id = self.store.sheet_id(id);
2974        // Find the cell reference in the mapping
2975        let cell_ref = CellRef::new(sheet_id, Coord::new(coord.row(), coord.col(), true, true));
2976        // Verify it actually maps to this vertex
2977        if self.cell_to_vertex.get(&cell_ref) == Some(&id) {
2978            Some(cell_ref)
2979        } else {
2980            None
2981        }
2982    }
2983
2984    /// Rebuild dependency edges/range links for an existing formula vertex after AST changes.
2985    ///
2986    /// This intentionally reuses the same extraction and edge wiring machinery as
2987    /// `set_cell_formula[_with_volatility]` to preserve edge orientation, placeholder
2988    /// behavior, and name/range dependency semantics.
2989    pub(crate) fn rebuild_formula_dependencies(&mut self, vertex_id: VertexId, ast: &ASTNode) {
2990        let sheet_id = self.store.sheet_id(vertex_id);
2991
2992        // Remove old dependency, name, and pending-name links first.
2993        self.remove_dependent_edges(vertex_id);
2994        self.detach_vertex_from_names(vertex_id);
2995        self.clear_pending_name_references(vertex_id);
2996
2997        let (
2998            new_dependencies,
2999            new_range_dependencies,
3000            _created_placeholders,
3001            named_dependencies,
3002            unresolved_names,
3003        ) = match self.extract_dependencies_with_pending_names(ast, sheet_id) {
3004            Ok(v) => v,
3005            Err(_) => {
3006                self.mark_as_ref_error(vertex_id);
3007                return;
3008            }
3009        };
3010
3011        // Self-reference / name-cycle safety parity with set_cell_formula.
3012        if new_dependencies.contains(&vertex_id) {
3013            self.mark_as_ref_error(vertex_id);
3014            return;
3015        }
3016
3017        for &name_vertex in &named_dependencies {
3018            let mut visited = FxHashSet::default();
3019            if self.name_depends_on_vertex(name_vertex, vertex_id, &mut visited) {
3020                self.mark_as_ref_error(vertex_id);
3021                return;
3022            }
3023        }
3024
3025        // Formula is now recoverable again.
3026        self.ref_error_vertices.remove(&vertex_id);
3027        self.vertex_values.remove(&vertex_id);
3028
3029        if !named_dependencies.is_empty() {
3030            self.attach_vertex_to_names(vertex_id, &named_dependencies);
3031        }
3032        for unresolved_name in &unresolved_names {
3033            self.record_pending_name_reference(sheet_id, unresolved_name, vertex_id);
3034        }
3035
3036        self.add_dependent_edges(vertex_id, &new_dependencies);
3037        self.add_range_dependent_edges(vertex_id, &new_range_dependencies, sheet_id);
3038        let _ = self.mark_dirty(vertex_id);
3039    }
3040}
3041
3042// ========== Sheet Management Operations ==========