Skip to main content

formualizer_eval/engine/graph/
mod.rs

1use crate::SheetId;
2use crate::engine::TombstoneRegistry;
3use crate::engine::named_range::{NameScope, NamedDefinition, NamedRange};
4use crate::engine::sheet_registry::SheetRegistry;
5use crate::formula_plane::authority::FormulaAuthority;
6use formualizer_common::{
7    CoordBuildHasher, ExcelError, ExcelErrorKind, LiteralValue, PackedSheetCell,
8};
9use formualizer_parse::parser::{ASTNode, ASTNodeType, ReferenceType};
10use rustc_hash::{FxHashMap, FxHashSet};
11
12#[cfg(debug_assertions)]
13use std::sync::atomic::{AtomicU64, Ordering};
14
15#[cfg(test)]
16#[derive(Debug, Default, Clone)]
17pub struct GraphInstrumentation {
18    pub edges_added: u64,
19    pub stripe_inserts: u64,
20    pub stripe_removes: u64,
21    pub dependents_scan_fallback_calls: u64,
22    pub dependents_scan_vertices_scanned: u64,
23}
24
25mod ast_utils;
26pub mod editor;
27mod formula_analysis;
28mod names;
29mod range_deps;
30mod sheets;
31pub mod snapshot;
32mod sources;
33mod tables;
34
35use super::arena::{AstNodeId, DataStore, ValueRef};
36use super::delta_edges::CsrMutableEdges;
37use super::ingest_pipeline::{DependencyPlanRow, FormulaAstInput};
38use super::sheet_index::SheetIndex;
39use super::vertex::{VertexId, VertexKind};
40use super::vertex_store::{FIRST_NORMAL_VERTEX, VertexStore};
41use crate::engine::topo::{
42    GraphAdapter,
43    pk::{DynamicTopo, PkConfig},
44};
45use crate::reference::{CellRef, Coord, SharedRangeRef, SharedRef, SharedSheetLocator};
46use formualizer_common::Coord as AbsCoord;
47// topo::pk wiring will be integrated behind config.use_dynamic_topo in a follow-up step
48
49struct RegistryFunctionProvider;
50
51impl crate::traits::FunctionProvider for RegistryFunctionProvider {
52    fn get_function(
53        &self,
54        ns: &str,
55        name: &str,
56    ) -> Option<std::sync::Arc<dyn crate::function::Function>> {
57        crate::function_registry::get(ns, name)
58    }
59}
60
61#[inline]
62fn normalize_stored_literal(value: LiteralValue) -> LiteralValue {
63    match value {
64        // Public contract: store numerics as Number(f64).
65        LiteralValue::Int(i) => LiteralValue::Number(i as f64),
66        other => other,
67    }
68}
69
70pub use editor::change_log::{ChangeEvent, ChangeLog};
71
72// ChangeEvent is now imported from change_log module
73
74/// 🔮 Scalability Hook: Dependency reference types for range compression
75#[derive(Debug, Clone, PartialEq, Eq, Hash)]
76pub enum DependencyRef {
77    /// A specific cell dependency
78    Cell(VertexId),
79    /// A dependency on a finite, rectangular range
80    Range {
81        sheet: String,
82        start_row: u32,
83        start_col: u32,
84        end_row: u32, // Inclusive
85        end_col: u32, // Inclusive
86    },
87    /// A whole column dependency (A:A) - future range compression
88    WholeColumn { sheet: String, col: u32 },
89    /// A whole row dependency (1:1) - future range compression  
90    WholeRow { sheet: String, row: u32 },
91}
92
93/// A key representing a coarse-grained section of a sheet
94#[derive(Debug, Clone, Hash, PartialEq, Eq)]
95pub struct StripeKey {
96    pub sheet_id: SheetId,
97    pub stripe_type: StripeType,
98    pub index: u32, // The index of the row, column, or block stripe
99}
100
101#[derive(Debug, Clone, Hash, PartialEq, Eq)]
102pub enum StripeType {
103    Row,
104    Column,
105    Block, // For dense, square-like ranges
106}
107
108/// Block stripe indexing mathematics
109const BLOCK_H: u32 = 256;
110const BLOCK_W: u32 = 256;
111
112pub fn block_index(row: u32, col: u32) -> u32 {
113    (row / BLOCK_H) << 16 | (col / BLOCK_W)
114}
115
116/// A summary of the results of a mutating operation on the graph.
117/// This serves as a "changelog" to the application layer.
118#[derive(Debug, Clone)]
119pub struct OperationSummary {
120    /// Vertices whose values have been directly or indirectly affected.
121    pub affected_vertices: Vec<VertexId>,
122    /// Placeholder cells that were newly created to satisfy dependencies.
123    pub created_placeholders: Vec<CellRef>,
124}
125
126/// Read-only dependency graph counters used by benchmark/instrumentation tooling.
127///
128/// These counters are deliberately observational: collecting them must not mutate graph state or
129/// alter formula evaluation semantics.
130#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
131pub struct GraphBaselineStats {
132    pub graph_vertex_count: usize,
133    pub graph_formula_vertex_count: usize,
134    pub graph_edge_count: usize,
135    pub dirty_vertex_count: usize,
136    pub evaluation_vertex_count: usize,
137    pub formula_ast_root_count: usize,
138    pub formula_ast_node_count: usize,
139}
140
141/// SoA-based dependency graph implementation
142#[derive(Debug)]
143pub struct DependencyGraph {
144    // Core columnar storage
145    store: VertexStore,
146
147    // Edge storage with delta slab
148    edges: CsrMutableEdges,
149
150    // Arena-based value and formula storage
151    data_store: DataStore,
152    vertex_values: FxHashMap<VertexId, ValueRef>,
153    vertex_formulas: FxHashMap<VertexId, AstNodeId>,
154
155    /// Gate for storing grid-backed (cell/formula) LiteralValue payloads inside the dependency graph.
156    ///
157    /// When `false` (Arrow-canonical mode), the graph does not store values for cell/formula
158    /// vertices. Arrow (base + overlays) is the sole value store for sheet cells.
159    value_cache_enabled: bool,
160
161    /// Debug-only instrumentation: count attempts to read *cell/formula* graph values while
162    /// caching is disabled (canonical mode guard).
163    #[cfg(debug_assertions)]
164    graph_value_read_attempts: AtomicU64,
165
166    // Address mappings using a hasher tuned for packed Coord / PackedSheetCell
167    // keys. FxHasher's weak avalanche produces O(N^2) collision cascades on
168    // row-major bulk ingest; CoordBuildHasher keeps these strictly O(N).
169    cell_to_vertex: std::collections::HashMap<CellRef, VertexId, CoordBuildHasher>,
170    load_packed_to_vertex: std::collections::HashMap<PackedSheetCell, VertexId, CoordBuildHasher>,
171
172    // Scheduling state - using HashSet for O(1) operations
173    dirty_vertices: FxHashSet<VertexId>,
174    volatile_vertices: FxHashSet<VertexId>,
175
176    /// Monotonic count of vertices processed by dirty-propagation BFS loops
177    /// (`mark_dirty_many` / `mark_dirty_many_value_cells`). Cheap plain
178    /// counter used by perf-shape tests to assert propagation work is
179    /// O(component), not O(sources × component).
180    dirty_propagation_visits: u64,
181
182    /// Nesting depth of active deferred-dirty scopes (`begin_deferred_dirty`
183    /// / `end_deferred_dirty`). While > 0, dirty-propagation entry points
184    /// queue their sources in `deferred_dirty_pending` instead of running a
185    /// BFS per call; the outermost `end_deferred_dirty` flushes the union in
186    /// ONE multi-source `mark_dirty_many`.
187    deferred_dirty_depth: u32,
188    /// Sources queued while a deferred-dirty scope is active.
189    deferred_dirty_pending: Vec<VertexId>,
190
191    /// Vertices explicitly marked as #REF! by structural operations.
192    ///
193    /// In Arrow-truth mode, the dependency graph does not cache cell/formula values.
194    /// We still need a place to record deterministic #REF! invalidations for editor
195    /// operations and structural transforms.
196    ref_error_vertices: FxHashSet<VertexId>,
197
198    // NEW: Specialized managers for range dependencies (Hybrid Model)
199    /// Maps a formula vertex to the ranges it depends on.
200    formula_to_range_deps: FxHashMap<VertexId, Vec<SharedRangeRef<'static>>>,
201
202    /// Maps a stripe to formulas that depend on it via a compressed range.
203    /// CRITICAL: VertexIds are deduplicated within each stripe to avoid quadratic blow-ups.
204    stripe_to_dependents: FxHashMap<StripeKey, FxHashSet<VertexId>>,
205
206    // Sheet-level sparse indexes for O(log n + k) range queries
207    /// Maps sheet_id to its interval tree index for efficient row/column operations
208    sheet_indexes: FxHashMap<SheetId, SheetIndex>,
209
210    // Sheet name/ID mapping
211    sheet_reg: SheetRegistry,
212    default_sheet_id: SheetId,
213
214    // Named ranges support
215    /// Workbook-scoped named ranges
216    named_ranges: FxHashMap<String, NamedRange>,
217
218    /// Normalized-key lookup for workbook-scoped names.
219    ///
220    /// When `config.case_sensitive_names == false`, keys are ASCII-lowercased.
221    /// Values are the canonical (original-cased) name stored in `named_ranges`.
222    named_ranges_lookup: FxHashMap<String, String>,
223
224    /// Sheet-scoped named ranges  
225    sheet_named_ranges: FxHashMap<(SheetId, String), NamedRange>,
226
227    /// Normalized-key lookup for sheet-scoped names.
228    ///
229    /// Key is (SheetId, normalized_name_key). Value is the canonical (original-cased)
230    /// name stored in `sheet_named_ranges`.
231    sheet_named_ranges_lookup: FxHashMap<(SheetId, String), String>,
232
233    /// Reverse mapping: vertex -> names it uses (by vertex id)
234    vertex_to_names: FxHashMap<VertexId, Vec<VertexId>>,
235
236    /// Lookup for name vertex -> (scope, name) to avoid map scans
237    name_vertex_lookup: FxHashMap<VertexId, (NameScope, String)>,
238
239    /// Pending formula vertices referencing unresolved bare symbolic names.
240    ///
241    /// Keys are normalized through `name_lookup_key(...)` so workbook names and
242    /// source scalars can both wake the same waiting formulas when a symbol appears.
243    pending_name_links: FxHashMap<String, FxHashSet<(SheetId, VertexId)>>,
244
245    /// Reverse mapping used to clear stale pending-name registrations when a
246    /// formula is edited, overwritten with a value, or otherwise rebuilt.
247    vertex_to_pending_names: FxHashMap<VertexId, FxHashSet<String>>,
248
249    // Native workbook tables (ListObjects)
250    tables: FxHashMap<String, tables::TableEntry>,
251    /// Normalized-key lookup for tables.
252    tables_lookup: FxHashMap<String, String>,
253    table_vertex_lookup: FxHashMap<VertexId, String>,
254
255    // External sources (SourceVertex)
256    source_scalars: FxHashMap<String, sources::SourceScalarEntry>,
257    source_tables: FxHashMap<String, sources::SourceTableEntry>,
258    source_vertex_lookup: FxHashMap<VertexId, String>,
259
260    /// Monotonic counter to assign synthetic coordinates to name vertices
261    name_vertex_seq: u32,
262
263    /// Monotonic counter to assign synthetic coordinates to source vertices
264    source_vertex_seq: u32,
265
266    /// Mapping from cell vertices to named range vertices that depend on them
267    cell_to_name_dependents: FxHashMap<VertexId, FxHashSet<VertexId>>,
268    /// Cached list of cell dependencies per named range vertex (for teardown)
269    name_to_cell_dependencies: FxHashMap<VertexId, Vec<VertexId>>,
270
271    // Evaluation configuration
272    config: super::EvalConfig,
273
274    // Graph-owned FormulaPlane authority shell. Inert until a later runtime cut-over.
275    formula_authority: FormulaAuthority,
276
277    // Dynamic topology orderer (Pearce–Kelly) maintained alongside edges when enabled
278    pk_order: Option<DynamicTopo<VertexId>>,
279
280    // Spill registry: anchor -> cells, and reverse mapping for blockers.
281    // `spill_cell_to_anchor` is keyed by `CellRef` and uses the tuned hasher
282    // for the same reason as `cell_to_vertex`.
283    spill_anchor_to_cells: FxHashMap<VertexId, Vec<CellRef>>,
284    spill_cell_to_anchor: std::collections::HashMap<CellRef, VertexId, CoordBuildHasher>,
285
286    // Hint: during initial bulk load, many cells are guaranteed new; allow skipping existence checks per-sheet
287    first_load_assume_new: bool,
288    ensure_touched_sheets: FxHashSet<SheetId>,
289
290    // handled deleted references, in case they are reintroduced.
291    pub tombstone_registry: TombstoneRegistry,
292
293    #[cfg(test)]
294    instr: std::sync::Mutex<GraphInstrumentation>,
295}
296
297impl Default for DependencyGraph {
298    fn default() -> Self {
299        Self::new()
300    }
301}
302
303impl DependencyGraph {
304    /// Expose range expansion limit for planners
305    pub fn range_expansion_limit(&self) -> usize {
306        self.config.range_expansion_limit
307    }
308
309    pub fn get_config(&self) -> &super::EvalConfig {
310        &self.config
311    }
312
313    pub(crate) fn formula_authority(&self) -> &FormulaAuthority {
314        &self.formula_authority
315    }
316
317    pub(crate) fn formula_authority_mut(&mut self) -> &mut FormulaAuthority {
318        &mut self.formula_authority
319    }
320
321    /// Return read-only baseline counters for FormulaPlane/dispatch benchmarking.
322    pub fn baseline_stats(&self) -> GraphBaselineStats {
323        let data_stats = self.data_store.memory_usage();
324        GraphBaselineStats {
325            graph_vertex_count: self.store.len(),
326            graph_formula_vertex_count: self.vertex_formulas.len(),
327            graph_edge_count: self.edges.num_edges_exact(),
328            dirty_vertex_count: self.dirty_vertices.len(),
329            evaluation_vertex_count: self.get_evaluation_vertices().len(),
330            formula_ast_root_count: self.vertex_formulas.len(),
331            formula_ast_node_count: data_stats.total_ast_nodes,
332        }
333    }
334
335    #[inline]
336    pub(crate) fn value_cache_enabled(&self) -> bool {
337        self.value_cache_enabled
338    }
339
340    /// Debug-only: how many times `get_value`/`get_cell_value` were called while caching is disabled.
341    ///
342    /// In Arrow-canonical mode this should remain 0 for engine/interpreter reads.
343    #[cfg(test)]
344    pub fn debug_graph_value_read_attempts(&self) -> u64 {
345        #[cfg(debug_assertions)]
346        {
347            self.graph_value_read_attempts.load(Ordering::Relaxed)
348        }
349        #[cfg(not(debug_assertions))]
350        {
351            0
352        }
353    }
354
355    /// Build a dependency plan for a set of formulas on sheets
356    pub fn plan_dependencies<'a, I>(
357        &mut self,
358        items: I,
359        policy: &formualizer_parse::parser::CollectPolicy,
360        volatile: Option<&[bool]>,
361    ) -> Result<crate::engine::plan::DependencyPlan, formualizer_common::ExcelError>
362    where
363        I: IntoIterator<Item = (&'a str, u32, u32, &'a formualizer_parse::parser::ASTNode)>,
364    {
365        crate::engine::plan::build_dependency_plan(
366            &mut self.sheet_reg,
367            items.into_iter(),
368            policy,
369            volatile,
370        )
371    }
372
373    pub fn plan_dependencies_mixed<'a, I>(
374        &mut self,
375        items: I,
376        policy: &formualizer_parse::parser::CollectPolicy,
377        volatile: Option<&[bool]>,
378    ) -> Result<crate::engine::plan::DependencyPlan, formualizer_common::ExcelError>
379    where
380        I: IntoIterator<
381            Item = (
382                &'a str,
383                u32,
384                u32,
385                crate::engine::plan::DependencyPlanAst<'a>,
386            ),
387        >,
388    {
389        crate::engine::plan::build_dependency_plan_mixed(
390            &mut self.sheet_reg,
391            &self.data_store,
392            items.into_iter(),
393            policy,
394            volatile,
395        )
396    }
397
398    /// Ensure vertices exist for given coords; allocate missing in contiguous batches and add to edges/index.
399    /// Returns a list suitable for edges.add_vertices_batch.
400    pub fn ensure_vertices_batch(
401        &mut self,
402        coords: &[(SheetId, AbsCoord)],
403    ) -> Vec<(AbsCoord, u32)> {
404        self.ensure_vertices_batch_ordered(coords).1
405    }
406
407    /// Ensure vertices exist for given packed absolute cells and return vertex ids aligned to the
408    /// input order, plus the newly allocated `(coord, raw_vid)` items suitable for edge/index
409    /// population.
410    pub fn ensure_vertices_batch_packed_ordered(
411        &mut self,
412        packed_cells: &[PackedSheetCell],
413    ) -> (Vec<VertexId>, Vec<(AbsCoord, u32)>) {
414        #[cfg(feature = "perf_instrumentation")]
415        use crate::instant::FzInstant as PerfInstant;
416        use rustc_hash::FxHashMap;
417
418        #[cfg(feature = "perf_instrumentation")]
419        let debug = std::env::var("FZ_DEBUG_LOAD")
420            .ok()
421            .is_some_and(|v| v != "0");
422        #[cfg(feature = "perf_instrumentation")]
423        let t0 = PerfInstant::now();
424
425        let mut ordered: Vec<Option<VertexId>> = vec![None; packed_cells.len()];
426        if packed_cells.is_empty() {
427            return (Vec::new(), Vec::new());
428        }
429
430        let first_sid = packed_cells[0].sheet_id();
431        let single_sheet = packed_cells.iter().all(|cell| cell.sheet_id() == first_sid);
432        let mut add_batch: Vec<(AbsCoord, u32)> = Vec::new();
433
434        #[cfg(feature = "perf_instrumentation")]
435        let mut packed_hits = 0usize;
436        #[cfg(feature = "perf_instrumentation")]
437        let mut generic_hits = 0usize;
438        #[cfg(feature = "perf_instrumentation")]
439        let mut missing = 0usize;
440        #[cfg(feature = "perf_instrumentation")]
441        let mut t_packed_lookup_us = 0u128;
442        #[cfg(feature = "perf_instrumentation")]
443        let mut t_generic_lookup_us = 0u128;
444        #[cfg(feature = "perf_instrumentation")]
445        let mut t_alloc_us = 0u128;
446        #[cfg(feature = "perf_instrumentation")]
447        let mut t_map_insert_us = 0u128;
448        #[cfg(feature = "perf_instrumentation")]
449        let mut t_index_insert_us = 0u128;
450        #[cfg(feature = "perf_instrumentation")]
451        let mut t_edge_register_us = 0u128;
452
453        if single_sheet {
454            let sid = first_sid;
455            let mut missing_items: Vec<(usize, PackedSheetCell)> =
456                Vec::with_capacity(packed_cells.len());
457
458            for (idx, packed) in packed_cells.iter().copied().enumerate() {
459                #[cfg(feature = "perf_instrumentation")]
460                let tl0 = PerfInstant::now();
461                if self.first_load_assume_new
462                    && let Some(&existing) = self.load_packed_to_vertex.get(&packed)
463                {
464                    ordered[idx] = Some(existing);
465                    #[cfg(feature = "perf_instrumentation")]
466                    {
467                        packed_hits += 1;
468                        t_packed_lookup_us += tl0.elapsed().as_micros();
469                    }
470                    continue;
471                }
472                #[cfg(feature = "perf_instrumentation")]
473                {
474                    t_packed_lookup_us += tl0.elapsed().as_micros();
475                }
476
477                let pc = AbsCoord::new(packed.row0(), packed.col0());
478                let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
479                #[cfg(feature = "perf_instrumentation")]
480                let tg0 = PerfInstant::now();
481                if let Some(&existing) = self.cell_to_vertex.get(&addr) {
482                    ordered[idx] = Some(existing);
483                    if self.first_load_assume_new {
484                        self.load_packed_to_vertex.insert(packed, existing);
485                    }
486                    #[cfg(feature = "perf_instrumentation")]
487                    {
488                        generic_hits += 1;
489                    }
490                } else {
491                    missing_items.push((idx, packed));
492                    #[cfg(feature = "perf_instrumentation")]
493                    {
494                        missing += 1;
495                    }
496                }
497                #[cfg(feature = "perf_instrumentation")]
498                {
499                    t_generic_lookup_us += tg0.elapsed().as_micros();
500                }
501            }
502
503            if !missing_items.is_empty() {
504                self.ensure_touched_sheets.insert(sid);
505
506                let mut pcs: Vec<AbsCoord> = Vec::with_capacity(missing_items.len());
507                for (_, packed) in &missing_items {
508                    pcs.push(AbsCoord::new(packed.row0(), packed.col0()));
509                }
510
511                #[cfg(feature = "perf_instrumentation")]
512                let ta0 = PerfInstant::now();
513                let vids = self.store.allocate_contiguous(sid, &pcs, 0x00);
514                #[cfg(feature = "perf_instrumentation")]
515                {
516                    t_alloc_us += ta0.elapsed().as_micros();
517                }
518                add_batch.reserve(missing_items.len());
519
520                match self.config.sheet_index_mode {
521                    crate::engine::SheetIndexMode::Eager
522                    | crate::engine::SheetIndexMode::FastBatch => {
523                        for ((input_idx, packed), vid) in
524                            missing_items.into_iter().zip(vids.into_iter())
525                        {
526                            let pc = AbsCoord::new(packed.row0(), packed.col0());
527                            ordered[input_idx] = Some(vid);
528                            add_batch.push((pc, vid.0));
529
530                            #[cfg(feature = "perf_instrumentation")]
531                            let tm0 = PerfInstant::now();
532                            if self.first_load_assume_new {
533                                self.load_packed_to_vertex.insert(packed, vid);
534                            } else {
535                                let addr =
536                                    CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
537                                self.cell_to_vertex.insert(addr, vid);
538                            }
539                            #[cfg(feature = "perf_instrumentation")]
540                            {
541                                t_map_insert_us += tm0.elapsed().as_micros();
542                            }
543
544                            #[cfg(feature = "perf_instrumentation")]
545                            let ti0 = PerfInstant::now();
546                            self.sheet_index_mut(sid).add_vertex(pc, vid);
547                            #[cfg(feature = "perf_instrumentation")]
548                            {
549                                t_index_insert_us += ti0.elapsed().as_micros();
550                            }
551                        }
552                    }
553                    crate::engine::SheetIndexMode::Lazy => {
554                        for ((input_idx, packed), vid) in
555                            missing_items.into_iter().zip(vids.into_iter())
556                        {
557                            let pc = AbsCoord::new(packed.row0(), packed.col0());
558                            ordered[input_idx] = Some(vid);
559                            add_batch.push((pc, vid.0));
560
561                            #[cfg(feature = "perf_instrumentation")]
562                            let tm0 = PerfInstant::now();
563                            if self.first_load_assume_new {
564                                self.load_packed_to_vertex.insert(packed, vid);
565                            } else {
566                                let addr =
567                                    CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
568                                self.cell_to_vertex.insert(addr, vid);
569                            }
570                            #[cfg(feature = "perf_instrumentation")]
571                            {
572                                t_map_insert_us += tm0.elapsed().as_micros();
573                            }
574                        }
575                    }
576                }
577            }
578        } else {
579            let mut grouped: FxHashMap<SheetId, Vec<(usize, PackedSheetCell)>> =
580                FxHashMap::default();
581
582            for (idx, packed) in packed_cells.iter().copied().enumerate() {
583                #[cfg(feature = "perf_instrumentation")]
584                let tl0 = PerfInstant::now();
585                if self.first_load_assume_new
586                    && let Some(&existing) = self.load_packed_to_vertex.get(&packed)
587                {
588                    ordered[idx] = Some(existing);
589                    #[cfg(feature = "perf_instrumentation")]
590                    {
591                        packed_hits += 1;
592                        t_packed_lookup_us += tl0.elapsed().as_micros();
593                    }
594                    continue;
595                }
596                #[cfg(feature = "perf_instrumentation")]
597                {
598                    t_packed_lookup_us += tl0.elapsed().as_micros();
599                }
600
601                let sid = packed.sheet_id();
602                let pc = AbsCoord::new(packed.row0(), packed.col0());
603                let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
604                #[cfg(feature = "perf_instrumentation")]
605                let tg0 = PerfInstant::now();
606                if let Some(&existing) = self.cell_to_vertex.get(&addr) {
607                    ordered[idx] = Some(existing);
608                    if self.first_load_assume_new {
609                        self.load_packed_to_vertex.insert(packed, existing);
610                    }
611                    #[cfg(feature = "perf_instrumentation")]
612                    {
613                        generic_hits += 1;
614                    }
615                } else {
616                    grouped.entry(sid).or_default().push((idx, packed));
617                    #[cfg(feature = "perf_instrumentation")]
618                    {
619                        missing += 1;
620                    }
621                }
622                #[cfg(feature = "perf_instrumentation")]
623                {
624                    t_generic_lookup_us += tg0.elapsed().as_micros();
625                }
626            }
627
628            for (sid, items) in grouped {
629                if items.is_empty() {
630                    continue;
631                }
632                self.ensure_touched_sheets.insert(sid);
633
634                let mut pcs: Vec<AbsCoord> = Vec::with_capacity(items.len());
635                for (_, packed) in &items {
636                    pcs.push(AbsCoord::new(packed.row0(), packed.col0()));
637                }
638
639                #[cfg(feature = "perf_instrumentation")]
640                let ta0 = PerfInstant::now();
641                let vids = self.store.allocate_contiguous(sid, &pcs, 0x00);
642                #[cfg(feature = "perf_instrumentation")]
643                {
644                    t_alloc_us += ta0.elapsed().as_micros();
645                }
646
647                for ((input_idx, packed), vid) in items.into_iter().zip(vids.into_iter()) {
648                    let pc = AbsCoord::new(packed.row0(), packed.col0());
649                    ordered[input_idx] = Some(vid);
650                    add_batch.push((pc, vid.0));
651
652                    #[cfg(feature = "perf_instrumentation")]
653                    let tm0 = PerfInstant::now();
654                    if self.first_load_assume_new {
655                        self.load_packed_to_vertex.insert(packed, vid);
656                    } else {
657                        let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
658                        self.cell_to_vertex.insert(addr, vid);
659                    }
660                    #[cfg(feature = "perf_instrumentation")]
661                    {
662                        t_map_insert_us += tm0.elapsed().as_micros();
663                    }
664
665                    match self.config.sheet_index_mode {
666                        crate::engine::SheetIndexMode::Eager
667                        | crate::engine::SheetIndexMode::FastBatch => {
668                            #[cfg(feature = "perf_instrumentation")]
669                            let ti0 = PerfInstant::now();
670                            self.sheet_index_mut(sid).add_vertex(pc, vid);
671                            #[cfg(feature = "perf_instrumentation")]
672                            {
673                                t_index_insert_us += ti0.elapsed().as_micros();
674                            }
675                        }
676                        crate::engine::SheetIndexMode::Lazy => {
677                            // defer index build
678                        }
679                    }
680                }
681            }
682        }
683
684        if !add_batch.is_empty() {
685            #[cfg(feature = "perf_instrumentation")]
686            let te0 = PerfInstant::now();
687            self.edges.add_vertices_batch(&add_batch);
688            #[cfg(feature = "perf_instrumentation")]
689            {
690                t_edge_register_us += te0.elapsed().as_micros();
691            }
692        }
693
694        #[cfg(feature = "perf_instrumentation")]
695        if debug {
696            eprintln!(
697                "[fz][ensure] cells={} single_sheet={} packed_hits={} generic_hits={} missing={} packed_lookup={}us generic_lookup={}us alloc={}us map_insert={}us index_insert={}us edge_register={}us total={}ms",
698                packed_cells.len(),
699                single_sheet,
700                packed_hits,
701                generic_hits,
702                missing,
703                t_packed_lookup_us,
704                t_generic_lookup_us,
705                t_alloc_us,
706                t_map_insert_us,
707                t_index_insert_us,
708                t_edge_register_us,
709                t0.elapsed().as_millis(),
710            );
711        }
712
713        let ordered = ordered
714            .into_iter()
715            .map(|vid| vid.expect("ensure_vertices_batch_packed_ordered must resolve every coord"))
716            .collect();
717        (ordered, add_batch)
718    }
719
720    /// Ensure vertices exist for given coords and return vertex ids aligned to the input order,
721    /// plus the newly allocated `(coord, raw_vid)` items suitable for edge/index population.
722    pub fn ensure_vertices_batch_ordered(
723        &mut self,
724        coords: &[(SheetId, AbsCoord)],
725    ) -> (Vec<VertexId>, Vec<(AbsCoord, u32)>) {
726        let mut packed: Vec<PackedSheetCell> = Vec::with_capacity(coords.len());
727        for &(sid, coord) in coords {
728            packed.push(Self::packed_cell_key(sid, coord));
729        }
730        self.ensure_vertices_batch_packed_ordered(&packed)
731    }
732
733    #[inline]
734    fn packed_cell_key(sheet_id: SheetId, coord: AbsCoord) -> PackedSheetCell {
735        PackedSheetCell::try_new(sheet_id, coord.row(), coord.col())
736            .expect("graph coordinate must fit PackedSheetCell")
737    }
738
739    fn flush_load_packed_mappings(&mut self) {
740        if self.load_packed_to_vertex.is_empty() {
741            return;
742        }
743        let debug = std::env::var("FZ_DEBUG_LOAD")
744            .ok()
745            .is_some_and(|v| v != "0");
746        let t0 = crate::instant::FzInstant::now();
747        let count = self.load_packed_to_vertex.len();
748        self.cell_to_vertex.reserve(count);
749        for (&packed, &vid) in &self.load_packed_to_vertex {
750            let coord = AbsCoord::new(packed.row0(), packed.col0());
751            let addr = CellRef::new(
752                packed.sheet_id(),
753                Coord::new(coord.row(), coord.col(), true, true),
754            );
755            self.cell_to_vertex.insert(addr, vid);
756        }
757        self.load_packed_to_vertex.clear();
758        if debug {
759            eprintln!(
760                "[fz][load] flush_load_packed_mappings: {} entries in {:.1} ms",
761                count,
762                t0.elapsed().as_secs_f64() * 1000.0,
763            );
764        }
765    }
766
767    /// Enable/disable the first-load fast path for value inserts.
768    pub fn set_first_load_assume_new(&mut self, enabled: bool) {
769        if self.first_load_assume_new && !enabled {
770            self.flush_load_packed_mappings();
771        } else if enabled {
772            self.load_packed_to_vertex.clear();
773        }
774        self.first_load_assume_new = enabled;
775    }
776
777    pub fn first_load_assume_new(&self) -> bool {
778        self.first_load_assume_new
779    }
780
781    /// Reset the per-sheet ensure touch tracking.
782    pub fn reset_ensure_touched(&mut self) {
783        self.ensure_touched_sheets.clear();
784    }
785
786    /// Store an AST and return its arena id.
787    pub fn store_ast(&mut self, ast: &formualizer_parse::parser::ASTNode) -> AstNodeId {
788        self.data_store.store_ast(ast, &self.sheet_reg)
789    }
790
791    /// Store ASTs in batch and return their arena ids
792    pub fn store_asts_batch<'a, I>(&mut self, asts: I) -> Vec<AstNodeId>
793    where
794        I: IntoIterator<Item = &'a formualizer_parse::parser::ASTNode>,
795    {
796        self.data_store.store_asts_batch(asts, &self.sheet_reg)
797    }
798
799    /// Reserve metadata structures for upcoming formula assignments during bulk load.
800    pub fn reserve_formula_metadata(&mut self, additional: usize) {
801        self.vertex_formulas.reserve(additional);
802        self.dirty_vertices.reserve(additional);
803        self.volatile_vertices.reserve(additional);
804    }
805
806    /// Lookup VertexId for a (SheetId, AbsCoord)
807    pub fn vid_for_sid_pc(&self, sid: SheetId, pc: AbsCoord) -> Option<VertexId> {
808        let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
809        self.cell_to_vertex.get(&addr).copied()
810    }
811
812    /// Helper to map a global cell index in a plan to a VertexId
813    pub fn vid_for_plan_idx(
814        &self,
815        plan: &crate::engine::plan::DependencyPlan,
816        idx: u32,
817    ) -> Option<VertexId> {
818        let (sid, pc) = plan.global_cells.get(idx as usize).copied()?;
819        self.vid_for_sid_pc(sid, pc)
820    }
821    /// Assign a formula to an existing vertex, removing prior edges and setting flags
822    pub fn assign_formula_vertex(
823        &mut self,
824        vid: VertexId,
825        ast_id: AstNodeId,
826        volatile: bool,
827        dynamic: bool,
828    ) {
829        if self.vertex_formulas.contains_key(&vid) {
830            self.remove_dependent_edges(vid);
831        }
832        self.store
833            .set_kind(vid, crate::engine::vertex::VertexKind::FormulaScalar);
834        self.vertex_values.remove(&vid);
835        self.vertex_formulas.insert(vid, ast_id);
836        self.mark_volatile(vid, volatile);
837        self.store.set_dynamic(vid, dynamic);
838
839        // schedule evaluation
840        self.mark_vertex_dirty(vid);
841    }
842
843    /// Fast path for initial workbook load: assign a formula to a vertex that is known not to
844    /// already own dependency edges in the graph. Dirtiness is batched separately.
845    pub fn assign_formula_vertex_load_fast(
846        &mut self,
847        vid: VertexId,
848        ast_id: AstNodeId,
849        volatile: bool,
850        dynamic: bool,
851    ) {
852        debug_assert!(
853            !self.vertex_formulas.contains_key(&vid),
854            "load-fast formula assignment expects fresh/non-formula vertices"
855        );
856        self.store
857            .set_kind(vid, crate::engine::vertex::VertexKind::FormulaScalar);
858        self.vertex_values.remove(&vid);
859        self.vertex_formulas.insert(vid, ast_id);
860        self.mark_volatile(vid, volatile);
861        self.store.set_dynamic(vid, dynamic);
862    }
863
864    /// Public wrapper for adding edges without beginning a batch (caller manages batch)
865    pub fn add_edges_nobatch(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
866        self.add_dependent_edges_nobatch(dependent, dependencies);
867    }
868
869    /// Iterate all normal vertex ids
870    pub fn iter_vertex_ids(&self) -> impl Iterator<Item = VertexId> + '_ {
871        self.store.all_vertices()
872    }
873
874    /// Get current AbsCoord for a vertex
875    pub fn vertex_coord(&self, vid: VertexId) -> AbsCoord {
876        self.store.coord(vid)
877    }
878
879    /// Total number of allocated vertices (including deleted)
880    pub fn vertex_count(&self) -> usize {
881        self.store.len()
882    }
883
884    /// Replace CSR edges in one shot from adjacency and coords
885    pub fn build_edges_from_adjacency(
886        &mut self,
887        adjacency: Vec<(u32, Vec<u32>)>,
888        coords: Vec<AbsCoord>,
889        vertex_ids: Vec<u32>,
890    ) {
891        // Merge in base/delta out-edges for vertices the formula-target
892        // adjacency doesn't cover (e.g. named-range pass-through vertices)
893        // before handing the final adjacency to the pure builder.
894        let adjacency = self.edges.adjacency_with_carried_forward_edges(adjacency);
895        self.edges
896            .build_from_adjacency(adjacency, coords, vertex_ids);
897    }
898    /// Compute min/max used row among vertices within [start_col..=end_col] on a sheet.
899    pub fn used_row_bounds_for_columns(
900        &self,
901        sheet_id: SheetId,
902        start_col: u32,
903        end_col: u32,
904    ) -> Option<(u32, u32)> {
905        // Prefer sheet index when available
906        if let Some(index) = self.sheet_indexes.get(&sheet_id)
907            && !index.is_empty()
908        {
909            let mut min_r: Option<u32> = None;
910            let mut max_r: Option<u32> = None;
911            for vid in index.vertices_in_col_range(start_col, end_col) {
912                let r = self.store.coord(vid).row();
913                min_r = Some(min_r.map(|m| m.min(r)).unwrap_or(r));
914                max_r = Some(max_r.map(|m| m.max(r)).unwrap_or(r));
915            }
916            return match (min_r, max_r) {
917                (Some(a), Some(b)) => Some((a, b)),
918                _ => None,
919            };
920        }
921        // Fallback: scan cell maps on the fly
922        let mut min_r: Option<u32> = None;
923        let mut max_r: Option<u32> = None;
924        for cref in self.cell_to_vertex.keys() {
925            if cref.sheet_id == sheet_id {
926                let c = cref.coord.col();
927                if c >= start_col && c <= end_col {
928                    let r = cref.coord.row();
929                    min_r = Some(min_r.map(|m| m.min(r)).unwrap_or(r));
930                    max_r = Some(max_r.map(|m| m.max(r)).unwrap_or(r));
931                }
932            }
933        }
934        for packed in self.load_packed_to_vertex.keys() {
935            if packed.sheet_id() == sheet_id {
936                let c = packed.col0();
937                if c >= start_col && c <= end_col {
938                    let r = packed.row0();
939                    min_r = Some(min_r.map(|m| m.min(r)).unwrap_or(r));
940                    max_r = Some(max_r.map(|m| m.max(r)).unwrap_or(r));
941                }
942            }
943        }
944        match (min_r, max_r) {
945            (Some(a), Some(b)) => Some((a, b)),
946            _ => None,
947        }
948    }
949
950    /// Build (or rebuild) the sheet index for a given sheet if running in Lazy mode.
951    pub fn finalize_sheet_index(&mut self, sheet: &str) {
952        let Some(sheet_id) = self.sheet_reg.get_id(sheet) else {
953            return;
954        };
955        // If already present and non-empty, skip
956        if let Some(idx) = self.sheet_indexes.get(&sheet_id)
957            && !idx.is_empty()
958        {
959            return;
960        }
961        let mut idx = SheetIndex::new();
962        // Collect coords for this sheet
963        let mut batch: Vec<(AbsCoord, VertexId)> =
964            Vec::with_capacity(self.cell_to_vertex.len() + self.load_packed_to_vertex.len());
965        for (cref, vid) in &self.cell_to_vertex {
966            if cref.sheet_id == sheet_id {
967                batch.push((AbsCoord::new(cref.coord.row(), cref.coord.col()), *vid));
968            }
969        }
970        for (&packed, &vid) in &self.load_packed_to_vertex {
971            if packed.sheet_id() != sheet_id {
972                continue;
973            }
974            let coord = AbsCoord::new(packed.row0(), packed.col0());
975            let addr = CellRef::new(sheet_id, Coord::new(coord.row(), coord.col(), true, true));
976            if self.cell_to_vertex.contains_key(&addr) {
977                continue;
978            }
979            batch.push((coord, vid));
980        }
981        // Use batch builder
982        idx.add_vertices_batch(&batch);
983        self.sheet_indexes.insert(sheet_id, idx);
984    }
985
986    pub fn set_sheet_index_mode(&mut self, mode: crate::engine::SheetIndexMode) {
987        self.config.sheet_index_mode = mode;
988    }
989
990    /// Compute min/max used column among vertices within [start_row..=end_row] on a sheet.
991    pub fn used_col_bounds_for_rows(
992        &self,
993        sheet_id: SheetId,
994        start_row: u32,
995        end_row: u32,
996    ) -> Option<(u32, u32)> {
997        if let Some(index) = self.sheet_indexes.get(&sheet_id)
998            && !index.is_empty()
999        {
1000            let mut min_c: Option<u32> = None;
1001            let mut max_c: Option<u32> = None;
1002            for vid in index.vertices_in_row_range(start_row, end_row) {
1003                let c = self.store.coord(vid).col();
1004                min_c = Some(min_c.map(|m| m.min(c)).unwrap_or(c));
1005                max_c = Some(max_c.map(|m| m.max(c)).unwrap_or(c));
1006            }
1007            return match (min_c, max_c) {
1008                (Some(a), Some(b)) => Some((a, b)),
1009                _ => None,
1010            };
1011        }
1012        // Fallback: scan cell maps on the fly
1013        let mut min_c: Option<u32> = None;
1014        let mut max_c: Option<u32> = None;
1015        for cref in self.cell_to_vertex.keys() {
1016            if cref.sheet_id == sheet_id {
1017                let r = cref.coord.row();
1018                if r >= start_row && r <= end_row {
1019                    let c = cref.coord.col();
1020                    min_c = Some(min_c.map(|m| m.min(c)).unwrap_or(c));
1021                    max_c = Some(max_c.map(|m| m.max(c)).unwrap_or(c));
1022                }
1023            }
1024        }
1025        for packed in self.load_packed_to_vertex.keys() {
1026            if packed.sheet_id() == sheet_id {
1027                let r = packed.row0();
1028                if r >= start_row && r <= end_row {
1029                    let c = packed.col0();
1030                    min_c = Some(min_c.map(|m| m.min(c)).unwrap_or(c));
1031                    max_c = Some(max_c.map(|m| m.max(c)).unwrap_or(c));
1032                }
1033            }
1034        }
1035        match (min_c, max_c) {
1036            (Some(a), Some(b)) => Some((a, b)),
1037            _ => None,
1038        }
1039    }
1040
1041    /// Returns true if the given sheet currently contains any formula vertices.
1042    pub fn sheet_has_formulas(&self, sheet_id: SheetId) -> bool {
1043        // Check vertex_formulas keys; they represent formula vertices
1044        for &vid in self.vertex_formulas.keys() {
1045            if self.store.sheet_id(vid) == sheet_id {
1046                return true;
1047            }
1048        }
1049        false
1050    }
1051    pub fn new() -> Self {
1052        Self::new_with_config(super::EvalConfig::default())
1053    }
1054
1055    pub fn new_with_config(config: super::EvalConfig) -> Self {
1056        let mut sheet_reg = SheetRegistry::new();
1057        let default_sheet_id = sheet_reg.id_for(&config.default_sheet_name);
1058
1059        let mut g = Self {
1060            store: VertexStore::new(),
1061            edges: CsrMutableEdges::new(),
1062            data_store: DataStore::new(),
1063            vertex_values: FxHashMap::default(),
1064            vertex_formulas: FxHashMap::default(),
1065            // Phase 1 (ticket 610): Arrow-truth is the only supported mode.
1066            // The dependency graph does not cache cell/formula literal payloads.
1067            value_cache_enabled: false,
1068            #[cfg(debug_assertions)]
1069            graph_value_read_attempts: AtomicU64::new(0),
1070            cell_to_vertex: std::collections::HashMap::with_hasher(CoordBuildHasher),
1071            load_packed_to_vertex: std::collections::HashMap::with_hasher(CoordBuildHasher),
1072            dirty_vertices: FxHashSet::default(),
1073            dirty_propagation_visits: 0,
1074            deferred_dirty_depth: 0,
1075            deferred_dirty_pending: Vec::new(),
1076            volatile_vertices: FxHashSet::default(),
1077            ref_error_vertices: FxHashSet::default(),
1078            formula_to_range_deps: FxHashMap::default(),
1079            stripe_to_dependents: FxHashMap::default(),
1080            sheet_indexes: FxHashMap::default(),
1081            sheet_reg,
1082            default_sheet_id,
1083            named_ranges: FxHashMap::default(),
1084            named_ranges_lookup: FxHashMap::default(),
1085            sheet_named_ranges: FxHashMap::default(),
1086            sheet_named_ranges_lookup: FxHashMap::default(),
1087            vertex_to_names: FxHashMap::default(),
1088            name_vertex_lookup: FxHashMap::default(),
1089            pending_name_links: FxHashMap::default(),
1090            vertex_to_pending_names: FxHashMap::default(),
1091            tables: FxHashMap::default(),
1092            tables_lookup: FxHashMap::default(),
1093            table_vertex_lookup: FxHashMap::default(),
1094            source_scalars: FxHashMap::default(),
1095            source_tables: FxHashMap::default(),
1096            source_vertex_lookup: FxHashMap::default(),
1097            name_vertex_seq: 0,
1098            source_vertex_seq: 0,
1099            cell_to_name_dependents: FxHashMap::default(),
1100            name_to_cell_dependencies: FxHashMap::default(),
1101            config: config.clone(),
1102            formula_authority: FormulaAuthority::default(),
1103            pk_order: None,
1104            spill_anchor_to_cells: FxHashMap::default(),
1105            spill_cell_to_anchor: std::collections::HashMap::with_hasher(CoordBuildHasher),
1106            first_load_assume_new: false,
1107            ensure_touched_sheets: FxHashSet::default(),
1108            tombstone_registry: TombstoneRegistry::default(),
1109            #[cfg(test)]
1110            instr: std::sync::Mutex::new(GraphInstrumentation::default()),
1111        };
1112
1113        if config.use_dynamic_topo {
1114            // Seed with currently active vertices (likely empty at startup)
1115            let nodes = g
1116                .store
1117                .all_vertices()
1118                .filter(|&id| g.store.vertex_exists_active(id));
1119            let mut pk = DynamicTopo::new(
1120                nodes,
1121                PkConfig {
1122                    visit_budget: config.pk_visit_budget,
1123                    compaction_interval_ops: config.pk_compaction_interval_ops,
1124                },
1125            );
1126            // Build an initial order using current graph
1127            let adapter = GraphAdapter { g: &g };
1128            pk.rebuild_full(&adapter);
1129            g.pk_order = Some(pk);
1130        }
1131
1132        g
1133    }
1134
1135    /// When dynamic topology is enabled, compute layers for a subset using PK ordering.
1136    pub(crate) fn pk_layers_for(&self, subset: &[VertexId]) -> Option<Vec<crate::engine::Layer>> {
1137        let pk = self.pk_order.as_ref()?;
1138        let adapter = crate::engine::topo::GraphAdapter { g: self };
1139        let layers = pk.layers_for(&adapter, subset, self.config.max_layer_width);
1140        Some(
1141            layers
1142                .into_iter()
1143                .map(|vs| crate::engine::Layer { vertices: vs })
1144                .collect(),
1145        )
1146    }
1147
1148    #[inline]
1149    pub(crate) fn dynamic_topo_enabled(&self) -> bool {
1150        self.pk_order.is_some()
1151    }
1152
1153    #[cfg(test)]
1154    pub fn reset_instr(&mut self) {
1155        if let Ok(mut g) = self.instr.lock() {
1156            *g = GraphInstrumentation::default();
1157        }
1158    }
1159
1160    #[cfg(test)]
1161    pub fn instr(&self) -> GraphInstrumentation {
1162        self.instr.lock().map(|g| g.clone()).unwrap_or_default()
1163    }
1164
1165    /// Begin batch operations - defer CSR rebuilds until end_batch() is called
1166    pub fn begin_batch(&mut self) {
1167        self.edges.begin_batch();
1168    }
1169
1170    /// End batch operations and trigger CSR rebuild if needed
1171    pub fn end_batch(&mut self) {
1172        self.edges.end_batch();
1173    }
1174
1175    pub fn default_sheet_id(&self) -> SheetId {
1176        self.default_sheet_id
1177    }
1178
1179    pub fn default_sheet_name(&self) -> &str {
1180        self.sheet_reg.name(self.default_sheet_id)
1181    }
1182
1183    pub fn set_default_sheet_by_name(&mut self, name: &str) {
1184        self.default_sheet_id = self.sheet_id_mut(name);
1185    }
1186
1187    pub fn set_default_sheet_by_id(&mut self, id: SheetId) {
1188        self.default_sheet_id = id;
1189    }
1190
1191    /// Returns the ID for a sheet name, creating one if it doesn't exist.
1192    pub fn sheet_id_mut(&mut self, name: &str) -> SheetId {
1193        self.sheet_reg.id_for(name)
1194    }
1195
1196    pub fn sheet_id(&self, name: &str) -> Option<SheetId> {
1197        self.sheet_reg.get_id(name)
1198    }
1199
1200    /// Resolve a sheet name to an existing ID or return a #REF! error.
1201    fn resolve_existing_sheet_id(&self, name: &str) -> Result<SheetId, ExcelError> {
1202        self.sheet_id(name).ok_or_else(|| {
1203            ExcelError::new(ExcelErrorKind::Ref).with_message(format!("Sheet not found: {name}"))
1204        })
1205    }
1206
1207    /// Returns the name of a sheet given its ID.
1208    pub fn sheet_name(&self, id: SheetId) -> &str {
1209        self.sheet_reg.name(id)
1210    }
1211
1212    /// Access the sheet registry (read-only) for external bindings
1213    pub fn sheet_reg(&self) -> &SheetRegistry {
1214        &self.sheet_reg
1215    }
1216
1217    pub(crate) fn data_store(&self) -> &DataStore {
1218        &self.data_store
1219    }
1220
1221    pub(crate) fn make_ingest_pipeline<'a>(
1222        &'a mut self,
1223        function_provider: &'a dyn crate::traits::FunctionProvider,
1224        policy: formualizer_parse::parser::CollectPolicy,
1225    ) -> crate::engine::ingest_pipeline::IngestPipeline<'a> {
1226        use crate::engine::ingest_pipeline::{
1227            NameRegistryView, NamedEntryRef, NamedTarget, SourceEntryRef, SourceRegistryView,
1228            TableEntrySnapshot, TableRegistryView,
1229        };
1230
1231        let DependencyGraph {
1232            data_store,
1233            sheet_reg,
1234            named_ranges,
1235            named_ranges_lookup,
1236            sheet_named_ranges,
1237            sheet_named_ranges_lookup,
1238            tables,
1239            tables_lookup,
1240            source_scalars,
1241            source_tables,
1242            config,
1243            ..
1244        } = self;
1245
1246        let case_sensitive_names = config.case_sensitive_names;
1247        let names = NameRegistryView::new(move |name, current_sheet| {
1248            let found = if case_sensitive_names {
1249                sheet_named_ranges
1250                    .get(&(current_sheet, name.to_string()))
1251                    .or_else(|| named_ranges.get(name))
1252            } else {
1253                let key = name.to_lowercase();
1254                sheet_named_ranges_lookup
1255                    .get(&(current_sheet, key.clone()))
1256                    .and_then(|canon| sheet_named_ranges.get(&(current_sheet, canon.clone())))
1257                    .or_else(|| {
1258                        named_ranges_lookup
1259                            .get(&key)
1260                            .and_then(|canon| named_ranges.get(canon))
1261                    })
1262            };
1263            found.map(|entry| NamedEntryRef {
1264                vertex: entry.vertex,
1265                target: match &entry.definition {
1266                    crate::engine::named_range::NamedDefinition::Cell(cell) => {
1267                        NamedTarget::Cell(*cell)
1268                    }
1269                    crate::engine::named_range::NamedDefinition::Range(range) => {
1270                        NamedTarget::Range(*range)
1271                    }
1272                    crate::engine::named_range::NamedDefinition::Literal(_)
1273                    | crate::engine::named_range::NamedDefinition::Formula { .. } => {
1274                        NamedTarget::Other
1275                    }
1276                },
1277            })
1278        });
1279
1280        let case_sensitive_tables = config.case_sensitive_tables;
1281        let tables_ref = &*tables;
1282        let tables_lookup_ref = &*tables_lookup;
1283        let snapshot_table = |entry: &tables::TableEntry| TableEntrySnapshot {
1284            name: entry.name.clone(),
1285            range: entry.range,
1286            header_row: entry.header_row,
1287            headers: entry.headers.clone(),
1288            vertex: entry.vertex,
1289        };
1290        let tables_view = TableRegistryView::new(
1291            move |name| {
1292                if case_sensitive_tables {
1293                    tables_ref.get(name).map(snapshot_table)
1294                } else {
1295                    let key = name.to_lowercase();
1296                    tables_lookup_ref
1297                        .get(&key)
1298                        .and_then(|canon| tables_ref.get(canon))
1299                        .map(snapshot_table)
1300                }
1301            },
1302            move |cell| {
1303                let row0 = cell.coord.row();
1304                let col0 = cell.coord.col();
1305                let mut best: Option<&tables::TableEntry> = None;
1306                let mut best_area = u64::MAX;
1307                let mut best_name = "";
1308                for table in tables_ref.values() {
1309                    if table.sheet_id() != cell.sheet_id {
1310                        continue;
1311                    }
1312                    let sr0 = table.range.start.coord.row();
1313                    let sc0 = table.range.start.coord.col();
1314                    let er0 = table.range.end.coord.row();
1315                    let ec0 = table.range.end.coord.col();
1316                    if row0 < sr0 || row0 > er0 || col0 < sc0 || col0 > ec0 {
1317                        continue;
1318                    }
1319                    let area = ((er0 - sr0 + 1) as u64).saturating_mul((ec0 - sc0 + 1) as u64);
1320                    let name = table.name.as_str();
1321                    if best.is_none() || area < best_area || (area == best_area && name < best_name)
1322                    {
1323                        best = Some(table);
1324                        best_area = area;
1325                        best_name = name;
1326                    }
1327                }
1328                best.map(snapshot_table)
1329            },
1330        );
1331
1332        let sources = SourceRegistryView::new(
1333            move |name| {
1334                source_scalars.get(name).map(|entry| SourceEntryRef {
1335                    vertex: entry.vertex,
1336                })
1337            },
1338            move |name| {
1339                source_tables.get(name).map(|entry| SourceEntryRef {
1340                    vertex: entry.vertex,
1341                })
1342            },
1343        );
1344
1345        crate::engine::ingest_pipeline::IngestPipeline::new(
1346            data_store,
1347            sheet_reg,
1348            names,
1349            tables_view,
1350            sources,
1351            function_provider,
1352            policy,
1353        )
1354    }
1355
1356    /// Converts a `CellRef` to a fully qualified A1-style string (e.g., "SheetName!A1").
1357    pub fn to_a1(&self, cell_ref: CellRef) -> String {
1358        format!("{}!{}", self.sheet_name(cell_ref.sheet_id), cell_ref.coord)
1359    }
1360
1361    pub(crate) fn vertex_len(&self) -> usize {
1362        self.store.len()
1363    }
1364
1365    /// Get mutable access to a sheet's index, creating it if it doesn't exist
1366    /// This is the primary way VertexEditor and internal operations access the index
1367    pub fn sheet_index_mut(&mut self, sheet_id: SheetId) -> &mut SheetIndex {
1368        self.sheet_indexes.entry(sheet_id).or_default()
1369    }
1370
1371    /// Get immutable access to a sheet's index, returns None if not initialized
1372    pub fn sheet_index(&self, sheet_id: SheetId) -> Option<&SheetIndex> {
1373        self.sheet_indexes.get(&sheet_id)
1374    }
1375
1376    /// Set a value in a cell, returns affected vertex IDs
1377    pub fn set_cell_value(
1378        &mut self,
1379        sheet: &str,
1380        row: u32,
1381        col: u32,
1382        value: LiteralValue,
1383    ) -> Result<OperationSummary, ExcelError> {
1384        let value = normalize_stored_literal(value);
1385        let sheet_id = self.sheet_id_mut(sheet);
1386        // External API is 1-based; store 0-based coords internally.
1387        let coord = Coord::from_excel(row, col, true, true);
1388        let addr = CellRef::new(sheet_id, coord);
1389        let mut created_placeholders = Vec::new();
1390
1391        let vertex_id = if let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
1392            // Check if it was a formula and remove dependencies
1393            let is_formula = matches!(
1394                self.store.kind(existing_id),
1395                VertexKind::FormulaScalar | VertexKind::FormulaArray
1396            );
1397
1398            if is_formula {
1399                self.remove_dependent_edges(existing_id);
1400                self.detach_vertex_from_names(existing_id);
1401                self.clear_pending_name_references(existing_id);
1402                self.vertex_formulas.remove(&existing_id);
1403            }
1404
1405            // Update to value kind
1406            self.store.set_kind(existing_id, VertexKind::Cell);
1407            if self.value_cache_enabled {
1408                let value_ref = self.data_store.store_value(value);
1409                self.vertex_values.insert(existing_id, value_ref);
1410            } else {
1411                // Ensure no stale payload remains if cache is disabled.
1412                self.vertex_values.remove(&existing_id);
1413            }
1414            existing_id
1415        } else {
1416            // Create new vertex
1417            created_placeholders.push(addr);
1418            let packed_coord = AbsCoord::from_excel(row, col);
1419            let vertex_id = self.store.allocate(packed_coord, sheet_id, 0x01); // dirty flag
1420
1421            // Add vertex coordinate for CSR
1422            self.edges.add_vertex(packed_coord, vertex_id.0);
1423
1424            // Add to sheet index for O(log n + k) range queries
1425            self.sheet_index_mut(sheet_id)
1426                .add_vertex(packed_coord, vertex_id);
1427
1428            self.store.set_kind(vertex_id, VertexKind::Cell);
1429            if self.value_cache_enabled {
1430                let value_ref = self.data_store.store_value(value);
1431                self.vertex_values.insert(vertex_id, value_ref);
1432            }
1433            self.cell_to_vertex.insert(addr, vertex_id);
1434            vertex_id
1435        };
1436
1437        // Cell edits clear any structural #REF! marking for this vertex.
1438        self.ref_error_vertices.remove(&vertex_id);
1439
1440        Ok(OperationSummary {
1441            affected_vertices: self.mark_dirty(vertex_id),
1442            created_placeholders,
1443        })
1444    }
1445
1446    /// Reserve capacity hints for upcoming bulk cell inserts (values only for now).
1447    pub fn reserve_cells(&mut self, additional: usize) {
1448        self.store.reserve(additional);
1449        if self.value_cache_enabled {
1450            self.vertex_values.reserve(additional);
1451        }
1452        self.cell_to_vertex.reserve(additional);
1453        // sheet_indexes: cannot easily reserve per-sheet without distribution; skip.
1454    }
1455
1456    /// Fast path for initial bulk load of value cells: avoids dirty propagation & dependency work.
1457    pub fn set_cell_value_bulk_untracked(
1458        &mut self,
1459        sheet: &str,
1460        row: u32,
1461        col: u32,
1462        value: LiteralValue,
1463    ) {
1464        let value = normalize_stored_literal(value);
1465        let sheet_id = self.sheet_id_mut(sheet);
1466        let coord = Coord::from_excel(row, col, true, true);
1467        let addr = CellRef::new(sheet_id, coord);
1468        if let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
1469            // Overwrite existing value vertex only (ignore formulas in bulk path)
1470            if matches!(
1471                self.store.kind(existing_id),
1472                VertexKind::FormulaScalar | VertexKind::FormulaArray
1473            ) {
1474                self.remove_dependent_edges(existing_id);
1475                self.detach_vertex_from_names(existing_id);
1476                self.clear_pending_name_references(existing_id);
1477                self.vertex_formulas.remove(&existing_id);
1478            }
1479            if self.value_cache_enabled {
1480                let value_ref = self.data_store.store_value(value);
1481                self.vertex_values.insert(existing_id, value_ref);
1482            } else {
1483                self.vertex_values.remove(&existing_id);
1484            }
1485            self.store.set_kind(existing_id, VertexKind::Cell);
1486            self.ref_error_vertices.remove(&existing_id);
1487            return;
1488        }
1489        let packed_coord = AbsCoord::from_excel(row, col);
1490        let vertex_id = self.store.allocate(packed_coord, sheet_id, 0x00); // not dirty
1491        self.edges.add_vertex(packed_coord, vertex_id.0);
1492        self.sheet_index_mut(sheet_id)
1493            .add_vertex(packed_coord, vertex_id);
1494        self.store.set_kind(vertex_id, VertexKind::Cell);
1495        self.ref_error_vertices.remove(&vertex_id);
1496        if self.value_cache_enabled {
1497            let value_ref = self.data_store.store_value(value);
1498            self.vertex_values.insert(vertex_id, value_ref);
1499        }
1500        self.cell_to_vertex.insert(addr, vertex_id);
1501    }
1502
1503    /// Bulk insert a collection of plain value cells (no formulas) more efficiently.
1504    pub fn bulk_insert_values<I>(&mut self, sheet: &str, cells: I)
1505    where
1506        I: IntoIterator<Item = (u32, u32, LiteralValue)>,
1507    {
1508        use crate::instant::FzInstant as Instant;
1509        let t0 = Instant::now();
1510        // Collect first to know size
1511        let collected: Vec<(u32, u32, LiteralValue)> = cells.into_iter().collect();
1512        if collected.is_empty() {
1513            return;
1514        }
1515        let sheet_id = self.sheet_id_mut(sheet);
1516        self.reserve_cells(collected.len());
1517        let t_reserve = Instant::now();
1518        let mut new_vertices: Vec<(AbsCoord, u32)> = Vec::with_capacity(collected.len());
1519        let mut index_items: Vec<(AbsCoord, VertexId)> = Vec::with_capacity(collected.len());
1520        // For new allocations, accumulate values and assign after a single batch store
1521        let mut new_value_coords: Vec<(AbsCoord, VertexId)> = Vec::with_capacity(collected.len());
1522        let mut new_value_literals: Vec<LiteralValue> = Vec::with_capacity(collected.len());
1523        // Detect fast path: during initial ingest, caller may guarantee most cells are new.
1524        let assume_new = self.first_load_assume_new
1525            && self
1526                .sheet_id(sheet)
1527                .map(|sid| !self.ensure_touched_sheets.contains(&sid))
1528                .unwrap_or(false);
1529
1530        for (row, col, value) in collected {
1531            let value = normalize_stored_literal(value);
1532            let coord = Coord::from_excel(row, col, true, true);
1533            let addr = CellRef::new(sheet_id, coord);
1534            if !assume_new && let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
1535                if matches!(
1536                    self.store.kind(existing_id),
1537                    VertexKind::FormulaScalar | VertexKind::FormulaArray
1538                ) {
1539                    self.remove_dependent_edges(existing_id);
1540                    self.detach_vertex_from_names(existing_id);
1541                    self.clear_pending_name_references(existing_id);
1542                    self.vertex_formulas.remove(&existing_id);
1543                }
1544                if self.value_cache_enabled {
1545                    let value_ref = self.data_store.store_value(value);
1546                    self.vertex_values.insert(existing_id, value_ref);
1547                } else {
1548                    self.vertex_values.remove(&existing_id);
1549                }
1550                self.store.set_kind(existing_id, VertexKind::Cell);
1551                continue;
1552            }
1553            let packed = AbsCoord::from_excel(row, col);
1554            let vertex_id = self.store.allocate(packed, sheet_id, 0x00);
1555            self.store.set_kind(vertex_id, VertexKind::Cell);
1556            // Defer value arena storage to a single batch
1557            new_value_coords.push((packed, vertex_id));
1558            new_value_literals.push(value);
1559            self.cell_to_vertex.insert(addr, vertex_id);
1560            new_vertices.push((packed, vertex_id.0));
1561            index_items.push((packed, vertex_id));
1562        }
1563        // Perform a single batch store for newly allocated values
1564        if self.value_cache_enabled && !new_value_literals.is_empty() {
1565            let vrefs = self.data_store.store_values_batch(new_value_literals);
1566            debug_assert_eq!(vrefs.len(), new_value_coords.len());
1567            for (i, (_pc, vid)) in new_value_coords.iter().enumerate() {
1568                self.vertex_values.insert(*vid, vrefs[i]);
1569            }
1570        }
1571        let t_after_alloc = Instant::now();
1572        if !new_vertices.is_empty() {
1573            let t_edges_start = Instant::now();
1574            self.edges.add_vertices_batch(&new_vertices);
1575            let t_edges_done = Instant::now();
1576
1577            match self.config.sheet_index_mode {
1578                crate::engine::SheetIndexMode::Eager => {
1579                    self.sheet_index_mut(sheet_id)
1580                        .add_vertices_batch(&index_items);
1581                }
1582                crate::engine::SheetIndexMode::Lazy => {
1583                    // Skip building index now; will be built on-demand
1584                }
1585                crate::engine::SheetIndexMode::FastBatch => {
1586                    // FastBatch for now delegates to same batch insert (future: build from sorted arrays)
1587                    self.sheet_index_mut(sheet_id)
1588                        .add_vertices_batch(&index_items);
1589                }
1590            }
1591            let t_index_done = Instant::now();
1592        }
1593    }
1594
1595    /// Set a formula in a cell, returns affected vertex IDs
1596    pub fn set_cell_formula(
1597        &mut self,
1598        sheet: &str,
1599        row: u32,
1600        col: u32,
1601        ast: ASTNode,
1602    ) -> Result<OperationSummary, ExcelError> {
1603        self.set_cell_formula_with_volatility(sheet, row, col, ast, false)
1604    }
1605
1606    /// Set a formula in a cell. The volatility argument is retained for API compatibility;
1607    /// dependency flags now come from `IngestPipeline`.
1608    pub fn set_cell_formula_with_volatility(
1609        &mut self,
1610        sheet: &str,
1611        row: u32,
1612        col: u32,
1613        ast: ASTNode,
1614        _volatile: bool,
1615    ) -> Result<OperationSummary, ExcelError> {
1616        let sheet_id = self.sheet_id_mut(sheet);
1617        let placement = CellRef::new(sheet_id, Coord::from_excel(row, col, true, true));
1618        let provider = RegistryFunctionProvider;
1619        let ingested = {
1620            let mut pipeline = self.ingest_pipeline(&provider);
1621            pipeline.ingest_formula(FormulaAstInput::Tree(ast), placement, None)?
1622        };
1623        self.set_cell_formula_with_plan(
1624            sheet,
1625            row,
1626            col,
1627            ingested.ast_id,
1628            &ingested.dep_plan,
1629            ingested.dep_plan.volatile,
1630            ingested.dep_plan.dynamic,
1631        )
1632    }
1633
1634    pub(crate) fn set_cell_formula_with_plan(
1635        &mut self,
1636        sheet: &str,
1637        row: u32,
1638        col: u32,
1639        ast_id: AstNodeId,
1640        plan: &DependencyPlanRow,
1641        volatile: bool,
1642        dynamic: bool,
1643    ) -> Result<OperationSummary, ExcelError> {
1644        let dbg = std::env::var("FZ_DEBUG_LOAD")
1645            .ok()
1646            .is_some_and(|v| v != "0");
1647        let dep_ms_thresh: u128 = std::env::var("FZ_DEBUG_DEP_MS")
1648            .ok()
1649            .and_then(|s| s.parse().ok())
1650            .unwrap_or(0);
1651        let sample_n: usize = std::env::var("FZ_DEBUG_SAMPLE_N")
1652            .ok()
1653            .and_then(|s| s.parse().ok())
1654            .unwrap_or(0);
1655        let t0 = if dbg {
1656            Some(crate::instant::FzInstant::now())
1657        } else {
1658            None
1659        };
1660        let sheet_id = self.sheet_id_mut(sheet);
1661        let coord = Coord::from_excel(row, col, true, true);
1662        let addr = CellRef::new(sheet_id, coord);
1663
1664        let t_dep0 = if dbg {
1665            Some(crate::instant::FzInstant::now())
1666        } else {
1667            None
1668        };
1669        let mut created_placeholders = Vec::new();
1670        let mut new_dependencies = Vec::with_capacity(plan.direct_cell_deps.len());
1671        for dep in &plan.direct_cell_deps {
1672            let dep_vid = self.get_or_create_vertex(dep, &mut created_placeholders);
1673            if !new_dependencies.contains(&dep_vid) {
1674                new_dependencies.push(dep_vid);
1675            }
1676        }
1677        let mut named_dependencies = Vec::new();
1678        let mut unresolved_names = Vec::new();
1679        for name in plan
1680            .resolved_named_refs
1681            .iter()
1682            .chain(plan.named_refs.iter())
1683        {
1684            if let Some(named) = self.resolve_name_entry(name, sheet_id) {
1685                if !new_dependencies.contains(&named.vertex) {
1686                    new_dependencies.push(named.vertex);
1687                }
1688                if !named_dependencies.contains(&named.vertex) {
1689                    named_dependencies.push(named.vertex);
1690                }
1691            } else if let Some(source) = self.resolve_source_scalar_entry(name) {
1692                if !new_dependencies.contains(&source.vertex) {
1693                    new_dependencies.push(source.vertex);
1694                }
1695            } else {
1696                unresolved_names.push(name.clone());
1697            }
1698        }
1699        for source_name in &plan.source_refs {
1700            if let Some(source) = self.resolve_source_scalar_entry(source_name) {
1701                if !new_dependencies.contains(&source.vertex) {
1702                    new_dependencies.push(source.vertex);
1703                }
1704            } else if let Some(source) = self.resolve_source_table_entry(source_name)
1705                && !new_dependencies.contains(&source.vertex)
1706            {
1707                new_dependencies.push(source.vertex);
1708            }
1709        }
1710        for table_name in &plan.table_refs {
1711            if let Some(table) = self.resolve_table_entry(table_name) {
1712                if !new_dependencies.contains(&table.vertex) {
1713                    new_dependencies.push(table.vertex);
1714                }
1715            } else if let Some(source) = self.resolve_source_table_entry(table_name)
1716                && !new_dependencies.contains(&source.vertex)
1717            {
1718                new_dependencies.push(source.vertex);
1719            }
1720        }
1721        if let (true, Some(t)) = (dbg, t_dep0) {
1722            let elapsed = t.elapsed().as_millis();
1723            let do_log = (dep_ms_thresh > 0 && elapsed >= dep_ms_thresh)
1724                || (sample_n > 0 && (row as usize).is_multiple_of(sample_n));
1725            if (dep_ms_thresh == 0 && sample_n == 0 && row.is_multiple_of(1000)) || do_log {
1726                eprintln!(
1727                    "[fz][dep] {}!{} planned: deps={}, ranges={}, placeholders={}, names={} in {} ms",
1728                    self.sheet_name(sheet_id),
1729                    crate::reference::Coord::from_excel(row, col, true, true),
1730                    new_dependencies.len(),
1731                    plan.range_deps.len(),
1732                    created_placeholders.len(),
1733                    named_dependencies.len(),
1734                    elapsed
1735                );
1736            }
1737        }
1738
1739        // Check for self-reference (immediate cycle detection)
1740        let addr_vertex_id = self.get_or_create_vertex(&addr, &mut created_placeholders);
1741
1742        // Editing a formula clears any prior structural #REF! marking for this vertex.
1743        self.ref_error_vertices.remove(&addr_vertex_id);
1744
1745        // Under `CyclePolicy::Iterate` (Runtime detection) self-dependencies
1746        // are accepted, mirroring Excel with iterative calculation enabled:
1747        // the self-edge forms a single-vertex SCC that the scheduler emits as
1748        // a Cycle unit and `evaluate_scc_unit` iterates (RFC #113, spec §7.1/
1749        // §7.6/§7.8). Everywhere else the edit-time rejection stands.
1750        //
1751        // Scope note (persistence contract, pinned by
1752        // `formualizer-workbook/tests/cycle_persistence.rs`): this rejection
1753        // is an INTERACTIVE-EDIT nicety only. Bulk load paths
1754        // (`ingest_formula_batches` → `BulkIngestBuilder`, incl. staged
1755        // `build_graph_all`) intentionally do not perform it, so workbooks
1756        // saved with self-references under an Iterate config always reload —
1757        // under any cycle config — and resolve to `#CIRC!`/iteration at
1758        // evaluation time per the loaded policy.
1759        if new_dependencies.contains(&addr_vertex_id) && !self.config.cycle.allows_self_dependency()
1760        {
1761            return Err(ExcelError::new(ExcelErrorKind::Circ)
1762                .with_message("Self-reference detected".to_string()));
1763        }
1764
1765        for &name_vertex in &named_dependencies {
1766            let mut visited = FxHashSet::default();
1767            if self.name_depends_on_vertex(name_vertex, addr_vertex_id, &mut visited) {
1768                return Err(ExcelError::new(ExcelErrorKind::Circ)
1769                    .with_message("Circular reference through named range".to_string()));
1770            }
1771        }
1772
1773        // Remove old dependencies first
1774        self.remove_dependent_edges(addr_vertex_id);
1775        self.detach_vertex_from_names(addr_vertex_id);
1776        self.clear_pending_name_references(addr_vertex_id);
1777
1778        // Update vertex properties
1779        self.store
1780            .set_kind(addr_vertex_id, VertexKind::FormulaScalar);
1781        self.vertex_formulas.insert(addr_vertex_id, ast_id);
1782        self.store.set_dirty(addr_vertex_id, true);
1783
1784        // Clear any cached value since this is now a formula
1785        self.vertex_values.remove(&addr_vertex_id);
1786
1787        self.mark_volatile(addr_vertex_id, volatile);
1788        self.store.set_dynamic(addr_vertex_id, dynamic);
1789
1790        if !named_dependencies.is_empty() {
1791            self.attach_vertex_to_names(addr_vertex_id, &named_dependencies);
1792        }
1793        for unresolved_name in &unresolved_names {
1794            self.record_pending_name_reference(sheet_id, unresolved_name, addr_vertex_id);
1795        }
1796
1797        if let (true, Some(t)) = (dbg, t0) {
1798            let elapsed = t.elapsed().as_millis();
1799            let log_set = dep_ms_thresh > 0 && elapsed >= dep_ms_thresh;
1800            if log_set {
1801                eprintln!(
1802                    "[fz][set] {}!{} total {} ms",
1803                    self.sheet_name(sheet_id),
1804                    crate::reference::Coord::from_excel(row, col, true, true),
1805                    elapsed
1806                );
1807            }
1808        }
1809
1810        // Add new dependency edges
1811        self.add_dependent_edges(addr_vertex_id, &new_dependencies);
1812        self.add_range_dependent_edges(addr_vertex_id, &plan.range_deps, sheet_id);
1813
1814        Ok(OperationSummary {
1815            affected_vertices: self.mark_dirty(addr_vertex_id),
1816            created_placeholders,
1817        })
1818    }
1819
1820    pub(crate) fn rewrite_structured_references_for_cell(
1821        &self,
1822        ast: &mut ASTNode,
1823        cell: CellRef,
1824    ) -> Result<bool, ExcelError> {
1825        self.rewrite_structured_references_node(ast, cell)
1826    }
1827
1828    fn rewrite_structured_references_node(
1829        &self,
1830        node: &mut ASTNode,
1831        cell: CellRef,
1832    ) -> Result<bool, ExcelError> {
1833        match &mut node.node_type {
1834            ASTNodeType::Reference { reference, .. } => {
1835                self.rewrite_structured_reference(reference, cell)
1836            }
1837            ASTNodeType::UnaryOp { expr, .. } => {
1838                self.rewrite_structured_references_node(expr, cell)
1839            }
1840            ASTNodeType::BinaryOp { left, right, .. } => {
1841                let left_rewritten = self.rewrite_structured_references_node(left, cell)?;
1842                let right_rewritten = self.rewrite_structured_references_node(right, cell)?;
1843                Ok(left_rewritten || right_rewritten)
1844            }
1845            ASTNodeType::Function { args, .. } => {
1846                let mut rewritten = false;
1847                for a in args.iter_mut() {
1848                    rewritten |= self.rewrite_structured_references_node(a, cell)?;
1849                }
1850                Ok(rewritten)
1851            }
1852            ASTNodeType::Call { callee, args } => {
1853                let mut rewritten = self.rewrite_structured_references_node(callee, cell)?;
1854                for a in args.iter_mut() {
1855                    rewritten |= self.rewrite_structured_references_node(a, cell)?;
1856                }
1857                Ok(rewritten)
1858            }
1859            ASTNodeType::Array(rows) => {
1860                let mut rewritten = false;
1861                for r in rows.iter_mut() {
1862                    for item in r.iter_mut() {
1863                        rewritten |= self.rewrite_structured_references_node(item, cell)?;
1864                    }
1865                }
1866                Ok(rewritten)
1867            }
1868            ASTNodeType::Literal(_) => Ok(false),
1869        }
1870    }
1871
1872    fn rewrite_structured_reference(
1873        &self,
1874        reference: &mut ReferenceType,
1875        cell: CellRef,
1876    ) -> Result<bool, ExcelError> {
1877        use formualizer_parse::parser::{SpecialItem, TableSpecifier};
1878
1879        let ReferenceType::Table(tref) = reference else {
1880            return Ok(false);
1881        };
1882
1883        // This-row shorthand: parsed as an unnamed table reference with a Combination specifier.
1884        if !tref.name.is_empty() {
1885            return Ok(false);
1886        }
1887
1888        let col_name = match &tref.specifier {
1889            Some(TableSpecifier::Combination(parts)) => {
1890                let mut saw_this_row = false;
1891                let mut col: Option<&str> = None;
1892                for p in parts {
1893                    match p.as_ref() {
1894                        TableSpecifier::SpecialItem(SpecialItem::ThisRow) => {
1895                            saw_this_row = true;
1896                        }
1897                        TableSpecifier::Column(c) => {
1898                            if col.is_some() {
1899                                return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1900                                    "This-row structured reference with multiple columns is not supported"
1901                                        .to_string(),
1902                                ));
1903                            }
1904                            col = Some(c.as_str());
1905                        }
1906                        other => {
1907                            return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1908                                format!(
1909                                    "Unsupported this-row structured reference component: {other}"
1910                                ),
1911                            ));
1912                        }
1913                    }
1914                }
1915                if !saw_this_row {
1916                    return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1917                        "Unnamed structured reference requires a this-row selector".to_string(),
1918                    ));
1919                }
1920                col.ok_or_else(|| {
1921                    ExcelError::new(ExcelErrorKind::NImpl).with_message(
1922                        "This-row structured reference missing column selector".to_string(),
1923                    )
1924                })?
1925            }
1926            _ => {
1927                return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1928                    "Unnamed structured reference form is not supported".to_string(),
1929                ));
1930            }
1931        };
1932
1933        let Some(table) = self.find_table_containing_cell(cell) else {
1934            return Err(ExcelError::new(ExcelErrorKind::Name)
1935                .with_message("This-row structured reference used outside a table".to_string()));
1936        };
1937
1938        let row0 = cell.coord.row();
1939        let col0 = cell.coord.col();
1940        let sr0 = table.range.start.coord.row();
1941        let sc0 = table.range.start.coord.col();
1942        let er0 = table.range.end.coord.row();
1943        let ec0 = table.range.end.coord.col();
1944
1945        if row0 < sr0 || row0 > er0 || col0 < sc0 || col0 > ec0 {
1946            return Err(ExcelError::new(ExcelErrorKind::Name)
1947                .with_message("This-row structured reference used outside a table".to_string()));
1948        }
1949
1950        if table.header_row && row0 == sr0 {
1951            return Err(ExcelError::new(ExcelErrorKind::Ref).with_message(
1952                "This-row structured references are not valid in the table header row".to_string(),
1953            ));
1954        }
1955
1956        let data_start = if table.header_row { sr0 + 1 } else { sr0 };
1957        if row0 < data_start {
1958            return Err(ExcelError::new(ExcelErrorKind::Ref).with_message(
1959                "This-row structured references require a data/totals row context".to_string(),
1960            ));
1961        }
1962
1963        let Some(idx) = table.col_index(col_name) else {
1964            return Err(ExcelError::new(ExcelErrorKind::Ref).with_message(format!(
1965                "Unknown table column in this-row reference: {col_name}"
1966            )));
1967        };
1968        let target_col0 = sc0 + (idx as u32);
1969        let target_row = row0 + 1;
1970        let target_col = target_col0 + 1;
1971
1972        *reference = ReferenceType::Cell {
1973            sheet: None,
1974            row: target_row,
1975            col: target_col,
1976            row_abs: true,
1977            col_abs: true,
1978        };
1979
1980        Ok(true)
1981    }
1982
1983    fn find_table_containing_cell(&self, cell: CellRef) -> Option<&tables::TableEntry> {
1984        let row0 = cell.coord.row();
1985        let col0 = cell.coord.col();
1986
1987        let mut best: Option<&tables::TableEntry> = None;
1988        let mut best_area: u64 = u64::MAX;
1989        let mut best_name: &str = "";
1990
1991        for t in self.tables.values() {
1992            if t.sheet_id() != cell.sheet_id {
1993                continue;
1994            }
1995            let sr0 = t.range.start.coord.row();
1996            let sc0 = t.range.start.coord.col();
1997            let er0 = t.range.end.coord.row();
1998            let ec0 = t.range.end.coord.col();
1999            if row0 < sr0 || row0 > er0 || col0 < sc0 || col0 > ec0 {
2000                continue;
2001            }
2002
2003            let h = (er0 - sr0 + 1) as u64;
2004            let w = (ec0 - sc0 + 1) as u64;
2005            let area = h.saturating_mul(w);
2006            let name = t.name.as_str();
2007            let better = match best {
2008                None => true,
2009                Some(_) => area < best_area || (area == best_area && name < best_name),
2010            };
2011            if better {
2012                best = Some(t);
2013                best_area = area;
2014                best_name = name;
2015            }
2016        }
2017
2018        best
2019    }
2020
2021    #[allow(clippy::type_complexity)]
2022    pub(crate) fn fp8_parity_extract_dependencies_with_pending_names(
2023        &mut self,
2024        ast: &ASTNode,
2025        current_sheet_id: SheetId,
2026    ) -> Result<
2027        (
2028            Vec<VertexId>,
2029            Vec<SharedRangeRef<'static>>,
2030            Vec<CellRef>,
2031            Vec<VertexId>,
2032            Vec<String>,
2033        ),
2034        ExcelError,
2035    > {
2036        self.extract_dependencies_with_pending_names(ast, current_sheet_id)
2037    }
2038
2039    pub(crate) fn fp8_parity_is_ast_volatile(&self, ast: &ASTNode) -> bool {
2040        self.is_ast_volatile(ast)
2041    }
2042
2043    pub fn set_cell_value_ref(
2044        &mut self,
2045        cell: formualizer_common::SheetCellRef<'_>,
2046        value: LiteralValue,
2047    ) -> Result<OperationSummary, ExcelError> {
2048        let owned = cell.into_owned();
2049        let sheet_id = match owned.sheet {
2050            formualizer_common::SheetLocator::Id(id) => id,
2051            formualizer_common::SheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
2052            formualizer_common::SheetLocator::Current => self.default_sheet_id,
2053        };
2054        let sheet_name = self.sheet_name(sheet_id).to_string();
2055        self.set_cell_value(
2056            &sheet_name,
2057            owned.coord.row() + 1,
2058            owned.coord.col() + 1,
2059            value,
2060        )
2061    }
2062
2063    pub fn set_cell_formula_ref(
2064        &mut self,
2065        cell: formualizer_common::SheetCellRef<'_>,
2066        ast: ASTNode,
2067    ) -> Result<OperationSummary, ExcelError> {
2068        let owned = cell.into_owned();
2069        let sheet_id = match owned.sheet {
2070            formualizer_common::SheetLocator::Id(id) => id,
2071            formualizer_common::SheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
2072            formualizer_common::SheetLocator::Current => self.default_sheet_id,
2073        };
2074        let sheet_name = self.sheet_name(sheet_id).to_string();
2075        self.set_cell_formula(
2076            &sheet_name,
2077            owned.coord.row() + 1,
2078            owned.coord.col() + 1,
2079            ast,
2080        )
2081    }
2082
2083    pub fn get_cell_value_ref(
2084        &self,
2085        cell: formualizer_common::SheetCellRef<'_>,
2086    ) -> Option<LiteralValue> {
2087        let owned = cell.into_owned();
2088        let sheet_id = match owned.sheet {
2089            formualizer_common::SheetLocator::Id(id) => id,
2090            formualizer_common::SheetLocator::Name(name) => self.sheet_id(name.as_ref())?,
2091            formualizer_common::SheetLocator::Current => self.default_sheet_id,
2092        };
2093        let sheet_name = self.sheet_name(sheet_id);
2094        self.get_cell_value(sheet_name, owned.coord.row() + 1, owned.coord.col() + 1)
2095    }
2096
2097    /// Get current value from a cell
2098    pub fn get_cell_value(&self, sheet: &str, row: u32, col: u32) -> Option<LiteralValue> {
2099        if !self.value_cache_enabled {
2100            #[cfg(debug_assertions)]
2101            {
2102                self.graph_value_read_attempts
2103                    .fetch_add(1, Ordering::Relaxed);
2104            }
2105            return None;
2106        }
2107        let sheet_id = self.sheet_reg.get_id(sheet)?;
2108        let coord = Coord::from_excel(row, col, true, true);
2109        let addr = CellRef::new(sheet_id, coord);
2110
2111        self.get_vertex_id_for_address(&addr)
2112            .and_then(|&vertex_id| {
2113                // Check values hashmap (stores both cell values and formula results)
2114                self.vertex_values
2115                    .get(&vertex_id)
2116                    .map(|&value_ref| self.data_store.retrieve_value(value_ref))
2117            })
2118    }
2119
2120    /// Mark vertex dirty and propagate to dependents
2121    fn mark_dirty(&mut self, vertex_id: VertexId) -> Vec<VertexId> {
2122        self.mark_dirty_many(&[vertex_id])
2123    }
2124
2125    /// Multi-source `mark_dirty`: one BFS with a shared seen-set across all
2126    /// sources, marking exactly the union of per-source `mark_dirty` calls
2127    /// but visiting every vertex at most once per call.
2128    ///
2129    /// Loop-of-`mark_dirty` callers (volatile redirty, iterative-SCC redirty)
2130    /// pay O(sources × component) without this — measured quadratic by the
2131    /// iterate edge corpus. A BFS that early-stops at already-`is_dirty`
2132    /// vertices would also fix that, but it is NOT safe in general: several
2133    /// call sites set the dirty flag WITHOUT propagating to dependents
2134    /// (`DependencyGraph::set_dirty`, `mark_dependents_dirty`, names.rs
2135    /// binding invalidation, eval.rs demand-driven re-marks), so "dirty"
2136    /// does not imply "my dependents are already dirty". The per-call shared
2137    /// seen-set needs no such invariant.
2138    ///
2139    /// While a deferred-dirty scope is active (`begin_deferred_dirty`), the
2140    /// call queues its sources for the end-of-scope flush and returns ONLY
2141    /// the sources as the "affected" set (the full transitive set is
2142    /// produced once by the flush). Loop-of-edits callers must not rely on
2143    /// per-edit transitive affected sets inside such a scope.
2144    pub(crate) fn mark_dirty_many(&mut self, vertex_ids: &[VertexId]) -> Vec<VertexId> {
2145        if self.deferred_dirty_depth > 0 {
2146            self.deferred_dirty_pending.extend_from_slice(vertex_ids);
2147            return vertex_ids.to_vec();
2148        }
2149        let mut affected = FxHashSet::default();
2150        let mut to_visit = Vec::new();
2151        let mut visited_for_propagation = FxHashSet::default();
2152
2153        for &vertex_id in vertex_ids {
2154            // Only mark the source vertex as dirty if it's a formula.
2155            // Value cells don't get marked dirty themselves but are still
2156            // affected.
2157            let is_formula = matches!(
2158                self.store.kind(vertex_id),
2159                VertexKind::FormulaScalar
2160                    | VertexKind::FormulaArray
2161                    | VertexKind::NamedScalar
2162                    | VertexKind::NamedArray
2163            );
2164
2165            if is_formula {
2166                to_visit.push(vertex_id);
2167            } else {
2168                // Value cells are affected (for tracking) but not marked dirty
2169                affected.insert(vertex_id);
2170            }
2171
2172            // Initial propagation from direct and range dependents
2173            {
2174                // Get dependents (vertices that depend on this vertex)
2175                if let Some(dependents) = self.dependents_slice(vertex_id) {
2176                    to_visit.extend(dependents.iter().copied());
2177                } else {
2178                    let dependents = self.get_dependents(vertex_id);
2179                    to_visit.extend(dependents);
2180                }
2181
2182                if let Some(name_set) = self.cell_to_name_dependents.get(&vertex_id) {
2183                    for &name_vertex in name_set {
2184                        to_visit.push(name_vertex);
2185                    }
2186                }
2187
2188                to_visit.extend(self.collect_range_dependents_for_vertex(vertex_id));
2189            }
2190        }
2191
2192        while let Some(id) = to_visit.pop() {
2193            if !visited_for_propagation.insert(id) {
2194                continue; // Already processed
2195            }
2196            self.dirty_propagation_visits += 1;
2197            affected.insert(id);
2198
2199            // Mark vertex as dirty
2200            self.store.set_dirty(id, true);
2201
2202            // Add direct dependents to visit list
2203            if let Some(dependents) = self.dependents_slice(id) {
2204                to_visit.extend(dependents.iter().copied());
2205            } else {
2206                let dependents = self.get_dependents(id);
2207                to_visit.extend(dependents);
2208            }
2209            to_visit.extend(self.collect_range_dependents_for_vertex(id));
2210        }
2211
2212        // Add to dirty set
2213        self.dirty_vertices.extend(&affected);
2214
2215        // Return as Vec for compatibility
2216        affected.into_iter().collect()
2217    }
2218
2219    /// Total vertices processed by dirty-propagation BFS loops since graph
2220    /// creation (perf-shape observability; see `dirty_propagation_visits`).
2221    pub(crate) fn dirty_propagation_visits(&self) -> u64 {
2222        self.dirty_propagation_visits
2223    }
2224
2225    /// Begin a deferred-dirty scope for a multi-edit batch.
2226    ///
2227    /// While active, `mark_dirty` / `mark_dirty_many` /
2228    /// `mark_dirty_many_value_cells` queue their sources instead of running a
2229    /// BFS per call; the outermost `end_deferred_dirty` flushes the queued
2230    /// union with ONE multi-source `mark_dirty_many`. Union semantics equal
2231    /// the sequential per-edit calls (pinned by
2232    /// `mark_dirty_many_equals_sequential_single_source_marks` plus the
2233    /// deferred-scope tests): any dependent edge removed mid-batch belongs to
2234    /// a vertex that was itself edited mid-batch, and edited vertices are
2235    /// themselves pending sources, so the flush covers everything a per-edit
2236    /// propagation would have reached.
2237    ///
2238    /// Nesting is depth-counted. The scope also enters the CSR edge batch
2239    /// (`begin_batch`) so edge-heavy batches amortize delta rebuilds (#127).
2240    ///
2241    /// Callers MUST guarantee `end_deferred_dirty` runs on every exit path
2242    /// (including `?` early returns): a leaked scope would silently swallow
2243    /// future propagations. Evaluation entry points `debug_assert` that no
2244    /// scope is active.
2245    pub fn begin_deferred_dirty(&mut self) {
2246        self.edges.begin_batch();
2247        self.deferred_dirty_depth += 1;
2248    }
2249
2250    /// End a deferred-dirty scope. When the outermost scope ends, runs ONE
2251    /// multi-source propagation over every source queued while deferred and
2252    /// returns its full affected set (sources pointing at vertices deleted
2253    /// mid-batch are skipped). Inner (nested) ends return an empty set.
2254    pub fn end_deferred_dirty(&mut self) -> Vec<VertexId> {
2255        debug_assert!(
2256            self.deferred_dirty_depth > 0,
2257            "end_deferred_dirty without matching begin_deferred_dirty"
2258        );
2259        self.edges.end_batch();
2260        self.deferred_dirty_depth = self.deferred_dirty_depth.saturating_sub(1);
2261        if self.deferred_dirty_depth > 0 {
2262            return Vec::new();
2263        }
2264        let pending = std::mem::take(&mut self.deferred_dirty_pending);
2265        if pending.is_empty() {
2266            return Vec::new();
2267        }
2268        let live: Vec<VertexId> = pending
2269            .into_iter()
2270            .filter(|&id| self.vertex_exists(id))
2271            .collect();
2272        self.mark_dirty_many(&live)
2273    }
2274
2275    /// True while a deferred-dirty scope is active (see
2276    /// `begin_deferred_dirty`). Evaluation must never start in this state.
2277    pub fn deferred_dirty_active(&self) -> bool {
2278        self.deferred_dirty_depth > 0
2279    }
2280
2281    /// Get all vertices that need evaluation
2282    pub fn get_evaluation_vertices(&self) -> Vec<VertexId> {
2283        let mut combined = FxHashSet::default();
2284        combined.extend(&self.dirty_vertices);
2285        combined.extend(&self.volatile_vertices);
2286
2287        let mut result: Vec<VertexId> = combined
2288            .into_iter()
2289            .filter(|&id| {
2290                // Only include active formula/name vertices; tombstoned vertices can retain stable
2291                // IDs in the store, but must never be scheduled for evaluation.
2292                self.store.vertex_exists_active(id)
2293                    && matches!(
2294                        self.store.kind(id),
2295                        VertexKind::FormulaScalar
2296                            | VertexKind::FormulaArray
2297                            | VertexKind::NamedScalar
2298                            | VertexKind::NamedArray
2299                    )
2300            })
2301            .collect();
2302        result.sort_unstable();
2303        result
2304    }
2305
2306    /// Clear dirty flags after successful evaluation
2307    pub fn clear_dirty_flags(&mut self, vertices: &[VertexId]) {
2308        for &vertex_id in vertices {
2309            self.store.set_dirty(vertex_id, false);
2310            self.dirty_vertices.remove(&vertex_id);
2311        }
2312    }
2313
2314    /// 🔮 Scalability Hook: Clear volatile vertices after evaluation cycle
2315    pub fn clear_volatile_flags(&mut self) {
2316        self.volatile_vertices.clear();
2317    }
2318
2319    /// Re-marks all volatile vertices as dirty for the next evaluation cycle.
2320    /// One multi-source propagation: many volatiles feeding one dependent
2321    /// component used to pay O(volatiles × component) (a full `mark_dirty`
2322    /// BFS per volatile); `mark_dirty_many` visits the component once.
2323    pub(crate) fn redirty_volatiles(&mut self) {
2324        let volatile_ids: Vec<VertexId> = self.volatile_vertices.iter().copied().collect();
2325        let _ = self.mark_dirty_many(&volatile_ids);
2326    }
2327
2328    /// Re-marks members of iterating SCCs (and, via propagation, their
2329    /// dependents) dirty for the next evaluation cycle — the volatile-like
2330    /// redirty that keeps `CyclePolicy::Iterate` cells re-evaluating every
2331    /// recalc (RFC #113; spec §4/§7.6). Vertices deleted since the recalc
2332    /// are skipped.
2333    ///
2334    /// One multi-source propagation: the old per-member `mark_dirty` loop was
2335    /// O(|SCC|²) per recalc for a large SCC (a converged 1000-member ring
2336    /// cost ~42 ms per no-op recalc, release); an interim `!is_dirty` skip
2337    /// fixed that but leaned on dirty-flag semantics that non-propagating
2338    /// `set_dirty` callers do not uphold. The shared seen-set in
2339    /// `mark_dirty_many` is O(component) without any such invariant.
2340    pub(crate) fn redirty_iterative_members(&mut self, members: &[VertexId]) {
2341        let live: Vec<VertexId> = members
2342            .iter()
2343            .copied()
2344            .filter(|&id| self.vertex_exists(id))
2345            .collect();
2346        let _ = self.mark_dirty_many(&live);
2347    }
2348
2349    fn get_or_create_vertex(
2350        &mut self,
2351        addr: &CellRef,
2352        created_placeholders: &mut Vec<CellRef>,
2353    ) -> VertexId {
2354        if let Some(&vertex_id) = self.cell_to_vertex.get(addr) {
2355            return vertex_id;
2356        }
2357
2358        // During first-load bulk ingest the fast path populates
2359        // ``load_packed_to_vertex`` but skips ``cell_to_vertex``. Promote
2360        // the entry into ``cell_to_vertex`` so subsequent lookups are O(1)
2361        // and consistent across the two maps.
2362        if self.first_load_assume_new {
2363            let packed = Self::packed_cell_key(
2364                addr.sheet_id,
2365                AbsCoord::new(addr.coord.row(), addr.coord.col()),
2366            );
2367            if let Some(&existing) = self.load_packed_to_vertex.get(&packed) {
2368                self.cell_to_vertex.insert(*addr, existing);
2369                return existing;
2370            }
2371        }
2372
2373        created_placeholders.push(*addr);
2374        let packed_coord = AbsCoord::new(addr.coord.row(), addr.coord.col());
2375        let vertex_id = self.store.allocate(packed_coord, addr.sheet_id, 0x00);
2376
2377        // Add vertex coordinate for CSR
2378        self.edges.add_vertex(packed_coord, vertex_id.0);
2379
2380        // Add to sheet index for O(log n + k) range queries
2381        self.sheet_index_mut(addr.sheet_id)
2382            .add_vertex(packed_coord, vertex_id);
2383
2384        self.store.set_kind(vertex_id, VertexKind::Empty);
2385        self.cell_to_vertex.insert(*addr, vertex_id);
2386        vertex_id
2387    }
2388
2389    fn add_dependent_edges(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
2390        // Batch to avoid repeated CSR rebuilds and keep reverse edges current
2391        self.edges.begin_batch();
2392
2393        // If PK enabled, update order using a short-lived adapter without holding &mut self
2394        // Track dependencies that should be skipped if rejecting cycle-creating edges
2395        let mut skip_deps: rustc_hash::FxHashSet<VertexId> = rustc_hash::FxHashSet::default();
2396        if self.pk_order.is_some()
2397            && let Some(mut pk) = self.pk_order.take()
2398        {
2399            pk.ensure_nodes(std::iter::once(dependent));
2400            pk.ensure_nodes(dependencies.iter().copied());
2401            {
2402                let adapter = GraphAdapter { g: self };
2403                for &dep_id in dependencies {
2404                    match pk.try_add_edge(&adapter, dep_id, dependent) {
2405                        Ok(_) => {}
2406                        Err(_cycle) => {
2407                            if self.config.pk_reject_cycle_edges {
2408                                skip_deps.insert(dep_id);
2409                            } else {
2410                                pk.rebuild_full(&adapter);
2411                            }
2412                        }
2413                    }
2414                }
2415            } // drop adapter
2416            self.pk_order = Some(pk);
2417        }
2418
2419        // Now mutate engine edges; if rejecting cycles, re-check and skip those that would create cycles
2420        for &dep_id in dependencies {
2421            if self.config.pk_reject_cycle_edges && skip_deps.contains(&dep_id) {
2422                continue;
2423            }
2424            self.edges.add_edge(dependent, dep_id);
2425            #[cfg(test)]
2426            {
2427                if let Ok(mut g) = self.instr.lock() {
2428                    g.edges_added += 1;
2429                }
2430            }
2431        }
2432
2433        self.edges.end_batch();
2434    }
2435
2436    /// Like add_dependent_edges, but assumes caller is managing edges.begin_batch/end_batch
2437    fn add_dependent_edges_nobatch(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
2438        // If PK enabled, update order using a short-lived adapter without holding &mut self
2439        let mut skip_deps: rustc_hash::FxHashSet<VertexId> = rustc_hash::FxHashSet::default();
2440        if self.pk_order.is_some()
2441            && let Some(mut pk) = self.pk_order.take()
2442        {
2443            pk.ensure_nodes(std::iter::once(dependent));
2444            pk.ensure_nodes(dependencies.iter().copied());
2445            {
2446                let adapter = GraphAdapter { g: self };
2447                for &dep_id in dependencies {
2448                    match pk.try_add_edge(&adapter, dep_id, dependent) {
2449                        Ok(_) => {}
2450                        Err(_cycle) => {
2451                            if self.config.pk_reject_cycle_edges {
2452                                skip_deps.insert(dep_id);
2453                            } else {
2454                                pk.rebuild_full(&adapter);
2455                            }
2456                        }
2457                    }
2458                }
2459            }
2460            self.pk_order = Some(pk);
2461        }
2462
2463        for &dep_id in dependencies {
2464            if self.config.pk_reject_cycle_edges && skip_deps.contains(&dep_id) {
2465                continue;
2466            }
2467            self.edges.add_edge(dependent, dep_id);
2468            #[cfg(test)]
2469            {
2470                if let Ok(mut g) = self.instr.lock() {
2471                    g.edges_added += 1;
2472                }
2473            }
2474        }
2475    }
2476
2477    /// Bulk set formulas on a sheet using a single dependency plan and batched edge updates.
2478    pub fn bulk_set_formulas<I>(&mut self, sheet: &str, items: I) -> Result<usize, ExcelError>
2479    where
2480        I: IntoIterator<Item = (u32, u32, ASTNode)>,
2481    {
2482        let collected: Vec<(u32, u32, ASTNode)> = items.into_iter().collect();
2483        if collected.is_empty() {
2484            return Ok(0);
2485        }
2486        let vol_flags: Vec<bool> = collected
2487            .iter()
2488            .map(|(_, _, ast)| self.is_ast_volatile(ast))
2489            .collect();
2490        self.bulk_set_formulas_with_volatility(sheet, collected, vol_flags)
2491    }
2492
2493    pub fn bulk_set_formulas_with_volatility(
2494        &mut self,
2495        sheet: &str,
2496        collected: Vec<(u32, u32, ASTNode)>,
2497        _vol_flags: Vec<bool>,
2498    ) -> Result<usize, ExcelError> {
2499        let sheet_id = self.sheet_id_mut(sheet);
2500        if collected.is_empty() {
2501            return Ok(0);
2502        }
2503        let provider = RegistryFunctionProvider;
2504        let ingested = {
2505            let mut pipeline = self.ingest_pipeline(&provider);
2506            let inputs = collected.into_iter().map(|(row, col, ast)| {
2507                let placement = CellRef::new(sheet_id, Coord::from_excel(row, col, true, true));
2508                (FormulaAstInput::Tree(ast), placement, None)
2509            });
2510            pipeline.ingest_batch(inputs)?
2511        };
2512        let planned = ingested
2513            .into_iter()
2514            .map(|formula| {
2515                (
2516                    formula.placement.coord.row() + 1,
2517                    formula.placement.coord.col() + 1,
2518                    formula.ast_id,
2519                    formula.dep_plan,
2520                )
2521            })
2522            .collect();
2523        self.bulk_set_formulas_with_plans(sheet, planned)
2524    }
2525
2526    pub(crate) fn bulk_set_formulas_with_plans(
2527        &mut self,
2528        sheet: &str,
2529        planned: Vec<(u32, u32, AstNodeId, DependencyPlanRow)>,
2530    ) -> Result<usize, ExcelError> {
2531        let sheet_id = self.sheet_id_mut(sheet);
2532        if planned.is_empty() {
2533            return Ok(0);
2534        }
2535        let mut created_placeholders: Vec<CellRef> = Vec::new();
2536        let mut target_vids: Vec<VertexId> = Vec::with_capacity(planned.len());
2537        for (row, col, _, _) in &planned {
2538            let addr = CellRef::new(sheet_id, Coord::from_excel(*row, *col, true, true));
2539            target_vids.push(self.get_or_create_vertex(&addr, &mut created_placeholders));
2540        }
2541        // Create direct-dependency placeholders before edge batching starts. If a formula-plane
2542        // demotion materializes formulas into an otherwise Arrow-only graph, interleaving
2543        // dependency vertex creation with edge insertion forces the CSR delta slab to rebuild on
2544        // every new dependency vertex. Pre-creating these vertices keeps bulk edge insertion O(n).
2545        for (_, _, _, plan) in &planned {
2546            for cell in &plan.direct_cell_deps {
2547                self.get_or_create_vertex(cell, &mut created_placeholders);
2548            }
2549        }
2550
2551        for (i, &tvid) in target_vids.iter().enumerate() {
2552            if self.vertex_formulas.contains_key(&tvid) {
2553                self.remove_dependent_edges(tvid);
2554            }
2555            self.detach_vertex_from_names(tvid);
2556            self.clear_pending_name_references(tvid);
2557            self.store.set_kind(tvid, VertexKind::FormulaScalar);
2558            self.store.set_dirty(tvid, true);
2559            self.vertex_values.remove(&tvid);
2560            self.vertex_formulas.insert(tvid, planned[i].2);
2561            self.mark_volatile(tvid, planned[i].3.volatile);
2562            self.store.set_dynamic(tvid, planned[i].3.dynamic);
2563        }
2564        self.dirty_vertices.extend(target_vids.iter().copied());
2565
2566        self.edges.begin_batch();
2567        for (i, tvid) in target_vids.iter().copied().enumerate() {
2568            let plan = &planned[i].3;
2569            let mut deps: Vec<VertexId> = Vec::new();
2570            for cell in &plan.direct_cell_deps {
2571                let dep_vid = self.get_or_create_vertex(cell, &mut created_placeholders);
2572                if !deps.contains(&dep_vid) {
2573                    deps.push(dep_vid);
2574                }
2575            }
2576
2577            let mut name_vertices = Vec::new();
2578            for name in plan
2579                .resolved_named_refs
2580                .iter()
2581                .chain(plan.named_refs.iter())
2582            {
2583                if let Some(named) = self.resolve_name_entry(name, sheet_id) {
2584                    if !deps.contains(&named.vertex) {
2585                        deps.push(named.vertex);
2586                    }
2587                    if !name_vertices.contains(&named.vertex) {
2588                        name_vertices.push(named.vertex);
2589                    }
2590                } else if let Some(source) = self.resolve_source_scalar_entry(name) {
2591                    if !deps.contains(&source.vertex) {
2592                        deps.push(source.vertex);
2593                    }
2594                } else {
2595                    self.record_pending_name_reference(sheet_id, name, tvid);
2596                }
2597            }
2598            for source_name in &plan.source_refs {
2599                if let Some(source) = self.resolve_source_scalar_entry(source_name) {
2600                    if !deps.contains(&source.vertex) {
2601                        deps.push(source.vertex);
2602                    }
2603                } else if let Some(source) = self.resolve_source_table_entry(source_name)
2604                    && !deps.contains(&source.vertex)
2605                {
2606                    deps.push(source.vertex);
2607                }
2608            }
2609            for table_name in &plan.table_refs {
2610                if let Some(table) = self.resolve_table_entry(table_name) {
2611                    if !deps.contains(&table.vertex) {
2612                        deps.push(table.vertex);
2613                    }
2614                } else if let Some(source) = self.resolve_source_table_entry(table_name)
2615                    && !deps.contains(&source.vertex)
2616                {
2617                    deps.push(source.vertex);
2618                }
2619            }
2620            if !name_vertices.is_empty() {
2621                self.attach_vertex_to_names(tvid, &name_vertices);
2622            }
2623            if !deps.is_empty() {
2624                self.add_dependent_edges_nobatch(tvid, &deps);
2625            }
2626            self.add_range_dependent_edges(tvid, &plan.range_deps, sheet_id);
2627        }
2628        self.edges.end_batch();
2629
2630        Ok(planned.len())
2631    }
2632
2633    /// Public (crate) helper to add a single dependency edge (dependent -> dependency) used for restoration/undo.
2634    pub fn add_dependency_edge(&mut self, dependent: VertexId, dependency: VertexId) {
2635        if dependent == dependency {
2636            return;
2637        }
2638        // If PK enabled attempt to add maintaining ordering; fallback to rebuild if cycle
2639        if self.pk_order.is_some()
2640            && let Some(mut pk) = self.pk_order.take()
2641        {
2642            pk.ensure_nodes(std::iter::once(dependent));
2643            pk.ensure_nodes(std::iter::once(dependency));
2644            let adapter = GraphAdapter { g: self };
2645            if pk.try_add_edge(&adapter, dependency, dependent).is_err() {
2646                // Cycle: rebuild full (conservative)
2647                pk.rebuild_full(&adapter);
2648            }
2649            self.pk_order = Some(pk);
2650        }
2651        self.edges.add_edge(dependent, dependency);
2652        self.store.set_dirty(dependent, true);
2653        self.dirty_vertices.insert(dependent);
2654    }
2655
2656    fn remove_dependent_edges(&mut self, vertex: VertexId) {
2657        // Remove all outgoing edges from this vertex (its dependencies)
2658        let dependencies = self.edges.out_edges(vertex);
2659
2660        self.edges.begin_batch();
2661        if self.pk_order.is_some()
2662            && let Some(mut pk) = self.pk_order.take()
2663        {
2664            for dep in &dependencies {
2665                pk.remove_edge(*dep, vertex);
2666            }
2667            self.pk_order = Some(pk);
2668        }
2669        for dep in dependencies {
2670            self.edges.remove_edge(vertex, dep);
2671        }
2672        self.edges.end_batch();
2673
2674        // Remove range dependencies and clean up stripes
2675        if let Some(old_ranges) = self.formula_to_range_deps.remove(&vertex) {
2676            let old_sheet_id = self.store.sheet_id(vertex);
2677
2678            for range in &old_ranges {
2679                let sheet_id = match range.sheet {
2680                    SharedSheetLocator::Id(id) => id,
2681                    _ => old_sheet_id,
2682                };
2683                let s_row = range.start_row.map(|b| b.index);
2684                let e_row = range.end_row.map(|b| b.index);
2685                let s_col = range.start_col.map(|b| b.index);
2686                let e_col = range.end_col.map(|b| b.index);
2687
2688                let mut keys_to_clean = FxHashSet::default();
2689
2690                let col_stripes = (s_row.is_none() && e_row.is_none())
2691                    || (s_col.is_some() && e_col.is_some() && (s_row.is_none() || e_row.is_none()));
2692                let row_stripes = (s_col.is_none() && e_col.is_none())
2693                    || (s_row.is_some() && e_row.is_some() && (s_col.is_none() || e_col.is_none()));
2694
2695                if col_stripes && !row_stripes {
2696                    let sc = s_col.unwrap_or(0);
2697                    let ec = e_col.unwrap_or(sc);
2698                    for col in sc..=ec {
2699                        keys_to_clean.insert(StripeKey {
2700                            sheet_id,
2701                            stripe_type: StripeType::Column,
2702                            index: col,
2703                        });
2704                    }
2705                } else if row_stripes && !col_stripes {
2706                    let sr = s_row.unwrap_or(0);
2707                    let er = e_row.unwrap_or(sr);
2708                    for row in sr..=er {
2709                        keys_to_clean.insert(StripeKey {
2710                            sheet_id,
2711                            stripe_type: StripeType::Row,
2712                            index: row,
2713                        });
2714                    }
2715                } else {
2716                    let start_row = s_row.unwrap_or(0);
2717                    let start_col = s_col.unwrap_or(0);
2718                    let end_row = e_row.unwrap_or(start_row);
2719                    let end_col = e_col.unwrap_or(start_col);
2720
2721                    let height = end_row.saturating_sub(start_row) + 1;
2722                    let width = end_col.saturating_sub(start_col) + 1;
2723
2724                    if self.config.enable_block_stripes && height > 1 && width > 1 {
2725                        let start_block_row = start_row / BLOCK_H;
2726                        let end_block_row = end_row / BLOCK_H;
2727                        let start_block_col = start_col / BLOCK_W;
2728                        let end_block_col = end_col / BLOCK_W;
2729
2730                        for block_row in start_block_row..=end_block_row {
2731                            for block_col in start_block_col..=end_block_col {
2732                                keys_to_clean.insert(StripeKey {
2733                                    sheet_id,
2734                                    stripe_type: StripeType::Block,
2735                                    index: block_index(block_row * BLOCK_H, block_col * BLOCK_W),
2736                                });
2737                            }
2738                        }
2739                    } else if height > width {
2740                        for col in start_col..=end_col {
2741                            keys_to_clean.insert(StripeKey {
2742                                sheet_id,
2743                                stripe_type: StripeType::Column,
2744                                index: col,
2745                            });
2746                        }
2747                    } else {
2748                        for row in start_row..=end_row {
2749                            keys_to_clean.insert(StripeKey {
2750                                sheet_id,
2751                                stripe_type: StripeType::Row,
2752                                index: row,
2753                            });
2754                        }
2755                    }
2756                }
2757
2758                for key in keys_to_clean {
2759                    if let Some(dependents) = self.stripe_to_dependents.get_mut(&key) {
2760                        dependents.remove(&vertex);
2761                        if dependents.is_empty() {
2762                            self.stripe_to_dependents.remove(&key);
2763                            #[cfg(test)]
2764                            {
2765                                if let Ok(mut g) = self.instr.lock() {
2766                                    g.stripe_removes += 1;
2767                                }
2768                            }
2769                        }
2770                    }
2771                }
2772            }
2773        }
2774    }
2775
2776    // Removed: vertices() and get_vertex() methods - no longer needed with SoA
2777    // The old AoS Vertex struct has been eliminated in favor of direct
2778    // access to columnar data through the VertexStore
2779
2780    /// Updates the cached value of a formula vertex.
2781    pub(crate) fn update_vertex_value(&mut self, vertex_id: VertexId, value: LiteralValue) {
2782        if !self.value_cache_enabled {
2783            // Canonical mode: cell/formula vertices must not store values in the graph.
2784            match self.store.kind(vertex_id) {
2785                VertexKind::Cell
2786                | VertexKind::FormulaScalar
2787                | VertexKind::FormulaArray
2788                | VertexKind::Empty => {
2789                    self.vertex_values.remove(&vertex_id);
2790                    return;
2791                }
2792                _ => {
2793                    // Allow non-cell vertices to cache values (e.g. named-range formulas).
2794                }
2795            }
2796        }
2797        let value_ref = self.data_store.store_value(normalize_stored_literal(value));
2798        self.vertex_values.insert(vertex_id, value_ref);
2799    }
2800
2801    /// Plan a spill region for an anchor; returns #SPILL! if blocked
2802    pub fn plan_spill_region(
2803        &self,
2804        anchor: VertexId,
2805        target_cells: &[CellRef],
2806    ) -> Result<(), ExcelError> {
2807        self.plan_spill_region_allowing_formula_overwrite(anchor, target_cells, None)
2808    }
2809
2810    /// Plan a spill region, optionally allowing specific formula vertices to be overwritten.
2811    ///
2812    /// This is used by parallel evaluation to allow spill anchors to take precedence over
2813    /// other formula vertices that are being evaluated in the same layer.
2814    pub(crate) fn plan_spill_region_allowing_formula_overwrite(
2815        &self,
2816        anchor: VertexId,
2817        target_cells: &[CellRef],
2818        overwritable_formulas: Option<&rustc_hash::FxHashSet<VertexId>>,
2819    ) -> Result<(), ExcelError> {
2820        use formualizer_common::{ExcelErrorExtra, ExcelErrorKind};
2821        // Compute expected spill shape from the target rectangle for better diagnostics
2822        let (expected_rows, expected_cols) = if target_cells.is_empty() {
2823            (0u32, 0u32)
2824        } else {
2825            let mut min_r = u32::MAX;
2826            let mut max_r = 0u32;
2827            let mut min_c = u32::MAX;
2828            let mut max_c = 0u32;
2829            for cell in target_cells {
2830                let r = cell.coord.row();
2831                let c = cell.coord.col();
2832                if r < min_r {
2833                    min_r = r;
2834                }
2835                if r > max_r {
2836                    max_r = r;
2837                }
2838                if c < min_c {
2839                    min_c = c;
2840                }
2841                if c > max_c {
2842                    max_c = c;
2843                }
2844            }
2845            (
2846                max_r.saturating_sub(min_r).saturating_add(1),
2847                max_c.saturating_sub(min_c).saturating_add(1),
2848            )
2849        };
2850        // Allow overlapping with previously owned spill cells by this anchor
2851        for cell in target_cells {
2852            // If cell is already owned by this anchor's previous spill, it's allowed.
2853            let owned_by_anchor = match self.spill_cell_to_anchor.get(cell) {
2854                Some(&existing_anchor) if existing_anchor == anchor => true,
2855                Some(_other) => {
2856                    return Err(ExcelError::new(ExcelErrorKind::Spill)
2857                        .with_message("BlockedBySpill")
2858                        .with_extra(ExcelErrorExtra::Spill {
2859                            expected_rows,
2860                            expected_cols,
2861                        }));
2862                }
2863                None => false,
2864            };
2865
2866            if owned_by_anchor {
2867                continue;
2868            }
2869
2870            // If cell is occupied by another formula anchor, block unless explicitly allowed.
2871            if let Some(&vid) = self.cell_to_vertex.get(cell)
2872                && vid != anchor
2873            {
2874                // Prevent clobbering formulas (array or scalar) in the target area
2875                match self.store.kind(vid) {
2876                    VertexKind::FormulaScalar | VertexKind::FormulaArray => {
2877                        if let Some(allow) = overwritable_formulas
2878                            && allow.contains(&vid)
2879                        {
2880                            continue;
2881                        }
2882                        return Err(ExcelError::new(ExcelErrorKind::Spill)
2883                            .with_message("BlockedByFormula")
2884                            .with_extra(ExcelErrorExtra::Spill {
2885                                expected_rows,
2886                                expected_cols,
2887                            }));
2888                    }
2889                    _ => {
2890                        // If a non-empty value exists (and not this anchor), block
2891                        if let Some(vref) = self.vertex_values.get(&vid) {
2892                            let v = self.data_store.retrieve_value(*vref);
2893                            if !matches!(v, LiteralValue::Empty) {
2894                                return Err(ExcelError::new(ExcelErrorKind::Spill)
2895                                    .with_message("BlockedByValue")
2896                                    .with_extra(ExcelErrorExtra::Spill {
2897                                        expected_rows,
2898                                        expected_cols,
2899                                    }));
2900                            }
2901                        }
2902                    }
2903                }
2904            }
2905        }
2906        Ok(())
2907    }
2908
2909    // Note: non-atomic commit_spill_region has been removed. All callers must use
2910    // commit_spill_region_atomic_with_fault for atomicity and rollback on failure.
2911
2912    /// Commit a spill atomically with an internal shadow buffer and optional fault injection.
2913    /// If a fault is injected partway through, all changes are rolled back to the pre-commit state.
2914    /// This does not change behavior under normal operation; it's primarily for Phase 3 guarantees and tests.
2915    pub fn commit_spill_region_atomic_with_fault(
2916        &mut self,
2917        anchor: VertexId,
2918        target_cells: Vec<CellRef>,
2919        values: Vec<Vec<LiteralValue>>,
2920        fault_after_ops: Option<usize>,
2921    ) -> Result<(), ExcelError> {
2922        // Anchor cell coordinates (0-based) for special-casing writes.
2923        // We must never overwrite the anchor via set_cell_value(), because that would
2924        // strip the formula and break incremental recalculation.
2925        let anchor_cell = self
2926            .get_cell_ref(anchor)
2927            .expect("anchor cell ref for spill commit");
2928        let anchor_sheet_name = self.sheet_name(anchor_cell.sheet_id).to_string();
2929        let anchor_row = anchor_cell.coord.row();
2930        let anchor_col = anchor_cell.coord.col();
2931
2932        // Capture previous owned cells for this anchor
2933        let prev_cells = self
2934            .spill_anchor_to_cells
2935            .get(&anchor)
2936            .cloned()
2937            .unwrap_or_default();
2938        // Use CoordBuildHasher on CellRef keys to avoid FxHasher clustering on
2939        // packed Coord values.
2940        let new_set: std::collections::HashSet<CellRef, CoordBuildHasher> =
2941            target_cells.iter().copied().collect();
2942        let prev_set: std::collections::HashSet<CellRef, CoordBuildHasher> =
2943            prev_cells.iter().copied().collect();
2944
2945        // Compose operation list: clears first (prev - new), then writes for new rectangle
2946        #[derive(Clone)]
2947        struct Op {
2948            sheet: String,
2949            row: u32,
2950            col: u32,
2951            new_value: LiteralValue,
2952        }
2953        let mut ops: Vec<Op> = Vec::new();
2954
2955        // Clears for cells no longer used
2956        for cell in prev_cells.iter() {
2957            if !new_set.contains(cell) {
2958                let sheet = self.sheet_name(cell.sheet_id).to_string();
2959                ops.push(Op {
2960                    sheet,
2961                    row: cell.coord.row(),
2962                    col: cell.coord.col(),
2963                    new_value: LiteralValue::Empty,
2964                });
2965            }
2966        }
2967
2968        // Writes for new values (row-major to match target rectangle)
2969        if !target_cells.is_empty() {
2970            let first = target_cells.first().copied().unwrap();
2971            let row0 = first.coord.row();
2972            let col0 = first.coord.col();
2973            let sheet = self.sheet_name(first.sheet_id).to_string();
2974            for (r_off, row_vals) in values.iter().enumerate() {
2975                for (c_off, v) in row_vals.iter().enumerate() {
2976                    ops.push(Op {
2977                        sheet: sheet.clone(),
2978                        row: row0 + r_off as u32,
2979                        col: col0 + c_off as u32,
2980                        new_value: v.clone(),
2981                    });
2982                }
2983            }
2984        }
2985
2986        // Shadow buffer of old values for rollback
2987        #[derive(Clone)]
2988        struct OldVal {
2989            present: bool,
2990            value: LiteralValue,
2991        }
2992        let mut old_values: Vec<((String, u32, u32), OldVal)> = Vec::with_capacity(ops.len());
2993
2994        // Capture old values before applying
2995        for op in &ops {
2996            // op.row/op.col are internal 0-based; get_cell_value is a public 1-based API.
2997            let old = self
2998                .get_cell_value(&op.sheet, op.row + 1, op.col + 1)
2999                .unwrap_or(LiteralValue::Empty);
3000            let present = true; // unified model: we always treat as present
3001            old_values.push((
3002                (op.sheet.clone(), op.row, op.col),
3003                OldVal {
3004                    present,
3005                    value: old,
3006                },
3007            ));
3008        }
3009
3010        // Apply with optional injected fault
3011        for (applied, op) in ops.iter().enumerate() {
3012            if let Some(n) = fault_after_ops
3013                && applied == n
3014            {
3015                for idx in (0..applied).rev() {
3016                    let ((ref sheet, row, col), ref old) = old_values[idx];
3017                    if sheet == &anchor_sheet_name && row == anchor_row && col == anchor_col {
3018                        self.update_vertex_value(anchor, old.value.clone());
3019                    } else {
3020                        let _ = self.set_cell_value(sheet, row + 1, col + 1, old.value.clone());
3021                    }
3022                }
3023                return Err(ExcelError::new(ExcelErrorKind::Error)
3024                    .with_message("Injected persistence fault during spill commit"));
3025            }
3026            if op.sheet == anchor_sheet_name && op.row == anchor_row && op.col == anchor_col {
3027                self.update_vertex_value(anchor, op.new_value.clone());
3028            } else {
3029                let _ =
3030                    self.set_cell_value(&op.sheet, op.row + 1, op.col + 1, op.new_value.clone());
3031            }
3032        }
3033
3034        // Update spill ownership maps only on success
3035        // Clear previous ownership not reused
3036        for cell in prev_cells.iter() {
3037            if !new_set.contains(cell) {
3038                self.spill_cell_to_anchor.remove(cell);
3039            }
3040        }
3041        // Mark ownership for new rectangle using the declared target cells only
3042        for cell in &target_cells {
3043            self.spill_cell_to_anchor.insert(*cell, anchor);
3044        }
3045        self.spill_anchor_to_cells.insert(anchor, target_cells);
3046        Ok(())
3047    }
3048
3049    pub(crate) fn spill_cells_for_anchor(&self, anchor: VertexId) -> Option<&[CellRef]> {
3050        self.spill_anchor_to_cells
3051            .get(&anchor)
3052            .map(|v| v.as_slice())
3053    }
3054
3055    pub(crate) fn spill_registry_has_anchor(&self, anchor: VertexId) -> bool {
3056        self.spill_anchor_to_cells.contains_key(&anchor)
3057    }
3058
3059    pub(crate) fn spill_registry_anchor_for_cell(&self, cell: CellRef) -> Option<VertexId> {
3060        self.spill_cell_to_anchor.get(&cell).copied()
3061    }
3062
3063    pub(crate) fn spill_registry_counts(&self) -> (usize, usize) {
3064        (
3065            self.spill_anchor_to_cells.len(),
3066            self.spill_cell_to_anchor.len(),
3067        )
3068    }
3069
3070    /// Clear an existing spill region for an anchor (set cells to Empty and forget ownership)
3071    pub fn clear_spill_region(&mut self, anchor: VertexId) {
3072        let _ = self.clear_spill_region_bulk(anchor);
3073    }
3074
3075    /// Bulk clear an existing spill region for an anchor.
3076    ///
3077    /// This avoids calling `set_cell_value()` per spill child (which can trigger O(N*V)
3078    /// dependent scans when `edges.delta_size() > 0`). Instead, it clears values directly and
3079    /// performs a single dirty propagation over the affected spill children.
3080    ///
3081    /// Returns the previously registered spill cells (including the anchor cell) for callers that
3082    /// want to mirror/record deltas.
3083    pub fn clear_spill_region_bulk(&mut self, anchor: VertexId) -> Vec<CellRef> {
3084        let anchor_cell = self.get_cell_ref(anchor);
3085        let Some(cells) = self.spill_anchor_to_cells.remove(&anchor) else {
3086            return Vec::new();
3087        };
3088
3089        // Remove ownership for all cells first.
3090        for cell in cells.iter() {
3091            self.spill_cell_to_anchor.remove(cell);
3092        }
3093
3094        // Prepare a single arena value ref for Empty (only when caching is enabled).
3095        let empty_ref = if self.value_cache_enabled {
3096            Some(self.data_store.store_value(LiteralValue::Empty))
3097        } else {
3098            None
3099        };
3100
3101        // Clear all spill children (excluding the anchor cell).
3102        let mut changed_vertices: Vec<VertexId> = Vec::new();
3103        for cell in cells.iter().copied() {
3104            let is_anchor = anchor_cell.map(|a| a == cell).unwrap_or(false);
3105            if is_anchor {
3106                continue;
3107            }
3108            let Some(&vid) = self.cell_to_vertex.get(&cell) else {
3109                continue;
3110            };
3111            // Ensure this vertex is a plain value cell.
3112            if self.vertex_formulas.remove(&vid).is_some() {
3113                // Be conservative: remove outgoing edges if this was a formula vertex.
3114                // This should be rare for spill children under normal policies.
3115                self.remove_dependent_edges(vid);
3116            }
3117            self.store.set_kind(vid, VertexKind::Cell);
3118            if let Some(er) = empty_ref {
3119                self.vertex_values.insert(vid, er);
3120            } else {
3121                self.vertex_values.remove(&vid);
3122            }
3123            self.store.set_dirty(vid, false);
3124            self.dirty_vertices.remove(&vid);
3125            changed_vertices.push(vid);
3126        }
3127
3128        // Single dirty propagation for all changed spill children.
3129        if !changed_vertices.is_empty() {
3130            self.mark_dirty_many_value_cells(&changed_vertices);
3131        }
3132
3133        cells
3134    }
3135
3136    fn mark_dirty_many_value_cells(&mut self, vertex_ids: &[VertexId]) -> Vec<VertexId> {
3137        if vertex_ids.is_empty() {
3138            return Vec::new();
3139        }
3140
3141        // Deferred-dirty scope (e.g. a spill clear inside a batched
3142        // `set_values`): queue the sources for the end-of-scope flush. The
3143        // general `mark_dirty_many` flush handles value-cell sources via its
3144        // per-source kind check, so one pending list serves both entry
3145        // points. (The flush's per-source range-dependent collection is a
3146        // subset of this path's bounding-rect collection, which conservatively
3147        // over-dirties; the per-source union is the exact required set.)
3148        if self.deferred_dirty_depth > 0 {
3149            self.deferred_dirty_pending.extend_from_slice(vertex_ids);
3150            return vertex_ids.to_vec();
3151        }
3152
3153        // Fold pending deltas once so the propagation loop below can use the
3154        // zero-allocation base `in_edges` slices. This is a deliberate
3155        // rebuild-on-read seam: one rebuild per bulk propagation, amortized
3156        // (the per-vertex alternative would allocate a merged Vec per visit).
3157        if self.edges.delta_size() > 0 {
3158            self.edges.rebuild();
3159        }
3160
3161        let mut affected: FxHashSet<VertexId> = FxHashSet::default();
3162        let mut to_visit: Vec<VertexId> = Vec::new();
3163        let mut visited_for_propagation: FxHashSet<VertexId> = FxHashSet::default();
3164
3165        // Value sources are affected but not marked dirty themselves.
3166        for &src in vertex_ids {
3167            affected.insert(src);
3168        }
3169
3170        // Collect initial direct dependents and name dependents.
3171        for &src in vertex_ids {
3172            to_visit.extend(self.edges.in_edges(src));
3173            if let Some(name_set) = self.cell_to_name_dependents.get(&src) {
3174                for &name_vertex in name_set {
3175                    to_visit.push(name_vertex);
3176                }
3177            }
3178        }
3179
3180        // Collect range dependents in bulk using spill rect bounds per sheet.
3181        let mut bounds_by_sheet: FxHashMap<SheetId, (u32, u32, u32, u32)> = FxHashMap::default();
3182        for &src in vertex_ids {
3183            let view = self.store.view(src);
3184            let sid = view.sheet_id();
3185            let r = view.row();
3186            let c = view.col();
3187            bounds_by_sheet
3188                .entry(sid)
3189                .and_modify(|b| {
3190                    b.0 = b.0.min(r);
3191                    b.1 = b.1.max(r);
3192                    b.2 = b.2.min(c);
3193                    b.3 = b.3.max(c);
3194                })
3195                .or_insert((r, r, c, c));
3196        }
3197
3198        for (sid, (sr, er, sc, ec)) in bounds_by_sheet {
3199            to_visit.extend(self.collect_range_dependents_for_rect(sid, sr, sc, er, ec));
3200        }
3201
3202        while let Some(id) = to_visit.pop() {
3203            if !visited_for_propagation.insert(id) {
3204                continue;
3205            }
3206            self.dirty_propagation_visits += 1;
3207            affected.insert(id);
3208            self.store.set_dirty(id, true);
3209            to_visit.extend(self.edges.in_edges(id));
3210            to_visit.extend(self.collect_range_dependents_for_vertex(id));
3211        }
3212
3213        self.dirty_vertices.extend(&affected);
3214        affected.into_iter().collect()
3215    }
3216
3217    fn collect_range_dependents_for_vertex(&self, vertex_id: VertexId) -> Vec<VertexId> {
3218        match self.store.kind(vertex_id) {
3219            VertexKind::Cell
3220            | VertexKind::Empty
3221            | VertexKind::FormulaScalar
3222            | VertexKind::FormulaArray => {
3223                let view = self.store.view(vertex_id);
3224                self.collect_range_dependents_for_rect(
3225                    view.sheet_id(),
3226                    view.row(),
3227                    view.col(),
3228                    view.row(),
3229                    view.col(),
3230                )
3231            }
3232            _ => Vec::new(),
3233        }
3234    }
3235
3236    fn collect_range_dependents_for_rect(
3237        &self,
3238        sheet_id: SheetId,
3239        start_row: u32,
3240        start_col: u32,
3241        end_row: u32,
3242        end_col: u32,
3243    ) -> Vec<VertexId> {
3244        if self.stripe_to_dependents.is_empty() {
3245            return Vec::new();
3246        }
3247        let mut candidates: FxHashSet<VertexId> = FxHashSet::default();
3248
3249        for col in start_col..=end_col {
3250            let key = StripeKey {
3251                sheet_id,
3252                stripe_type: StripeType::Column,
3253                index: col,
3254            };
3255            if let Some(deps) = self.stripe_to_dependents.get(&key) {
3256                candidates.extend(deps);
3257            }
3258        }
3259        for row in start_row..=end_row {
3260            let key = StripeKey {
3261                sheet_id,
3262                stripe_type: StripeType::Row,
3263                index: row,
3264            };
3265            if let Some(deps) = self.stripe_to_dependents.get(&key) {
3266                candidates.extend(deps);
3267            }
3268        }
3269        if self.config.enable_block_stripes {
3270            let br0 = start_row / BLOCK_H;
3271            let br1 = end_row / BLOCK_H;
3272            let bc0 = start_col / BLOCK_W;
3273            let bc1 = end_col / BLOCK_W;
3274            for br in br0..=br1 {
3275                for bc in bc0..=bc1 {
3276                    let key = StripeKey {
3277                        sheet_id,
3278                        stripe_type: StripeType::Block,
3279                        index: block_index(br * BLOCK_H, bc * BLOCK_W),
3280                    };
3281                    if let Some(deps) = self.stripe_to_dependents.get(&key) {
3282                        candidates.extend(deps);
3283                    }
3284                }
3285            }
3286        }
3287
3288        // Precision check: the dirty rect must overlap at least one of the formula's registered ranges.
3289        let mut out: Vec<VertexId> = Vec::new();
3290        for dep_id in candidates {
3291            let Some(ranges) = self.formula_to_range_deps.get(&dep_id) else {
3292                continue;
3293            };
3294            let mut hit = false;
3295            for range in ranges {
3296                let range_sheet_id = match range.sheet {
3297                    SharedSheetLocator::Id(id) => id,
3298                    _ => sheet_id,
3299                };
3300                if range_sheet_id != sheet_id {
3301                    continue;
3302                }
3303                let sr0 = range.start_row.map(|b| b.index).unwrap_or(0);
3304                let er0 = range.end_row.map(|b| b.index).unwrap_or(u32::MAX);
3305                let sc0 = range.start_col.map(|b| b.index).unwrap_or(0);
3306                let ec0 = range.end_col.map(|b| b.index).unwrap_or(u32::MAX);
3307                let overlap =
3308                    sr0 <= end_row && er0 >= start_row && sc0 <= end_col && ec0 >= start_col;
3309                if overlap {
3310                    hit = true;
3311                    break;
3312                }
3313            }
3314            if hit {
3315                out.push(dep_id);
3316            }
3317        }
3318        out
3319    }
3320
3321    /// Check if a vertex exists
3322    pub(crate) fn vertex_exists(&self, vertex_id: VertexId) -> bool {
3323        if vertex_id.0 < FIRST_NORMAL_VERTEX {
3324            return false;
3325        }
3326        let index = (vertex_id.0 - FIRST_NORMAL_VERTEX) as usize;
3327        index < self.store.len()
3328    }
3329
3330    /// Get the kind of a vertex
3331    pub(crate) fn get_vertex_kind(&self, vertex_id: VertexId) -> VertexKind {
3332        self.store.kind(vertex_id)
3333    }
3334
3335    /// Get the sheet ID of a vertex
3336    pub(crate) fn get_vertex_sheet_id(&self, vertex_id: VertexId) -> SheetId {
3337        self.store.sheet_id(vertex_id)
3338    }
3339
3340    pub fn get_formula_id(&self, vertex_id: VertexId) -> Option<AstNodeId> {
3341        self.vertex_formulas.get(&vertex_id).copied()
3342    }
3343
3344    pub(crate) fn formula_vertices(&self) -> Vec<VertexId> {
3345        let mut vertices = self.vertex_formulas.keys().copied().collect::<Vec<_>>();
3346        vertices.sort_unstable();
3347        vertices
3348    }
3349
3350    pub fn get_formula_id_and_volatile(&self, vertex_id: VertexId) -> Option<(AstNodeId, bool)> {
3351        let ast_id = self.get_formula_id(vertex_id)?;
3352        Some((ast_id, self.is_volatile(vertex_id)))
3353    }
3354
3355    pub fn get_formula_node(&self, vertex_id: VertexId) -> Option<&super::arena::AstNodeData> {
3356        let ast_id = self.get_formula_id(vertex_id)?;
3357        self.data_store.get_node(ast_id)
3358    }
3359
3360    pub fn get_formula_node_and_volatile(
3361        &self,
3362        vertex_id: VertexId,
3363    ) -> Option<(&super::arena::AstNodeData, bool)> {
3364        let (ast_id, vol) = self.get_formula_id_and_volatile(vertex_id)?;
3365        let node = self.data_store.get_node(ast_id)?;
3366        Some((node, vol))
3367    }
3368
3369    /// Get the formula AST for a vertex.
3370    ///
3371    /// Not used in hot paths; reconstructs from arena.
3372    pub fn get_formula(&self, vertex_id: VertexId) -> Option<ASTNode> {
3373        let ast_id = self.get_formula_id(vertex_id)?;
3374        self.data_store.retrieve_ast(ast_id, &self.sheet_reg)
3375    }
3376
3377    /// Get the value stored for a vertex
3378    pub fn get_value(&self, vertex_id: VertexId) -> Option<LiteralValue> {
3379        if !self.value_cache_enabled {
3380            // In canonical mode, cell/formula values must not be read from the graph.
3381            // Non-cell vertices (e.g. named ranges, external sources) may still use graph storage.
3382            match self.store.kind(vertex_id) {
3383                VertexKind::Cell
3384                | VertexKind::FormulaScalar
3385                | VertexKind::FormulaArray
3386                | VertexKind::Empty => {
3387                    #[cfg(debug_assertions)]
3388                    {
3389                        self.graph_value_read_attempts
3390                            .fetch_add(1, Ordering::Relaxed);
3391                    }
3392                    return None;
3393                }
3394                _ => {
3395                    // Allow non-cell vertices to use vertex_values.
3396                }
3397            }
3398        }
3399        self.vertex_values
3400            .get(&vertex_id)
3401            .map(|&value_ref| self.data_store.retrieve_value(value_ref))
3402    }
3403
3404    /// Get the cell reference for a vertex
3405    pub(crate) fn get_cell_ref(&self, vertex_id: VertexId) -> Option<CellRef> {
3406        let packed_coord = self.store.coord(vertex_id);
3407        let sheet_id = self.store.sheet_id(vertex_id);
3408        let coord = Coord::new(packed_coord.row(), packed_coord.col(), true, true);
3409        Some(CellRef::new(sheet_id, coord))
3410    }
3411
3412    /// Create a cell reference (helper for internal use)
3413    pub(crate) fn make_cell_ref_internal(&self, sheet_id: SheetId, row: u32, col: u32) -> CellRef {
3414        let coord = Coord::new(row, col, true, true);
3415        CellRef::new(sheet_id, coord)
3416    }
3417
3418    /// Create a cell reference from sheet name and Excel 1-based coordinates.
3419    pub fn make_cell_ref(&self, sheet_name: &str, row: u32, col: u32) -> CellRef {
3420        let sheet_id = self.sheet_reg.get_id(sheet_name).unwrap_or(0);
3421        let coord = Coord::from_excel(row, col, true, true);
3422        CellRef::new(sheet_id, coord)
3423    }
3424
3425    /// Check if a vertex is dirty
3426    pub(crate) fn is_dirty(&self, vertex_id: VertexId) -> bool {
3427        self.store.is_dirty(vertex_id)
3428    }
3429
3430    /// Check if a vertex is volatile
3431    pub(crate) fn is_volatile(&self, vertex_id: VertexId) -> bool {
3432        self.store.is_volatile(vertex_id)
3433    }
3434
3435    pub(crate) fn is_dynamic(&self, vertex_id: VertexId) -> bool {
3436        self.store.is_dynamic(vertex_id)
3437    }
3438
3439    /// Get vertex ID for a cell address
3440    pub fn get_vertex_id_for_address(&self, addr: &CellRef) -> Option<&VertexId> {
3441        self.cell_to_vertex.get(addr)
3442    }
3443
3444    #[cfg(test)]
3445    pub fn cell_to_vertex(
3446        &self,
3447    ) -> &std::collections::HashMap<CellRef, VertexId, CoordBuildHasher> {
3448        &self.cell_to_vertex
3449    }
3450
3451    /// Borrow dependencies of a vertex when no pending edge delta exists.
3452    ///
3453    /// This enables zero-allocation traversal in hot scheduler paths.
3454    #[inline]
3455    pub(crate) fn dependencies_slice(&self, vertex_id: VertexId) -> Option<&[VertexId]> {
3456        self.edges.out_edges_ref(vertex_id)
3457    }
3458
3459    /// Get the dependencies of a vertex (for scheduler)
3460    pub(crate) fn get_dependencies(&self, vertex_id: VertexId) -> Vec<VertexId> {
3461        self.edges.out_edges(vertex_id)
3462    }
3463
3464    /// Check if a vertex has a self-loop
3465    pub(crate) fn has_self_loop(&self, vertex_id: VertexId) -> bool {
3466        if let Some(deps) = self.dependencies_slice(vertex_id) {
3467            deps.contains(&vertex_id)
3468        } else {
3469            self.edges.out_edges(vertex_id).contains(&vertex_id)
3470        }
3471    }
3472
3473    /// Borrow dependents of a vertex when no pending edge delta exists.
3474    ///
3475    /// This enables zero-allocation traversal in hot scheduler paths.
3476    #[inline]
3477    pub(crate) fn dependents_slice(&self, vertex_id: VertexId) -> Option<&[VertexId]> {
3478        self.edges.in_edges_ref(vertex_id)
3479    }
3480
3481    /// Get dependents of a vertex (vertices that depend on this vertex)
3482    ///
3483    /// Delta-aware: pending edge mutations that have not been folded into the
3484    /// CSR base yet are merged in via the delta slab's reverse index, so this
3485    /// is O(in-degree) even mid-edit (no O(V) scan, no forced rebuild; #125).
3486    pub(crate) fn get_dependents(&self, vertex_id: VertexId) -> Vec<VertexId> {
3487        self.edges.in_edges_merged(vertex_id)
3488    }
3489
3490    // Internal helper methods for Milestone 0.4
3491
3492    /// Internal: Create a snapshot of vertex state for rollback
3493    #[doc(hidden)]
3494    pub fn snapshot_vertex(&self, id: VertexId) -> crate::engine::VertexSnapshot {
3495        let coord = self.store.coord(id);
3496        let sheet_id = self.store.sheet_id(id);
3497        let kind = self.store.kind(id);
3498        let flags = self.store.flags(id);
3499
3500        // Get value and formula references
3501        let value_ref = self.vertex_values.get(&id).copied();
3502        let formula_ref = self.vertex_formulas.get(&id).copied();
3503
3504        // Get outgoing edges (dependencies)
3505        let out_edges = self.get_dependencies(id);
3506
3507        crate::engine::VertexSnapshot {
3508            coord,
3509            sheet_id,
3510            kind,
3511            flags,
3512            value_ref,
3513            formula_ref,
3514            out_edges,
3515        }
3516    }
3517
3518    /// Internal: Remove all edges for a vertex
3519    #[doc(hidden)]
3520    pub fn remove_all_edges(&mut self, id: VertexId) {
3521        // Enter batch mode to avoid intermediate rebuilds
3522        self.edges.begin_batch();
3523
3524        // Remove outgoing edges (this vertex's dependencies)
3525        self.remove_dependent_edges(id);
3526
3527        // Remove incoming edges (vertices that depend on this vertex).
3528        // get_dependents is delta-aware, so no rebuild is needed here (#125).
3529        let dependents = self.get_dependents(id);
3530        if self.pk_order.is_some()
3531            && let Some(mut pk) = self.pk_order.take()
3532        {
3533            for dependent in &dependents {
3534                pk.remove_edge(id, *dependent);
3535            }
3536            self.pk_order = Some(pk);
3537        }
3538        for dependent in dependents {
3539            self.edges.remove_edge(dependent, id);
3540        }
3541
3542        // Exit batch mode and rebuild once with all changes
3543        self.edges.end_batch();
3544    }
3545
3546    /// Internal: Mark vertex as having #REF! error
3547    #[doc(hidden)]
3548    pub fn mark_as_ref_error(&mut self, id: VertexId) {
3549        if !self.value_cache_enabled {
3550            match self.store.kind(id) {
3551                VertexKind::Cell
3552                | VertexKind::FormulaScalar
3553                | VertexKind::FormulaArray
3554                | VertexKind::Empty => {
3555                    self.ref_error_vertices.insert(id);
3556                    // Canonical-only: graph does not cache cell/formula values.
3557                    // Ensure the dependent subgraph is dirtied so evaluation updates Arrow truth.
3558                    self.vertex_values.remove(&id);
3559                    let _ = self.mark_dirty(id);
3560                    return;
3561                }
3562                _ => {
3563                    // Allow non-cell vertices to use cached values.
3564                }
3565            }
3566        }
3567        let error = LiteralValue::Error(ExcelError::new(ExcelErrorKind::Ref));
3568        let value_ref = self.data_store.store_value(error);
3569        self.vertex_values.insert(id, value_ref);
3570        let _ = self.mark_dirty(id);
3571    }
3572
3573    /// Check if a vertex has a #REF! error
3574    pub fn is_ref_error(&self, id: VertexId) -> bool {
3575        if !self.value_cache_enabled {
3576            match self.store.kind(id) {
3577                VertexKind::Cell
3578                | VertexKind::FormulaScalar
3579                | VertexKind::FormulaArray
3580                | VertexKind::Empty => {
3581                    return self.ref_error_vertices.contains(&id);
3582                }
3583                _ => {
3584                    // Non-cell vertices may still have cached values.
3585                }
3586            }
3587        }
3588        if let Some(value_ref) = self.vertex_values.get(&id) {
3589            let value = self.data_store.retrieve_value(*value_ref);
3590            if let LiteralValue::Error(err) = value {
3591                return err.kind == ExcelErrorKind::Ref;
3592            }
3593        }
3594        false
3595    }
3596
3597    /// Internal: Mark all direct dependents as dirty
3598    #[doc(hidden)]
3599    pub fn mark_dependents_dirty(&mut self, id: VertexId) {
3600        let dependents = self.get_dependents(id);
3601        for dep_id in dependents {
3602            self.store.set_dirty(dep_id, true);
3603            self.dirty_vertices.insert(dep_id);
3604        }
3605    }
3606
3607    /// Internal: Mark a vertex as volatile
3608    #[doc(hidden)]
3609    pub fn mark_volatile(&mut self, id: VertexId, volatile: bool) {
3610        self.store.set_volatile(id, volatile);
3611        if volatile {
3612            self.volatile_vertices.insert(id);
3613        } else {
3614            self.volatile_vertices.remove(&id);
3615        }
3616    }
3617
3618    /// Update vertex coordinate
3619    #[doc(hidden)]
3620    pub fn set_coord(&mut self, id: VertexId, coord: AbsCoord) {
3621        self.store.set_coord(id, coord);
3622    }
3623
3624    /// Update edge cache coordinate
3625    #[doc(hidden)]
3626    pub fn update_edge_coord(&mut self, id: VertexId, coord: AbsCoord) {
3627        self.edges.update_coord(id, coord);
3628    }
3629
3630    /// Mark vertex as deleted (tombstone)
3631    #[doc(hidden)]
3632    pub fn mark_deleted(&mut self, id: VertexId, deleted: bool) {
3633        self.store.mark_deleted(id, deleted);
3634    }
3635
3636    /// Set vertex kind
3637    #[doc(hidden)]
3638    pub fn set_kind(&mut self, id: VertexId, kind: VertexKind) {
3639        self.store.set_kind(id, kind);
3640    }
3641
3642    /// Set vertex dirty flag
3643    #[doc(hidden)]
3644    pub fn set_dirty(&mut self, id: VertexId, dirty: bool) {
3645        self.store.set_dirty(id, dirty);
3646        if dirty {
3647            self.dirty_vertices.insert(id);
3648        } else {
3649            self.dirty_vertices.remove(&id);
3650        }
3651    }
3652
3653    /// Get vertex kind (for testing)
3654    #[cfg(test)]
3655    pub(crate) fn get_kind(&self, id: VertexId) -> VertexKind {
3656        self.store.kind(id)
3657    }
3658
3659    /// Get vertex flags (for testing)
3660    #[cfg(test)]
3661    pub(crate) fn get_flags(&self, id: VertexId) -> u8 {
3662        self.store.flags(id)
3663    }
3664
3665    /// Check if vertex is deleted (for testing)
3666    #[cfg(test)]
3667    pub(crate) fn is_deleted(&self, id: VertexId) -> bool {
3668        self.store.is_deleted(id)
3669    }
3670
3671    /// Force edge rebuild (internal use)
3672    #[doc(hidden)]
3673    pub fn rebuild_edges(&mut self) {
3674        self.edges.rebuild();
3675    }
3676
3677    /// Fold pending edge deltas into the CSR base ahead of a read-heavy phase
3678    /// (scheduling/evaluation), restoring the zero-allocation slice fast
3679    /// paths. No-op when no deltas are pending. This is the read-side half of
3680    /// the #125 amortization: writes defer rebuilds, read bursts pay for at
3681    /// most one.
3682    pub fn flush_pending_edge_deltas(&mut self) {
3683        self.edges.rebuild();
3684    }
3685
3686    /// Get delta size (internal use)
3687    #[doc(hidden)]
3688    pub fn edges_delta_size(&self) -> usize {
3689        self.edges.delta_size()
3690    }
3691
3692    /// Number of full CSR rebuilds performed so far (observability; used by
3693    /// the #125 rebuild-amortization regression tests).
3694    #[doc(hidden)]
3695    pub fn edges_rebuild_count(&self) -> u64 {
3696        self.edges.rebuild_count()
3697    }
3698
3699    /// Get vertex ID for specific cell address
3700    pub fn get_vertex_for_cell(&self, addr: &CellRef) -> Option<VertexId> {
3701        self.cell_to_vertex.get(addr).copied()
3702    }
3703
3704    /// Get coord for a vertex (public for VertexEditor)
3705    pub fn get_coord(&self, id: VertexId) -> AbsCoord {
3706        self.store.coord(id)
3707    }
3708
3709    /// Get sheet_id for a vertex (public for VertexEditor)
3710    pub fn get_sheet_id(&self, id: VertexId) -> SheetId {
3711        self.store.sheet_id(id)
3712    }
3713
3714    /// Get all vertices in a sheet
3715    pub fn vertices_in_sheet(&self, sheet_id: SheetId) -> impl Iterator<Item = VertexId> + '_ {
3716        self.store
3717            .all_vertices()
3718            .filter(move |&id| self.vertex_exists(id) && self.store.sheet_id(id) == sheet_id)
3719    }
3720
3721    /// Does a vertex have a formula associated
3722    pub fn vertex_has_formula(&self, id: VertexId) -> bool {
3723        self.vertex_formulas.contains_key(&id)
3724    }
3725
3726    /// Get all vertices with formulas
3727    pub fn vertices_with_formulas(&self) -> impl Iterator<Item = VertexId> + '_ {
3728        self.vertex_formulas.keys().copied()
3729    }
3730
3731    /// Update a vertex's formula
3732    pub fn update_vertex_formula(&mut self, id: VertexId, ast: ASTNode) -> Result<(), ExcelError> {
3733        // Get the sheet_id for this vertex
3734        let sheet_id = self.store.sheet_id(id);
3735
3736        // If the adjusted AST contains special #REF markers (from structural edits),
3737        // treat this as a REF error on the vertex instead of attempting to resolve.
3738        // This prevents failures when reference_adjuster injected placeholder refs.
3739        let has_ref_marker = ast.get_dependencies().into_iter().any(|r| {
3740            matches!(
3741                r,
3742                ReferenceType::Cell { sheet: Some(s), .. }
3743                    | ReferenceType::Range { sheet: Some(s), .. } if s == "#REF"
3744            )
3745        });
3746        if has_ref_marker {
3747            // Store the adjusted AST for round-tripping/display, but set value state to #REF!
3748            let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
3749            self.vertex_formulas.insert(id, ast_id);
3750            self.mark_as_ref_error(id);
3751            self.store.set_kind(id, VertexKind::FormulaScalar);
3752            return Ok(());
3753        }
3754
3755        // Extract dependencies from AST
3756        let (new_dependencies, new_range_dependencies, _, named_dependencies) =
3757            self.extract_dependencies(&ast, sheet_id)?;
3758
3759        // Remove old dependencies first
3760        self.remove_dependent_edges(id);
3761        self.detach_vertex_from_names(id);
3762
3763        // Store the new formula
3764        let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
3765        self.vertex_formulas.insert(id, ast_id);
3766
3767        // Add new dependency edges
3768        self.add_dependent_edges(id, &new_dependencies);
3769        self.add_range_dependent_edges(id, &new_range_dependencies, sheet_id);
3770
3771        if !named_dependencies.is_empty() {
3772            self.attach_vertex_to_names(id, &named_dependencies);
3773        }
3774
3775        // Mark as formula vertex
3776        self.store.set_kind(id, VertexKind::FormulaScalar);
3777
3778        Ok(())
3779    }
3780
3781    /// Mark a vertex as dirty without propagation (for VertexEditor)
3782    pub fn mark_vertex_dirty(&mut self, vertex_id: VertexId) {
3783        self.store.set_dirty(vertex_id, true);
3784        self.dirty_vertices.insert(vertex_id);
3785    }
3786
3787    /// Batch-mark vertices dirty without propagation.
3788    pub fn mark_vertices_dirty_batch(&mut self, vertices: &[VertexId]) {
3789        self.dirty_vertices.reserve(vertices.len());
3790        for &vertex_id in vertices {
3791            self.store.set_dirty(vertex_id, true);
3792        }
3793        self.dirty_vertices.extend(vertices.iter().copied());
3794    }
3795
3796    /// Update cell mapping for a vertex (for VertexEditor)
3797    pub fn update_cell_mapping(
3798        &mut self,
3799        id: VertexId,
3800        old_addr: Option<CellRef>,
3801        new_addr: CellRef,
3802    ) {
3803        // Remove old mapping if it exists
3804        if let Some(old) = old_addr {
3805            self.cell_to_vertex.remove(&old);
3806        }
3807        // Add new mapping
3808        self.cell_to_vertex.insert(new_addr, id);
3809    }
3810
3811    /// Remove cell mapping (for VertexEditor)
3812    pub fn remove_cell_mapping(&mut self, addr: &CellRef) {
3813        self.cell_to_vertex.remove(addr);
3814    }
3815
3816    /// Get the cell reference for a vertex
3817    pub fn get_cell_ref_for_vertex(&self, id: VertexId) -> Option<CellRef> {
3818        let coord = self.store.coord(id);
3819        let sheet_id = self.store.sheet_id(id);
3820        // Find the cell reference in the mapping
3821        let cell_ref = CellRef::new(sheet_id, Coord::new(coord.row(), coord.col(), true, true));
3822        // Verify it actually maps to this vertex
3823        if self.cell_to_vertex.get(&cell_ref) == Some(&id) {
3824            Some(cell_ref)
3825        } else {
3826            None
3827        }
3828    }
3829
3830    /// Rebuild dependency edges/range links for an existing formula vertex after AST changes.
3831    ///
3832    /// This intentionally reuses the same extraction and edge wiring machinery as
3833    /// `set_cell_formula[_with_volatility]` to preserve edge orientation, placeholder
3834    /// behavior, and name/range dependency semantics.
3835    pub(crate) fn rebuild_formula_dependencies(&mut self, vertex_id: VertexId, ast: &ASTNode) {
3836        let sheet_id = self.store.sheet_id(vertex_id);
3837
3838        // Remove old dependency, name, and pending-name links first.
3839        self.remove_dependent_edges(vertex_id);
3840        self.detach_vertex_from_names(vertex_id);
3841        self.clear_pending_name_references(vertex_id);
3842
3843        let (
3844            new_dependencies,
3845            new_range_dependencies,
3846            _created_placeholders,
3847            named_dependencies,
3848            unresolved_names,
3849        ) = match self.extract_dependencies_with_pending_names(ast, sheet_id) {
3850            Ok(v) => v,
3851            Err(_) => {
3852                self.mark_as_ref_error(vertex_id);
3853                return;
3854            }
3855        };
3856
3857        // Self-reference / name-cycle safety parity with set_cell_formula
3858        // (including the `CyclePolicy::Iterate` self-dependency relaxation).
3859        if new_dependencies.contains(&vertex_id) && !self.config.cycle.allows_self_dependency() {
3860            self.mark_as_ref_error(vertex_id);
3861            return;
3862        }
3863
3864        for &name_vertex in &named_dependencies {
3865            let mut visited = FxHashSet::default();
3866            if self.name_depends_on_vertex(name_vertex, vertex_id, &mut visited) {
3867                self.mark_as_ref_error(vertex_id);
3868                return;
3869            }
3870        }
3871
3872        // Formula is now recoverable again.
3873        self.ref_error_vertices.remove(&vertex_id);
3874        self.vertex_values.remove(&vertex_id);
3875
3876        if !named_dependencies.is_empty() {
3877            self.attach_vertex_to_names(vertex_id, &named_dependencies);
3878        }
3879        for unresolved_name in &unresolved_names {
3880            self.record_pending_name_reference(sheet_id, unresolved_name, vertex_id);
3881        }
3882
3883        self.add_dependent_edges(vertex_id, &new_dependencies);
3884        self.add_range_dependent_edges(vertex_id, &new_range_dependencies, sheet_id);
3885        let _ = self.mark_dirty(vertex_id);
3886    }
3887}
3888
3889// ========== Sheet Management Operations ==========