Skip to main content

formualizer_eval/engine/graph/
mod.rs

1use crate::SheetId;
2use crate::engine::TombstoneRegistry;
3use crate::engine::named_range::{NameScope, NamedDefinition, NamedRange};
4use crate::engine::sheet_registry::SheetRegistry;
5use formualizer_common::{
6    CoordBuildHasher, ExcelError, ExcelErrorKind, LiteralValue, PackedSheetCell,
7};
8use formualizer_parse::parser::{ASTNode, ASTNodeType, ReferenceType};
9use rustc_hash::{FxHashMap, FxHashSet};
10
11#[cfg(debug_assertions)]
12use std::sync::atomic::{AtomicU64, Ordering};
13
14#[cfg(test)]
15#[derive(Debug, Default, Clone)]
16pub struct GraphInstrumentation {
17    pub edges_added: u64,
18    pub stripe_inserts: u64,
19    pub stripe_removes: u64,
20    pub dependents_scan_fallback_calls: u64,
21    pub dependents_scan_vertices_scanned: u64,
22}
23
24mod ast_utils;
25pub mod editor;
26mod formula_analysis;
27mod names;
28mod range_deps;
29mod sheets;
30pub mod snapshot;
31mod sources;
32mod tables;
33
34use super::arena::{AstNodeId, DataStore, ValueRef};
35use super::delta_edges::CsrMutableEdges;
36use super::sheet_index::SheetIndex;
37use super::vertex::{VertexId, VertexKind};
38use super::vertex_store::{FIRST_NORMAL_VERTEX, VertexStore};
39use crate::engine::topo::{
40    GraphAdapter,
41    pk::{DynamicTopo, PkConfig},
42};
43use crate::reference::{CellRef, Coord, SharedRangeRef, SharedRef, SharedSheetLocator};
44use formualizer_common::Coord as AbsCoord;
45// topo::pk wiring will be integrated behind config.use_dynamic_topo in a follow-up step
46
47#[inline]
48fn normalize_stored_literal(value: LiteralValue) -> LiteralValue {
49    match value {
50        // Public contract: store numerics as Number(f64).
51        LiteralValue::Int(i) => LiteralValue::Number(i as f64),
52        other => other,
53    }
54}
55
56pub use editor::change_log::{ChangeEvent, ChangeLog};
57
58// ChangeEvent is now imported from change_log module
59
60/// đź”® Scalability Hook: Dependency reference types for range compression
61#[derive(Debug, Clone, PartialEq, Eq, Hash)]
62pub enum DependencyRef {
63    /// A specific cell dependency
64    Cell(VertexId),
65    /// A dependency on a finite, rectangular range
66    Range {
67        sheet: String,
68        start_row: u32,
69        start_col: u32,
70        end_row: u32, // Inclusive
71        end_col: u32, // Inclusive
72    },
73    /// A whole column dependency (A:A) - future range compression
74    WholeColumn { sheet: String, col: u32 },
75    /// A whole row dependency (1:1) - future range compression  
76    WholeRow { sheet: String, row: u32 },
77}
78
79/// A key representing a coarse-grained section of a sheet
80#[derive(Debug, Clone, Hash, PartialEq, Eq)]
81pub struct StripeKey {
82    pub sheet_id: SheetId,
83    pub stripe_type: StripeType,
84    pub index: u32, // The index of the row, column, or block stripe
85}
86
87#[derive(Debug, Clone, Hash, PartialEq, Eq)]
88pub enum StripeType {
89    Row,
90    Column,
91    Block, // For dense, square-like ranges
92}
93
94/// Block stripe indexing mathematics
95const BLOCK_H: u32 = 256;
96const BLOCK_W: u32 = 256;
97
98pub fn block_index(row: u32, col: u32) -> u32 {
99    (row / BLOCK_H) << 16 | (col / BLOCK_W)
100}
101
102/// A summary of the results of a mutating operation on the graph.
103/// This serves as a "changelog" to the application layer.
104#[derive(Debug, Clone)]
105pub struct OperationSummary {
106    /// Vertices whose values have been directly or indirectly affected.
107    pub affected_vertices: Vec<VertexId>,
108    /// Placeholder cells that were newly created to satisfy dependencies.
109    pub created_placeholders: Vec<CellRef>,
110}
111
112/// SoA-based dependency graph implementation
113#[derive(Debug)]
114pub struct DependencyGraph {
115    // Core columnar storage
116    store: VertexStore,
117
118    // Edge storage with delta slab
119    edges: CsrMutableEdges,
120
121    // Arena-based value and formula storage
122    data_store: DataStore,
123    vertex_values: FxHashMap<VertexId, ValueRef>,
124    vertex_formulas: FxHashMap<VertexId, AstNodeId>,
125
126    /// Gate for storing grid-backed (cell/formula) LiteralValue payloads inside the dependency graph.
127    ///
128    /// When `false` (Arrow-canonical mode), the graph does not store values for cell/formula
129    /// vertices. Arrow (base + overlays) is the sole value store for sheet cells.
130    value_cache_enabled: bool,
131
132    /// Debug-only instrumentation: count attempts to read *cell/formula* graph values while
133    /// caching is disabled (canonical mode guard).
134    #[cfg(debug_assertions)]
135    graph_value_read_attempts: AtomicU64,
136
137    // Address mappings using a hasher tuned for packed Coord / PackedSheetCell
138    // keys. FxHasher's weak avalanche produces O(N^2) collision cascades on
139    // row-major bulk ingest; CoordBuildHasher keeps these strictly O(N).
140    cell_to_vertex: std::collections::HashMap<CellRef, VertexId, CoordBuildHasher>,
141    load_packed_to_vertex: std::collections::HashMap<PackedSheetCell, VertexId, CoordBuildHasher>,
142
143    // Scheduling state - using HashSet for O(1) operations
144    dirty_vertices: FxHashSet<VertexId>,
145    volatile_vertices: FxHashSet<VertexId>,
146
147    /// Vertices explicitly marked as #REF! by structural operations.
148    ///
149    /// In Arrow-truth mode, the dependency graph does not cache cell/formula values.
150    /// We still need a place to record deterministic #REF! invalidations for editor
151    /// operations and structural transforms.
152    ref_error_vertices: FxHashSet<VertexId>,
153
154    // NEW: Specialized managers for range dependencies (Hybrid Model)
155    /// Maps a formula vertex to the ranges it depends on.
156    formula_to_range_deps: FxHashMap<VertexId, Vec<SharedRangeRef<'static>>>,
157
158    /// Maps a stripe to formulas that depend on it via a compressed range.
159    /// CRITICAL: VertexIds are deduplicated within each stripe to avoid quadratic blow-ups.
160    stripe_to_dependents: FxHashMap<StripeKey, FxHashSet<VertexId>>,
161
162    // Sheet-level sparse indexes for O(log n + k) range queries
163    /// Maps sheet_id to its interval tree index for efficient row/column operations
164    sheet_indexes: FxHashMap<SheetId, SheetIndex>,
165
166    // Sheet name/ID mapping
167    sheet_reg: SheetRegistry,
168    default_sheet_id: SheetId,
169
170    // Named ranges support
171    /// Workbook-scoped named ranges
172    named_ranges: FxHashMap<String, NamedRange>,
173
174    /// Normalized-key lookup for workbook-scoped names.
175    ///
176    /// When `config.case_sensitive_names == false`, keys are ASCII-lowercased.
177    /// Values are the canonical (original-cased) name stored in `named_ranges`.
178    named_ranges_lookup: FxHashMap<String, String>,
179
180    /// Sheet-scoped named ranges  
181    sheet_named_ranges: FxHashMap<(SheetId, String), NamedRange>,
182
183    /// Normalized-key lookup for sheet-scoped names.
184    ///
185    /// Key is (SheetId, normalized_name_key). Value is the canonical (original-cased)
186    /// name stored in `sheet_named_ranges`.
187    sheet_named_ranges_lookup: FxHashMap<(SheetId, String), String>,
188
189    /// Reverse mapping: vertex -> names it uses (by vertex id)
190    vertex_to_names: FxHashMap<VertexId, Vec<VertexId>>,
191
192    /// Lookup for name vertex -> (scope, name) to avoid map scans
193    name_vertex_lookup: FxHashMap<VertexId, (NameScope, String)>,
194
195    /// Pending formula vertices referencing unresolved bare symbolic names.
196    ///
197    /// Keys are normalized through `name_lookup_key(...)` so workbook names and
198    /// source scalars can both wake the same waiting formulas when a symbol appears.
199    pending_name_links: FxHashMap<String, FxHashSet<(SheetId, VertexId)>>,
200
201    /// Reverse mapping used to clear stale pending-name registrations when a
202    /// formula is edited, overwritten with a value, or otherwise rebuilt.
203    vertex_to_pending_names: FxHashMap<VertexId, FxHashSet<String>>,
204
205    // Native workbook tables (ListObjects)
206    tables: FxHashMap<String, tables::TableEntry>,
207    /// Normalized-key lookup for tables.
208    tables_lookup: FxHashMap<String, String>,
209    table_vertex_lookup: FxHashMap<VertexId, String>,
210
211    // External sources (SourceVertex)
212    source_scalars: FxHashMap<String, sources::SourceScalarEntry>,
213    source_tables: FxHashMap<String, sources::SourceTableEntry>,
214    source_vertex_lookup: FxHashMap<VertexId, String>,
215
216    /// Monotonic counter to assign synthetic coordinates to name vertices
217    name_vertex_seq: u32,
218
219    /// Monotonic counter to assign synthetic coordinates to source vertices
220    source_vertex_seq: u32,
221
222    /// Mapping from cell vertices to named range vertices that depend on them
223    cell_to_name_dependents: FxHashMap<VertexId, FxHashSet<VertexId>>,
224    /// Cached list of cell dependencies per named range vertex (for teardown)
225    name_to_cell_dependencies: FxHashMap<VertexId, Vec<VertexId>>,
226
227    // Evaluation configuration
228    config: super::EvalConfig,
229
230    // Dynamic topology orderer (Pearce–Kelly) maintained alongside edges when enabled
231    pk_order: Option<DynamicTopo<VertexId>>,
232
233    // Spill registry: anchor -> cells, and reverse mapping for blockers.
234    // `spill_cell_to_anchor` is keyed by `CellRef` and uses the tuned hasher
235    // for the same reason as `cell_to_vertex`.
236    spill_anchor_to_cells: FxHashMap<VertexId, Vec<CellRef>>,
237    spill_cell_to_anchor: std::collections::HashMap<CellRef, VertexId, CoordBuildHasher>,
238
239    // Hint: during initial bulk load, many cells are guaranteed new; allow skipping existence checks per-sheet
240    first_load_assume_new: bool,
241    ensure_touched_sheets: FxHashSet<SheetId>,
242
243    // handled deleted references, in case they are reintroduced.
244    pub tombstone_registry: TombstoneRegistry,
245
246    #[cfg(test)]
247    instr: std::sync::Mutex<GraphInstrumentation>,
248}
249
250impl Default for DependencyGraph {
251    fn default() -> Self {
252        Self::new()
253    }
254}
255
256impl DependencyGraph {
257    /// Expose range expansion limit for planners
258    pub fn range_expansion_limit(&self) -> usize {
259        self.config.range_expansion_limit
260    }
261
262    pub fn get_config(&self) -> &super::EvalConfig {
263        &self.config
264    }
265
266    #[inline]
267    pub(crate) fn value_cache_enabled(&self) -> bool {
268        self.value_cache_enabled
269    }
270
271    /// Debug-only: how many times `get_value`/`get_cell_value` were called while caching is disabled.
272    ///
273    /// In Arrow-canonical mode this should remain 0 for engine/interpreter reads.
274    #[cfg(test)]
275    pub fn debug_graph_value_read_attempts(&self) -> u64 {
276        #[cfg(debug_assertions)]
277        {
278            self.graph_value_read_attempts.load(Ordering::Relaxed)
279        }
280        #[cfg(not(debug_assertions))]
281        {
282            0
283        }
284    }
285
286    /// Build a dependency plan for a set of formulas on sheets
287    pub fn plan_dependencies<'a, I>(
288        &mut self,
289        items: I,
290        policy: &formualizer_parse::parser::CollectPolicy,
291        volatile: Option<&[bool]>,
292    ) -> Result<crate::engine::plan::DependencyPlan, formualizer_common::ExcelError>
293    where
294        I: IntoIterator<Item = (&'a str, u32, u32, &'a formualizer_parse::parser::ASTNode)>,
295    {
296        crate::engine::plan::build_dependency_plan(
297            &mut self.sheet_reg,
298            items.into_iter(),
299            policy,
300            volatile,
301        )
302    }
303
304    /// Ensure vertices exist for given coords; allocate missing in contiguous batches and add to edges/index.
305    /// Returns a list suitable for edges.add_vertices_batch.
306    pub fn ensure_vertices_batch(
307        &mut self,
308        coords: &[(SheetId, AbsCoord)],
309    ) -> Vec<(AbsCoord, u32)> {
310        self.ensure_vertices_batch_ordered(coords).1
311    }
312
313    /// Ensure vertices exist for given packed absolute cells and return vertex ids aligned to the
314    /// input order, plus the newly allocated `(coord, raw_vid)` items suitable for edge/index
315    /// population.
316    pub fn ensure_vertices_batch_packed_ordered(
317        &mut self,
318        packed_cells: &[PackedSheetCell],
319    ) -> (Vec<VertexId>, Vec<(AbsCoord, u32)>) {
320        #[cfg(feature = "perf_instrumentation")]
321        use crate::instant::FzInstant as PerfInstant;
322        use rustc_hash::FxHashMap;
323
324        #[cfg(feature = "perf_instrumentation")]
325        let debug = std::env::var("FZ_DEBUG_LOAD")
326            .ok()
327            .is_some_and(|v| v != "0");
328        #[cfg(feature = "perf_instrumentation")]
329        let t0 = PerfInstant::now();
330
331        let mut ordered: Vec<Option<VertexId>> = vec![None; packed_cells.len()];
332        if packed_cells.is_empty() {
333            return (Vec::new(), Vec::new());
334        }
335
336        let first_sid = packed_cells[0].sheet_id();
337        let single_sheet = packed_cells.iter().all(|cell| cell.sheet_id() == first_sid);
338        let mut add_batch: Vec<(AbsCoord, u32)> = Vec::new();
339
340        #[cfg(feature = "perf_instrumentation")]
341        let mut packed_hits = 0usize;
342        #[cfg(feature = "perf_instrumentation")]
343        let mut generic_hits = 0usize;
344        #[cfg(feature = "perf_instrumentation")]
345        let mut missing = 0usize;
346        #[cfg(feature = "perf_instrumentation")]
347        let mut t_packed_lookup_us = 0u128;
348        #[cfg(feature = "perf_instrumentation")]
349        let mut t_generic_lookup_us = 0u128;
350        #[cfg(feature = "perf_instrumentation")]
351        let mut t_alloc_us = 0u128;
352        #[cfg(feature = "perf_instrumentation")]
353        let mut t_map_insert_us = 0u128;
354        #[cfg(feature = "perf_instrumentation")]
355        let mut t_index_insert_us = 0u128;
356        #[cfg(feature = "perf_instrumentation")]
357        let mut t_edge_register_us = 0u128;
358
359        if single_sheet {
360            let sid = first_sid;
361            let mut missing_items: Vec<(usize, PackedSheetCell)> =
362                Vec::with_capacity(packed_cells.len());
363
364            for (idx, packed) in packed_cells.iter().copied().enumerate() {
365                #[cfg(feature = "perf_instrumentation")]
366                let tl0 = PerfInstant::now();
367                if self.first_load_assume_new
368                    && let Some(&existing) = self.load_packed_to_vertex.get(&packed)
369                {
370                    ordered[idx] = Some(existing);
371                    #[cfg(feature = "perf_instrumentation")]
372                    {
373                        packed_hits += 1;
374                        t_packed_lookup_us += tl0.elapsed().as_micros();
375                    }
376                    continue;
377                }
378                #[cfg(feature = "perf_instrumentation")]
379                {
380                    t_packed_lookup_us += tl0.elapsed().as_micros();
381                }
382
383                let pc = AbsCoord::new(packed.row0(), packed.col0());
384                let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
385                #[cfg(feature = "perf_instrumentation")]
386                let tg0 = PerfInstant::now();
387                if let Some(&existing) = self.cell_to_vertex.get(&addr) {
388                    ordered[idx] = Some(existing);
389                    if self.first_load_assume_new {
390                        self.load_packed_to_vertex.insert(packed, existing);
391                    }
392                    #[cfg(feature = "perf_instrumentation")]
393                    {
394                        generic_hits += 1;
395                    }
396                } else {
397                    missing_items.push((idx, packed));
398                    #[cfg(feature = "perf_instrumentation")]
399                    {
400                        missing += 1;
401                    }
402                }
403                #[cfg(feature = "perf_instrumentation")]
404                {
405                    t_generic_lookup_us += tg0.elapsed().as_micros();
406                }
407            }
408
409            if !missing_items.is_empty() {
410                self.ensure_touched_sheets.insert(sid);
411
412                let mut pcs: Vec<AbsCoord> = Vec::with_capacity(missing_items.len());
413                for (_, packed) in &missing_items {
414                    pcs.push(AbsCoord::new(packed.row0(), packed.col0()));
415                }
416
417                #[cfg(feature = "perf_instrumentation")]
418                let ta0 = PerfInstant::now();
419                let vids = self.store.allocate_contiguous(sid, &pcs, 0x00);
420                #[cfg(feature = "perf_instrumentation")]
421                {
422                    t_alloc_us += ta0.elapsed().as_micros();
423                }
424                add_batch.reserve(missing_items.len());
425
426                match self.config.sheet_index_mode {
427                    crate::engine::SheetIndexMode::Eager
428                    | crate::engine::SheetIndexMode::FastBatch => {
429                        for ((input_idx, packed), vid) in
430                            missing_items.into_iter().zip(vids.into_iter())
431                        {
432                            let pc = AbsCoord::new(packed.row0(), packed.col0());
433                            ordered[input_idx] = Some(vid);
434                            add_batch.push((pc, vid.0));
435
436                            #[cfg(feature = "perf_instrumentation")]
437                            let tm0 = PerfInstant::now();
438                            if self.first_load_assume_new {
439                                self.load_packed_to_vertex.insert(packed, vid);
440                            } else {
441                                let addr =
442                                    CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
443                                self.cell_to_vertex.insert(addr, vid);
444                            }
445                            #[cfg(feature = "perf_instrumentation")]
446                            {
447                                t_map_insert_us += tm0.elapsed().as_micros();
448                            }
449
450                            #[cfg(feature = "perf_instrumentation")]
451                            let ti0 = PerfInstant::now();
452                            self.sheet_index_mut(sid).add_vertex(pc, vid);
453                            #[cfg(feature = "perf_instrumentation")]
454                            {
455                                t_index_insert_us += ti0.elapsed().as_micros();
456                            }
457                        }
458                    }
459                    crate::engine::SheetIndexMode::Lazy => {
460                        for ((input_idx, packed), vid) in
461                            missing_items.into_iter().zip(vids.into_iter())
462                        {
463                            let pc = AbsCoord::new(packed.row0(), packed.col0());
464                            ordered[input_idx] = Some(vid);
465                            add_batch.push((pc, vid.0));
466
467                            #[cfg(feature = "perf_instrumentation")]
468                            let tm0 = PerfInstant::now();
469                            if self.first_load_assume_new {
470                                self.load_packed_to_vertex.insert(packed, vid);
471                            } else {
472                                let addr =
473                                    CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
474                                self.cell_to_vertex.insert(addr, vid);
475                            }
476                            #[cfg(feature = "perf_instrumentation")]
477                            {
478                                t_map_insert_us += tm0.elapsed().as_micros();
479                            }
480                        }
481                    }
482                }
483            }
484        } else {
485            let mut grouped: FxHashMap<SheetId, Vec<(usize, PackedSheetCell)>> =
486                FxHashMap::default();
487
488            for (idx, packed) in packed_cells.iter().copied().enumerate() {
489                #[cfg(feature = "perf_instrumentation")]
490                let tl0 = PerfInstant::now();
491                if self.first_load_assume_new
492                    && let Some(&existing) = self.load_packed_to_vertex.get(&packed)
493                {
494                    ordered[idx] = Some(existing);
495                    #[cfg(feature = "perf_instrumentation")]
496                    {
497                        packed_hits += 1;
498                        t_packed_lookup_us += tl0.elapsed().as_micros();
499                    }
500                    continue;
501                }
502                #[cfg(feature = "perf_instrumentation")]
503                {
504                    t_packed_lookup_us += tl0.elapsed().as_micros();
505                }
506
507                let sid = packed.sheet_id();
508                let pc = AbsCoord::new(packed.row0(), packed.col0());
509                let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
510                #[cfg(feature = "perf_instrumentation")]
511                let tg0 = PerfInstant::now();
512                if let Some(&existing) = self.cell_to_vertex.get(&addr) {
513                    ordered[idx] = Some(existing);
514                    if self.first_load_assume_new {
515                        self.load_packed_to_vertex.insert(packed, existing);
516                    }
517                    #[cfg(feature = "perf_instrumentation")]
518                    {
519                        generic_hits += 1;
520                    }
521                } else {
522                    grouped.entry(sid).or_default().push((idx, packed));
523                    #[cfg(feature = "perf_instrumentation")]
524                    {
525                        missing += 1;
526                    }
527                }
528                #[cfg(feature = "perf_instrumentation")]
529                {
530                    t_generic_lookup_us += tg0.elapsed().as_micros();
531                }
532            }
533
534            for (sid, items) in grouped {
535                if items.is_empty() {
536                    continue;
537                }
538                self.ensure_touched_sheets.insert(sid);
539
540                let mut pcs: Vec<AbsCoord> = Vec::with_capacity(items.len());
541                for (_, packed) in &items {
542                    pcs.push(AbsCoord::new(packed.row0(), packed.col0()));
543                }
544
545                #[cfg(feature = "perf_instrumentation")]
546                let ta0 = PerfInstant::now();
547                let vids = self.store.allocate_contiguous(sid, &pcs, 0x00);
548                #[cfg(feature = "perf_instrumentation")]
549                {
550                    t_alloc_us += ta0.elapsed().as_micros();
551                }
552
553                for ((input_idx, packed), vid) in items.into_iter().zip(vids.into_iter()) {
554                    let pc = AbsCoord::new(packed.row0(), packed.col0());
555                    ordered[input_idx] = Some(vid);
556                    add_batch.push((pc, vid.0));
557
558                    #[cfg(feature = "perf_instrumentation")]
559                    let tm0 = PerfInstant::now();
560                    if self.first_load_assume_new {
561                        self.load_packed_to_vertex.insert(packed, vid);
562                    } else {
563                        let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
564                        self.cell_to_vertex.insert(addr, vid);
565                    }
566                    #[cfg(feature = "perf_instrumentation")]
567                    {
568                        t_map_insert_us += tm0.elapsed().as_micros();
569                    }
570
571                    match self.config.sheet_index_mode {
572                        crate::engine::SheetIndexMode::Eager
573                        | crate::engine::SheetIndexMode::FastBatch => {
574                            #[cfg(feature = "perf_instrumentation")]
575                            let ti0 = PerfInstant::now();
576                            self.sheet_index_mut(sid).add_vertex(pc, vid);
577                            #[cfg(feature = "perf_instrumentation")]
578                            {
579                                t_index_insert_us += ti0.elapsed().as_micros();
580                            }
581                        }
582                        crate::engine::SheetIndexMode::Lazy => {
583                            // defer index build
584                        }
585                    }
586                }
587            }
588        }
589
590        if !add_batch.is_empty() {
591            #[cfg(feature = "perf_instrumentation")]
592            let te0 = PerfInstant::now();
593            self.edges.add_vertices_batch(&add_batch);
594            #[cfg(feature = "perf_instrumentation")]
595            {
596                t_edge_register_us += te0.elapsed().as_micros();
597            }
598        }
599
600        #[cfg(feature = "perf_instrumentation")]
601        if debug {
602            eprintln!(
603                "[fz][ensure] cells={} single_sheet={} packed_hits={} generic_hits={} missing={} packed_lookup={}us generic_lookup={}us alloc={}us map_insert={}us index_insert={}us edge_register={}us total={}ms",
604                packed_cells.len(),
605                single_sheet,
606                packed_hits,
607                generic_hits,
608                missing,
609                t_packed_lookup_us,
610                t_generic_lookup_us,
611                t_alloc_us,
612                t_map_insert_us,
613                t_index_insert_us,
614                t_edge_register_us,
615                t0.elapsed().as_millis(),
616            );
617        }
618
619        let ordered = ordered
620            .into_iter()
621            .map(|vid| vid.expect("ensure_vertices_batch_packed_ordered must resolve every coord"))
622            .collect();
623        (ordered, add_batch)
624    }
625
626    /// Ensure vertices exist for given coords and return vertex ids aligned to the input order,
627    /// plus the newly allocated `(coord, raw_vid)` items suitable for edge/index population.
628    pub fn ensure_vertices_batch_ordered(
629        &mut self,
630        coords: &[(SheetId, AbsCoord)],
631    ) -> (Vec<VertexId>, Vec<(AbsCoord, u32)>) {
632        let mut packed: Vec<PackedSheetCell> = Vec::with_capacity(coords.len());
633        for &(sid, coord) in coords {
634            packed.push(Self::packed_cell_key(sid, coord));
635        }
636        self.ensure_vertices_batch_packed_ordered(&packed)
637    }
638
639    #[inline]
640    fn packed_cell_key(sheet_id: SheetId, coord: AbsCoord) -> PackedSheetCell {
641        PackedSheetCell::try_new(sheet_id, coord.row(), coord.col())
642            .expect("graph coordinate must fit PackedSheetCell")
643    }
644
645    fn flush_load_packed_mappings(&mut self) {
646        if self.load_packed_to_vertex.is_empty() {
647            return;
648        }
649        let debug = std::env::var("FZ_DEBUG_LOAD")
650            .ok()
651            .is_some_and(|v| v != "0");
652        let t0 = crate::instant::FzInstant::now();
653        let count = self.load_packed_to_vertex.len();
654        self.cell_to_vertex.reserve(count);
655        for (&packed, &vid) in &self.load_packed_to_vertex {
656            let coord = AbsCoord::new(packed.row0(), packed.col0());
657            let addr = CellRef::new(
658                packed.sheet_id(),
659                Coord::new(coord.row(), coord.col(), true, true),
660            );
661            self.cell_to_vertex.insert(addr, vid);
662        }
663        self.load_packed_to_vertex.clear();
664        if debug {
665            eprintln!(
666                "[fz][load] flush_load_packed_mappings: {} entries in {:.1} ms",
667                count,
668                t0.elapsed().as_secs_f64() * 1000.0,
669            );
670        }
671    }
672
673    /// Enable/disable the first-load fast path for value inserts.
674    pub fn set_first_load_assume_new(&mut self, enabled: bool) {
675        if self.first_load_assume_new && !enabled {
676            self.flush_load_packed_mappings();
677        } else if enabled {
678            self.load_packed_to_vertex.clear();
679        }
680        self.first_load_assume_new = enabled;
681    }
682
683    pub fn first_load_assume_new(&self) -> bool {
684        self.first_load_assume_new
685    }
686
687    /// Reset the per-sheet ensure touch tracking.
688    pub fn reset_ensure_touched(&mut self) {
689        self.ensure_touched_sheets.clear();
690    }
691
692    /// Store ASTs in batch and return their arena ids
693    pub fn store_asts_batch<'a, I>(&mut self, asts: I) -> Vec<AstNodeId>
694    where
695        I: IntoIterator<Item = &'a formualizer_parse::parser::ASTNode>,
696    {
697        self.data_store.store_asts_batch(asts, &self.sheet_reg)
698    }
699
700    /// Reserve metadata structures for upcoming formula assignments during bulk load.
701    pub fn reserve_formula_metadata(&mut self, additional: usize) {
702        self.vertex_formulas.reserve(additional);
703        self.dirty_vertices.reserve(additional);
704        self.volatile_vertices.reserve(additional);
705    }
706
707    /// Lookup VertexId for a (SheetId, AbsCoord)
708    pub fn vid_for_sid_pc(&self, sid: SheetId, pc: AbsCoord) -> Option<VertexId> {
709        let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
710        self.cell_to_vertex.get(&addr).copied()
711    }
712
713    /// Helper to map a global cell index in a plan to a VertexId
714    pub fn vid_for_plan_idx(
715        &self,
716        plan: &crate::engine::plan::DependencyPlan,
717        idx: u32,
718    ) -> Option<VertexId> {
719        let (sid, pc) = plan.global_cells.get(idx as usize).copied()?;
720        self.vid_for_sid_pc(sid, pc)
721    }
722    /// Assign a formula to an existing vertex, removing prior edges and setting flags
723    pub fn assign_formula_vertex(
724        &mut self,
725        vid: VertexId,
726        ast_id: AstNodeId,
727        volatile: bool,
728        dynamic: bool,
729    ) {
730        if self.vertex_formulas.contains_key(&vid) {
731            self.remove_dependent_edges(vid);
732        }
733        self.store
734            .set_kind(vid, crate::engine::vertex::VertexKind::FormulaScalar);
735        self.vertex_values.remove(&vid);
736        self.vertex_formulas.insert(vid, ast_id);
737        self.mark_volatile(vid, volatile);
738        self.store.set_dynamic(vid, dynamic);
739
740        // schedule evaluation
741        self.mark_vertex_dirty(vid);
742    }
743
744    /// Fast path for initial workbook load: assign a formula to a vertex that is known not to
745    /// already own dependency edges in the graph. Dirtiness is batched separately.
746    pub fn assign_formula_vertex_load_fast(
747        &mut self,
748        vid: VertexId,
749        ast_id: AstNodeId,
750        volatile: bool,
751        dynamic: bool,
752    ) {
753        debug_assert!(
754            !self.vertex_formulas.contains_key(&vid),
755            "load-fast formula assignment expects fresh/non-formula vertices"
756        );
757        self.store
758            .set_kind(vid, crate::engine::vertex::VertexKind::FormulaScalar);
759        self.vertex_values.remove(&vid);
760        self.vertex_formulas.insert(vid, ast_id);
761        self.mark_volatile(vid, volatile);
762        self.store.set_dynamic(vid, dynamic);
763    }
764
765    /// Public wrapper for adding edges without beginning a batch (caller manages batch)
766    pub fn add_edges_nobatch(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
767        self.add_dependent_edges_nobatch(dependent, dependencies);
768    }
769
770    /// Iterate all normal vertex ids
771    pub fn iter_vertex_ids(&self) -> impl Iterator<Item = VertexId> + '_ {
772        self.store.all_vertices()
773    }
774
775    /// Get current AbsCoord for a vertex
776    pub fn vertex_coord(&self, vid: VertexId) -> AbsCoord {
777        self.store.coord(vid)
778    }
779
780    /// Total number of allocated vertices (including deleted)
781    pub fn vertex_count(&self) -> usize {
782        self.store.len()
783    }
784
785    /// Replace CSR edges in one shot from adjacency and coords
786    pub fn build_edges_from_adjacency(
787        &mut self,
788        adjacency: Vec<(u32, Vec<u32>)>,
789        coords: Vec<AbsCoord>,
790        vertex_ids: Vec<u32>,
791    ) {
792        self.edges
793            .build_from_adjacency(adjacency, coords, vertex_ids);
794    }
795    /// Compute min/max used row among vertices within [start_col..=end_col] on a sheet.
796    pub fn used_row_bounds_for_columns(
797        &self,
798        sheet_id: SheetId,
799        start_col: u32,
800        end_col: u32,
801    ) -> Option<(u32, u32)> {
802        // Prefer sheet index when available
803        if let Some(index) = self.sheet_indexes.get(&sheet_id)
804            && !index.is_empty()
805        {
806            let mut min_r: Option<u32> = None;
807            let mut max_r: Option<u32> = None;
808            for vid in index.vertices_in_col_range(start_col, end_col) {
809                let r = self.store.coord(vid).row();
810                min_r = Some(min_r.map(|m| m.min(r)).unwrap_or(r));
811                max_r = Some(max_r.map(|m| m.max(r)).unwrap_or(r));
812            }
813            return match (min_r, max_r) {
814                (Some(a), Some(b)) => Some((a, b)),
815                _ => None,
816            };
817        }
818        // Fallback: scan cell maps on the fly
819        let mut min_r: Option<u32> = None;
820        let mut max_r: Option<u32> = None;
821        for cref in self.cell_to_vertex.keys() {
822            if cref.sheet_id == sheet_id {
823                let c = cref.coord.col();
824                if c >= start_col && c <= end_col {
825                    let r = cref.coord.row();
826                    min_r = Some(min_r.map(|m| m.min(r)).unwrap_or(r));
827                    max_r = Some(max_r.map(|m| m.max(r)).unwrap_or(r));
828                }
829            }
830        }
831        for packed in self.load_packed_to_vertex.keys() {
832            if packed.sheet_id() == sheet_id {
833                let c = packed.col0();
834                if c >= start_col && c <= end_col {
835                    let r = packed.row0();
836                    min_r = Some(min_r.map(|m| m.min(r)).unwrap_or(r));
837                    max_r = Some(max_r.map(|m| m.max(r)).unwrap_or(r));
838                }
839            }
840        }
841        match (min_r, max_r) {
842            (Some(a), Some(b)) => Some((a, b)),
843            _ => None,
844        }
845    }
846
847    /// Build (or rebuild) the sheet index for a given sheet if running in Lazy mode.
848    pub fn finalize_sheet_index(&mut self, sheet: &str) {
849        let Some(sheet_id) = self.sheet_reg.get_id(sheet) else {
850            return;
851        };
852        // If already present and non-empty, skip
853        if let Some(idx) = self.sheet_indexes.get(&sheet_id)
854            && !idx.is_empty()
855        {
856            return;
857        }
858        let mut idx = SheetIndex::new();
859        // Collect coords for this sheet
860        let mut batch: Vec<(AbsCoord, VertexId)> =
861            Vec::with_capacity(self.cell_to_vertex.len() + self.load_packed_to_vertex.len());
862        for (cref, vid) in &self.cell_to_vertex {
863            if cref.sheet_id == sheet_id {
864                batch.push((AbsCoord::new(cref.coord.row(), cref.coord.col()), *vid));
865            }
866        }
867        for (&packed, &vid) in &self.load_packed_to_vertex {
868            if packed.sheet_id() != sheet_id {
869                continue;
870            }
871            let coord = AbsCoord::new(packed.row0(), packed.col0());
872            let addr = CellRef::new(sheet_id, Coord::new(coord.row(), coord.col(), true, true));
873            if self.cell_to_vertex.contains_key(&addr) {
874                continue;
875            }
876            batch.push((coord, vid));
877        }
878        // Use batch builder
879        idx.add_vertices_batch(&batch);
880        self.sheet_indexes.insert(sheet_id, idx);
881    }
882
883    pub fn set_sheet_index_mode(&mut self, mode: crate::engine::SheetIndexMode) {
884        self.config.sheet_index_mode = mode;
885    }
886
887    /// Compute min/max used column among vertices within [start_row..=end_row] on a sheet.
888    pub fn used_col_bounds_for_rows(
889        &self,
890        sheet_id: SheetId,
891        start_row: u32,
892        end_row: u32,
893    ) -> Option<(u32, u32)> {
894        if let Some(index) = self.sheet_indexes.get(&sheet_id)
895            && !index.is_empty()
896        {
897            let mut min_c: Option<u32> = None;
898            let mut max_c: Option<u32> = None;
899            for vid in index.vertices_in_row_range(start_row, end_row) {
900                let c = self.store.coord(vid).col();
901                min_c = Some(min_c.map(|m| m.min(c)).unwrap_or(c));
902                max_c = Some(max_c.map(|m| m.max(c)).unwrap_or(c));
903            }
904            return match (min_c, max_c) {
905                (Some(a), Some(b)) => Some((a, b)),
906                _ => None,
907            };
908        }
909        // Fallback: scan cell maps on the fly
910        let mut min_c: Option<u32> = None;
911        let mut max_c: Option<u32> = None;
912        for cref in self.cell_to_vertex.keys() {
913            if cref.sheet_id == sheet_id {
914                let r = cref.coord.row();
915                if r >= start_row && r <= end_row {
916                    let c = cref.coord.col();
917                    min_c = Some(min_c.map(|m| m.min(c)).unwrap_or(c));
918                    max_c = Some(max_c.map(|m| m.max(c)).unwrap_or(c));
919                }
920            }
921        }
922        for packed in self.load_packed_to_vertex.keys() {
923            if packed.sheet_id() == sheet_id {
924                let r = packed.row0();
925                if r >= start_row && r <= end_row {
926                    let c = packed.col0();
927                    min_c = Some(min_c.map(|m| m.min(c)).unwrap_or(c));
928                    max_c = Some(max_c.map(|m| m.max(c)).unwrap_or(c));
929                }
930            }
931        }
932        match (min_c, max_c) {
933            (Some(a), Some(b)) => Some((a, b)),
934            _ => None,
935        }
936    }
937
938    /// Returns true if the given sheet currently contains any formula vertices.
939    pub fn sheet_has_formulas(&self, sheet_id: SheetId) -> bool {
940        // Check vertex_formulas keys; they represent formula vertices
941        for &vid in self.vertex_formulas.keys() {
942            if self.store.sheet_id(vid) == sheet_id {
943                return true;
944            }
945        }
946        false
947    }
948    pub fn new() -> Self {
949        Self::new_with_config(super::EvalConfig::default())
950    }
951
952    pub fn new_with_config(config: super::EvalConfig) -> Self {
953        let mut sheet_reg = SheetRegistry::new();
954        let default_sheet_id = sheet_reg.id_for(&config.default_sheet_name);
955
956        let mut g = Self {
957            store: VertexStore::new(),
958            edges: CsrMutableEdges::new(),
959            data_store: DataStore::new(),
960            vertex_values: FxHashMap::default(),
961            vertex_formulas: FxHashMap::default(),
962            // Phase 1 (ticket 610): Arrow-truth is the only supported mode.
963            // The dependency graph does not cache cell/formula literal payloads.
964            value_cache_enabled: false,
965            #[cfg(debug_assertions)]
966            graph_value_read_attempts: AtomicU64::new(0),
967            cell_to_vertex: std::collections::HashMap::with_hasher(CoordBuildHasher),
968            load_packed_to_vertex: std::collections::HashMap::with_hasher(CoordBuildHasher),
969            dirty_vertices: FxHashSet::default(),
970            volatile_vertices: FxHashSet::default(),
971            ref_error_vertices: FxHashSet::default(),
972            formula_to_range_deps: FxHashMap::default(),
973            stripe_to_dependents: FxHashMap::default(),
974            sheet_indexes: FxHashMap::default(),
975            sheet_reg,
976            default_sheet_id,
977            named_ranges: FxHashMap::default(),
978            named_ranges_lookup: FxHashMap::default(),
979            sheet_named_ranges: FxHashMap::default(),
980            sheet_named_ranges_lookup: FxHashMap::default(),
981            vertex_to_names: FxHashMap::default(),
982            name_vertex_lookup: FxHashMap::default(),
983            pending_name_links: FxHashMap::default(),
984            vertex_to_pending_names: FxHashMap::default(),
985            tables: FxHashMap::default(),
986            tables_lookup: FxHashMap::default(),
987            table_vertex_lookup: FxHashMap::default(),
988            source_scalars: FxHashMap::default(),
989            source_tables: FxHashMap::default(),
990            source_vertex_lookup: FxHashMap::default(),
991            name_vertex_seq: 0,
992            source_vertex_seq: 0,
993            cell_to_name_dependents: FxHashMap::default(),
994            name_to_cell_dependencies: FxHashMap::default(),
995            config: config.clone(),
996            pk_order: None,
997            spill_anchor_to_cells: FxHashMap::default(),
998            spill_cell_to_anchor: std::collections::HashMap::with_hasher(CoordBuildHasher),
999            first_load_assume_new: false,
1000            ensure_touched_sheets: FxHashSet::default(),
1001            tombstone_registry: TombstoneRegistry::default(),
1002            #[cfg(test)]
1003            instr: std::sync::Mutex::new(GraphInstrumentation::default()),
1004        };
1005
1006        if config.use_dynamic_topo {
1007            // Seed with currently active vertices (likely empty at startup)
1008            let nodes = g
1009                .store
1010                .all_vertices()
1011                .filter(|&id| g.store.vertex_exists_active(id));
1012            let mut pk = DynamicTopo::new(
1013                nodes,
1014                PkConfig {
1015                    visit_budget: config.pk_visit_budget,
1016                    compaction_interval_ops: config.pk_compaction_interval_ops,
1017                },
1018            );
1019            // Build an initial order using current graph
1020            let adapter = GraphAdapter { g: &g };
1021            pk.rebuild_full(&adapter);
1022            g.pk_order = Some(pk);
1023        }
1024
1025        g
1026    }
1027
1028    /// When dynamic topology is enabled, compute layers for a subset using PK ordering.
1029    pub(crate) fn pk_layers_for(&self, subset: &[VertexId]) -> Option<Vec<crate::engine::Layer>> {
1030        let pk = self.pk_order.as_ref()?;
1031        let adapter = crate::engine::topo::GraphAdapter { g: self };
1032        let layers = pk.layers_for(&adapter, subset, self.config.max_layer_width);
1033        Some(
1034            layers
1035                .into_iter()
1036                .map(|vs| crate::engine::Layer { vertices: vs })
1037                .collect(),
1038        )
1039    }
1040
1041    #[inline]
1042    pub(crate) fn dynamic_topo_enabled(&self) -> bool {
1043        self.pk_order.is_some()
1044    }
1045
1046    #[cfg(test)]
1047    pub fn reset_instr(&mut self) {
1048        if let Ok(mut g) = self.instr.lock() {
1049            *g = GraphInstrumentation::default();
1050        }
1051    }
1052
1053    #[cfg(test)]
1054    pub fn instr(&self) -> GraphInstrumentation {
1055        self.instr.lock().map(|g| g.clone()).unwrap_or_default()
1056    }
1057
1058    /// Begin batch operations - defer CSR rebuilds until end_batch() is called
1059    pub fn begin_batch(&mut self) {
1060        self.edges.begin_batch();
1061    }
1062
1063    /// End batch operations and trigger CSR rebuild if needed
1064    pub fn end_batch(&mut self) {
1065        self.edges.end_batch();
1066    }
1067
1068    pub fn default_sheet_id(&self) -> SheetId {
1069        self.default_sheet_id
1070    }
1071
1072    pub fn default_sheet_name(&self) -> &str {
1073        self.sheet_reg.name(self.default_sheet_id)
1074    }
1075
1076    pub fn set_default_sheet_by_name(&mut self, name: &str) {
1077        self.default_sheet_id = self.sheet_id_mut(name);
1078    }
1079
1080    pub fn set_default_sheet_by_id(&mut self, id: SheetId) {
1081        self.default_sheet_id = id;
1082    }
1083
1084    /// Returns the ID for a sheet name, creating one if it doesn't exist.
1085    pub fn sheet_id_mut(&mut self, name: &str) -> SheetId {
1086        self.sheet_reg.id_for(name)
1087    }
1088
1089    pub fn sheet_id(&self, name: &str) -> Option<SheetId> {
1090        self.sheet_reg.get_id(name)
1091    }
1092
1093    /// Resolve a sheet name to an existing ID or return a #REF! error.
1094    fn resolve_existing_sheet_id(&self, name: &str) -> Result<SheetId, ExcelError> {
1095        self.sheet_id(name).ok_or_else(|| {
1096            ExcelError::new(ExcelErrorKind::Ref).with_message(format!("Sheet not found: {name}"))
1097        })
1098    }
1099
1100    /// Returns the name of a sheet given its ID.
1101    pub fn sheet_name(&self, id: SheetId) -> &str {
1102        self.sheet_reg.name(id)
1103    }
1104
1105    /// Access the sheet registry (read-only) for external bindings
1106    pub fn sheet_reg(&self) -> &SheetRegistry {
1107        &self.sheet_reg
1108    }
1109
1110    pub(crate) fn data_store(&self) -> &DataStore {
1111        &self.data_store
1112    }
1113
1114    /// Converts a `CellRef` to a fully qualified A1-style string (e.g., "SheetName!A1").
1115    pub fn to_a1(&self, cell_ref: CellRef) -> String {
1116        format!("{}!{}", self.sheet_name(cell_ref.sheet_id), cell_ref.coord)
1117    }
1118
1119    pub(crate) fn vertex_len(&self) -> usize {
1120        self.store.len()
1121    }
1122
1123    /// Get mutable access to a sheet's index, creating it if it doesn't exist
1124    /// This is the primary way VertexEditor and internal operations access the index
1125    pub fn sheet_index_mut(&mut self, sheet_id: SheetId) -> &mut SheetIndex {
1126        self.sheet_indexes.entry(sheet_id).or_default()
1127    }
1128
1129    /// Get immutable access to a sheet's index, returns None if not initialized
1130    pub fn sheet_index(&self, sheet_id: SheetId) -> Option<&SheetIndex> {
1131        self.sheet_indexes.get(&sheet_id)
1132    }
1133
1134    /// Set a value in a cell, returns affected vertex IDs
1135    pub fn set_cell_value(
1136        &mut self,
1137        sheet: &str,
1138        row: u32,
1139        col: u32,
1140        value: LiteralValue,
1141    ) -> Result<OperationSummary, ExcelError> {
1142        let value = normalize_stored_literal(value);
1143        let sheet_id = self.sheet_id_mut(sheet);
1144        // External API is 1-based; store 0-based coords internally.
1145        let coord = Coord::from_excel(row, col, true, true);
1146        let addr = CellRef::new(sheet_id, coord);
1147        let mut created_placeholders = Vec::new();
1148
1149        let vertex_id = if let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
1150            // Check if it was a formula and remove dependencies
1151            let is_formula = matches!(
1152                self.store.kind(existing_id),
1153                VertexKind::FormulaScalar | VertexKind::FormulaArray
1154            );
1155
1156            if is_formula {
1157                self.remove_dependent_edges(existing_id);
1158                self.detach_vertex_from_names(existing_id);
1159                self.clear_pending_name_references(existing_id);
1160                self.vertex_formulas.remove(&existing_id);
1161            }
1162
1163            // Update to value kind
1164            self.store.set_kind(existing_id, VertexKind::Cell);
1165            if self.value_cache_enabled {
1166                let value_ref = self.data_store.store_value(value);
1167                self.vertex_values.insert(existing_id, value_ref);
1168            } else {
1169                // Ensure no stale payload remains if cache is disabled.
1170                self.vertex_values.remove(&existing_id);
1171            }
1172            existing_id
1173        } else {
1174            // Create new vertex
1175            created_placeholders.push(addr);
1176            let packed_coord = AbsCoord::from_excel(row, col);
1177            let vertex_id = self.store.allocate(packed_coord, sheet_id, 0x01); // dirty flag
1178
1179            // Add vertex coordinate for CSR
1180            self.edges.add_vertex(packed_coord, vertex_id.0);
1181
1182            // Add to sheet index for O(log n + k) range queries
1183            self.sheet_index_mut(sheet_id)
1184                .add_vertex(packed_coord, vertex_id);
1185
1186            self.store.set_kind(vertex_id, VertexKind::Cell);
1187            if self.value_cache_enabled {
1188                let value_ref = self.data_store.store_value(value);
1189                self.vertex_values.insert(vertex_id, value_ref);
1190            }
1191            self.cell_to_vertex.insert(addr, vertex_id);
1192            vertex_id
1193        };
1194
1195        // Cell edits clear any structural #REF! marking for this vertex.
1196        self.ref_error_vertices.remove(&vertex_id);
1197
1198        Ok(OperationSummary {
1199            affected_vertices: self.mark_dirty(vertex_id),
1200            created_placeholders,
1201        })
1202    }
1203
1204    /// Reserve capacity hints for upcoming bulk cell inserts (values only for now).
1205    pub fn reserve_cells(&mut self, additional: usize) {
1206        self.store.reserve(additional);
1207        if self.value_cache_enabled {
1208            self.vertex_values.reserve(additional);
1209        }
1210        self.cell_to_vertex.reserve(additional);
1211        // sheet_indexes: cannot easily reserve per-sheet without distribution; skip.
1212    }
1213
1214    /// Fast path for initial bulk load of value cells: avoids dirty propagation & dependency work.
1215    pub fn set_cell_value_bulk_untracked(
1216        &mut self,
1217        sheet: &str,
1218        row: u32,
1219        col: u32,
1220        value: LiteralValue,
1221    ) {
1222        let value = normalize_stored_literal(value);
1223        let sheet_id = self.sheet_id_mut(sheet);
1224        let coord = Coord::from_excel(row, col, true, true);
1225        let addr = CellRef::new(sheet_id, coord);
1226        if let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
1227            // Overwrite existing value vertex only (ignore formulas in bulk path)
1228            if matches!(
1229                self.store.kind(existing_id),
1230                VertexKind::FormulaScalar | VertexKind::FormulaArray
1231            ) {
1232                self.remove_dependent_edges(existing_id);
1233                self.detach_vertex_from_names(existing_id);
1234                self.clear_pending_name_references(existing_id);
1235                self.vertex_formulas.remove(&existing_id);
1236            }
1237            if self.value_cache_enabled {
1238                let value_ref = self.data_store.store_value(value);
1239                self.vertex_values.insert(existing_id, value_ref);
1240            } else {
1241                self.vertex_values.remove(&existing_id);
1242            }
1243            self.store.set_kind(existing_id, VertexKind::Cell);
1244            self.ref_error_vertices.remove(&existing_id);
1245            return;
1246        }
1247        let packed_coord = AbsCoord::from_excel(row, col);
1248        let vertex_id = self.store.allocate(packed_coord, sheet_id, 0x00); // not dirty
1249        self.edges.add_vertex(packed_coord, vertex_id.0);
1250        self.sheet_index_mut(sheet_id)
1251            .add_vertex(packed_coord, vertex_id);
1252        self.store.set_kind(vertex_id, VertexKind::Cell);
1253        self.ref_error_vertices.remove(&vertex_id);
1254        if self.value_cache_enabled {
1255            let value_ref = self.data_store.store_value(value);
1256            self.vertex_values.insert(vertex_id, value_ref);
1257        }
1258        self.cell_to_vertex.insert(addr, vertex_id);
1259    }
1260
1261    /// Bulk insert a collection of plain value cells (no formulas) more efficiently.
1262    pub fn bulk_insert_values<I>(&mut self, sheet: &str, cells: I)
1263    where
1264        I: IntoIterator<Item = (u32, u32, LiteralValue)>,
1265    {
1266        use crate::instant::FzInstant as Instant;
1267        let t0 = Instant::now();
1268        // Collect first to know size
1269        let collected: Vec<(u32, u32, LiteralValue)> = cells.into_iter().collect();
1270        if collected.is_empty() {
1271            return;
1272        }
1273        let sheet_id = self.sheet_id_mut(sheet);
1274        self.reserve_cells(collected.len());
1275        let t_reserve = Instant::now();
1276        let mut new_vertices: Vec<(AbsCoord, u32)> = Vec::with_capacity(collected.len());
1277        let mut index_items: Vec<(AbsCoord, VertexId)> = Vec::with_capacity(collected.len());
1278        // For new allocations, accumulate values and assign after a single batch store
1279        let mut new_value_coords: Vec<(AbsCoord, VertexId)> = Vec::with_capacity(collected.len());
1280        let mut new_value_literals: Vec<LiteralValue> = Vec::with_capacity(collected.len());
1281        // Detect fast path: during initial ingest, caller may guarantee most cells are new.
1282        let assume_new = self.first_load_assume_new
1283            && self
1284                .sheet_id(sheet)
1285                .map(|sid| !self.ensure_touched_sheets.contains(&sid))
1286                .unwrap_or(false);
1287
1288        for (row, col, value) in collected {
1289            let value = normalize_stored_literal(value);
1290            let coord = Coord::from_excel(row, col, true, true);
1291            let addr = CellRef::new(sheet_id, coord);
1292            if !assume_new && let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
1293                if matches!(
1294                    self.store.kind(existing_id),
1295                    VertexKind::FormulaScalar | VertexKind::FormulaArray
1296                ) {
1297                    self.remove_dependent_edges(existing_id);
1298                    self.detach_vertex_from_names(existing_id);
1299                    self.clear_pending_name_references(existing_id);
1300                    self.vertex_formulas.remove(&existing_id);
1301                }
1302                if self.value_cache_enabled {
1303                    let value_ref = self.data_store.store_value(value);
1304                    self.vertex_values.insert(existing_id, value_ref);
1305                } else {
1306                    self.vertex_values.remove(&existing_id);
1307                }
1308                self.store.set_kind(existing_id, VertexKind::Cell);
1309                continue;
1310            }
1311            let packed = AbsCoord::from_excel(row, col);
1312            let vertex_id = self.store.allocate(packed, sheet_id, 0x00);
1313            self.store.set_kind(vertex_id, VertexKind::Cell);
1314            // Defer value arena storage to a single batch
1315            new_value_coords.push((packed, vertex_id));
1316            new_value_literals.push(value);
1317            self.cell_to_vertex.insert(addr, vertex_id);
1318            new_vertices.push((packed, vertex_id.0));
1319            index_items.push((packed, vertex_id));
1320        }
1321        // Perform a single batch store for newly allocated values
1322        if self.value_cache_enabled && !new_value_literals.is_empty() {
1323            let vrefs = self.data_store.store_values_batch(new_value_literals);
1324            debug_assert_eq!(vrefs.len(), new_value_coords.len());
1325            for (i, (_pc, vid)) in new_value_coords.iter().enumerate() {
1326                self.vertex_values.insert(*vid, vrefs[i]);
1327            }
1328        }
1329        let t_after_alloc = Instant::now();
1330        if !new_vertices.is_empty() {
1331            let t_edges_start = Instant::now();
1332            self.edges.add_vertices_batch(&new_vertices);
1333            let t_edges_done = Instant::now();
1334
1335            match self.config.sheet_index_mode {
1336                crate::engine::SheetIndexMode::Eager => {
1337                    self.sheet_index_mut(sheet_id)
1338                        .add_vertices_batch(&index_items);
1339                }
1340                crate::engine::SheetIndexMode::Lazy => {
1341                    // Skip building index now; will be built on-demand
1342                }
1343                crate::engine::SheetIndexMode::FastBatch => {
1344                    // FastBatch for now delegates to same batch insert (future: build from sorted arrays)
1345                    self.sheet_index_mut(sheet_id)
1346                        .add_vertices_batch(&index_items);
1347                }
1348            }
1349            let t_index_done = Instant::now();
1350        }
1351    }
1352
1353    /// Set a formula in a cell, returns affected vertex IDs
1354    pub fn set_cell_formula(
1355        &mut self,
1356        sheet: &str,
1357        row: u32,
1358        col: u32,
1359        ast: ASTNode,
1360    ) -> Result<OperationSummary, ExcelError> {
1361        let volatile = self.is_ast_volatile(&ast);
1362        self.set_cell_formula_with_volatility(sheet, row, col, ast, volatile)
1363    }
1364
1365    /// Set a formula in a cell with a known volatility flag (context-scoped detection upstream)
1366    pub fn set_cell_formula_with_volatility(
1367        &mut self,
1368        sheet: &str,
1369        row: u32,
1370        col: u32,
1371        ast: ASTNode,
1372        volatile: bool,
1373    ) -> Result<OperationSummary, ExcelError> {
1374        let dbg = std::env::var("FZ_DEBUG_LOAD")
1375            .ok()
1376            .is_some_and(|v| v != "0");
1377        let dep_ms_thresh: u128 = std::env::var("FZ_DEBUG_DEP_MS")
1378            .ok()
1379            .and_then(|s| s.parse().ok())
1380            .unwrap_or(0);
1381        let sample_n: usize = std::env::var("FZ_DEBUG_SAMPLE_N")
1382            .ok()
1383            .and_then(|s| s.parse().ok())
1384            .unwrap_or(0);
1385        let t0 = if dbg {
1386            Some(crate::instant::FzInstant::now())
1387        } else {
1388            None
1389        };
1390        let sheet_id = self.sheet_id_mut(sheet);
1391        let coord = Coord::from_excel(row, col, true, true);
1392        let addr = CellRef::new(sheet_id, coord);
1393
1394        // Rewrite context-dependent structured references (e.g., this-row selectors) into
1395        // concrete cell/range references for this formula cell.
1396        let mut ast = ast;
1397        self.rewrite_structured_references_for_cell(&mut ast, addr)?;
1398
1399        // Extract dependencies from AST, creating placeholders if needed
1400        let t_dep0 = if dbg {
1401            Some(crate::instant::FzInstant::now())
1402        } else {
1403            None
1404        };
1405        let (
1406            new_dependencies,
1407            new_range_dependencies,
1408            mut created_placeholders,
1409            named_dependencies,
1410            unresolved_names,
1411        ) = self.extract_dependencies_with_pending_names(&ast, sheet_id)?;
1412        if let (true, Some(t)) = (dbg, t_dep0) {
1413            let elapsed = t.elapsed().as_millis();
1414            // Only log if over threshold or sampled
1415            let do_log = (dep_ms_thresh > 0 && elapsed >= dep_ms_thresh)
1416                || (sample_n > 0 && (row as usize).is_multiple_of(sample_n));
1417            if dep_ms_thresh == 0 && sample_n == 0 {
1418                // default: very light sampling every 1000 rows
1419                if row.is_multiple_of(1000) {
1420                    eprintln!(
1421                        "[fz][dep] {}!{} extracted: deps={}, ranges={}, placeholders={}, names={} in {} ms",
1422                        self.sheet_name(sheet_id),
1423                        crate::reference::Coord::from_excel(row, col, true, true),
1424                        new_dependencies.len(),
1425                        new_range_dependencies.len(),
1426                        created_placeholders.len(),
1427                        named_dependencies.len(),
1428                        elapsed
1429                    );
1430                }
1431            } else if do_log {
1432                eprintln!(
1433                    "[fz][dep] {}!{} extracted: deps={}, ranges={}, placeholders={}, names={} in {} ms",
1434                    self.sheet_name(sheet_id),
1435                    crate::reference::Coord::from_excel(row, col, true, true),
1436                    new_dependencies.len(),
1437                    new_range_dependencies.len(),
1438                    created_placeholders.len(),
1439                    named_dependencies.len(),
1440                    elapsed
1441                );
1442            }
1443        }
1444
1445        // Check for self-reference (immediate cycle detection)
1446        let addr_vertex_id = self.get_or_create_vertex(&addr, &mut created_placeholders);
1447
1448        // Editing a formula clears any prior structural #REF! marking for this vertex.
1449        self.ref_error_vertices.remove(&addr_vertex_id);
1450
1451        if new_dependencies.contains(&addr_vertex_id) {
1452            return Err(ExcelError::new(ExcelErrorKind::Circ)
1453                .with_message("Self-reference detected".to_string()));
1454        }
1455
1456        for &name_vertex in &named_dependencies {
1457            let mut visited = FxHashSet::default();
1458            if self.name_depends_on_vertex(name_vertex, addr_vertex_id, &mut visited) {
1459                return Err(ExcelError::new(ExcelErrorKind::Circ)
1460                    .with_message("Circular reference through named range".to_string()));
1461            }
1462        }
1463
1464        // Remove old dependencies first
1465        self.remove_dependent_edges(addr_vertex_id);
1466        self.detach_vertex_from_names(addr_vertex_id);
1467        self.clear_pending_name_references(addr_vertex_id);
1468
1469        // Update vertex properties
1470        self.store
1471            .set_kind(addr_vertex_id, VertexKind::FormulaScalar);
1472        let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
1473        self.vertex_formulas.insert(addr_vertex_id, ast_id);
1474        self.store.set_dirty(addr_vertex_id, true);
1475
1476        // Clear any cached value since this is now a formula
1477        self.vertex_values.remove(&addr_vertex_id);
1478
1479        self.mark_volatile(addr_vertex_id, volatile);
1480        let dynamic = self.is_ast_dynamic(&ast);
1481        self.store.set_dynamic(addr_vertex_id, dynamic);
1482
1483        if !named_dependencies.is_empty() {
1484            self.attach_vertex_to_names(addr_vertex_id, &named_dependencies);
1485        }
1486        for unresolved_name in &unresolved_names {
1487            self.record_pending_name_reference(sheet_id, unresolved_name, addr_vertex_id);
1488        }
1489
1490        if let (true, Some(t)) = (dbg, t0) {
1491            let elapsed = t.elapsed().as_millis();
1492            let log_set = dep_ms_thresh > 0 && elapsed >= dep_ms_thresh;
1493            if log_set {
1494                eprintln!(
1495                    "[fz][set] {}!{} total {} ms",
1496                    self.sheet_name(sheet_id),
1497                    crate::reference::Coord::from_excel(row, col, true, true),
1498                    elapsed
1499                );
1500            }
1501        }
1502
1503        // Add new dependency edges
1504        self.add_dependent_edges(addr_vertex_id, &new_dependencies);
1505        self.add_range_dependent_edges(addr_vertex_id, &new_range_dependencies, sheet_id);
1506
1507        Ok(OperationSummary {
1508            affected_vertices: self.mark_dirty(addr_vertex_id),
1509            created_placeholders,
1510        })
1511    }
1512
1513    pub(crate) fn rewrite_structured_references_for_cell(
1514        &self,
1515        ast: &mut ASTNode,
1516        cell: CellRef,
1517    ) -> Result<(), ExcelError> {
1518        self.rewrite_structured_references_node(ast, cell)
1519    }
1520
1521    fn rewrite_structured_references_node(
1522        &self,
1523        node: &mut ASTNode,
1524        cell: CellRef,
1525    ) -> Result<(), ExcelError> {
1526        match &mut node.node_type {
1527            ASTNodeType::Reference { reference, .. } => {
1528                self.rewrite_structured_reference(reference, cell)
1529            }
1530            ASTNodeType::UnaryOp { expr, .. } => {
1531                self.rewrite_structured_references_node(expr, cell)
1532            }
1533            ASTNodeType::BinaryOp { left, right, .. } => {
1534                self.rewrite_structured_references_node(left, cell)?;
1535                self.rewrite_structured_references_node(right, cell)
1536            }
1537            ASTNodeType::Function { args, .. } => {
1538                for a in args.iter_mut() {
1539                    self.rewrite_structured_references_node(a, cell)?;
1540                }
1541                Ok(())
1542            }
1543            ASTNodeType::Array(rows) => {
1544                for r in rows.iter_mut() {
1545                    for item in r.iter_mut() {
1546                        self.rewrite_structured_references_node(item, cell)?;
1547                    }
1548                }
1549                Ok(())
1550            }
1551            ASTNodeType::Literal(_) => Ok(()),
1552        }
1553    }
1554
1555    fn rewrite_structured_reference(
1556        &self,
1557        reference: &mut ReferenceType,
1558        cell: CellRef,
1559    ) -> Result<(), ExcelError> {
1560        use formualizer_parse::parser::{SpecialItem, TableSpecifier};
1561
1562        let ReferenceType::Table(tref) = reference else {
1563            return Ok(());
1564        };
1565
1566        // This-row shorthand: parsed as an unnamed table reference with a Combination specifier.
1567        if !tref.name.is_empty() {
1568            return Ok(());
1569        }
1570
1571        let col_name = match &tref.specifier {
1572            Some(TableSpecifier::Combination(parts)) => {
1573                let mut saw_this_row = false;
1574                let mut col: Option<&str> = None;
1575                for p in parts {
1576                    match p.as_ref() {
1577                        TableSpecifier::SpecialItem(SpecialItem::ThisRow) => {
1578                            saw_this_row = true;
1579                        }
1580                        TableSpecifier::Column(c) => {
1581                            if col.is_some() {
1582                                return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1583                                    "This-row structured reference with multiple columns is not supported"
1584                                        .to_string(),
1585                                ));
1586                            }
1587                            col = Some(c.as_str());
1588                        }
1589                        other => {
1590                            return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1591                                format!(
1592                                    "Unsupported this-row structured reference component: {other}"
1593                                ),
1594                            ));
1595                        }
1596                    }
1597                }
1598                if !saw_this_row {
1599                    return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1600                        "Unnamed structured reference requires a this-row selector".to_string(),
1601                    ));
1602                }
1603                col.ok_or_else(|| {
1604                    ExcelError::new(ExcelErrorKind::NImpl).with_message(
1605                        "This-row structured reference missing column selector".to_string(),
1606                    )
1607                })?
1608            }
1609            _ => {
1610                return Err(ExcelError::new(ExcelErrorKind::NImpl).with_message(
1611                    "Unnamed structured reference form is not supported".to_string(),
1612                ));
1613            }
1614        };
1615
1616        let Some(table) = self.find_table_containing_cell(cell) else {
1617            return Err(ExcelError::new(ExcelErrorKind::Name)
1618                .with_message("This-row structured reference used outside a table".to_string()));
1619        };
1620
1621        let row0 = cell.coord.row();
1622        let col0 = cell.coord.col();
1623        let sr0 = table.range.start.coord.row();
1624        let sc0 = table.range.start.coord.col();
1625        let er0 = table.range.end.coord.row();
1626        let ec0 = table.range.end.coord.col();
1627
1628        if row0 < sr0 || row0 > er0 || col0 < sc0 || col0 > ec0 {
1629            return Err(ExcelError::new(ExcelErrorKind::Name)
1630                .with_message("This-row structured reference used outside a table".to_string()));
1631        }
1632
1633        if table.header_row && row0 == sr0 {
1634            return Err(ExcelError::new(ExcelErrorKind::Ref).with_message(
1635                "This-row structured references are not valid in the table header row".to_string(),
1636            ));
1637        }
1638
1639        let data_start = if table.header_row { sr0 + 1 } else { sr0 };
1640        if row0 < data_start {
1641            return Err(ExcelError::new(ExcelErrorKind::Ref).with_message(
1642                "This-row structured references require a data/totals row context".to_string(),
1643            ));
1644        }
1645
1646        let Some(idx) = table.col_index(col_name) else {
1647            return Err(ExcelError::new(ExcelErrorKind::Ref).with_message(format!(
1648                "Unknown table column in this-row reference: {col_name}"
1649            )));
1650        };
1651        let target_col0 = sc0 + (idx as u32);
1652        let target_row = row0 + 1;
1653        let target_col = target_col0 + 1;
1654
1655        *reference = ReferenceType::Cell {
1656            sheet: None,
1657            row: target_row,
1658            col: target_col,
1659            row_abs: true,
1660            col_abs: true,
1661        };
1662
1663        Ok(())
1664    }
1665
1666    fn find_table_containing_cell(&self, cell: CellRef) -> Option<&tables::TableEntry> {
1667        let row0 = cell.coord.row();
1668        let col0 = cell.coord.col();
1669
1670        let mut best: Option<&tables::TableEntry> = None;
1671        let mut best_area: u64 = u64::MAX;
1672        let mut best_name: &str = "";
1673
1674        for t in self.tables.values() {
1675            if t.sheet_id() != cell.sheet_id {
1676                continue;
1677            }
1678            let sr0 = t.range.start.coord.row();
1679            let sc0 = t.range.start.coord.col();
1680            let er0 = t.range.end.coord.row();
1681            let ec0 = t.range.end.coord.col();
1682            if row0 < sr0 || row0 > er0 || col0 < sc0 || col0 > ec0 {
1683                continue;
1684            }
1685
1686            let h = (er0 - sr0 + 1) as u64;
1687            let w = (ec0 - sc0 + 1) as u64;
1688            let area = h.saturating_mul(w);
1689            let name = t.name.as_str();
1690            let better = match best {
1691                None => true,
1692                Some(_) => area < best_area || (area == best_area && name < best_name),
1693            };
1694            if better {
1695                best = Some(t);
1696                best_area = area;
1697                best_name = name;
1698            }
1699        }
1700
1701        best
1702    }
1703
1704    pub fn set_cell_value_ref(
1705        &mut self,
1706        cell: formualizer_common::SheetCellRef<'_>,
1707        value: LiteralValue,
1708    ) -> Result<OperationSummary, ExcelError> {
1709        let owned = cell.into_owned();
1710        let sheet_id = match owned.sheet {
1711            formualizer_common::SheetLocator::Id(id) => id,
1712            formualizer_common::SheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
1713            formualizer_common::SheetLocator::Current => self.default_sheet_id,
1714        };
1715        let sheet_name = self.sheet_name(sheet_id).to_string();
1716        self.set_cell_value(
1717            &sheet_name,
1718            owned.coord.row() + 1,
1719            owned.coord.col() + 1,
1720            value,
1721        )
1722    }
1723
1724    pub fn set_cell_formula_ref(
1725        &mut self,
1726        cell: formualizer_common::SheetCellRef<'_>,
1727        ast: ASTNode,
1728    ) -> Result<OperationSummary, ExcelError> {
1729        let owned = cell.into_owned();
1730        let sheet_id = match owned.sheet {
1731            formualizer_common::SheetLocator::Id(id) => id,
1732            formualizer_common::SheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
1733            formualizer_common::SheetLocator::Current => self.default_sheet_id,
1734        };
1735        let sheet_name = self.sheet_name(sheet_id).to_string();
1736        self.set_cell_formula(
1737            &sheet_name,
1738            owned.coord.row() + 1,
1739            owned.coord.col() + 1,
1740            ast,
1741        )
1742    }
1743
1744    pub fn get_cell_value_ref(
1745        &self,
1746        cell: formualizer_common::SheetCellRef<'_>,
1747    ) -> Option<LiteralValue> {
1748        let owned = cell.into_owned();
1749        let sheet_id = match owned.sheet {
1750            formualizer_common::SheetLocator::Id(id) => id,
1751            formualizer_common::SheetLocator::Name(name) => self.sheet_id(name.as_ref())?,
1752            formualizer_common::SheetLocator::Current => self.default_sheet_id,
1753        };
1754        let sheet_name = self.sheet_name(sheet_id);
1755        self.get_cell_value(sheet_name, owned.coord.row() + 1, owned.coord.col() + 1)
1756    }
1757
1758    /// Get current value from a cell
1759    pub fn get_cell_value(&self, sheet: &str, row: u32, col: u32) -> Option<LiteralValue> {
1760        if !self.value_cache_enabled {
1761            #[cfg(debug_assertions)]
1762            {
1763                self.graph_value_read_attempts
1764                    .fetch_add(1, Ordering::Relaxed);
1765            }
1766            return None;
1767        }
1768        let sheet_id = self.sheet_reg.get_id(sheet)?;
1769        let coord = Coord::from_excel(row, col, true, true);
1770        let addr = CellRef::new(sheet_id, coord);
1771
1772        self.get_vertex_id_for_address(&addr)
1773            .and_then(|&vertex_id| {
1774                // Check values hashmap (stores both cell values and formula results)
1775                self.vertex_values
1776                    .get(&vertex_id)
1777                    .map(|&value_ref| self.data_store.retrieve_value(value_ref))
1778            })
1779    }
1780
1781    /// Mark vertex dirty and propagate to dependents
1782    fn mark_dirty(&mut self, vertex_id: VertexId) -> Vec<VertexId> {
1783        let mut affected = FxHashSet::default();
1784        let mut to_visit = Vec::new();
1785        let mut visited_for_propagation = FxHashSet::default();
1786
1787        // Only mark the source vertex as dirty if it's a formula
1788        // Value cells don't get marked dirty themselves but are still affected
1789        let is_formula = matches!(
1790            self.store.kind(vertex_id),
1791            VertexKind::FormulaScalar
1792                | VertexKind::FormulaArray
1793                | VertexKind::NamedScalar
1794                | VertexKind::NamedArray
1795        );
1796
1797        if is_formula {
1798            to_visit.push(vertex_id);
1799        } else {
1800            // Value cells are affected (for tracking) but not marked dirty
1801            affected.insert(vertex_id);
1802        }
1803
1804        // Initial propagation from direct and range dependents
1805        {
1806            // Get dependents (vertices that depend on this vertex)
1807            if let Some(dependents) = self.dependents_slice(vertex_id) {
1808                to_visit.extend(dependents.iter().copied());
1809            } else {
1810                let dependents = self.get_dependents(vertex_id);
1811                to_visit.extend(dependents);
1812            }
1813
1814            if let Some(name_set) = self.cell_to_name_dependents.get(&vertex_id) {
1815                for &name_vertex in name_set {
1816                    to_visit.push(name_vertex);
1817                }
1818            }
1819
1820            to_visit.extend(self.collect_range_dependents_for_vertex(vertex_id));
1821        }
1822
1823        while let Some(id) = to_visit.pop() {
1824            if !visited_for_propagation.insert(id) {
1825                continue; // Already processed
1826            }
1827            affected.insert(id);
1828
1829            // Mark vertex as dirty
1830            self.store.set_dirty(id, true);
1831
1832            // Add direct dependents to visit list
1833            if let Some(dependents) = self.dependents_slice(id) {
1834                to_visit.extend(dependents.iter().copied());
1835            } else {
1836                let dependents = self.get_dependents(id);
1837                to_visit.extend(dependents);
1838            }
1839            to_visit.extend(self.collect_range_dependents_for_vertex(id));
1840        }
1841
1842        // Add to dirty set
1843        self.dirty_vertices.extend(&affected);
1844
1845        // Return as Vec for compatibility
1846        affected.into_iter().collect()
1847    }
1848
1849    /// Get all vertices that need evaluation
1850    pub fn get_evaluation_vertices(&self) -> Vec<VertexId> {
1851        let mut combined = FxHashSet::default();
1852        combined.extend(&self.dirty_vertices);
1853        combined.extend(&self.volatile_vertices);
1854
1855        let mut result: Vec<VertexId> = combined
1856            .into_iter()
1857            .filter(|&id| {
1858                // Only include formula vertices
1859                matches!(
1860                    self.store.kind(id),
1861                    VertexKind::FormulaScalar
1862                        | VertexKind::FormulaArray
1863                        | VertexKind::NamedScalar
1864                        | VertexKind::NamedArray
1865                )
1866            })
1867            .collect();
1868        result.sort_unstable();
1869        result
1870    }
1871
1872    /// Clear dirty flags after successful evaluation
1873    pub fn clear_dirty_flags(&mut self, vertices: &[VertexId]) {
1874        for &vertex_id in vertices {
1875            self.store.set_dirty(vertex_id, false);
1876            self.dirty_vertices.remove(&vertex_id);
1877        }
1878    }
1879
1880    /// đź”® Scalability Hook: Clear volatile vertices after evaluation cycle
1881    pub fn clear_volatile_flags(&mut self) {
1882        self.volatile_vertices.clear();
1883    }
1884
1885    /// Re-marks all volatile vertices as dirty for the next evaluation cycle.
1886    pub(crate) fn redirty_volatiles(&mut self) {
1887        let volatile_ids: Vec<VertexId> = self.volatile_vertices.iter().copied().collect();
1888        for id in volatile_ids {
1889            self.mark_dirty(id);
1890        }
1891    }
1892
1893    fn get_or_create_vertex(
1894        &mut self,
1895        addr: &CellRef,
1896        created_placeholders: &mut Vec<CellRef>,
1897    ) -> VertexId {
1898        if let Some(&vertex_id) = self.cell_to_vertex.get(addr) {
1899            return vertex_id;
1900        }
1901
1902        created_placeholders.push(*addr);
1903        let packed_coord = AbsCoord::new(addr.coord.row(), addr.coord.col());
1904        let vertex_id = self.store.allocate(packed_coord, addr.sheet_id, 0x00);
1905
1906        // Add vertex coordinate for CSR
1907        self.edges.add_vertex(packed_coord, vertex_id.0);
1908
1909        // Add to sheet index for O(log n + k) range queries
1910        self.sheet_index_mut(addr.sheet_id)
1911            .add_vertex(packed_coord, vertex_id);
1912
1913        self.store.set_kind(vertex_id, VertexKind::Empty);
1914        self.cell_to_vertex.insert(*addr, vertex_id);
1915        vertex_id
1916    }
1917
1918    fn add_dependent_edges(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
1919        // Batch to avoid repeated CSR rebuilds and keep reverse edges current
1920        self.edges.begin_batch();
1921
1922        // If PK enabled, update order using a short-lived adapter without holding &mut self
1923        // Track dependencies that should be skipped if rejecting cycle-creating edges
1924        let mut skip_deps: rustc_hash::FxHashSet<VertexId> = rustc_hash::FxHashSet::default();
1925        if self.pk_order.is_some()
1926            && let Some(mut pk) = self.pk_order.take()
1927        {
1928            pk.ensure_nodes(std::iter::once(dependent));
1929            pk.ensure_nodes(dependencies.iter().copied());
1930            {
1931                let adapter = GraphAdapter { g: self };
1932                for &dep_id in dependencies {
1933                    match pk.try_add_edge(&adapter, dep_id, dependent) {
1934                        Ok(_) => {}
1935                        Err(_cycle) => {
1936                            if self.config.pk_reject_cycle_edges {
1937                                skip_deps.insert(dep_id);
1938                            } else {
1939                                pk.rebuild_full(&adapter);
1940                            }
1941                        }
1942                    }
1943                }
1944            } // drop adapter
1945            self.pk_order = Some(pk);
1946        }
1947
1948        // Now mutate engine edges; if rejecting cycles, re-check and skip those that would create cycles
1949        for &dep_id in dependencies {
1950            if self.config.pk_reject_cycle_edges && skip_deps.contains(&dep_id) {
1951                continue;
1952            }
1953            self.edges.add_edge(dependent, dep_id);
1954            #[cfg(test)]
1955            {
1956                if let Ok(mut g) = self.instr.lock() {
1957                    g.edges_added += 1;
1958                }
1959            }
1960        }
1961
1962        self.edges.end_batch();
1963    }
1964
1965    /// Like add_dependent_edges, but assumes caller is managing edges.begin_batch/end_batch
1966    fn add_dependent_edges_nobatch(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
1967        // If PK enabled, update order using a short-lived adapter without holding &mut self
1968        let mut skip_deps: rustc_hash::FxHashSet<VertexId> = rustc_hash::FxHashSet::default();
1969        if self.pk_order.is_some()
1970            && let Some(mut pk) = self.pk_order.take()
1971        {
1972            pk.ensure_nodes(std::iter::once(dependent));
1973            pk.ensure_nodes(dependencies.iter().copied());
1974            {
1975                let adapter = GraphAdapter { g: self };
1976                for &dep_id in dependencies {
1977                    match pk.try_add_edge(&adapter, dep_id, dependent) {
1978                        Ok(_) => {}
1979                        Err(_cycle) => {
1980                            if self.config.pk_reject_cycle_edges {
1981                                skip_deps.insert(dep_id);
1982                            } else {
1983                                pk.rebuild_full(&adapter);
1984                            }
1985                        }
1986                    }
1987                }
1988            }
1989            self.pk_order = Some(pk);
1990        }
1991
1992        for &dep_id in dependencies {
1993            if self.config.pk_reject_cycle_edges && skip_deps.contains(&dep_id) {
1994                continue;
1995            }
1996            self.edges.add_edge(dependent, dep_id);
1997            #[cfg(test)]
1998            {
1999                if let Ok(mut g) = self.instr.lock() {
2000                    g.edges_added += 1;
2001                }
2002            }
2003        }
2004    }
2005
2006    /// Bulk set formulas on a sheet using a single dependency plan and batched edge updates.
2007    pub fn bulk_set_formulas<I>(&mut self, sheet: &str, items: I) -> Result<usize, ExcelError>
2008    where
2009        I: IntoIterator<Item = (u32, u32, ASTNode)>,
2010    {
2011        let collected: Vec<(u32, u32, ASTNode)> = items.into_iter().collect();
2012        if collected.is_empty() {
2013            return Ok(0);
2014        }
2015        let vol_flags: Vec<bool> = collected
2016            .iter()
2017            .map(|(_, _, ast)| self.is_ast_volatile(ast))
2018            .collect();
2019        self.bulk_set_formulas_with_volatility(sheet, collected, vol_flags)
2020    }
2021
2022    pub fn bulk_set_formulas_with_volatility(
2023        &mut self,
2024        sheet: &str,
2025        collected: Vec<(u32, u32, ASTNode)>,
2026        vol_flags: Vec<bool>,
2027    ) -> Result<usize, ExcelError> {
2028        use formualizer_parse::parser::CollectPolicy;
2029        let sheet_id = self.sheet_id_mut(sheet);
2030
2031        if collected.is_empty() {
2032            return Ok(0);
2033        }
2034
2035        // 1) Build plan across all formulas (read-only, no graph mutation)
2036        let tiny_refs = collected.iter().map(|(r, c, ast)| (sheet, *r, *c, ast));
2037        let policy = CollectPolicy {
2038            expand_small_ranges: true,
2039            range_expansion_limit: self.config.range_expansion_limit,
2040            include_names: true,
2041        };
2042        let plan = crate::engine::plan::build_dependency_plan(
2043            &mut self.sheet_reg,
2044            tiny_refs,
2045            &policy,
2046            Some(&vol_flags),
2047        )?;
2048
2049        // 2) Ensure/create target vertices and referenced cells (placeholders) once
2050        let mut created_placeholders: Vec<CellRef> = Vec::new();
2051
2052        // Targets
2053        let mut target_vids: Vec<VertexId> = Vec::with_capacity(plan.formula_targets.len());
2054        for (sid, pc) in &plan.formula_targets {
2055            let addr = CellRef::new(*sid, Coord::new(pc.row(), pc.col(), true, true));
2056            let vid = if let Some(&existing) = self.cell_to_vertex.get(&addr) {
2057                existing
2058            } else {
2059                self.get_or_create_vertex(&addr, &mut created_placeholders)
2060            };
2061            target_vids.push(vid);
2062        }
2063
2064        // Global referenced cells
2065        let mut dep_vids: Vec<VertexId> = Vec::with_capacity(plan.global_cells.len());
2066        for (sid, pc) in &plan.global_cells {
2067            let addr = CellRef::new(*sid, Coord::new(pc.row(), pc.col(), true, true));
2068            let vid = if let Some(&existing) = self.cell_to_vertex.get(&addr) {
2069                existing
2070            } else {
2071                self.get_or_create_vertex(&addr, &mut created_placeholders)
2072            };
2073            dep_vids.push(vid);
2074        }
2075
2076        // 3) Store ASTs in batch and update kinds/flags/value map
2077        let ast_ids = self
2078            .data_store
2079            .store_asts_batch(collected.iter().map(|(_, _, ast)| ast), &self.sheet_reg);
2080        for (i, &tvid) in target_vids.iter().enumerate() {
2081            // If this cell already had a formula, remove its edges once here
2082            if self.vertex_formulas.contains_key(&tvid) {
2083                self.remove_dependent_edges(tvid);
2084            }
2085            self.store.set_kind(tvid, VertexKind::FormulaScalar);
2086            self.store.set_dirty(tvid, true);
2087            self.vertex_values.remove(&tvid);
2088            self.vertex_formulas.insert(tvid, ast_ids[i]);
2089            self.mark_volatile(tvid, vol_flags.get(i).copied().unwrap_or(false));
2090
2091            let dynamic = self.is_ast_dynamic(&collected[i].2);
2092            self.store.set_dynamic(tvid, dynamic);
2093        }
2094
2095        // 4) Add edges in one batch
2096        self.edges.begin_batch();
2097        for (i, tvid) in target_vids.iter().copied().enumerate() {
2098            let mut deps: Vec<VertexId> = Vec::new();
2099
2100            // Map per-formula indices into dep_vids
2101            if let Some(indices) = plan.per_formula_cells.get(i) {
2102                deps.reserve(indices.len());
2103                for &idx in indices {
2104                    if let Some(vid) = dep_vids.get(idx as usize) {
2105                        deps.push(*vid);
2106                    }
2107                }
2108            }
2109
2110            if let Some(names) = plan.per_formula_names.get(i)
2111                && !names.is_empty()
2112            {
2113                let mut name_vertices = Vec::new();
2114                let formula_sheet = plan
2115                    .formula_targets
2116                    .get(i)
2117                    .map(|(sid, _)| *sid)
2118                    .unwrap_or(sheet_id);
2119                for name in names {
2120                    if let Some(named) = self.resolve_name_entry(name, formula_sheet) {
2121                        deps.push(named.vertex);
2122                        name_vertices.push(named.vertex);
2123                    } else if let Some(source) = self.resolve_source_scalar_entry(name) {
2124                        deps.push(source.vertex);
2125                    } else {
2126                        self.record_pending_name_reference(formula_sheet, name, tvid);
2127                    }
2128                }
2129                if !name_vertices.is_empty() {
2130                    self.attach_vertex_to_names(tvid, &name_vertices);
2131                }
2132            }
2133
2134            if let Some(tables) = plan.per_formula_tables.get(i)
2135                && !tables.is_empty()
2136            {
2137                for table_name in tables {
2138                    if let Some(table) = self.resolve_table_entry(table_name) {
2139                        deps.push(table.vertex);
2140                    } else if let Some(source) = self.resolve_source_table_entry(table_name) {
2141                        deps.push(source.vertex);
2142                    }
2143                }
2144            }
2145
2146            if !deps.is_empty() {
2147                self.add_dependent_edges_nobatch(tvid, &deps);
2148            }
2149
2150            // Range deps from plan are already compact RangeKeys; register directly.
2151            if let Some(rks) = plan.per_formula_ranges.get(i) {
2152                self.add_range_deps_from_keys(tvid, rks, sheet_id);
2153            }
2154        }
2155        self.edges.end_batch();
2156
2157        Ok(collected.len())
2158    }
2159
2160    /// Public (crate) helper to add a single dependency edge (dependent -> dependency) used for restoration/undo.
2161    pub fn add_dependency_edge(&mut self, dependent: VertexId, dependency: VertexId) {
2162        if dependent == dependency {
2163            return;
2164        }
2165        // If PK enabled attempt to add maintaining ordering; fallback to rebuild if cycle
2166        if self.pk_order.is_some()
2167            && let Some(mut pk) = self.pk_order.take()
2168        {
2169            pk.ensure_nodes(std::iter::once(dependent));
2170            pk.ensure_nodes(std::iter::once(dependency));
2171            let adapter = GraphAdapter { g: self };
2172            if pk.try_add_edge(&adapter, dependency, dependent).is_err() {
2173                // Cycle: rebuild full (conservative)
2174                pk.rebuild_full(&adapter);
2175            }
2176            self.pk_order = Some(pk);
2177        }
2178        self.edges.add_edge(dependent, dependency);
2179        self.store.set_dirty(dependent, true);
2180        self.dirty_vertices.insert(dependent);
2181    }
2182
2183    fn remove_dependent_edges(&mut self, vertex: VertexId) {
2184        // Remove all outgoing edges from this vertex (its dependencies)
2185        let dependencies = self.edges.out_edges(vertex);
2186
2187        self.edges.begin_batch();
2188        if self.pk_order.is_some()
2189            && let Some(mut pk) = self.pk_order.take()
2190        {
2191            for dep in &dependencies {
2192                pk.remove_edge(*dep, vertex);
2193            }
2194            self.pk_order = Some(pk);
2195        }
2196        for dep in dependencies {
2197            self.edges.remove_edge(vertex, dep);
2198        }
2199        self.edges.end_batch();
2200
2201        // Remove range dependencies and clean up stripes
2202        if let Some(old_ranges) = self.formula_to_range_deps.remove(&vertex) {
2203            let old_sheet_id = self.store.sheet_id(vertex);
2204
2205            for range in &old_ranges {
2206                let sheet_id = match range.sheet {
2207                    SharedSheetLocator::Id(id) => id,
2208                    _ => old_sheet_id,
2209                };
2210                let s_row = range.start_row.map(|b| b.index);
2211                let e_row = range.end_row.map(|b| b.index);
2212                let s_col = range.start_col.map(|b| b.index);
2213                let e_col = range.end_col.map(|b| b.index);
2214
2215                let mut keys_to_clean = FxHashSet::default();
2216
2217                let col_stripes = (s_row.is_none() && e_row.is_none())
2218                    || (s_col.is_some() && e_col.is_some() && (s_row.is_none() || e_row.is_none()));
2219                let row_stripes = (s_col.is_none() && e_col.is_none())
2220                    || (s_row.is_some() && e_row.is_some() && (s_col.is_none() || e_col.is_none()));
2221
2222                if col_stripes && !row_stripes {
2223                    let sc = s_col.unwrap_or(0);
2224                    let ec = e_col.unwrap_or(sc);
2225                    for col in sc..=ec {
2226                        keys_to_clean.insert(StripeKey {
2227                            sheet_id,
2228                            stripe_type: StripeType::Column,
2229                            index: col,
2230                        });
2231                    }
2232                } else if row_stripes && !col_stripes {
2233                    let sr = s_row.unwrap_or(0);
2234                    let er = e_row.unwrap_or(sr);
2235                    for row in sr..=er {
2236                        keys_to_clean.insert(StripeKey {
2237                            sheet_id,
2238                            stripe_type: StripeType::Row,
2239                            index: row,
2240                        });
2241                    }
2242                } else {
2243                    let start_row = s_row.unwrap_or(0);
2244                    let start_col = s_col.unwrap_or(0);
2245                    let end_row = e_row.unwrap_or(start_row);
2246                    let end_col = e_col.unwrap_or(start_col);
2247
2248                    let height = end_row.saturating_sub(start_row) + 1;
2249                    let width = end_col.saturating_sub(start_col) + 1;
2250
2251                    if self.config.enable_block_stripes && height > 1 && width > 1 {
2252                        let start_block_row = start_row / BLOCK_H;
2253                        let end_block_row = end_row / BLOCK_H;
2254                        let start_block_col = start_col / BLOCK_W;
2255                        let end_block_col = end_col / BLOCK_W;
2256
2257                        for block_row in start_block_row..=end_block_row {
2258                            for block_col in start_block_col..=end_block_col {
2259                                keys_to_clean.insert(StripeKey {
2260                                    sheet_id,
2261                                    stripe_type: StripeType::Block,
2262                                    index: block_index(block_row * BLOCK_H, block_col * BLOCK_W),
2263                                });
2264                            }
2265                        }
2266                    } else if height > width {
2267                        for col in start_col..=end_col {
2268                            keys_to_clean.insert(StripeKey {
2269                                sheet_id,
2270                                stripe_type: StripeType::Column,
2271                                index: col,
2272                            });
2273                        }
2274                    } else {
2275                        for row in start_row..=end_row {
2276                            keys_to_clean.insert(StripeKey {
2277                                sheet_id,
2278                                stripe_type: StripeType::Row,
2279                                index: row,
2280                            });
2281                        }
2282                    }
2283                }
2284
2285                for key in keys_to_clean {
2286                    if let Some(dependents) = self.stripe_to_dependents.get_mut(&key) {
2287                        dependents.remove(&vertex);
2288                        if dependents.is_empty() {
2289                            self.stripe_to_dependents.remove(&key);
2290                            #[cfg(test)]
2291                            {
2292                                if let Ok(mut g) = self.instr.lock() {
2293                                    g.stripe_removes += 1;
2294                                }
2295                            }
2296                        }
2297                    }
2298                }
2299            }
2300        }
2301    }
2302
2303    // Removed: vertices() and get_vertex() methods - no longer needed with SoA
2304    // The old AoS Vertex struct has been eliminated in favor of direct
2305    // access to columnar data through the VertexStore
2306
2307    /// Updates the cached value of a formula vertex.
2308    pub(crate) fn update_vertex_value(&mut self, vertex_id: VertexId, value: LiteralValue) {
2309        if !self.value_cache_enabled {
2310            // Canonical mode: cell/formula vertices must not store values in the graph.
2311            match self.store.kind(vertex_id) {
2312                VertexKind::Cell
2313                | VertexKind::FormulaScalar
2314                | VertexKind::FormulaArray
2315                | VertexKind::Empty => {
2316                    self.vertex_values.remove(&vertex_id);
2317                    return;
2318                }
2319                _ => {
2320                    // Allow non-cell vertices to cache values (e.g. named-range formulas).
2321                }
2322            }
2323        }
2324        let value_ref = self.data_store.store_value(normalize_stored_literal(value));
2325        self.vertex_values.insert(vertex_id, value_ref);
2326    }
2327
2328    /// Plan a spill region for an anchor; returns #SPILL! if blocked
2329    pub fn plan_spill_region(
2330        &self,
2331        anchor: VertexId,
2332        target_cells: &[CellRef],
2333    ) -> Result<(), ExcelError> {
2334        self.plan_spill_region_allowing_formula_overwrite(anchor, target_cells, None)
2335    }
2336
2337    /// Plan a spill region, optionally allowing specific formula vertices to be overwritten.
2338    ///
2339    /// This is used by parallel evaluation to allow spill anchors to take precedence over
2340    /// other formula vertices that are being evaluated in the same layer.
2341    pub(crate) fn plan_spill_region_allowing_formula_overwrite(
2342        &self,
2343        anchor: VertexId,
2344        target_cells: &[CellRef],
2345        overwritable_formulas: Option<&rustc_hash::FxHashSet<VertexId>>,
2346    ) -> Result<(), ExcelError> {
2347        use formualizer_common::{ExcelErrorExtra, ExcelErrorKind};
2348        // Compute expected spill shape from the target rectangle for better diagnostics
2349        let (expected_rows, expected_cols) = if target_cells.is_empty() {
2350            (0u32, 0u32)
2351        } else {
2352            let mut min_r = u32::MAX;
2353            let mut max_r = 0u32;
2354            let mut min_c = u32::MAX;
2355            let mut max_c = 0u32;
2356            for cell in target_cells {
2357                let r = cell.coord.row();
2358                let c = cell.coord.col();
2359                if r < min_r {
2360                    min_r = r;
2361                }
2362                if r > max_r {
2363                    max_r = r;
2364                }
2365                if c < min_c {
2366                    min_c = c;
2367                }
2368                if c > max_c {
2369                    max_c = c;
2370                }
2371            }
2372            (
2373                max_r.saturating_sub(min_r).saturating_add(1),
2374                max_c.saturating_sub(min_c).saturating_add(1),
2375            )
2376        };
2377        // Allow overlapping with previously owned spill cells by this anchor
2378        for cell in target_cells {
2379            // If cell is already owned by this anchor's previous spill, it's allowed.
2380            let owned_by_anchor = match self.spill_cell_to_anchor.get(cell) {
2381                Some(&existing_anchor) if existing_anchor == anchor => true,
2382                Some(_other) => {
2383                    return Err(ExcelError::new(ExcelErrorKind::Spill)
2384                        .with_message("BlockedBySpill")
2385                        .with_extra(ExcelErrorExtra::Spill {
2386                            expected_rows,
2387                            expected_cols,
2388                        }));
2389                }
2390                None => false,
2391            };
2392
2393            if owned_by_anchor {
2394                continue;
2395            }
2396
2397            // If cell is occupied by another formula anchor, block unless explicitly allowed.
2398            if let Some(&vid) = self.cell_to_vertex.get(cell)
2399                && vid != anchor
2400            {
2401                // Prevent clobbering formulas (array or scalar) in the target area
2402                match self.store.kind(vid) {
2403                    VertexKind::FormulaScalar | VertexKind::FormulaArray => {
2404                        if let Some(allow) = overwritable_formulas
2405                            && allow.contains(&vid)
2406                        {
2407                            continue;
2408                        }
2409                        return Err(ExcelError::new(ExcelErrorKind::Spill)
2410                            .with_message("BlockedByFormula")
2411                            .with_extra(ExcelErrorExtra::Spill {
2412                                expected_rows,
2413                                expected_cols,
2414                            }));
2415                    }
2416                    _ => {
2417                        // If a non-empty value exists (and not this anchor), block
2418                        if let Some(vref) = self.vertex_values.get(&vid) {
2419                            let v = self.data_store.retrieve_value(*vref);
2420                            if !matches!(v, LiteralValue::Empty) {
2421                                return Err(ExcelError::new(ExcelErrorKind::Spill)
2422                                    .with_message("BlockedByValue")
2423                                    .with_extra(ExcelErrorExtra::Spill {
2424                                        expected_rows,
2425                                        expected_cols,
2426                                    }));
2427                            }
2428                        }
2429                    }
2430                }
2431            }
2432        }
2433        Ok(())
2434    }
2435
2436    // Note: non-atomic commit_spill_region has been removed. All callers must use
2437    // commit_spill_region_atomic_with_fault for atomicity and rollback on failure.
2438
2439    /// Commit a spill atomically with an internal shadow buffer and optional fault injection.
2440    /// If a fault is injected partway through, all changes are rolled back to the pre-commit state.
2441    /// This does not change behavior under normal operation; it's primarily for Phase 3 guarantees and tests.
2442    pub fn commit_spill_region_atomic_with_fault(
2443        &mut self,
2444        anchor: VertexId,
2445        target_cells: Vec<CellRef>,
2446        values: Vec<Vec<LiteralValue>>,
2447        fault_after_ops: Option<usize>,
2448    ) -> Result<(), ExcelError> {
2449        // Anchor cell coordinates (0-based) for special-casing writes.
2450        // We must never overwrite the anchor via set_cell_value(), because that would
2451        // strip the formula and break incremental recalculation.
2452        let anchor_cell = self
2453            .get_cell_ref(anchor)
2454            .expect("anchor cell ref for spill commit");
2455        let anchor_sheet_name = self.sheet_name(anchor_cell.sheet_id).to_string();
2456        let anchor_row = anchor_cell.coord.row();
2457        let anchor_col = anchor_cell.coord.col();
2458
2459        // Capture previous owned cells for this anchor
2460        let prev_cells = self
2461            .spill_anchor_to_cells
2462            .get(&anchor)
2463            .cloned()
2464            .unwrap_or_default();
2465        // Use CoordBuildHasher on CellRef keys to avoid FxHasher clustering on
2466        // packed Coord values.
2467        let new_set: std::collections::HashSet<CellRef, CoordBuildHasher> =
2468            target_cells.iter().copied().collect();
2469        let prev_set: std::collections::HashSet<CellRef, CoordBuildHasher> =
2470            prev_cells.iter().copied().collect();
2471
2472        // Compose operation list: clears first (prev - new), then writes for new rectangle
2473        #[derive(Clone)]
2474        struct Op {
2475            sheet: String,
2476            row: u32,
2477            col: u32,
2478            new_value: LiteralValue,
2479        }
2480        let mut ops: Vec<Op> = Vec::new();
2481
2482        // Clears for cells no longer used
2483        for cell in prev_cells.iter() {
2484            if !new_set.contains(cell) {
2485                let sheet = self.sheet_name(cell.sheet_id).to_string();
2486                ops.push(Op {
2487                    sheet,
2488                    row: cell.coord.row(),
2489                    col: cell.coord.col(),
2490                    new_value: LiteralValue::Empty,
2491                });
2492            }
2493        }
2494
2495        // Writes for new values (row-major to match target rectangle)
2496        if !target_cells.is_empty() {
2497            let first = target_cells.first().copied().unwrap();
2498            let row0 = first.coord.row();
2499            let col0 = first.coord.col();
2500            let sheet = self.sheet_name(first.sheet_id).to_string();
2501            for (r_off, row_vals) in values.iter().enumerate() {
2502                for (c_off, v) in row_vals.iter().enumerate() {
2503                    ops.push(Op {
2504                        sheet: sheet.clone(),
2505                        row: row0 + r_off as u32,
2506                        col: col0 + c_off as u32,
2507                        new_value: v.clone(),
2508                    });
2509                }
2510            }
2511        }
2512
2513        // Shadow buffer of old values for rollback
2514        #[derive(Clone)]
2515        struct OldVal {
2516            present: bool,
2517            value: LiteralValue,
2518        }
2519        let mut old_values: Vec<((String, u32, u32), OldVal)> = Vec::with_capacity(ops.len());
2520
2521        // Capture old values before applying
2522        for op in &ops {
2523            // op.row/op.col are internal 0-based; get_cell_value is a public 1-based API.
2524            let old = self
2525                .get_cell_value(&op.sheet, op.row + 1, op.col + 1)
2526                .unwrap_or(LiteralValue::Empty);
2527            let present = true; // unified model: we always treat as present
2528            old_values.push((
2529                (op.sheet.clone(), op.row, op.col),
2530                OldVal {
2531                    present,
2532                    value: old,
2533                },
2534            ));
2535        }
2536
2537        // Apply with optional injected fault
2538        for (applied, op) in ops.iter().enumerate() {
2539            if let Some(n) = fault_after_ops
2540                && applied == n
2541            {
2542                for idx in (0..applied).rev() {
2543                    let ((ref sheet, row, col), ref old) = old_values[idx];
2544                    if sheet == &anchor_sheet_name && row == anchor_row && col == anchor_col {
2545                        self.update_vertex_value(anchor, old.value.clone());
2546                    } else {
2547                        let _ = self.set_cell_value(sheet, row + 1, col + 1, old.value.clone());
2548                    }
2549                }
2550                return Err(ExcelError::new(ExcelErrorKind::Error)
2551                    .with_message("Injected persistence fault during spill commit"));
2552            }
2553            if op.sheet == anchor_sheet_name && op.row == anchor_row && op.col == anchor_col {
2554                self.update_vertex_value(anchor, op.new_value.clone());
2555            } else {
2556                let _ =
2557                    self.set_cell_value(&op.sheet, op.row + 1, op.col + 1, op.new_value.clone());
2558            }
2559        }
2560
2561        // Update spill ownership maps only on success
2562        // Clear previous ownership not reused
2563        for cell in prev_cells.iter() {
2564            if !new_set.contains(cell) {
2565                self.spill_cell_to_anchor.remove(cell);
2566            }
2567        }
2568        // Mark ownership for new rectangle using the declared target cells only
2569        for cell in &target_cells {
2570            self.spill_cell_to_anchor.insert(*cell, anchor);
2571        }
2572        self.spill_anchor_to_cells.insert(anchor, target_cells);
2573        Ok(())
2574    }
2575
2576    pub(crate) fn spill_cells_for_anchor(&self, anchor: VertexId) -> Option<&[CellRef]> {
2577        self.spill_anchor_to_cells
2578            .get(&anchor)
2579            .map(|v| v.as_slice())
2580    }
2581
2582    pub(crate) fn spill_registry_has_anchor(&self, anchor: VertexId) -> bool {
2583        self.spill_anchor_to_cells.contains_key(&anchor)
2584    }
2585
2586    pub(crate) fn spill_registry_anchor_for_cell(&self, cell: CellRef) -> Option<VertexId> {
2587        self.spill_cell_to_anchor.get(&cell).copied()
2588    }
2589
2590    pub(crate) fn spill_registry_counts(&self) -> (usize, usize) {
2591        (
2592            self.spill_anchor_to_cells.len(),
2593            self.spill_cell_to_anchor.len(),
2594        )
2595    }
2596
2597    /// Clear an existing spill region for an anchor (set cells to Empty and forget ownership)
2598    pub fn clear_spill_region(&mut self, anchor: VertexId) {
2599        let _ = self.clear_spill_region_bulk(anchor);
2600    }
2601
2602    /// Bulk clear an existing spill region for an anchor.
2603    ///
2604    /// This avoids calling `set_cell_value()` per spill child (which can trigger O(N*V)
2605    /// dependent scans when `edges.delta_size() > 0`). Instead, it clears values directly and
2606    /// performs a single dirty propagation over the affected spill children.
2607    ///
2608    /// Returns the previously registered spill cells (including the anchor cell) for callers that
2609    /// want to mirror/record deltas.
2610    pub fn clear_spill_region_bulk(&mut self, anchor: VertexId) -> Vec<CellRef> {
2611        let anchor_cell = self.get_cell_ref(anchor);
2612        let Some(cells) = self.spill_anchor_to_cells.remove(&anchor) else {
2613            return Vec::new();
2614        };
2615
2616        // Remove ownership for all cells first.
2617        for cell in cells.iter() {
2618            self.spill_cell_to_anchor.remove(cell);
2619        }
2620
2621        // Prepare a single arena value ref for Empty (only when caching is enabled).
2622        let empty_ref = if self.value_cache_enabled {
2623            Some(self.data_store.store_value(LiteralValue::Empty))
2624        } else {
2625            None
2626        };
2627
2628        // Clear all spill children (excluding the anchor cell).
2629        let mut changed_vertices: Vec<VertexId> = Vec::new();
2630        for cell in cells.iter().copied() {
2631            let is_anchor = anchor_cell.map(|a| a == cell).unwrap_or(false);
2632            if is_anchor {
2633                continue;
2634            }
2635            let Some(&vid) = self.cell_to_vertex.get(&cell) else {
2636                continue;
2637            };
2638            // Ensure this vertex is a plain value cell.
2639            if self.vertex_formulas.remove(&vid).is_some() {
2640                // Be conservative: remove outgoing edges if this was a formula vertex.
2641                // This should be rare for spill children under normal policies.
2642                self.remove_dependent_edges(vid);
2643            }
2644            self.store.set_kind(vid, VertexKind::Cell);
2645            if let Some(er) = empty_ref {
2646                self.vertex_values.insert(vid, er);
2647            } else {
2648                self.vertex_values.remove(&vid);
2649            }
2650            self.store.set_dirty(vid, false);
2651            self.dirty_vertices.remove(&vid);
2652            changed_vertices.push(vid);
2653        }
2654
2655        // Single dirty propagation for all changed spill children.
2656        if !changed_vertices.is_empty() {
2657            self.mark_dirty_many_value_cells(&changed_vertices);
2658        }
2659
2660        cells
2661    }
2662
2663    fn mark_dirty_many_value_cells(&mut self, vertex_ids: &[VertexId]) -> Vec<VertexId> {
2664        if vertex_ids.is_empty() {
2665            return Vec::new();
2666        }
2667
2668        // Ensure reverse edges are usable (delta.in_edges is intentionally not delta-aware).
2669        if self.edges.delta_size() > 0 {
2670            self.edges.rebuild();
2671        }
2672
2673        let mut affected: FxHashSet<VertexId> = FxHashSet::default();
2674        let mut to_visit: Vec<VertexId> = Vec::new();
2675        let mut visited_for_propagation: FxHashSet<VertexId> = FxHashSet::default();
2676
2677        // Value sources are affected but not marked dirty themselves.
2678        for &src in vertex_ids {
2679            affected.insert(src);
2680        }
2681
2682        // Collect initial direct dependents and name dependents.
2683        for &src in vertex_ids {
2684            to_visit.extend(self.edges.in_edges(src));
2685            if let Some(name_set) = self.cell_to_name_dependents.get(&src) {
2686                for &name_vertex in name_set {
2687                    to_visit.push(name_vertex);
2688                }
2689            }
2690        }
2691
2692        // Collect range dependents in bulk using spill rect bounds per sheet.
2693        let mut bounds_by_sheet: FxHashMap<SheetId, (u32, u32, u32, u32)> = FxHashMap::default();
2694        for &src in vertex_ids {
2695            let view = self.store.view(src);
2696            let sid = view.sheet_id();
2697            let r = view.row();
2698            let c = view.col();
2699            bounds_by_sheet
2700                .entry(sid)
2701                .and_modify(|b| {
2702                    b.0 = b.0.min(r);
2703                    b.1 = b.1.max(r);
2704                    b.2 = b.2.min(c);
2705                    b.3 = b.3.max(c);
2706                })
2707                .or_insert((r, r, c, c));
2708        }
2709
2710        for (sid, (sr, er, sc, ec)) in bounds_by_sheet {
2711            to_visit.extend(self.collect_range_dependents_for_rect(sid, sr, sc, er, ec));
2712        }
2713
2714        while let Some(id) = to_visit.pop() {
2715            if !visited_for_propagation.insert(id) {
2716                continue;
2717            }
2718            affected.insert(id);
2719            self.store.set_dirty(id, true);
2720            to_visit.extend(self.edges.in_edges(id));
2721            to_visit.extend(self.collect_range_dependents_for_vertex(id));
2722        }
2723
2724        self.dirty_vertices.extend(&affected);
2725        affected.into_iter().collect()
2726    }
2727
2728    fn collect_range_dependents_for_vertex(&self, vertex_id: VertexId) -> Vec<VertexId> {
2729        match self.store.kind(vertex_id) {
2730            VertexKind::Cell
2731            | VertexKind::Empty
2732            | VertexKind::FormulaScalar
2733            | VertexKind::FormulaArray => {
2734                let view = self.store.view(vertex_id);
2735                self.collect_range_dependents_for_rect(
2736                    view.sheet_id(),
2737                    view.row(),
2738                    view.col(),
2739                    view.row(),
2740                    view.col(),
2741                )
2742            }
2743            _ => Vec::new(),
2744        }
2745    }
2746
2747    fn collect_range_dependents_for_rect(
2748        &self,
2749        sheet_id: SheetId,
2750        start_row: u32,
2751        start_col: u32,
2752        end_row: u32,
2753        end_col: u32,
2754    ) -> Vec<VertexId> {
2755        if self.stripe_to_dependents.is_empty() {
2756            return Vec::new();
2757        }
2758        let mut candidates: FxHashSet<VertexId> = FxHashSet::default();
2759
2760        for col in start_col..=end_col {
2761            let key = StripeKey {
2762                sheet_id,
2763                stripe_type: StripeType::Column,
2764                index: col,
2765            };
2766            if let Some(deps) = self.stripe_to_dependents.get(&key) {
2767                candidates.extend(deps);
2768            }
2769        }
2770        for row in start_row..=end_row {
2771            let key = StripeKey {
2772                sheet_id,
2773                stripe_type: StripeType::Row,
2774                index: row,
2775            };
2776            if let Some(deps) = self.stripe_to_dependents.get(&key) {
2777                candidates.extend(deps);
2778            }
2779        }
2780        if self.config.enable_block_stripes {
2781            let br0 = start_row / BLOCK_H;
2782            let br1 = end_row / BLOCK_H;
2783            let bc0 = start_col / BLOCK_W;
2784            let bc1 = end_col / BLOCK_W;
2785            for br in br0..=br1 {
2786                for bc in bc0..=bc1 {
2787                    let key = StripeKey {
2788                        sheet_id,
2789                        stripe_type: StripeType::Block,
2790                        index: block_index(br * BLOCK_H, bc * BLOCK_W),
2791                    };
2792                    if let Some(deps) = self.stripe_to_dependents.get(&key) {
2793                        candidates.extend(deps);
2794                    }
2795                }
2796            }
2797        }
2798
2799        // Precision check: the dirty rect must overlap at least one of the formula's registered ranges.
2800        let mut out: Vec<VertexId> = Vec::new();
2801        for dep_id in candidates {
2802            let Some(ranges) = self.formula_to_range_deps.get(&dep_id) else {
2803                continue;
2804            };
2805            let mut hit = false;
2806            for range in ranges {
2807                let range_sheet_id = match range.sheet {
2808                    SharedSheetLocator::Id(id) => id,
2809                    _ => sheet_id,
2810                };
2811                if range_sheet_id != sheet_id {
2812                    continue;
2813                }
2814                let sr0 = range.start_row.map(|b| b.index).unwrap_or(0);
2815                let er0 = range.end_row.map(|b| b.index).unwrap_or(u32::MAX);
2816                let sc0 = range.start_col.map(|b| b.index).unwrap_or(0);
2817                let ec0 = range.end_col.map(|b| b.index).unwrap_or(u32::MAX);
2818                let overlap =
2819                    sr0 <= end_row && er0 >= start_row && sc0 <= end_col && ec0 >= start_col;
2820                if overlap {
2821                    hit = true;
2822                    break;
2823                }
2824            }
2825            if hit {
2826                out.push(dep_id);
2827            }
2828        }
2829        out
2830    }
2831
2832    /// Check if a vertex exists
2833    pub(crate) fn vertex_exists(&self, vertex_id: VertexId) -> bool {
2834        if vertex_id.0 < FIRST_NORMAL_VERTEX {
2835            return false;
2836        }
2837        let index = (vertex_id.0 - FIRST_NORMAL_VERTEX) as usize;
2838        index < self.store.len()
2839    }
2840
2841    /// Get the kind of a vertex
2842    pub(crate) fn get_vertex_kind(&self, vertex_id: VertexId) -> VertexKind {
2843        self.store.kind(vertex_id)
2844    }
2845
2846    /// Get the sheet ID of a vertex
2847    pub(crate) fn get_vertex_sheet_id(&self, vertex_id: VertexId) -> SheetId {
2848        self.store.sheet_id(vertex_id)
2849    }
2850
2851    pub fn get_formula_id(&self, vertex_id: VertexId) -> Option<AstNodeId> {
2852        self.vertex_formulas.get(&vertex_id).copied()
2853    }
2854
2855    pub fn get_formula_id_and_volatile(&self, vertex_id: VertexId) -> Option<(AstNodeId, bool)> {
2856        let ast_id = self.get_formula_id(vertex_id)?;
2857        Some((ast_id, self.is_volatile(vertex_id)))
2858    }
2859
2860    pub fn get_formula_node(&self, vertex_id: VertexId) -> Option<&super::arena::AstNodeData> {
2861        let ast_id = self.get_formula_id(vertex_id)?;
2862        self.data_store.get_node(ast_id)
2863    }
2864
2865    pub fn get_formula_node_and_volatile(
2866        &self,
2867        vertex_id: VertexId,
2868    ) -> Option<(&super::arena::AstNodeData, bool)> {
2869        let (ast_id, vol) = self.get_formula_id_and_volatile(vertex_id)?;
2870        let node = self.data_store.get_node(ast_id)?;
2871        Some((node, vol))
2872    }
2873
2874    /// Get the formula AST for a vertex.
2875    ///
2876    /// Not used in hot paths; reconstructs from arena.
2877    pub fn get_formula(&self, vertex_id: VertexId) -> Option<ASTNode> {
2878        let ast_id = self.get_formula_id(vertex_id)?;
2879        self.data_store.retrieve_ast(ast_id, &self.sheet_reg)
2880    }
2881
2882    /// Get the value stored for a vertex
2883    pub fn get_value(&self, vertex_id: VertexId) -> Option<LiteralValue> {
2884        if !self.value_cache_enabled {
2885            // In canonical mode, cell/formula values must not be read from the graph.
2886            // Non-cell vertices (e.g. named ranges, external sources) may still use graph storage.
2887            match self.store.kind(vertex_id) {
2888                VertexKind::Cell
2889                | VertexKind::FormulaScalar
2890                | VertexKind::FormulaArray
2891                | VertexKind::Empty => {
2892                    #[cfg(debug_assertions)]
2893                    {
2894                        self.graph_value_read_attempts
2895                            .fetch_add(1, Ordering::Relaxed);
2896                    }
2897                    return None;
2898                }
2899                _ => {
2900                    // Allow non-cell vertices to use vertex_values.
2901                }
2902            }
2903        }
2904        self.vertex_values
2905            .get(&vertex_id)
2906            .map(|&value_ref| self.data_store.retrieve_value(value_ref))
2907    }
2908
2909    /// Get the cell reference for a vertex
2910    pub(crate) fn get_cell_ref(&self, vertex_id: VertexId) -> Option<CellRef> {
2911        let packed_coord = self.store.coord(vertex_id);
2912        let sheet_id = self.store.sheet_id(vertex_id);
2913        let coord = Coord::new(packed_coord.row(), packed_coord.col(), true, true);
2914        Some(CellRef::new(sheet_id, coord))
2915    }
2916
2917    /// Create a cell reference (helper for internal use)
2918    pub(crate) fn make_cell_ref_internal(&self, sheet_id: SheetId, row: u32, col: u32) -> CellRef {
2919        let coord = Coord::new(row, col, true, true);
2920        CellRef::new(sheet_id, coord)
2921    }
2922
2923    /// Create a cell reference from sheet name and Excel 1-based coordinates.
2924    pub fn make_cell_ref(&self, sheet_name: &str, row: u32, col: u32) -> CellRef {
2925        let sheet_id = self.sheet_reg.get_id(sheet_name).unwrap_or(0);
2926        let coord = Coord::from_excel(row, col, true, true);
2927        CellRef::new(sheet_id, coord)
2928    }
2929
2930    /// Check if a vertex is dirty
2931    pub(crate) fn is_dirty(&self, vertex_id: VertexId) -> bool {
2932        self.store.is_dirty(vertex_id)
2933    }
2934
2935    /// Check if a vertex is volatile
2936    pub(crate) fn is_volatile(&self, vertex_id: VertexId) -> bool {
2937        self.store.is_volatile(vertex_id)
2938    }
2939
2940    pub(crate) fn is_dynamic(&self, vertex_id: VertexId) -> bool {
2941        self.store.is_dynamic(vertex_id)
2942    }
2943
2944    /// Get vertex ID for a cell address
2945    pub fn get_vertex_id_for_address(&self, addr: &CellRef) -> Option<&VertexId> {
2946        self.cell_to_vertex.get(addr)
2947    }
2948
2949    #[cfg(test)]
2950    pub fn cell_to_vertex(
2951        &self,
2952    ) -> &std::collections::HashMap<CellRef, VertexId, CoordBuildHasher> {
2953        &self.cell_to_vertex
2954    }
2955
2956    /// Borrow dependencies of a vertex when no pending edge delta exists.
2957    ///
2958    /// This enables zero-allocation traversal in hot scheduler paths.
2959    #[inline]
2960    pub(crate) fn dependencies_slice(&self, vertex_id: VertexId) -> Option<&[VertexId]> {
2961        self.edges.out_edges_ref(vertex_id)
2962    }
2963
2964    /// Get the dependencies of a vertex (for scheduler)
2965    pub(crate) fn get_dependencies(&self, vertex_id: VertexId) -> Vec<VertexId> {
2966        self.edges.out_edges(vertex_id)
2967    }
2968
2969    /// Check if a vertex has a self-loop
2970    pub(crate) fn has_self_loop(&self, vertex_id: VertexId) -> bool {
2971        if let Some(deps) = self.dependencies_slice(vertex_id) {
2972            deps.contains(&vertex_id)
2973        } else {
2974            self.edges.out_edges(vertex_id).contains(&vertex_id)
2975        }
2976    }
2977
2978    /// Borrow dependents of a vertex when no pending edge delta exists.
2979    ///
2980    /// This enables zero-allocation traversal in hot scheduler paths.
2981    #[inline]
2982    pub(crate) fn dependents_slice(&self, vertex_id: VertexId) -> Option<&[VertexId]> {
2983        self.edges.in_edges_ref(vertex_id)
2984    }
2985
2986    /// Get dependents of a vertex (vertices that depend on this vertex)
2987    /// Uses reverse edges for O(1) lookup when available
2988    pub(crate) fn get_dependents(&self, vertex_id: VertexId) -> Vec<VertexId> {
2989        // If there are pending changes in delta, we need to scan
2990        // Otherwise we can use the fast reverse edges
2991        if self.edges.delta_size() > 0 {
2992            #[cfg(test)]
2993            {
2994                // This scan is intentionally tracked for perf regression tests.
2995                // It is expected to be rare in normal operation.
2996                if let Ok(mut g) = self.instr.lock() {
2997                    g.dependents_scan_fallback_calls += 1;
2998                    g.dependents_scan_vertices_scanned += self.cell_to_vertex.len() as u64;
2999                }
3000            }
3001            // Fall back to scanning when delta has changes
3002            let mut dependents = Vec::new();
3003            for (&_addr, &vid) in &self.cell_to_vertex {
3004                let out_edges = self.edges.out_edges(vid);
3005                if out_edges.contains(&vertex_id) {
3006                    dependents.push(vid);
3007                }
3008            }
3009            for named in self.named_ranges.values() {
3010                let vid = named.vertex;
3011                let out_edges = self.edges.out_edges(vid);
3012                if out_edges.contains(&vertex_id) {
3013                    dependents.push(vid);
3014                }
3015            }
3016            for named in self.sheet_named_ranges.values() {
3017                let vid = named.vertex;
3018                let out_edges = self.edges.out_edges(vid);
3019                if out_edges.contains(&vertex_id) {
3020                    dependents.push(vid);
3021                }
3022            }
3023            dependents
3024        } else {
3025            // Fast path: use reverse edges from CSR
3026            self.edges.in_edges(vertex_id).to_vec()
3027        }
3028    }
3029
3030    // Internal helper methods for Milestone 0.4
3031
3032    /// Internal: Create a snapshot of vertex state for rollback
3033    #[doc(hidden)]
3034    pub fn snapshot_vertex(&self, id: VertexId) -> crate::engine::VertexSnapshot {
3035        let coord = self.store.coord(id);
3036        let sheet_id = self.store.sheet_id(id);
3037        let kind = self.store.kind(id);
3038        let flags = self.store.flags(id);
3039
3040        // Get value and formula references
3041        let value_ref = self.vertex_values.get(&id).copied();
3042        let formula_ref = self.vertex_formulas.get(&id).copied();
3043
3044        // Get outgoing edges (dependencies)
3045        let out_edges = self.get_dependencies(id);
3046
3047        crate::engine::VertexSnapshot {
3048            coord,
3049            sheet_id,
3050            kind,
3051            flags,
3052            value_ref,
3053            formula_ref,
3054            out_edges,
3055        }
3056    }
3057
3058    /// Internal: Remove all edges for a vertex
3059    #[doc(hidden)]
3060    pub fn remove_all_edges(&mut self, id: VertexId) {
3061        // Enter batch mode to avoid intermediate rebuilds
3062        self.edges.begin_batch();
3063
3064        // Remove outgoing edges (this vertex's dependencies)
3065        self.remove_dependent_edges(id);
3066
3067        // Force rebuild to get accurate dependents list
3068        // This is necessary because get_dependents uses CSR reverse edges
3069        self.edges.rebuild();
3070
3071        // Remove incoming edges (vertices that depend on this vertex)
3072        let dependents = self.get_dependents(id);
3073        if self.pk_order.is_some()
3074            && let Some(mut pk) = self.pk_order.take()
3075        {
3076            for dependent in &dependents {
3077                pk.remove_edge(id, *dependent);
3078            }
3079            self.pk_order = Some(pk);
3080        }
3081        for dependent in dependents {
3082            self.edges.remove_edge(dependent, id);
3083        }
3084
3085        // Exit batch mode and rebuild once with all changes
3086        self.edges.end_batch();
3087    }
3088
3089    /// Internal: Mark vertex as having #REF! error
3090    #[doc(hidden)]
3091    pub fn mark_as_ref_error(&mut self, id: VertexId) {
3092        if !self.value_cache_enabled {
3093            match self.store.kind(id) {
3094                VertexKind::Cell
3095                | VertexKind::FormulaScalar
3096                | VertexKind::FormulaArray
3097                | VertexKind::Empty => {
3098                    self.ref_error_vertices.insert(id);
3099                    // Canonical-only: graph does not cache cell/formula values.
3100                    // Ensure the dependent subgraph is dirtied so evaluation updates Arrow truth.
3101                    self.vertex_values.remove(&id);
3102                    let _ = self.mark_dirty(id);
3103                    return;
3104                }
3105                _ => {
3106                    // Allow non-cell vertices to use cached values.
3107                }
3108            }
3109        }
3110        let error = LiteralValue::Error(ExcelError::new(ExcelErrorKind::Ref));
3111        let value_ref = self.data_store.store_value(error);
3112        self.vertex_values.insert(id, value_ref);
3113        let _ = self.mark_dirty(id);
3114    }
3115
3116    /// Check if a vertex has a #REF! error
3117    pub fn is_ref_error(&self, id: VertexId) -> bool {
3118        if !self.value_cache_enabled {
3119            match self.store.kind(id) {
3120                VertexKind::Cell
3121                | VertexKind::FormulaScalar
3122                | VertexKind::FormulaArray
3123                | VertexKind::Empty => {
3124                    return self.ref_error_vertices.contains(&id);
3125                }
3126                _ => {
3127                    // Non-cell vertices may still have cached values.
3128                }
3129            }
3130        }
3131        if let Some(value_ref) = self.vertex_values.get(&id) {
3132            let value = self.data_store.retrieve_value(*value_ref);
3133            if let LiteralValue::Error(err) = value {
3134                return err.kind == ExcelErrorKind::Ref;
3135            }
3136        }
3137        false
3138    }
3139
3140    /// Internal: Mark all direct dependents as dirty
3141    #[doc(hidden)]
3142    pub fn mark_dependents_dirty(&mut self, id: VertexId) {
3143        let dependents = self.get_dependents(id);
3144        for dep_id in dependents {
3145            self.store.set_dirty(dep_id, true);
3146            self.dirty_vertices.insert(dep_id);
3147        }
3148    }
3149
3150    /// Internal: Mark a vertex as volatile
3151    #[doc(hidden)]
3152    pub fn mark_volatile(&mut self, id: VertexId, volatile: bool) {
3153        self.store.set_volatile(id, volatile);
3154        if volatile {
3155            self.volatile_vertices.insert(id);
3156        } else {
3157            self.volatile_vertices.remove(&id);
3158        }
3159    }
3160
3161    /// Update vertex coordinate
3162    #[doc(hidden)]
3163    pub fn set_coord(&mut self, id: VertexId, coord: AbsCoord) {
3164        self.store.set_coord(id, coord);
3165    }
3166
3167    /// Update edge cache coordinate
3168    #[doc(hidden)]
3169    pub fn update_edge_coord(&mut self, id: VertexId, coord: AbsCoord) {
3170        self.edges.update_coord(id, coord);
3171    }
3172
3173    /// Mark vertex as deleted (tombstone)
3174    #[doc(hidden)]
3175    pub fn mark_deleted(&mut self, id: VertexId, deleted: bool) {
3176        self.store.mark_deleted(id, deleted);
3177    }
3178
3179    /// Set vertex kind
3180    #[doc(hidden)]
3181    pub fn set_kind(&mut self, id: VertexId, kind: VertexKind) {
3182        self.store.set_kind(id, kind);
3183    }
3184
3185    /// Set vertex dirty flag
3186    #[doc(hidden)]
3187    pub fn set_dirty(&mut self, id: VertexId, dirty: bool) {
3188        self.store.set_dirty(id, dirty);
3189        if dirty {
3190            self.dirty_vertices.insert(id);
3191        } else {
3192            self.dirty_vertices.remove(&id);
3193        }
3194    }
3195
3196    /// Get vertex kind (for testing)
3197    #[cfg(test)]
3198    pub(crate) fn get_kind(&self, id: VertexId) -> VertexKind {
3199        self.store.kind(id)
3200    }
3201
3202    /// Get vertex flags (for testing)
3203    #[cfg(test)]
3204    pub(crate) fn get_flags(&self, id: VertexId) -> u8 {
3205        self.store.flags(id)
3206    }
3207
3208    /// Check if vertex is deleted (for testing)
3209    #[cfg(test)]
3210    pub(crate) fn is_deleted(&self, id: VertexId) -> bool {
3211        self.store.is_deleted(id)
3212    }
3213
3214    /// Force edge rebuild (internal use)
3215    #[doc(hidden)]
3216    pub fn rebuild_edges(&mut self) {
3217        self.edges.rebuild();
3218    }
3219
3220    /// Get delta size (internal use)
3221    #[doc(hidden)]
3222    pub fn edges_delta_size(&self) -> usize {
3223        self.edges.delta_size()
3224    }
3225
3226    /// Get vertex ID for specific cell address
3227    pub fn get_vertex_for_cell(&self, addr: &CellRef) -> Option<VertexId> {
3228        self.cell_to_vertex.get(addr).copied()
3229    }
3230
3231    /// Get coord for a vertex (public for VertexEditor)
3232    pub fn get_coord(&self, id: VertexId) -> AbsCoord {
3233        self.store.coord(id)
3234    }
3235
3236    /// Get sheet_id for a vertex (public for VertexEditor)
3237    pub fn get_sheet_id(&self, id: VertexId) -> SheetId {
3238        self.store.sheet_id(id)
3239    }
3240
3241    /// Get all vertices in a sheet
3242    pub fn vertices_in_sheet(&self, sheet_id: SheetId) -> impl Iterator<Item = VertexId> + '_ {
3243        self.store
3244            .all_vertices()
3245            .filter(move |&id| self.vertex_exists(id) && self.store.sheet_id(id) == sheet_id)
3246    }
3247
3248    /// Does a vertex have a formula associated
3249    pub fn vertex_has_formula(&self, id: VertexId) -> bool {
3250        self.vertex_formulas.contains_key(&id)
3251    }
3252
3253    /// Get all vertices with formulas
3254    pub fn vertices_with_formulas(&self) -> impl Iterator<Item = VertexId> + '_ {
3255        self.vertex_formulas.keys().copied()
3256    }
3257
3258    /// Update a vertex's formula
3259    pub fn update_vertex_formula(&mut self, id: VertexId, ast: ASTNode) -> Result<(), ExcelError> {
3260        // Get the sheet_id for this vertex
3261        let sheet_id = self.store.sheet_id(id);
3262
3263        // If the adjusted AST contains special #REF markers (from structural edits),
3264        // treat this as a REF error on the vertex instead of attempting to resolve.
3265        // This prevents failures when reference_adjuster injected placeholder refs.
3266        let has_ref_marker = ast.get_dependencies().into_iter().any(|r| {
3267            matches!(
3268                r,
3269                ReferenceType::Cell { sheet: Some(s), .. }
3270                    | ReferenceType::Range { sheet: Some(s), .. } if s == "#REF"
3271            )
3272        });
3273        if has_ref_marker {
3274            // Store the adjusted AST for round-tripping/display, but set value state to #REF!
3275            let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
3276            self.vertex_formulas.insert(id, ast_id);
3277            self.mark_as_ref_error(id);
3278            self.store.set_kind(id, VertexKind::FormulaScalar);
3279            return Ok(());
3280        }
3281
3282        // Extract dependencies from AST
3283        let (new_dependencies, new_range_dependencies, _, named_dependencies) =
3284            self.extract_dependencies(&ast, sheet_id)?;
3285
3286        // Remove old dependencies first
3287        self.remove_dependent_edges(id);
3288        self.detach_vertex_from_names(id);
3289
3290        // Store the new formula
3291        let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
3292        self.vertex_formulas.insert(id, ast_id);
3293
3294        // Add new dependency edges
3295        self.add_dependent_edges(id, &new_dependencies);
3296        self.add_range_dependent_edges(id, &new_range_dependencies, sheet_id);
3297
3298        if !named_dependencies.is_empty() {
3299            self.attach_vertex_to_names(id, &named_dependencies);
3300        }
3301
3302        // Mark as formula vertex
3303        self.store.set_kind(id, VertexKind::FormulaScalar);
3304
3305        Ok(())
3306    }
3307
3308    /// Mark a vertex as dirty without propagation (for VertexEditor)
3309    pub fn mark_vertex_dirty(&mut self, vertex_id: VertexId) {
3310        self.store.set_dirty(vertex_id, true);
3311        self.dirty_vertices.insert(vertex_id);
3312    }
3313
3314    /// Batch-mark vertices dirty without propagation.
3315    pub fn mark_vertices_dirty_batch(&mut self, vertices: &[VertexId]) {
3316        self.dirty_vertices.reserve(vertices.len());
3317        for &vertex_id in vertices {
3318            self.store.set_dirty(vertex_id, true);
3319        }
3320        self.dirty_vertices.extend(vertices.iter().copied());
3321    }
3322
3323    /// Update cell mapping for a vertex (for VertexEditor)
3324    pub fn update_cell_mapping(
3325        &mut self,
3326        id: VertexId,
3327        old_addr: Option<CellRef>,
3328        new_addr: CellRef,
3329    ) {
3330        // Remove old mapping if it exists
3331        if let Some(old) = old_addr {
3332            self.cell_to_vertex.remove(&old);
3333        }
3334        // Add new mapping
3335        self.cell_to_vertex.insert(new_addr, id);
3336    }
3337
3338    /// Remove cell mapping (for VertexEditor)
3339    pub fn remove_cell_mapping(&mut self, addr: &CellRef) {
3340        self.cell_to_vertex.remove(addr);
3341    }
3342
3343    /// Get the cell reference for a vertex
3344    pub fn get_cell_ref_for_vertex(&self, id: VertexId) -> Option<CellRef> {
3345        let coord = self.store.coord(id);
3346        let sheet_id = self.store.sheet_id(id);
3347        // Find the cell reference in the mapping
3348        let cell_ref = CellRef::new(sheet_id, Coord::new(coord.row(), coord.col(), true, true));
3349        // Verify it actually maps to this vertex
3350        if self.cell_to_vertex.get(&cell_ref) == Some(&id) {
3351            Some(cell_ref)
3352        } else {
3353            None
3354        }
3355    }
3356
3357    /// Rebuild dependency edges/range links for an existing formula vertex after AST changes.
3358    ///
3359    /// This intentionally reuses the same extraction and edge wiring machinery as
3360    /// `set_cell_formula[_with_volatility]` to preserve edge orientation, placeholder
3361    /// behavior, and name/range dependency semantics.
3362    pub(crate) fn rebuild_formula_dependencies(&mut self, vertex_id: VertexId, ast: &ASTNode) {
3363        let sheet_id = self.store.sheet_id(vertex_id);
3364
3365        // Remove old dependency, name, and pending-name links first.
3366        self.remove_dependent_edges(vertex_id);
3367        self.detach_vertex_from_names(vertex_id);
3368        self.clear_pending_name_references(vertex_id);
3369
3370        let (
3371            new_dependencies,
3372            new_range_dependencies,
3373            _created_placeholders,
3374            named_dependencies,
3375            unresolved_names,
3376        ) = match self.extract_dependencies_with_pending_names(ast, sheet_id) {
3377            Ok(v) => v,
3378            Err(_) => {
3379                self.mark_as_ref_error(vertex_id);
3380                return;
3381            }
3382        };
3383
3384        // Self-reference / name-cycle safety parity with set_cell_formula.
3385        if new_dependencies.contains(&vertex_id) {
3386            self.mark_as_ref_error(vertex_id);
3387            return;
3388        }
3389
3390        for &name_vertex in &named_dependencies {
3391            let mut visited = FxHashSet::default();
3392            if self.name_depends_on_vertex(name_vertex, vertex_id, &mut visited) {
3393                self.mark_as_ref_error(vertex_id);
3394                return;
3395            }
3396        }
3397
3398        // Formula is now recoverable again.
3399        self.ref_error_vertices.remove(&vertex_id);
3400        self.vertex_values.remove(&vertex_id);
3401
3402        if !named_dependencies.is_empty() {
3403            self.attach_vertex_to_names(vertex_id, &named_dependencies);
3404        }
3405        for unresolved_name in &unresolved_names {
3406            self.record_pending_name_reference(sheet_id, unresolved_name, vertex_id);
3407        }
3408
3409        self.add_dependent_edges(vertex_id, &new_dependencies);
3410        self.add_range_dependent_edges(vertex_id, &new_range_dependencies, sheet_id);
3411        let _ = self.mark_dirty(vertex_id);
3412    }
3413}
3414
3415// ========== Sheet Management Operations ==========