formualizer_eval/engine/graph/
mod.rs

1use crate::SheetId;
2use crate::engine::named_range::{NameScope, NamedDefinition, NamedRange};
3use crate::engine::sheet_registry::SheetRegistry;
4use formualizer_common::{ExcelError, ExcelErrorKind, LiteralValue};
5use formualizer_parse::parser::{ASTNode, ReferenceType};
6use rustc_hash::{FxHashMap, FxHashSet};
7
8#[cfg(test)]
9#[derive(Debug, Default, Clone)]
10pub struct GraphInstrumentation {
11    pub edges_added: u64,
12    pub stripe_inserts: u64,
13    pub stripe_removes: u64,
14}
15
16mod ast_utils;
17pub mod editor;
18mod formula_analysis;
19mod names;
20mod range_deps;
21mod sheets;
22pub mod snapshot;
23
24use super::arena::{AstNodeId, DataStore, ValueRef};
25use super::delta_edges::CsrMutableEdges;
26use super::sheet_index::SheetIndex;
27use super::vertex::{VertexId, VertexKind};
28use super::vertex_store::{FIRST_NORMAL_VERTEX, VertexStore};
29use crate::engine::topo::{
30    GraphAdapter,
31    pk::{DynamicTopo, PkConfig},
32};
33use crate::reference::{CellRef, Coord, SharedRangeRef, SharedRef, SharedSheetLocator};
34use formualizer_common::Coord as AbsCoord;
35// topo::pk wiring will be integrated behind config.use_dynamic_topo in a follow-up step
36
37pub use editor::change_log::{ChangeEvent, ChangeLog};
38
39// ChangeEvent is now imported from change_log module
40
41/// đź”® Scalability Hook: Dependency reference types for range compression
42#[derive(Debug, Clone, PartialEq, Eq, Hash)]
43pub enum DependencyRef {
44    /// A specific cell dependency
45    Cell(VertexId),
46    /// A dependency on a finite, rectangular range
47    Range {
48        sheet: String,
49        start_row: u32,
50        start_col: u32,
51        end_row: u32, // Inclusive
52        end_col: u32, // Inclusive
53    },
54    /// A whole column dependency (A:A) - future range compression
55    WholeColumn { sheet: String, col: u32 },
56    /// A whole row dependency (1:1) - future range compression  
57    WholeRow { sheet: String, row: u32 },
58}
59
60/// A key representing a coarse-grained section of a sheet
61#[derive(Debug, Clone, Hash, PartialEq, Eq)]
62pub struct StripeKey {
63    pub sheet_id: SheetId,
64    pub stripe_type: StripeType,
65    pub index: u32, // The index of the row, column, or block stripe
66}
67
68#[derive(Debug, Clone, Hash, PartialEq, Eq)]
69pub enum StripeType {
70    Row,
71    Column,
72    Block, // For dense, square-like ranges
73}
74
75/// Block stripe indexing mathematics
76const BLOCK_H: u32 = 256;
77const BLOCK_W: u32 = 256;
78
79pub fn block_index(row: u32, col: u32) -> u32 {
80    (row / BLOCK_H) << 16 | (col / BLOCK_W)
81}
82
83/// A summary of the results of a mutating operation on the graph.
84/// This serves as a "changelog" to the application layer.
85#[derive(Debug, Clone)]
86pub struct OperationSummary {
87    /// Vertices whose values have been directly or indirectly affected.
88    pub affected_vertices: Vec<VertexId>,
89    /// Placeholder cells that were newly created to satisfy dependencies.
90    pub created_placeholders: Vec<CellRef>,
91}
92
93/// SoA-based dependency graph implementation
94#[derive(Debug)]
95pub struct DependencyGraph {
96    // Core columnar storage
97    store: VertexStore,
98
99    // Edge storage with delta slab
100    edges: CsrMutableEdges,
101
102    // Arena-based value and formula storage
103    data_store: DataStore,
104    vertex_values: FxHashMap<VertexId, ValueRef>,
105    vertex_formulas: FxHashMap<VertexId, AstNodeId>,
106
107    // Address mappings using fast hashing
108    cell_to_vertex: FxHashMap<CellRef, VertexId>,
109
110    // Scheduling state - using HashSet for O(1) operations
111    dirty_vertices: FxHashSet<VertexId>,
112    volatile_vertices: FxHashSet<VertexId>,
113
114    // NEW: Specialized managers for range dependencies (Hybrid Model)
115    /// Maps a formula vertex to the ranges it depends on.
116    formula_to_range_deps: FxHashMap<VertexId, Vec<SharedRangeRef<'static>>>,
117
118    /// Maps a stripe to formulas that depend on it via a compressed range.
119    /// CRITICAL: VertexIds are deduplicated within each stripe to avoid quadratic blow-ups.
120    stripe_to_dependents: FxHashMap<StripeKey, FxHashSet<VertexId>>,
121
122    // Sheet-level sparse indexes for O(log n + k) range queries
123    /// Maps sheet_id to its interval tree index for efficient row/column operations
124    sheet_indexes: FxHashMap<SheetId, SheetIndex>,
125
126    // Sheet name/ID mapping
127    sheet_reg: SheetRegistry,
128    default_sheet_id: SheetId,
129
130    // Named ranges support
131    /// Workbook-scoped named ranges
132    named_ranges: FxHashMap<String, NamedRange>,
133
134    /// Sheet-scoped named ranges  
135    sheet_named_ranges: FxHashMap<(SheetId, String), NamedRange>,
136
137    /// Reverse mapping: vertex -> names it uses (by vertex id)
138    vertex_to_names: FxHashMap<VertexId, Vec<VertexId>>,
139
140    /// Lookup for name vertex -> (scope, name) to avoid map scans
141    name_vertex_lookup: FxHashMap<VertexId, (NameScope, String)>,
142
143    /// Pending formula vertices referencing names not yet defined
144    pending_name_links: FxHashMap<String, Vec<(SheetId, VertexId)>>,
145
146    /// Monotonic counter to assign synthetic coordinates to name vertices
147    name_vertex_seq: u32,
148
149    /// Mapping from cell vertices to named range vertices that depend on them
150    cell_to_name_dependents: FxHashMap<VertexId, FxHashSet<VertexId>>,
151    /// Cached list of cell dependencies per named range vertex (for teardown)
152    name_to_cell_dependencies: FxHashMap<VertexId, Vec<VertexId>>,
153
154    // Evaluation configuration
155    config: super::EvalConfig,
156
157    // Dynamic topology orderer (Pearce–Kelly) maintained alongside edges when enabled
158    pk_order: Option<DynamicTopo<VertexId>>,
159
160    // Spill registry: anchor -> cells, and reverse mapping for blockers
161    spill_anchor_to_cells: FxHashMap<VertexId, Vec<CellRef>>,
162    spill_cell_to_anchor: FxHashMap<CellRef, VertexId>,
163
164    // Hint: during initial bulk load, many cells are guaranteed new; allow skipping existence checks per-sheet
165    first_load_assume_new: bool,
166    ensure_touched_sheets: FxHashSet<SheetId>,
167
168    #[cfg(test)]
169    instr: GraphInstrumentation,
170}
171
172impl Default for DependencyGraph {
173    fn default() -> Self {
174        Self::new()
175    }
176}
177
178impl DependencyGraph {
179    /// Expose range expansion limit for planners
180    pub fn range_expansion_limit(&self) -> usize {
181        self.config.range_expansion_limit
182    }
183
184    pub fn get_config(&self) -> &super::EvalConfig {
185        &self.config
186    }
187
188    /// Build a dependency plan for a set of formulas on sheets
189    pub fn plan_dependencies<'a, I>(
190        &mut self,
191        items: I,
192        policy: &formualizer_parse::parser::CollectPolicy,
193        volatile: Option<&[bool]>,
194    ) -> Result<crate::engine::plan::DependencyPlan, formualizer_common::ExcelError>
195    where
196        I: IntoIterator<Item = (&'a str, u32, u32, &'a formualizer_parse::parser::ASTNode)>,
197    {
198        crate::engine::plan::build_dependency_plan(
199            &mut self.sheet_reg,
200            items.into_iter(),
201            policy,
202            volatile,
203        )
204    }
205
206    /// Ensure vertices exist for given coords; allocate missing in contiguous batches and add to edges/index.
207    /// Returns a list suitable for edges.add_vertices_batch.
208    pub fn ensure_vertices_batch(
209        &mut self,
210        coords: &[(SheetId, AbsCoord)],
211    ) -> Vec<(AbsCoord, u32)> {
212        use rustc_hash::FxHashMap;
213        let mut grouped: FxHashMap<SheetId, Vec<AbsCoord>> = FxHashMap::default();
214        for (sid, pc) in coords.iter().copied() {
215            let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
216            if self.cell_to_vertex.contains_key(&addr) {
217                continue;
218            }
219            grouped.entry(sid).or_default().push(pc);
220        }
221        let mut add_batch: Vec<(AbsCoord, u32)> = Vec::new();
222        for (sid, pcs) in grouped {
223            if pcs.is_empty() {
224                continue;
225            }
226            // Mark sheet as touched by ensure to disable fast-path new allocations for its values
227            self.ensure_touched_sheets.insert(sid);
228            let vids = self.store.allocate_contiguous(sid, &pcs, 0x00);
229            for (i, pc) in pcs.iter().enumerate() {
230                let vid = vids[i];
231                add_batch.push((*pc, vid.0));
232                let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
233                self.cell_to_vertex.insert(addr, vid);
234                // Respect sheet index mode: skip index updates in Lazy mode during bulk ensure
235                match self.config.sheet_index_mode {
236                    crate::engine::SheetIndexMode::Eager
237                    | crate::engine::SheetIndexMode::FastBatch => {
238                        self.sheet_index_mut(sid).add_vertex(*pc, vid);
239                    }
240                    crate::engine::SheetIndexMode::Lazy => {
241                        // defer index build
242                    }
243                }
244            }
245        }
246        if !add_batch.is_empty() {
247            self.edges.add_vertices_batch(&add_batch);
248        }
249        add_batch
250    }
251
252    /// Enable/disable the first-load fast path for value inserts.
253    pub fn set_first_load_assume_new(&mut self, enabled: bool) {
254        self.first_load_assume_new = enabled;
255    }
256
257    /// Reset the per-sheet ensure touch tracking.
258    pub fn reset_ensure_touched(&mut self) {
259        self.ensure_touched_sheets.clear();
260    }
261
262    /// Store ASTs in batch and return their arena ids
263    pub fn store_asts_batch<'a, I>(&mut self, asts: I) -> Vec<AstNodeId>
264    where
265        I: IntoIterator<Item = &'a formualizer_parse::parser::ASTNode>,
266    {
267        self.data_store.store_asts_batch(asts, &self.sheet_reg)
268    }
269
270    /// Lookup VertexId for a (SheetId, AbsCoord)
271    pub fn vid_for_sid_pc(&self, sid: SheetId, pc: AbsCoord) -> Option<VertexId> {
272        let addr = CellRef::new(sid, Coord::new(pc.row(), pc.col(), true, true));
273        self.cell_to_vertex.get(&addr).copied()
274    }
275
276    /// Helper to map a global cell index in a plan to a VertexId
277    pub fn vid_for_plan_idx(
278        &self,
279        plan: &crate::engine::plan::DependencyPlan,
280        idx: u32,
281    ) -> Option<VertexId> {
282        let (sid, pc) = plan.global_cells.get(idx as usize).copied()?;
283        self.vid_for_sid_pc(sid, pc)
284    }
285
286    /// Assign a formula to an existing vertex, removing prior edges and setting flags
287    pub fn assign_formula_vertex(&mut self, vid: VertexId, ast_id: AstNodeId, volatile: bool) {
288        if self.vertex_formulas.contains_key(&vid) {
289            self.remove_dependent_edges(vid);
290        }
291        self.store
292            .set_kind(vid, crate::engine::vertex::VertexKind::FormulaScalar);
293        self.vertex_values.remove(&vid);
294        self.vertex_formulas.insert(vid, ast_id);
295        self.mark_volatile(vid, volatile);
296        // schedule evaluation
297        self.mark_vertex_dirty(vid);
298    }
299
300    /// Public wrapper for adding edges without beginning a batch (caller manages batch)
301    pub fn add_edges_nobatch(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
302        self.add_dependent_edges_nobatch(dependent, dependencies);
303    }
304
305    /// Iterate all normal vertex ids
306    pub fn iter_vertex_ids(&self) -> impl Iterator<Item = VertexId> + '_ {
307        self.store.all_vertices()
308    }
309
310    /// Get current AbsCoord for a vertex
311    pub fn vertex_coord(&self, vid: VertexId) -> AbsCoord {
312        self.store.coord(vid)
313    }
314
315    /// Total number of allocated vertices (including deleted)
316    pub fn vertex_count(&self) -> usize {
317        self.store.len()
318    }
319
320    /// Replace CSR edges in one shot from adjacency and coords
321    pub fn build_edges_from_adjacency(
322        &mut self,
323        adjacency: Vec<(u32, Vec<u32>)>,
324        coords: Vec<AbsCoord>,
325        vertex_ids: Vec<u32>,
326    ) {
327        self.edges
328            .build_from_adjacency(adjacency, coords, vertex_ids);
329    }
330    /// Compute min/max used row among vertices within [start_col..=end_col] on a sheet.
331    pub fn used_row_bounds_for_columns(
332        &self,
333        sheet_id: SheetId,
334        start_col: u32,
335        end_col: u32,
336    ) -> Option<(u32, u32)> {
337        // Prefer sheet index when available
338        if let Some(index) = self.sheet_indexes.get(&sheet_id)
339            && !index.is_empty()
340        {
341            let mut min_r: Option<u32> = None;
342            let mut max_r: Option<u32> = None;
343            for vid in index.vertices_in_col_range(start_col, end_col) {
344                let r = self.store.coord(vid).row();
345                min_r = Some(min_r.map(|m| m.min(r)).unwrap_or(r));
346                max_r = Some(max_r.map(|m| m.max(r)).unwrap_or(r));
347            }
348            return match (min_r, max_r) {
349                (Some(a), Some(b)) => Some((a, b)),
350                _ => None,
351            };
352        }
353        // Fallback: scan cell map for bounds on the fly
354        let mut min_r: Option<u32> = None;
355        let mut max_r: Option<u32> = None;
356        for cref in self.cell_to_vertex.keys() {
357            if cref.sheet_id == sheet_id {
358                let c = cref.coord.col();
359                if c >= start_col && c <= end_col {
360                    let r = cref.coord.row();
361                    min_r = Some(min_r.map(|m| m.min(r)).unwrap_or(r));
362                    max_r = Some(max_r.map(|m| m.max(r)).unwrap_or(r));
363                }
364            }
365        }
366        match (min_r, max_r) {
367            (Some(a), Some(b)) => Some((a, b)),
368            _ => None,
369        }
370    }
371
372    /// Build (or rebuild) the sheet index for a given sheet if running in Lazy mode.
373    pub fn finalize_sheet_index(&mut self, sheet: &str) {
374        let Some(sheet_id) = self.sheet_reg.get_id(sheet) else {
375            return;
376        };
377        // If already present and non-empty, skip
378        if let Some(idx) = self.sheet_indexes.get(&sheet_id)
379            && !idx.is_empty()
380        {
381            return;
382        }
383        let mut idx = SheetIndex::new();
384        // Collect coords for this sheet
385        let mut batch: Vec<(AbsCoord, VertexId)> = Vec::with_capacity(self.cell_to_vertex.len());
386        for (cref, vid) in &self.cell_to_vertex {
387            if cref.sheet_id == sheet_id {
388                batch.push((AbsCoord::new(cref.coord.row(), cref.coord.col()), *vid));
389            }
390        }
391        // Use batch builder
392        idx.add_vertices_batch(&batch);
393        self.sheet_indexes.insert(sheet_id, idx);
394    }
395
396    pub fn set_sheet_index_mode(&mut self, mode: crate::engine::SheetIndexMode) {
397        self.config.sheet_index_mode = mode;
398    }
399
400    /// Compute min/max used column among vertices within [start_row..=end_row] on a sheet.
401    pub fn used_col_bounds_for_rows(
402        &self,
403        sheet_id: SheetId,
404        start_row: u32,
405        end_row: u32,
406    ) -> Option<(u32, u32)> {
407        if let Some(index) = self.sheet_indexes.get(&sheet_id)
408            && !index.is_empty()
409        {
410            let mut min_c: Option<u32> = None;
411            let mut max_c: Option<u32> = None;
412            for vid in index.vertices_in_row_range(start_row, end_row) {
413                let c = self.store.coord(vid).col();
414                min_c = Some(min_c.map(|m| m.min(c)).unwrap_or(c));
415                max_c = Some(max_c.map(|m| m.max(c)).unwrap_or(c));
416            }
417            return match (min_c, max_c) {
418                (Some(a), Some(b)) => Some((a, b)),
419                _ => None,
420            };
421        }
422        // Fallback: scan cell map on the fly
423        let mut min_c: Option<u32> = None;
424        let mut max_c: Option<u32> = None;
425        for cref in self.cell_to_vertex.keys() {
426            if cref.sheet_id == sheet_id {
427                let r = cref.coord.row();
428                if r >= start_row && r <= end_row {
429                    let c = cref.coord.col();
430                    min_c = Some(min_c.map(|m| m.min(c)).unwrap_or(c));
431                    max_c = Some(max_c.map(|m| m.max(c)).unwrap_or(c));
432                }
433            }
434        }
435        match (min_c, max_c) {
436            (Some(a), Some(b)) => Some((a, b)),
437            _ => None,
438        }
439    }
440
441    /// Returns true if the given sheet currently contains any formula vertices.
442    pub fn sheet_has_formulas(&self, sheet_id: SheetId) -> bool {
443        // Check vertex_formulas keys; they represent formula vertices
444        for &vid in self.vertex_formulas.keys() {
445            if self.store.sheet_id(vid) == sheet_id {
446                return true;
447            }
448        }
449        false
450    }
451    pub fn new() -> Self {
452        Self::new_with_config(super::EvalConfig::default())
453    }
454
455    pub fn new_with_config(config: super::EvalConfig) -> Self {
456        let mut sheet_reg = SheetRegistry::new();
457        let default_sheet_id = sheet_reg.id_for(&config.default_sheet_name);
458
459        let mut g = Self {
460            store: VertexStore::new(),
461            edges: CsrMutableEdges::new(),
462            data_store: DataStore::new(),
463            vertex_values: FxHashMap::default(),
464            vertex_formulas: FxHashMap::default(),
465            cell_to_vertex: FxHashMap::default(),
466            dirty_vertices: FxHashSet::default(),
467            volatile_vertices: FxHashSet::default(),
468            formula_to_range_deps: FxHashMap::default(),
469            stripe_to_dependents: FxHashMap::default(),
470            sheet_indexes: FxHashMap::default(),
471            sheet_reg,
472            default_sheet_id,
473            named_ranges: FxHashMap::default(),
474            sheet_named_ranges: FxHashMap::default(),
475            vertex_to_names: FxHashMap::default(),
476            name_vertex_lookup: FxHashMap::default(),
477            pending_name_links: FxHashMap::default(),
478            name_vertex_seq: 0,
479            cell_to_name_dependents: FxHashMap::default(),
480            name_to_cell_dependencies: FxHashMap::default(),
481            config: config.clone(),
482            pk_order: None,
483            spill_anchor_to_cells: FxHashMap::default(),
484            spill_cell_to_anchor: FxHashMap::default(),
485            first_load_assume_new: false,
486            ensure_touched_sheets: FxHashSet::default(),
487            #[cfg(test)]
488            instr: GraphInstrumentation::default(),
489        };
490
491        if config.use_dynamic_topo {
492            // Seed with currently active vertices (likely empty at startup)
493            let nodes = g
494                .store
495                .all_vertices()
496                .filter(|&id| g.store.vertex_exists_active(id));
497            let mut pk = DynamicTopo::new(
498                nodes,
499                PkConfig {
500                    visit_budget: config.pk_visit_budget,
501                    compaction_interval_ops: config.pk_compaction_interval_ops,
502                },
503            );
504            // Build an initial order using current graph
505            let adapter = GraphAdapter { g: &g };
506            pk.rebuild_full(&adapter);
507            g.pk_order = Some(pk);
508        }
509
510        g
511    }
512
513    /// When dynamic topology is enabled, compute layers for a subset using PK ordering.
514    pub(crate) fn pk_layers_for(&self, subset: &[VertexId]) -> Option<Vec<crate::engine::Layer>> {
515        let pk = self.pk_order.as_ref()?;
516        let adapter = crate::engine::topo::GraphAdapter { g: self };
517        let layers = pk.layers_for(&adapter, subset, self.config.max_layer_width);
518        Some(
519            layers
520                .into_iter()
521                .map(|vs| crate::engine::Layer { vertices: vs })
522                .collect(),
523        )
524    }
525
526    #[inline]
527    pub(crate) fn dynamic_topo_enabled(&self) -> bool {
528        self.pk_order.is_some()
529    }
530
531    #[cfg(test)]
532    pub fn reset_instr(&mut self) {
533        self.instr = GraphInstrumentation::default();
534    }
535
536    #[cfg(test)]
537    pub fn instr(&self) -> GraphInstrumentation {
538        self.instr.clone()
539    }
540
541    /// Begin batch operations - defer CSR rebuilds until end_batch() is called
542    pub fn begin_batch(&mut self) {
543        self.edges.begin_batch();
544    }
545
546    /// End batch operations and trigger CSR rebuild if needed
547    pub fn end_batch(&mut self) {
548        self.edges.end_batch();
549    }
550
551    pub fn default_sheet_id(&self) -> SheetId {
552        self.default_sheet_id
553    }
554
555    pub fn default_sheet_name(&self) -> &str {
556        self.sheet_reg.name(self.default_sheet_id)
557    }
558
559    pub fn set_default_sheet_by_name(&mut self, name: &str) {
560        self.default_sheet_id = self.sheet_id_mut(name);
561    }
562
563    pub fn set_default_sheet_by_id(&mut self, id: SheetId) {
564        self.default_sheet_id = id;
565    }
566
567    /// Returns the ID for a sheet name, creating one if it doesn't exist.
568    pub fn sheet_id_mut(&mut self, name: &str) -> SheetId {
569        self.sheet_reg.id_for(name)
570    }
571
572    pub fn sheet_id(&self, name: &str) -> Option<SheetId> {
573        self.sheet_reg.get_id(name)
574    }
575
576    /// Resolve a sheet name to an existing ID or return a #REF! error.
577    fn resolve_existing_sheet_id(&self, name: &str) -> Result<SheetId, ExcelError> {
578        self.sheet_id(name).ok_or_else(|| {
579            ExcelError::new(ExcelErrorKind::Ref).with_message(format!("Sheet not found: {name}"))
580        })
581    }
582
583    /// Returns the name of a sheet given its ID.
584    pub fn sheet_name(&self, id: SheetId) -> &str {
585        self.sheet_reg.name(id)
586    }
587
588    /// Access the sheet registry (read-only) for external bindings
589    pub fn sheet_reg(&self) -> &SheetRegistry {
590        &self.sheet_reg
591    }
592
593    pub(crate) fn data_store(&self) -> &DataStore {
594        &self.data_store
595    }
596
597    /// Converts a `CellRef` to a fully qualified A1-style string (e.g., "SheetName!A1").
598    pub fn to_a1(&self, cell_ref: CellRef) -> String {
599        format!("{}!{}", self.sheet_name(cell_ref.sheet_id), cell_ref.coord)
600    }
601
602    pub(crate) fn vertex_len(&self) -> usize {
603        self.store.len()
604    }
605
606    /// Get mutable access to a sheet's index, creating it if it doesn't exist
607    /// This is the primary way VertexEditor and internal operations access the index
608    pub fn sheet_index_mut(&mut self, sheet_id: SheetId) -> &mut SheetIndex {
609        self.sheet_indexes.entry(sheet_id).or_default()
610    }
611
612    /// Get immutable access to a sheet's index, returns None if not initialized
613    pub fn sheet_index(&self, sheet_id: SheetId) -> Option<&SheetIndex> {
614        self.sheet_indexes.get(&sheet_id)
615    }
616
617    /// Set a value in a cell, returns affected vertex IDs
618    pub fn set_cell_value(
619        &mut self,
620        sheet: &str,
621        row: u32,
622        col: u32,
623        value: LiteralValue,
624    ) -> Result<OperationSummary, ExcelError> {
625        let sheet_id = self.sheet_id_mut(sheet);
626        // External API is 1-based; store 0-based coords internally.
627        let coord = Coord::from_excel(row, col, true, true);
628        let addr = CellRef::new(sheet_id, coord);
629        let mut created_placeholders = Vec::new();
630
631        let vertex_id = if let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
632            // Check if it was a formula and remove dependencies
633            let is_formula = matches!(
634                self.store.kind(existing_id),
635                VertexKind::FormulaScalar | VertexKind::FormulaArray
636            );
637
638            if is_formula {
639                self.remove_dependent_edges(existing_id);
640                self.vertex_formulas.remove(&existing_id);
641            }
642
643            // Update to value kind
644            self.store.set_kind(existing_id, VertexKind::Cell);
645            let value_ref = self.data_store.store_value(value);
646            self.vertex_values.insert(existing_id, value_ref);
647            existing_id
648        } else {
649            // Create new vertex
650            created_placeholders.push(addr);
651            let packed_coord = AbsCoord::from_excel(row, col);
652            let vertex_id = self.store.allocate(packed_coord, sheet_id, 0x01); // dirty flag
653
654            // Add vertex coordinate for CSR
655            self.edges.add_vertex(packed_coord, vertex_id.0);
656
657            // Add to sheet index for O(log n + k) range queries
658            self.sheet_index_mut(sheet_id)
659                .add_vertex(packed_coord, vertex_id);
660
661            self.store.set_kind(vertex_id, VertexKind::Cell);
662            let value_ref = self.data_store.store_value(value);
663            self.vertex_values.insert(vertex_id, value_ref);
664            self.cell_to_vertex.insert(addr, vertex_id);
665            vertex_id
666        };
667
668        Ok(OperationSummary {
669            affected_vertices: self.mark_dirty(vertex_id),
670            created_placeholders,
671        })
672    }
673
674    /// Reserve capacity hints for upcoming bulk cell inserts (values only for now).
675    pub fn reserve_cells(&mut self, additional: usize) {
676        self.store.reserve(additional);
677        self.vertex_values.reserve(additional);
678        self.cell_to_vertex.reserve(additional);
679        // sheet_indexes: cannot easily reserve per-sheet without distribution; skip.
680    }
681
682    /// Fast path for initial bulk load of value cells: avoids dirty propagation & dependency work.
683    pub fn set_cell_value_bulk_untracked(
684        &mut self,
685        sheet: &str,
686        row: u32,
687        col: u32,
688        value: LiteralValue,
689    ) {
690        let sheet_id = self.sheet_id_mut(sheet);
691        let coord = Coord::from_excel(row, col, true, true);
692        let addr = CellRef::new(sheet_id, coord);
693        if let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
694            // Overwrite existing value vertex only (ignore formulas in bulk path)
695            let value_ref = self.data_store.store_value(value);
696            self.vertex_values.insert(existing_id, value_ref);
697            self.store.set_kind(existing_id, VertexKind::Cell);
698            return;
699        }
700        let packed_coord = AbsCoord::from_excel(row, col);
701        let vertex_id = self.store.allocate(packed_coord, sheet_id, 0x00); // not dirty
702        self.edges.add_vertex(packed_coord, vertex_id.0);
703        self.sheet_index_mut(sheet_id)
704            .add_vertex(packed_coord, vertex_id);
705        self.store.set_kind(vertex_id, VertexKind::Cell);
706        let value_ref = self.data_store.store_value(value);
707        self.vertex_values.insert(vertex_id, value_ref);
708        self.cell_to_vertex.insert(addr, vertex_id);
709    }
710
711    /// Bulk insert a collection of plain value cells (no formulas) more efficiently.
712    pub fn bulk_insert_values<I>(&mut self, sheet: &str, cells: I)
713    where
714        I: IntoIterator<Item = (u32, u32, LiteralValue)>,
715    {
716        use web_time::Instant;
717        let t0 = Instant::now();
718        // Collect first to know size
719        let collected: Vec<(u32, u32, LiteralValue)> = cells.into_iter().collect();
720        if collected.is_empty() {
721            return;
722        }
723        let sheet_id = self.sheet_id_mut(sheet);
724        self.reserve_cells(collected.len());
725        let t_reserve = Instant::now();
726        let mut new_vertices: Vec<(AbsCoord, u32)> = Vec::with_capacity(collected.len());
727        let mut index_items: Vec<(AbsCoord, VertexId)> = Vec::with_capacity(collected.len());
728        // For new allocations, accumulate values and assign after a single batch store
729        let mut new_value_coords: Vec<(AbsCoord, VertexId)> = Vec::with_capacity(collected.len());
730        let mut new_value_literals: Vec<LiteralValue> = Vec::with_capacity(collected.len());
731        // Detect fast path: during initial ingest, caller may guarantee most cells are new.
732        let assume_new = self.first_load_assume_new
733            && self
734                .sheet_id(sheet)
735                .map(|sid| !self.ensure_touched_sheets.contains(&sid))
736                .unwrap_or(false);
737
738        for (row, col, value) in collected {
739            let coord = Coord::from_excel(row, col, true, true);
740            let addr = CellRef::new(sheet_id, coord);
741            if !assume_new && let Some(&existing_id) = self.cell_to_vertex.get(&addr) {
742                let value_ref = self.data_store.store_value(value);
743                self.vertex_values.insert(existing_id, value_ref);
744                self.store.set_kind(existing_id, VertexKind::Cell);
745                continue;
746            }
747            let packed = AbsCoord::from_excel(row, col);
748            let vertex_id = self.store.allocate(packed, sheet_id, 0x00);
749            self.store.set_kind(vertex_id, VertexKind::Cell);
750            // Defer value arena storage to a single batch
751            new_value_coords.push((packed, vertex_id));
752            new_value_literals.push(value);
753            self.cell_to_vertex.insert(addr, vertex_id);
754            new_vertices.push((packed, vertex_id.0));
755            index_items.push((packed, vertex_id));
756        }
757        // Perform a single batch store for newly allocated values
758        if !new_value_literals.is_empty() {
759            let vrefs = self.data_store.store_values_batch(new_value_literals);
760            debug_assert_eq!(vrefs.len(), new_value_coords.len());
761            for (i, (_pc, vid)) in new_value_coords.iter().enumerate() {
762                self.vertex_values.insert(*vid, vrefs[i]);
763            }
764        }
765        let t_after_alloc = Instant::now();
766        if !new_vertices.is_empty() {
767            let t_edges_start = Instant::now();
768            self.edges.add_vertices_batch(&new_vertices);
769            let t_edges_done = Instant::now();
770
771            match self.config.sheet_index_mode {
772                crate::engine::SheetIndexMode::Eager => {
773                    self.sheet_index_mut(sheet_id)
774                        .add_vertices_batch(&index_items);
775                }
776                crate::engine::SheetIndexMode::Lazy => {
777                    // Skip building index now; will be built on-demand
778                }
779                crate::engine::SheetIndexMode::FastBatch => {
780                    // FastBatch for now delegates to same batch insert (future: build from sorted arrays)
781                    self.sheet_index_mut(sheet_id)
782                        .add_vertices_batch(&index_items);
783                }
784            }
785            let t_index_done = Instant::now();
786        }
787    }
788
789    /// Set a formula in a cell, returns affected vertex IDs
790    pub fn set_cell_formula(
791        &mut self,
792        sheet: &str,
793        row: u32,
794        col: u32,
795        ast: ASTNode,
796    ) -> Result<OperationSummary, ExcelError> {
797        let volatile = self.is_ast_volatile(&ast);
798        self.set_cell_formula_with_volatility(sheet, row, col, ast, volatile)
799    }
800
801    /// Set a formula in a cell with a known volatility flag (context-scoped detection upstream)
802    pub fn set_cell_formula_with_volatility(
803        &mut self,
804        sheet: &str,
805        row: u32,
806        col: u32,
807        ast: ASTNode,
808        volatile: bool,
809    ) -> Result<OperationSummary, ExcelError> {
810        let dbg = std::env::var("FZ_DEBUG_LOAD")
811            .ok()
812            .is_some_and(|v| v != "0");
813        let dep_ms_thresh: u128 = std::env::var("FZ_DEBUG_DEP_MS")
814            .ok()
815            .and_then(|s| s.parse().ok())
816            .unwrap_or(0);
817        let sample_n: usize = std::env::var("FZ_DEBUG_SAMPLE_N")
818            .ok()
819            .and_then(|s| s.parse().ok())
820            .unwrap_or(0);
821        let t0 = if dbg {
822            Some(web_time::Instant::now())
823        } else {
824            None
825        };
826        let sheet_id = self.sheet_id_mut(sheet);
827        let coord = Coord::from_excel(row, col, true, true);
828        let addr = CellRef::new(sheet_id, coord);
829
830        // Extract dependencies from AST, creating placeholders if needed
831        let t_dep0 = if dbg {
832            Some(web_time::Instant::now())
833        } else {
834            None
835        };
836        let (
837            new_dependencies,
838            new_range_dependencies,
839            mut created_placeholders,
840            named_dependencies,
841        ) = self.extract_dependencies(&ast, sheet_id)?;
842        if let (true, Some(t)) = (dbg, t_dep0) {
843            let elapsed = t.elapsed().as_millis();
844            // Only log if over threshold or sampled
845            let do_log = (dep_ms_thresh > 0 && elapsed >= dep_ms_thresh)
846                || (sample_n > 0 && (row as usize).is_multiple_of(sample_n));
847            if dep_ms_thresh == 0 && sample_n == 0 {
848                // default: very light sampling every 1000 rows
849                if row.is_multiple_of(1000) {
850                    eprintln!(
851                        "[fz][dep] {}!{} extracted: deps={}, ranges={}, placeholders={}, names={} in {} ms",
852                        self.sheet_name(sheet_id),
853                        crate::reference::Coord::from_excel(row, col, true, true),
854                        new_dependencies.len(),
855                        new_range_dependencies.len(),
856                        created_placeholders.len(),
857                        named_dependencies.len(),
858                        elapsed
859                    );
860                }
861            } else if do_log {
862                eprintln!(
863                    "[fz][dep] {}!{} extracted: deps={}, ranges={}, placeholders={}, names={} in {} ms",
864                    self.sheet_name(sheet_id),
865                    crate::reference::Coord::from_excel(row, col, true, true),
866                    new_dependencies.len(),
867                    new_range_dependencies.len(),
868                    created_placeholders.len(),
869                    named_dependencies.len(),
870                    elapsed
871                );
872            }
873        }
874
875        // Check for self-reference (immediate cycle detection)
876        let addr_vertex_id = self.get_or_create_vertex(&addr, &mut created_placeholders);
877
878        if new_dependencies.contains(&addr_vertex_id) {
879            return Err(ExcelError::new(ExcelErrorKind::Circ)
880                .with_message("Self-reference detected".to_string()));
881        }
882
883        for &name_vertex in &named_dependencies {
884            let mut visited = FxHashSet::default();
885            if self.name_depends_on_vertex(name_vertex, addr_vertex_id, &mut visited) {
886                return Err(ExcelError::new(ExcelErrorKind::Circ)
887                    .with_message("Circular reference through named range".to_string()));
888            }
889        }
890
891        // Remove old dependencies first
892        self.remove_dependent_edges(addr_vertex_id);
893        self.detach_vertex_from_names(addr_vertex_id);
894
895        // Update vertex properties
896        self.store
897            .set_kind(addr_vertex_id, VertexKind::FormulaScalar);
898        let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
899        self.vertex_formulas.insert(addr_vertex_id, ast_id);
900        self.store.set_dirty(addr_vertex_id, true);
901
902        // Clear any cached value since this is now a formula
903        self.vertex_values.remove(&addr_vertex_id);
904
905        self.mark_volatile(addr_vertex_id, volatile);
906
907        if !named_dependencies.is_empty() {
908            self.attach_vertex_to_names(addr_vertex_id, &named_dependencies);
909        }
910
911        if let (true, Some(t)) = (dbg, t0) {
912            let elapsed = t.elapsed().as_millis();
913            let log_set = dep_ms_thresh > 0 && elapsed >= dep_ms_thresh;
914            if log_set {
915                eprintln!(
916                    "[fz][set] {}!{} total {} ms",
917                    self.sheet_name(sheet_id),
918                    crate::reference::Coord::from_excel(row, col, true, true),
919                    elapsed
920                );
921            }
922        }
923
924        // Add new dependency edges
925        self.add_dependent_edges(addr_vertex_id, &new_dependencies);
926        self.add_range_dependent_edges(addr_vertex_id, &new_range_dependencies, sheet_id);
927
928        Ok(OperationSummary {
929            affected_vertices: self.mark_dirty(addr_vertex_id),
930            created_placeholders,
931        })
932    }
933
934    pub fn set_cell_value_ref(
935        &mut self,
936        cell: formualizer_common::SheetCellRef<'_>,
937        value: LiteralValue,
938    ) -> Result<OperationSummary, ExcelError> {
939        let owned = cell.into_owned();
940        let sheet_id = match owned.sheet {
941            formualizer_common::SheetLocator::Id(id) => id,
942            formualizer_common::SheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
943            formualizer_common::SheetLocator::Current => self.default_sheet_id,
944        };
945        let sheet_name = self.sheet_name(sheet_id).to_string();
946        self.set_cell_value(
947            &sheet_name,
948            owned.coord.row() + 1,
949            owned.coord.col() + 1,
950            value,
951        )
952    }
953
954    pub fn set_cell_formula_ref(
955        &mut self,
956        cell: formualizer_common::SheetCellRef<'_>,
957        ast: ASTNode,
958    ) -> Result<OperationSummary, ExcelError> {
959        let owned = cell.into_owned();
960        let sheet_id = match owned.sheet {
961            formualizer_common::SheetLocator::Id(id) => id,
962            formualizer_common::SheetLocator::Name(name) => self.sheet_id_mut(name.as_ref()),
963            formualizer_common::SheetLocator::Current => self.default_sheet_id,
964        };
965        let sheet_name = self.sheet_name(sheet_id).to_string();
966        self.set_cell_formula(
967            &sheet_name,
968            owned.coord.row() + 1,
969            owned.coord.col() + 1,
970            ast,
971        )
972    }
973
974    pub fn get_cell_value_ref(
975        &self,
976        cell: formualizer_common::SheetCellRef<'_>,
977    ) -> Option<LiteralValue> {
978        let owned = cell.into_owned();
979        let sheet_id = match owned.sheet {
980            formualizer_common::SheetLocator::Id(id) => id,
981            formualizer_common::SheetLocator::Name(name) => self.sheet_id(name.as_ref())?,
982            formualizer_common::SheetLocator::Current => self.default_sheet_id,
983        };
984        let sheet_name = self.sheet_name(sheet_id);
985        self.get_cell_value(sheet_name, owned.coord.row() + 1, owned.coord.col() + 1)
986    }
987
988    /// Get current value from a cell
989    pub fn get_cell_value(&self, sheet: &str, row: u32, col: u32) -> Option<LiteralValue> {
990        let sheet_id = self.sheet_reg.get_id(sheet)?;
991        let coord = Coord::from_excel(row, col, true, true);
992        let addr = CellRef::new(sheet_id, coord);
993
994        self.cell_to_vertex.get(&addr).and_then(|&vertex_id| {
995            // Check values hashmap (stores both cell values and formula results)
996            self.vertex_values
997                .get(&vertex_id)
998                .map(|&value_ref| self.data_store.retrieve_value(value_ref))
999        })
1000    }
1001
1002    /// Mark vertex dirty and propagate to dependents
1003    fn mark_dirty(&mut self, vertex_id: VertexId) -> Vec<VertexId> {
1004        let mut affected = FxHashSet::default();
1005        let mut to_visit = Vec::new();
1006        let mut visited_for_propagation = FxHashSet::default();
1007
1008        // Only mark the source vertex as dirty if it's a formula
1009        // Value cells don't get marked dirty themselves but are still affected
1010        let is_formula = matches!(
1011            self.store.kind(vertex_id),
1012            VertexKind::FormulaScalar
1013                | VertexKind::FormulaArray
1014                | VertexKind::NamedScalar
1015                | VertexKind::NamedArray
1016        );
1017
1018        if is_formula {
1019            to_visit.push(vertex_id);
1020        } else {
1021            // Value cells are affected (for tracking) but not marked dirty
1022            affected.insert(vertex_id);
1023        }
1024
1025        // Initial propagation from direct and range dependents
1026        {
1027            // Get dependents (vertices that depend on this vertex)
1028            let dependents = self.get_dependents(vertex_id);
1029            to_visit.extend(&dependents);
1030
1031            if let Some(name_set) = self.cell_to_name_dependents.get(&vertex_id) {
1032                for &name_vertex in name_set {
1033                    to_visit.push(name_vertex);
1034                }
1035            }
1036
1037            // Check range dependencies
1038            let view = self.store.view(vertex_id);
1039            let row = view.row();
1040            let col = view.col();
1041            let dirty_sheet_id = view.sheet_id();
1042
1043            // New stripe-based dependents lookup
1044            let mut potential_dependents = FxHashSet::default();
1045
1046            // 1. Column stripe lookup
1047            let column_key = StripeKey {
1048                sheet_id: dirty_sheet_id,
1049                stripe_type: StripeType::Column,
1050                index: col,
1051            };
1052            if let Some(dependents) = self.stripe_to_dependents.get(&column_key) {
1053                potential_dependents.extend(dependents);
1054            }
1055
1056            // 2. Row stripe lookup
1057            let row_key = StripeKey {
1058                sheet_id: dirty_sheet_id,
1059                stripe_type: StripeType::Row,
1060                index: row,
1061            };
1062            if let Some(dependents) = self.stripe_to_dependents.get(&row_key) {
1063                potential_dependents.extend(dependents);
1064            }
1065
1066            // 3. Block stripe lookup
1067            if self.config.enable_block_stripes {
1068                let block_key = StripeKey {
1069                    sheet_id: dirty_sheet_id,
1070                    stripe_type: StripeType::Block,
1071                    index: block_index(row, col),
1072                };
1073                if let Some(dependents) = self.stripe_to_dependents.get(&block_key) {
1074                    potential_dependents.extend(dependents);
1075                }
1076            }
1077
1078            // Precision check: ensure the dirtied cell is actually within the formula's range
1079            for &dep_id in &potential_dependents {
1080                if let Some(ranges) = self.formula_to_range_deps.get(&dep_id) {
1081                    for range in ranges {
1082                        let range_sheet_id = match range.sheet {
1083                            SharedSheetLocator::Id(id) => id,
1084                            _ => dirty_sheet_id,
1085                        };
1086                        if range_sheet_id != dirty_sheet_id {
1087                            continue;
1088                        }
1089                        let sr0 = range.start_row.map(|b| b.index).unwrap_or(0);
1090                        let er0 = range.end_row.map(|b| b.index).unwrap_or(u32::MAX);
1091                        let sc0 = range.start_col.map(|b| b.index).unwrap_or(0);
1092                        let ec0 = range.end_col.map(|b| b.index).unwrap_or(u32::MAX);
1093                        if row >= sr0 && row <= er0 && col >= sc0 && col <= ec0 {
1094                            to_visit.push(dep_id);
1095                            break;
1096                        }
1097                    }
1098                }
1099            }
1100        }
1101
1102        while let Some(id) = to_visit.pop() {
1103            if !visited_for_propagation.insert(id) {
1104                continue; // Already processed
1105            }
1106            affected.insert(id);
1107
1108            // Mark vertex as dirty
1109            self.store.set_dirty(id, true);
1110
1111            // Add direct dependents to visit list
1112            let dependents = self.get_dependents(id);
1113            to_visit.extend(&dependents);
1114        }
1115
1116        // Add to dirty set
1117        self.dirty_vertices.extend(&affected);
1118
1119        // Return as Vec for compatibility
1120        affected.into_iter().collect()
1121    }
1122
1123    /// Get all vertices that need evaluation
1124    pub fn get_evaluation_vertices(&self) -> Vec<VertexId> {
1125        let mut combined = FxHashSet::default();
1126        combined.extend(&self.dirty_vertices);
1127        combined.extend(&self.volatile_vertices);
1128
1129        let mut result: Vec<VertexId> = combined
1130            .into_iter()
1131            .filter(|&id| {
1132                // Only include formula vertices
1133                matches!(
1134                    self.store.kind(id),
1135                    VertexKind::FormulaScalar
1136                        | VertexKind::FormulaArray
1137                        | VertexKind::NamedScalar
1138                        | VertexKind::NamedArray
1139                )
1140            })
1141            .collect();
1142        result.sort_unstable();
1143        result
1144    }
1145
1146    /// Clear dirty flags after successful evaluation
1147    pub fn clear_dirty_flags(&mut self, vertices: &[VertexId]) {
1148        for &vertex_id in vertices {
1149            self.store.set_dirty(vertex_id, false);
1150            self.dirty_vertices.remove(&vertex_id);
1151        }
1152    }
1153
1154    /// đź”® Scalability Hook: Clear volatile vertices after evaluation cycle
1155    pub fn clear_volatile_flags(&mut self) {
1156        self.volatile_vertices.clear();
1157    }
1158
1159    /// Re-marks all volatile vertices as dirty for the next evaluation cycle.
1160    pub(crate) fn redirty_volatiles(&mut self) {
1161        let volatile_ids: Vec<VertexId> = self.volatile_vertices.iter().copied().collect();
1162        for id in volatile_ids {
1163            self.mark_dirty(id);
1164        }
1165    }
1166
1167    fn get_or_create_vertex(
1168        &mut self,
1169        addr: &CellRef,
1170        created_placeholders: &mut Vec<CellRef>,
1171    ) -> VertexId {
1172        if let Some(&vertex_id) = self.cell_to_vertex.get(addr) {
1173            return vertex_id;
1174        }
1175
1176        created_placeholders.push(*addr);
1177        let packed_coord = AbsCoord::new(addr.coord.row(), addr.coord.col());
1178        let vertex_id = self.store.allocate(packed_coord, addr.sheet_id, 0x00);
1179
1180        // Add vertex coordinate for CSR
1181        self.edges.add_vertex(packed_coord, vertex_id.0);
1182
1183        // Add to sheet index for O(log n + k) range queries
1184        self.sheet_index_mut(addr.sheet_id)
1185            .add_vertex(packed_coord, vertex_id);
1186
1187        self.store.set_kind(vertex_id, VertexKind::Empty);
1188        self.cell_to_vertex.insert(*addr, vertex_id);
1189        vertex_id
1190    }
1191
1192    fn add_dependent_edges(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
1193        // Batch to avoid repeated CSR rebuilds and keep reverse edges current
1194        self.edges.begin_batch();
1195
1196        // If PK enabled, update order using a short-lived adapter without holding &mut self
1197        // Track dependencies that should be skipped if rejecting cycle-creating edges
1198        let mut skip_deps: rustc_hash::FxHashSet<VertexId> = rustc_hash::FxHashSet::default();
1199        if self.pk_order.is_some()
1200            && let Some(mut pk) = self.pk_order.take()
1201        {
1202            pk.ensure_nodes(std::iter::once(dependent));
1203            pk.ensure_nodes(dependencies.iter().copied());
1204            {
1205                let adapter = GraphAdapter { g: self };
1206                for &dep_id in dependencies {
1207                    match pk.try_add_edge(&adapter, dep_id, dependent) {
1208                        Ok(_) => {}
1209                        Err(_cycle) => {
1210                            if self.config.pk_reject_cycle_edges {
1211                                skip_deps.insert(dep_id);
1212                            } else {
1213                                pk.rebuild_full(&adapter);
1214                            }
1215                        }
1216                    }
1217                }
1218            } // drop adapter
1219            self.pk_order = Some(pk);
1220        }
1221
1222        // Now mutate engine edges; if rejecting cycles, re-check and skip those that would create cycles
1223        for &dep_id in dependencies {
1224            if self.config.pk_reject_cycle_edges && skip_deps.contains(&dep_id) {
1225                continue;
1226            }
1227            self.edges.add_edge(dependent, dep_id);
1228            #[cfg(test)]
1229            {
1230                self.instr.edges_added += 1;
1231            }
1232        }
1233
1234        self.edges.end_batch();
1235    }
1236
1237    /// Like add_dependent_edges, but assumes caller is managing edges.begin_batch/end_batch
1238    fn add_dependent_edges_nobatch(&mut self, dependent: VertexId, dependencies: &[VertexId]) {
1239        // If PK enabled, update order using a short-lived adapter without holding &mut self
1240        let mut skip_deps: rustc_hash::FxHashSet<VertexId> = rustc_hash::FxHashSet::default();
1241        if self.pk_order.is_some()
1242            && let Some(mut pk) = self.pk_order.take()
1243        {
1244            pk.ensure_nodes(std::iter::once(dependent));
1245            pk.ensure_nodes(dependencies.iter().copied());
1246            {
1247                let adapter = GraphAdapter { g: self };
1248                for &dep_id in dependencies {
1249                    match pk.try_add_edge(&adapter, dep_id, dependent) {
1250                        Ok(_) => {}
1251                        Err(_cycle) => {
1252                            if self.config.pk_reject_cycle_edges {
1253                                skip_deps.insert(dep_id);
1254                            } else {
1255                                pk.rebuild_full(&adapter);
1256                            }
1257                        }
1258                    }
1259                }
1260            }
1261            self.pk_order = Some(pk);
1262        }
1263
1264        for &dep_id in dependencies {
1265            if self.config.pk_reject_cycle_edges && skip_deps.contains(&dep_id) {
1266                continue;
1267            }
1268            self.edges.add_edge(dependent, dep_id);
1269            #[cfg(test)]
1270            {
1271                self.instr.edges_added += 1;
1272            }
1273        }
1274    }
1275
1276    /// Bulk set formulas on a sheet using a single dependency plan and batched edge updates.
1277    pub fn bulk_set_formulas<I>(&mut self, sheet: &str, items: I) -> Result<usize, ExcelError>
1278    where
1279        I: IntoIterator<Item = (u32, u32, ASTNode)>,
1280    {
1281        let collected: Vec<(u32, u32, ASTNode)> = items.into_iter().collect();
1282        if collected.is_empty() {
1283            return Ok(0);
1284        }
1285        let vol_flags: Vec<bool> = collected
1286            .iter()
1287            .map(|(_, _, ast)| self.is_ast_volatile(ast))
1288            .collect();
1289        self.bulk_set_formulas_with_volatility(sheet, collected, vol_flags)
1290    }
1291
1292    pub fn bulk_set_formulas_with_volatility(
1293        &mut self,
1294        sheet: &str,
1295        collected: Vec<(u32, u32, ASTNode)>,
1296        vol_flags: Vec<bool>,
1297    ) -> Result<usize, ExcelError> {
1298        use formualizer_parse::parser::CollectPolicy;
1299        let sheet_id = self.sheet_id_mut(sheet);
1300
1301        if collected.is_empty() {
1302            return Ok(0);
1303        }
1304
1305        // 1) Build plan across all formulas (read-only, no graph mutation)
1306        let tiny_refs = collected.iter().map(|(r, c, ast)| (sheet, *r, *c, ast));
1307        let policy = CollectPolicy {
1308            expand_small_ranges: true,
1309            range_expansion_limit: self.config.range_expansion_limit,
1310            include_names: true,
1311        };
1312        let plan = crate::engine::plan::build_dependency_plan(
1313            &mut self.sheet_reg,
1314            tiny_refs,
1315            &policy,
1316            Some(&vol_flags),
1317        )?;
1318
1319        // 2) Ensure/create target vertices and referenced cells (placeholders) once
1320        let mut created_placeholders: Vec<CellRef> = Vec::new();
1321
1322        // Targets
1323        let mut target_vids: Vec<VertexId> = Vec::with_capacity(plan.formula_targets.len());
1324        for (sid, pc) in &plan.formula_targets {
1325            let addr = CellRef::new(*sid, Coord::new(pc.row(), pc.col(), true, true));
1326            let vid = if let Some(&existing) = self.cell_to_vertex.get(&addr) {
1327                existing
1328            } else {
1329                self.get_or_create_vertex(&addr, &mut created_placeholders)
1330            };
1331            target_vids.push(vid);
1332        }
1333
1334        // Global referenced cells
1335        let mut dep_vids: Vec<VertexId> = Vec::with_capacity(plan.global_cells.len());
1336        for (sid, pc) in &plan.global_cells {
1337            let addr = CellRef::new(*sid, Coord::new(pc.row(), pc.col(), true, true));
1338            let vid = if let Some(&existing) = self.cell_to_vertex.get(&addr) {
1339                existing
1340            } else {
1341                self.get_or_create_vertex(&addr, &mut created_placeholders)
1342            };
1343            dep_vids.push(vid);
1344        }
1345
1346        // 3) Store ASTs in batch and update kinds/flags/value map
1347        let ast_ids = self
1348            .data_store
1349            .store_asts_batch(collected.iter().map(|(_, _, ast)| ast), &self.sheet_reg);
1350
1351        for (i, &tvid) in target_vids.iter().enumerate() {
1352            // If this cell already had a formula, remove its edges once here
1353            if self.vertex_formulas.contains_key(&tvid) {
1354                self.remove_dependent_edges(tvid);
1355            }
1356            self.store.set_kind(tvid, VertexKind::FormulaScalar);
1357            self.store.set_dirty(tvid, true);
1358            self.vertex_values.remove(&tvid);
1359            self.vertex_formulas.insert(tvid, ast_ids[i]);
1360            self.mark_volatile(tvid, vol_flags.get(i).copied().unwrap_or(false));
1361        }
1362
1363        // 4) Add edges in one batch
1364        self.edges.begin_batch();
1365        for (i, tvid) in target_vids.iter().copied().enumerate() {
1366            // Map per-formula indices into dep_vids
1367            if let Some(indices) = plan.per_formula_cells.get(i) {
1368                let mut deps: Vec<VertexId> = Vec::with_capacity(indices.len());
1369                for &idx in indices {
1370                    if let Some(vid) = dep_vids.get(idx as usize) {
1371                        deps.push(*vid);
1372                    }
1373                }
1374                self.add_dependent_edges_nobatch(tvid, &deps);
1375            }
1376
1377            // Range deps from plan are already compact RangeKeys; register directly.
1378            if let Some(rks) = plan.per_formula_ranges.get(i) {
1379                self.add_range_deps_from_keys(tvid, rks, sheet_id);
1380            }
1381        }
1382        self.edges.end_batch();
1383
1384        Ok(collected.len())
1385    }
1386
1387    /// Public (crate) helper to add a single dependency edge (dependent -> dependency) used for restoration/undo.
1388    pub fn add_dependency_edge(&mut self, dependent: VertexId, dependency: VertexId) {
1389        if dependent == dependency {
1390            return;
1391        }
1392        // If PK enabled attempt to add maintaining ordering; fallback to rebuild if cycle
1393        if self.pk_order.is_some()
1394            && let Some(mut pk) = self.pk_order.take()
1395        {
1396            pk.ensure_nodes(std::iter::once(dependent));
1397            pk.ensure_nodes(std::iter::once(dependency));
1398            let adapter = GraphAdapter { g: self };
1399            if pk.try_add_edge(&adapter, dependency, dependent).is_err() {
1400                // Cycle: rebuild full (conservative)
1401                pk.rebuild_full(&adapter);
1402            }
1403            self.pk_order = Some(pk);
1404        }
1405        self.edges.add_edge(dependent, dependency);
1406        self.store.set_dirty(dependent, true);
1407        self.dirty_vertices.insert(dependent);
1408    }
1409
1410    fn remove_dependent_edges(&mut self, vertex: VertexId) {
1411        // Remove all outgoing edges from this vertex (its dependencies)
1412        let dependencies = self.edges.out_edges(vertex);
1413
1414        self.edges.begin_batch();
1415        if self.pk_order.is_some()
1416            && let Some(mut pk) = self.pk_order.take()
1417        {
1418            for dep in &dependencies {
1419                pk.remove_edge(*dep, vertex);
1420            }
1421            self.pk_order = Some(pk);
1422        }
1423        for dep in dependencies {
1424            self.edges.remove_edge(vertex, dep);
1425        }
1426        self.edges.end_batch();
1427
1428        // Remove range dependencies and clean up stripes
1429        if let Some(old_ranges) = self.formula_to_range_deps.remove(&vertex) {
1430            let old_sheet_id = self.store.sheet_id(vertex);
1431
1432            for range in &old_ranges {
1433                let sheet_id = match range.sheet {
1434                    SharedSheetLocator::Id(id) => id,
1435                    _ => old_sheet_id,
1436                };
1437                let s_row = range.start_row.map(|b| b.index);
1438                let e_row = range.end_row.map(|b| b.index);
1439                let s_col = range.start_col.map(|b| b.index);
1440                let e_col = range.end_col.map(|b| b.index);
1441
1442                let mut keys_to_clean = FxHashSet::default();
1443
1444                let col_stripes = (s_row.is_none() && e_row.is_none())
1445                    || (s_col.is_some() && e_col.is_some() && (s_row.is_none() || e_row.is_none()));
1446                let row_stripes = (s_col.is_none() && e_col.is_none())
1447                    || (s_row.is_some() && e_row.is_some() && (s_col.is_none() || e_col.is_none()));
1448
1449                if col_stripes && !row_stripes {
1450                    let sc = s_col.unwrap_or(0);
1451                    let ec = e_col.unwrap_or(sc);
1452                    for col in sc..=ec {
1453                        keys_to_clean.insert(StripeKey {
1454                            sheet_id,
1455                            stripe_type: StripeType::Column,
1456                            index: col,
1457                        });
1458                    }
1459                } else if row_stripes && !col_stripes {
1460                    let sr = s_row.unwrap_or(0);
1461                    let er = e_row.unwrap_or(sr);
1462                    for row in sr..=er {
1463                        keys_to_clean.insert(StripeKey {
1464                            sheet_id,
1465                            stripe_type: StripeType::Row,
1466                            index: row,
1467                        });
1468                    }
1469                } else {
1470                    let start_row = s_row.unwrap_or(0);
1471                    let start_col = s_col.unwrap_or(0);
1472                    let end_row = e_row.unwrap_or(start_row);
1473                    let end_col = e_col.unwrap_or(start_col);
1474
1475                    let height = end_row.saturating_sub(start_row) + 1;
1476                    let width = end_col.saturating_sub(start_col) + 1;
1477
1478                    if self.config.enable_block_stripes && height > 1 && width > 1 {
1479                        let start_block_row = start_row / BLOCK_H;
1480                        let end_block_row = end_row / BLOCK_H;
1481                        let start_block_col = start_col / BLOCK_W;
1482                        let end_block_col = end_col / BLOCK_W;
1483
1484                        for block_row in start_block_row..=end_block_row {
1485                            for block_col in start_block_col..=end_block_col {
1486                                keys_to_clean.insert(StripeKey {
1487                                    sheet_id,
1488                                    stripe_type: StripeType::Block,
1489                                    index: block_index(block_row * BLOCK_H, block_col * BLOCK_W),
1490                                });
1491                            }
1492                        }
1493                    } else if height > width {
1494                        for col in start_col..=end_col {
1495                            keys_to_clean.insert(StripeKey {
1496                                sheet_id,
1497                                stripe_type: StripeType::Column,
1498                                index: col,
1499                            });
1500                        }
1501                    } else {
1502                        for row in start_row..=end_row {
1503                            keys_to_clean.insert(StripeKey {
1504                                sheet_id,
1505                                stripe_type: StripeType::Row,
1506                                index: row,
1507                            });
1508                        }
1509                    }
1510                }
1511
1512                for key in keys_to_clean {
1513                    if let Some(dependents) = self.stripe_to_dependents.get_mut(&key) {
1514                        dependents.remove(&vertex);
1515                        if dependents.is_empty() {
1516                            self.stripe_to_dependents.remove(&key);
1517                            #[cfg(test)]
1518                            {
1519                                self.instr.stripe_removes += 1;
1520                            }
1521                        }
1522                    }
1523                }
1524            }
1525        }
1526    }
1527
1528    // Removed: vertices() and get_vertex() methods - no longer needed with SoA
1529    // The old AoS Vertex struct has been eliminated in favor of direct
1530    // access to columnar data through the VertexStore
1531
1532    /// Updates the cached value of a formula vertex.
1533    pub(crate) fn update_vertex_value(&mut self, vertex_id: VertexId, value: LiteralValue) {
1534        let value_ref = self.data_store.store_value(value);
1535        self.vertex_values.insert(vertex_id, value_ref);
1536    }
1537
1538    /// Plan a spill region for an anchor; returns #SPILL! if blocked
1539    pub fn plan_spill_region(
1540        &self,
1541        anchor: VertexId,
1542        target_cells: &[CellRef],
1543    ) -> Result<(), ExcelError> {
1544        use formualizer_common::{ExcelErrorExtra, ExcelErrorKind};
1545        // Compute expected spill shape from the target rectangle for better diagnostics
1546        let (expected_rows, expected_cols) = if target_cells.is_empty() {
1547            (0u32, 0u32)
1548        } else {
1549            let mut min_r = u32::MAX;
1550            let mut max_r = 0u32;
1551            let mut min_c = u32::MAX;
1552            let mut max_c = 0u32;
1553            for cell in target_cells {
1554                let r = cell.coord.row();
1555                let c = cell.coord.col();
1556                if r < min_r {
1557                    min_r = r;
1558                }
1559                if r > max_r {
1560                    max_r = r;
1561                }
1562                if c < min_c {
1563                    min_c = c;
1564                }
1565                if c > max_c {
1566                    max_c = c;
1567                }
1568            }
1569            (
1570                max_r.saturating_sub(min_r).saturating_add(1),
1571                max_c.saturating_sub(min_c).saturating_add(1),
1572            )
1573        };
1574        // Allow overlapping with previously owned spill cells by this anchor
1575        for cell in target_cells {
1576            // If cell is already owned by this anchor's previous spill, it's allowed.
1577            let owned_by_anchor = match self.spill_cell_to_anchor.get(cell) {
1578                Some(&existing_anchor) if existing_anchor == anchor => true,
1579                Some(_other) => {
1580                    return Err(ExcelError::new(ExcelErrorKind::Spill)
1581                        .with_message("BlockedBySpill")
1582                        .with_extra(ExcelErrorExtra::Spill {
1583                            expected_rows,
1584                            expected_cols,
1585                        }));
1586                }
1587                None => false,
1588            };
1589
1590            if owned_by_anchor {
1591                continue;
1592            }
1593
1594            // If cell is occupied by another formula anchor, block regardless of value visibility
1595            if let Some(&vid) = self.cell_to_vertex.get(cell)
1596                && vid != anchor
1597            {
1598                // Prevent clobbering formulas (array or scalar) in the target area
1599                match self.store.kind(vid) {
1600                    VertexKind::FormulaScalar | VertexKind::FormulaArray => {
1601                        return Err(ExcelError::new(ExcelErrorKind::Spill)
1602                            .with_message("BlockedByFormula")
1603                            .with_extra(ExcelErrorExtra::Spill {
1604                                expected_rows,
1605                                expected_cols,
1606                            }));
1607                    }
1608                    _ => {
1609                        // If a non-empty value exists (and not this anchor), block
1610                        if let Some(vref) = self.vertex_values.get(&vid) {
1611                            let v = self.data_store.retrieve_value(*vref);
1612                            if !matches!(v, LiteralValue::Empty) {
1613                                return Err(ExcelError::new(ExcelErrorKind::Spill)
1614                                    .with_message("BlockedByValue")
1615                                    .with_extra(ExcelErrorExtra::Spill {
1616                                        expected_rows,
1617                                        expected_cols,
1618                                    }));
1619                            }
1620                        }
1621                    }
1622                }
1623            }
1624        }
1625        Ok(())
1626    }
1627
1628    // Note: non-atomic commit_spill_region has been removed. All callers must use
1629    // commit_spill_region_atomic_with_fault for atomicity and rollback on failure.
1630
1631    /// Commit a spill atomically with an internal shadow buffer and optional fault injection.
1632    /// If a fault is injected partway through, all changes are rolled back to the pre-commit state.
1633    /// This does not change behavior under normal operation; it's primarily for Phase 3 guarantees and tests.
1634    pub fn commit_spill_region_atomic_with_fault(
1635        &mut self,
1636        anchor: VertexId,
1637        target_cells: Vec<CellRef>,
1638        values: Vec<Vec<LiteralValue>>,
1639        fault_after_ops: Option<usize>,
1640    ) -> Result<(), ExcelError> {
1641        use rustc_hash::FxHashSet;
1642
1643        // Capture previous owned cells for this anchor
1644        let prev_cells = self
1645            .spill_anchor_to_cells
1646            .get(&anchor)
1647            .cloned()
1648            .unwrap_or_default();
1649        let new_set: FxHashSet<CellRef> = target_cells.iter().copied().collect();
1650        let prev_set: FxHashSet<CellRef> = prev_cells.iter().copied().collect();
1651
1652        // Compose operation list: clears first (prev - new), then writes for new rectangle
1653        #[derive(Clone)]
1654        struct Op {
1655            sheet: String,
1656            row: u32,
1657            col: u32,
1658            new_value: LiteralValue,
1659        }
1660        let mut ops: Vec<Op> = Vec::new();
1661
1662        // Clears for cells no longer used
1663        for cell in prev_cells.iter() {
1664            if !new_set.contains(cell) {
1665                let sheet = self.sheet_name(cell.sheet_id).to_string();
1666                ops.push(Op {
1667                    sheet,
1668                    row: cell.coord.row(),
1669                    col: cell.coord.col(),
1670                    new_value: LiteralValue::Empty,
1671                });
1672            }
1673        }
1674
1675        // Writes for new values (row-major to match target rectangle)
1676        if !target_cells.is_empty() {
1677            let first = target_cells.first().copied().unwrap();
1678            let row0 = first.coord.row();
1679            let col0 = first.coord.col();
1680            let sheet = self.sheet_name(first.sheet_id).to_string();
1681            for (r_off, row_vals) in values.iter().enumerate() {
1682                for (c_off, v) in row_vals.iter().enumerate() {
1683                    ops.push(Op {
1684                        sheet: sheet.clone(),
1685                        row: row0 + r_off as u32,
1686                        col: col0 + c_off as u32,
1687                        new_value: v.clone(),
1688                    });
1689                }
1690            }
1691        }
1692
1693        // Shadow buffer of old values for rollback
1694        #[derive(Clone)]
1695        struct OldVal {
1696            present: bool,
1697            value: LiteralValue,
1698        }
1699        let mut old_values: Vec<((String, u32, u32), OldVal)> = Vec::with_capacity(ops.len());
1700
1701        // Capture old values before applying
1702        for op in &ops {
1703            // op.row/op.col are internal 0-based; get_cell_value is a public 1-based API.
1704            let old = self
1705                .get_cell_value(&op.sheet, op.row + 1, op.col + 1)
1706                .unwrap_or(LiteralValue::Empty);
1707            let present = true; // unified model: we always treat as present
1708            old_values.push((
1709                (op.sheet.clone(), op.row, op.col),
1710                OldVal {
1711                    present,
1712                    value: old,
1713                },
1714            ));
1715        }
1716
1717        // Apply with optional injected fault
1718        for (applied, op) in ops.iter().enumerate() {
1719            if let Some(n) = fault_after_ops
1720                && applied == n
1721            {
1722                for idx in (0..applied).rev() {
1723                    let ((ref sheet, row, col), ref old) = old_values[idx];
1724                    let _ = self.set_cell_value(sheet, row + 1, col + 1, old.value.clone());
1725                }
1726                return Err(ExcelError::new(ExcelErrorKind::Error)
1727                    .with_message("Injected persistence fault during spill commit"));
1728            }
1729            let _ = self.set_cell_value(&op.sheet, op.row + 1, op.col + 1, op.new_value.clone());
1730        }
1731
1732        // Update spill ownership maps only on success
1733        // Clear previous ownership not reused
1734        for cell in prev_cells.iter() {
1735            if !new_set.contains(cell) {
1736                self.spill_cell_to_anchor.remove(cell);
1737            }
1738        }
1739        // Mark ownership for new rectangle using the declared target cells only
1740        for cell in &target_cells {
1741            self.spill_cell_to_anchor.insert(*cell, anchor);
1742        }
1743        self.spill_anchor_to_cells.insert(anchor, target_cells);
1744        Ok(())
1745    }
1746
1747    /// Clear an existing spill region for an anchor (set cells to Empty and forget ownership)
1748    pub fn clear_spill_region(&mut self, anchor: VertexId) {
1749        if let Some(cells) = self.spill_anchor_to_cells.remove(&anchor) {
1750            for cell in cells {
1751                let sheet = self.sheet_name(cell.sheet_id).to_string();
1752                let _ = self.set_cell_value(
1753                    &sheet,
1754                    cell.coord.row() + 1,
1755                    cell.coord.col() + 1,
1756                    LiteralValue::Empty,
1757                );
1758                self.spill_cell_to_anchor.remove(&cell);
1759            }
1760        }
1761    }
1762
1763    /// Check if a vertex exists
1764    pub(crate) fn vertex_exists(&self, vertex_id: VertexId) -> bool {
1765        if vertex_id.0 < FIRST_NORMAL_VERTEX {
1766            return false;
1767        }
1768        let index = (vertex_id.0 - FIRST_NORMAL_VERTEX) as usize;
1769        index < self.store.len()
1770    }
1771
1772    /// Get the kind of a vertex
1773    pub(crate) fn get_vertex_kind(&self, vertex_id: VertexId) -> VertexKind {
1774        self.store.kind(vertex_id)
1775    }
1776
1777    /// Get the sheet ID of a vertex
1778    pub(crate) fn get_vertex_sheet_id(&self, vertex_id: VertexId) -> SheetId {
1779        self.store.sheet_id(vertex_id)
1780    }
1781
1782    pub fn get_formula_id(&self, vertex_id: VertexId) -> Option<AstNodeId> {
1783        self.vertex_formulas.get(&vertex_id).copied()
1784    }
1785
1786    pub fn get_formula_id_and_volatile(&self, vertex_id: VertexId) -> Option<(AstNodeId, bool)> {
1787        let ast_id = self.get_formula_id(vertex_id)?;
1788        Some((ast_id, self.is_volatile(vertex_id)))
1789    }
1790
1791    pub fn get_formula_node(&self, vertex_id: VertexId) -> Option<&super::arena::AstNodeData> {
1792        let ast_id = self.get_formula_id(vertex_id)?;
1793        self.data_store.get_node(ast_id)
1794    }
1795
1796    pub fn get_formula_node_and_volatile(
1797        &self,
1798        vertex_id: VertexId,
1799    ) -> Option<(&super::arena::AstNodeData, bool)> {
1800        let (ast_id, vol) = self.get_formula_id_and_volatile(vertex_id)?;
1801        let node = self.data_store.get_node(ast_id)?;
1802        Some((node, vol))
1803    }
1804
1805    /// Get the formula AST for a vertex.
1806    ///
1807    /// Not used in hot paths; reconstructs from arena.
1808    pub fn get_formula(&self, vertex_id: VertexId) -> Option<ASTNode> {
1809        let ast_id = self.get_formula_id(vertex_id)?;
1810        self.data_store.retrieve_ast(ast_id, &self.sheet_reg)
1811    }
1812
1813    /// Get the value stored for a vertex
1814    pub fn get_value(&self, vertex_id: VertexId) -> Option<LiteralValue> {
1815        self.vertex_values
1816            .get(&vertex_id)
1817            .map(|&value_ref| self.data_store.retrieve_value(value_ref))
1818    }
1819
1820    /// Get the cell reference for a vertex
1821    pub(crate) fn get_cell_ref(&self, vertex_id: VertexId) -> Option<CellRef> {
1822        let packed_coord = self.store.coord(vertex_id);
1823        let sheet_id = self.store.sheet_id(vertex_id);
1824        let coord = Coord::new(packed_coord.row(), packed_coord.col(), true, true);
1825        Some(CellRef::new(sheet_id, coord))
1826    }
1827
1828    /// Create a cell reference (helper for internal use)
1829    pub(crate) fn make_cell_ref_internal(&self, sheet_id: SheetId, row: u32, col: u32) -> CellRef {
1830        let coord = Coord::new(row, col, true, true);
1831        CellRef::new(sheet_id, coord)
1832    }
1833
1834    /// Create a cell reference from sheet name and Excel 1-based coordinates.
1835    pub fn make_cell_ref(&self, sheet_name: &str, row: u32, col: u32) -> CellRef {
1836        let sheet_id = self.sheet_reg.get_id(sheet_name).unwrap_or(0);
1837        let coord = Coord::from_excel(row, col, true, true);
1838        CellRef::new(sheet_id, coord)
1839    }
1840
1841    /// Check if a vertex is dirty
1842    pub(crate) fn is_dirty(&self, vertex_id: VertexId) -> bool {
1843        self.store.is_dirty(vertex_id)
1844    }
1845
1846    /// Check if a vertex is volatile
1847    pub(crate) fn is_volatile(&self, vertex_id: VertexId) -> bool {
1848        self.store.is_volatile(vertex_id)
1849    }
1850
1851    /// Get vertex ID for a cell address
1852    pub fn get_vertex_id_for_address(&self, addr: &CellRef) -> Option<&VertexId> {
1853        self.cell_to_vertex.get(addr)
1854    }
1855
1856    #[cfg(test)]
1857    pub fn cell_to_vertex(&self) -> &FxHashMap<CellRef, VertexId> {
1858        &self.cell_to_vertex
1859    }
1860
1861    /// Get the dependencies of a vertex (for scheduler)
1862    pub(crate) fn get_dependencies(&self, vertex_id: VertexId) -> Vec<VertexId> {
1863        self.edges.out_edges(vertex_id)
1864    }
1865
1866    /// Check if a vertex has a self-loop
1867    pub(crate) fn has_self_loop(&self, vertex_id: VertexId) -> bool {
1868        self.edges.out_edges(vertex_id).contains(&vertex_id)
1869    }
1870
1871    /// Get dependents of a vertex (vertices that depend on this vertex)
1872    /// Uses reverse edges for O(1) lookup when available
1873    pub(crate) fn get_dependents(&self, vertex_id: VertexId) -> Vec<VertexId> {
1874        // If there are pending changes in delta, we need to scan
1875        // Otherwise we can use the fast reverse edges
1876        if self.edges.delta_size() > 0 {
1877            // Fall back to scanning when delta has changes
1878            let mut dependents = Vec::new();
1879            for (&_addr, &vid) in &self.cell_to_vertex {
1880                let out_edges = self.edges.out_edges(vid);
1881                if out_edges.contains(&vertex_id) {
1882                    dependents.push(vid);
1883                }
1884            }
1885            for named in self.named_ranges.values() {
1886                let vid = named.vertex;
1887                let out_edges = self.edges.out_edges(vid);
1888                if out_edges.contains(&vertex_id) {
1889                    dependents.push(vid);
1890                }
1891            }
1892            for named in self.sheet_named_ranges.values() {
1893                let vid = named.vertex;
1894                let out_edges = self.edges.out_edges(vid);
1895                if out_edges.contains(&vertex_id) {
1896                    dependents.push(vid);
1897                }
1898            }
1899            dependents
1900        } else {
1901            // Fast path: use reverse edges from CSR
1902            self.edges.in_edges(vertex_id).to_vec()
1903        }
1904    }
1905
1906    // Internal helper methods for Milestone 0.4
1907
1908    /// Internal: Create a snapshot of vertex state for rollback
1909    #[doc(hidden)]
1910    pub fn snapshot_vertex(&self, id: VertexId) -> crate::engine::VertexSnapshot {
1911        let coord = self.store.coord(id);
1912        let sheet_id = self.store.sheet_id(id);
1913        let kind = self.store.kind(id);
1914        let flags = self.store.flags(id);
1915
1916        // Get value and formula references
1917        let value_ref = self.vertex_values.get(&id).copied();
1918        let formula_ref = self.vertex_formulas.get(&id).copied();
1919
1920        // Get outgoing edges (dependencies)
1921        let out_edges = self.get_dependencies(id);
1922
1923        crate::engine::VertexSnapshot {
1924            coord,
1925            sheet_id,
1926            kind,
1927            flags,
1928            value_ref,
1929            formula_ref,
1930            out_edges,
1931        }
1932    }
1933
1934    /// Internal: Remove all edges for a vertex
1935    #[doc(hidden)]
1936    pub fn remove_all_edges(&mut self, id: VertexId) {
1937        // Enter batch mode to avoid intermediate rebuilds
1938        self.edges.begin_batch();
1939
1940        // Remove outgoing edges (this vertex's dependencies)
1941        self.remove_dependent_edges(id);
1942
1943        // Force rebuild to get accurate dependents list
1944        // This is necessary because get_dependents uses CSR reverse edges
1945        self.edges.rebuild();
1946
1947        // Remove incoming edges (vertices that depend on this vertex)
1948        let dependents = self.get_dependents(id);
1949        if self.pk_order.is_some()
1950            && let Some(mut pk) = self.pk_order.take()
1951        {
1952            for dependent in &dependents {
1953                pk.remove_edge(id, *dependent);
1954            }
1955            self.pk_order = Some(pk);
1956        }
1957        for dependent in dependents {
1958            self.edges.remove_edge(dependent, id);
1959        }
1960
1961        // Exit batch mode and rebuild once with all changes
1962        self.edges.end_batch();
1963    }
1964
1965    /// Internal: Mark vertex as having #REF! error
1966    #[doc(hidden)]
1967    pub fn mark_as_ref_error(&mut self, id: VertexId) {
1968        let error = LiteralValue::Error(ExcelError::new(ExcelErrorKind::Ref));
1969        let value_ref = self.data_store.store_value(error);
1970        self.vertex_values.insert(id, value_ref);
1971        self.store.set_dirty(id, true);
1972        self.dirty_vertices.insert(id);
1973    }
1974
1975    /// Check if a vertex has a #REF! error
1976    pub fn is_ref_error(&self, id: VertexId) -> bool {
1977        if let Some(value_ref) = self.vertex_values.get(&id) {
1978            let value = self.data_store.retrieve_value(*value_ref);
1979            if let LiteralValue::Error(err) = value {
1980                return err.kind == ExcelErrorKind::Ref;
1981            }
1982        }
1983        false
1984    }
1985
1986    /// Internal: Mark all direct dependents as dirty
1987    #[doc(hidden)]
1988    pub fn mark_dependents_dirty(&mut self, id: VertexId) {
1989        let dependents = self.get_dependents(id);
1990        for dep_id in dependents {
1991            self.store.set_dirty(dep_id, true);
1992            self.dirty_vertices.insert(dep_id);
1993        }
1994    }
1995
1996    /// Internal: Mark a vertex as volatile
1997    #[doc(hidden)]
1998    pub fn mark_volatile(&mut self, id: VertexId, volatile: bool) {
1999        self.store.set_volatile(id, volatile);
2000        if volatile {
2001            self.volatile_vertices.insert(id);
2002        } else {
2003            self.volatile_vertices.remove(&id);
2004        }
2005    }
2006
2007    /// Update vertex coordinate
2008    #[doc(hidden)]
2009    pub fn set_coord(&mut self, id: VertexId, coord: AbsCoord) {
2010        self.store.set_coord(id, coord);
2011    }
2012
2013    /// Update edge cache coordinate
2014    #[doc(hidden)]
2015    pub fn update_edge_coord(&mut self, id: VertexId, coord: AbsCoord) {
2016        self.edges.update_coord(id, coord);
2017    }
2018
2019    /// Mark vertex as deleted (tombstone)
2020    #[doc(hidden)]
2021    pub fn mark_deleted(&mut self, id: VertexId, deleted: bool) {
2022        self.store.mark_deleted(id, deleted);
2023    }
2024
2025    /// Set vertex kind
2026    #[doc(hidden)]
2027    pub fn set_kind(&mut self, id: VertexId, kind: VertexKind) {
2028        self.store.set_kind(id, kind);
2029    }
2030
2031    /// Set vertex dirty flag
2032    #[doc(hidden)]
2033    pub fn set_dirty(&mut self, id: VertexId, dirty: bool) {
2034        self.store.set_dirty(id, dirty);
2035        if dirty {
2036            self.dirty_vertices.insert(id);
2037        } else {
2038            self.dirty_vertices.remove(&id);
2039        }
2040    }
2041
2042    /// Get vertex kind (for testing)
2043    #[cfg(test)]
2044    pub(crate) fn get_kind(&self, id: VertexId) -> VertexKind {
2045        self.store.kind(id)
2046    }
2047
2048    /// Get vertex flags (for testing)
2049    #[cfg(test)]
2050    pub(crate) fn get_flags(&self, id: VertexId) -> u8 {
2051        self.store.flags(id)
2052    }
2053
2054    /// Check if vertex is deleted (for testing)
2055    #[cfg(test)]
2056    pub(crate) fn is_deleted(&self, id: VertexId) -> bool {
2057        self.store.is_deleted(id)
2058    }
2059
2060    /// Force edge rebuild (internal use)
2061    #[doc(hidden)]
2062    pub fn rebuild_edges(&mut self) {
2063        self.edges.rebuild();
2064    }
2065
2066    /// Get delta size (internal use)
2067    #[doc(hidden)]
2068    pub fn edges_delta_size(&self) -> usize {
2069        self.edges.delta_size()
2070    }
2071
2072    /// Get vertex ID for specific cell address
2073    pub fn get_vertex_for_cell(&self, addr: &CellRef) -> Option<VertexId> {
2074        self.cell_to_vertex.get(addr).copied()
2075    }
2076
2077    /// Get coord for a vertex (public for VertexEditor)
2078    pub fn get_coord(&self, id: VertexId) -> AbsCoord {
2079        self.store.coord(id)
2080    }
2081
2082    /// Get sheet_id for a vertex (public for VertexEditor)
2083    pub fn get_sheet_id(&self, id: VertexId) -> SheetId {
2084        self.store.sheet_id(id)
2085    }
2086
2087    /// Get all vertices in a sheet
2088    pub fn vertices_in_sheet(&self, sheet_id: SheetId) -> impl Iterator<Item = VertexId> + '_ {
2089        self.store
2090            .all_vertices()
2091            .filter(move |&id| self.vertex_exists(id) && self.store.sheet_id(id) == sheet_id)
2092    }
2093
2094    /// Does a vertex have a formula associated
2095    pub fn vertex_has_formula(&self, id: VertexId) -> bool {
2096        self.vertex_formulas.contains_key(&id)
2097    }
2098
2099    /// Get all vertices with formulas
2100    pub fn vertices_with_formulas(&self) -> impl Iterator<Item = VertexId> + '_ {
2101        self.vertex_formulas.keys().copied()
2102    }
2103
2104    /// Update a vertex's formula
2105    pub fn update_vertex_formula(&mut self, id: VertexId, ast: ASTNode) -> Result<(), ExcelError> {
2106        // Get the sheet_id for this vertex
2107        let sheet_id = self.store.sheet_id(id);
2108
2109        // If the adjusted AST contains special #REF markers (from structural edits),
2110        // treat this as a REF error on the vertex instead of attempting to resolve.
2111        // This prevents failures when reference_adjuster injected placeholder refs.
2112        let has_ref_marker = ast.get_dependencies().into_iter().any(|r| {
2113            matches!(
2114                r,
2115                ReferenceType::Cell { sheet: Some(s), .. }
2116                    | ReferenceType::Range { sheet: Some(s), .. } if s == "#REF"
2117            )
2118        });
2119        if has_ref_marker {
2120            // Store the adjusted AST for round-tripping/display, but set value state to #REF!
2121            let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
2122            self.vertex_formulas.insert(id, ast_id);
2123            self.mark_as_ref_error(id);
2124            self.store.set_kind(id, VertexKind::FormulaScalar);
2125            return Ok(());
2126        }
2127
2128        // Extract dependencies from AST
2129        let (new_dependencies, new_range_dependencies, _, named_dependencies) =
2130            self.extract_dependencies(&ast, sheet_id)?;
2131
2132        // Remove old dependencies first
2133        self.remove_dependent_edges(id);
2134        self.detach_vertex_from_names(id);
2135
2136        // Store the new formula
2137        let ast_id = self.data_store.store_ast(&ast, &self.sheet_reg);
2138        self.vertex_formulas.insert(id, ast_id);
2139
2140        // Add new dependency edges
2141        self.add_dependent_edges(id, &new_dependencies);
2142        self.add_range_dependent_edges(id, &new_range_dependencies, sheet_id);
2143
2144        if !named_dependencies.is_empty() {
2145            self.attach_vertex_to_names(id, &named_dependencies);
2146        }
2147
2148        // Mark as formula vertex
2149        self.store.set_kind(id, VertexKind::FormulaScalar);
2150
2151        Ok(())
2152    }
2153
2154    /// Mark a vertex as dirty without propagation (for VertexEditor)
2155    pub fn mark_vertex_dirty(&mut self, vertex_id: VertexId) {
2156        self.store.set_dirty(vertex_id, true);
2157        self.dirty_vertices.insert(vertex_id);
2158    }
2159
2160    /// Update cell mapping for a vertex (for VertexEditor)
2161    pub fn update_cell_mapping(
2162        &mut self,
2163        id: VertexId,
2164        old_addr: Option<CellRef>,
2165        new_addr: CellRef,
2166    ) {
2167        // Remove old mapping if it exists
2168        if let Some(old) = old_addr {
2169            self.cell_to_vertex.remove(&old);
2170        }
2171        // Add new mapping
2172        self.cell_to_vertex.insert(new_addr, id);
2173    }
2174
2175    /// Remove cell mapping (for VertexEditor)
2176    pub fn remove_cell_mapping(&mut self, addr: &CellRef) {
2177        self.cell_to_vertex.remove(addr);
2178    }
2179
2180    /// Get the cell reference for a vertex
2181    pub fn get_cell_ref_for_vertex(&self, id: VertexId) -> Option<CellRef> {
2182        let coord = self.store.coord(id);
2183        let sheet_id = self.store.sheet_id(id);
2184        // Find the cell reference in the mapping
2185        let cell_ref = CellRef::new(sheet_id, Coord::new(coord.row(), coord.col(), true, true));
2186        // Verify it actually maps to this vertex
2187        if self.cell_to_vertex.get(&cell_ref) == Some(&id) {
2188            Some(cell_ref)
2189        } else {
2190            None
2191        }
2192    }
2193
2194    // ========== Phase 2: Structural Operations ==========
2195}
2196
2197// ========== Sheet Management Operations ==========