Skip to main content

formualizer_eval/engine/
mod.rs

1//! Formualizer Dependency Graph Engine
2//!
3//! Provides incremental formula evaluation with dependency tracking.
4
5pub mod arrow_ingest;
6pub mod effects;
7pub mod eval;
8pub mod eval_delta;
9pub mod formula_ingest;
10pub mod graph;
11pub mod ingest;
12pub mod ingest_builder;
13pub(crate) mod ingest_pipeline;
14pub mod journal;
15pub mod lookup_index_cache;
16pub mod plan;
17pub mod range_view;
18pub mod row_visibility;
19pub mod scheduler;
20pub mod spill;
21pub mod vertex;
22pub mod virtual_deps;
23
24// New SoA modules
25pub mod csr_edges;
26pub mod debug_views;
27pub mod delta_edges;
28pub mod interval_tree;
29pub mod named_range;
30pub mod sheet_index;
31pub mod sheet_registry;
32pub mod topo;
33pub mod vertex_store;
34
35// Phase 1: Arena modules
36pub mod arena;
37
38// Phase 1: Warmup configuration (kept for compatibility)
39pub mod tuning;
40
41#[cfg(test)]
42mod tests;
43
44pub use arena::AstNodeId;
45pub use eval::{
46    Engine, EngineAction, EngineBaselineStats, EvalResult, RecalcPlan, VirtualDepTelemetry,
47};
48pub use eval_delta::{DeltaMode, EvalDelta};
49pub use formula_ingest::{FormulaIngestBatch, FormulaIngestRecord, FormulaIngestReport};
50pub use journal::{ActionJournal, ArrowOp, ArrowUndoBatch, GraphUndoBatch};
51// Use SoA implementation
52pub use graph::snapshot::VertexSnapshot;
53pub use graph::{
54    ChangeEvent, DependencyGraph, DependencyRef, GraphBaselineStats, OperationSummary, StripeKey,
55    StripeType, block_index,
56};
57pub use row_visibility::{RowVisibilitySource, VisibilityMaskMode};
58pub use scheduler::{Layer, Schedule, Scheduler};
59pub use vertex::{VertexId, VertexKind};
60
61pub use graph::editor::{
62    DataUpdateSummary, EditorError, MetaUpdateSummary, RangeSummary, ShiftSummary, TransactionId,
63    VertexDataPatch, VertexEditor, VertexMeta, VertexMetaPatch,
64};
65
66pub use graph::editor::change_log::{ChangeLog, ChangeLogger, NullChangeLogger};
67
68#[doc(hidden)]
69pub mod fp8_parity_test_support {
70    use super::{Engine, EvalConfig};
71    use crate::engine::arena::CanonicalLabels;
72    use crate::formula_plane::dependency_summary::summarize_canonical_template;
73    use crate::formula_plane::producer::SpanReadSummary;
74    use crate::formula_plane::runtime::{PlacementDomain, ResultRegion};
75    use crate::formula_plane::template_canonical::{
76        CanonicalRejectReason, CanonicalTemplateFlag, canonicalize_template,
77    };
78    use crate::reference::{CellRef, Coord};
79    use crate::traits::EvaluationContext;
80    use formualizer_common::{ExcelError, LiteralValue};
81    use formualizer_parse::parser::{ASTNode, parse};
82    use std::sync::Arc;
83
84    #[derive(Clone, Debug)]
85    pub struct Fp8ParityObservation {
86        pub formula: String,
87        pub placement: CellRef,
88        pub old_payload: String,
89        pub new_hash: u64,
90    }
91
92    pub fn default_config() -> EvalConfig {
93        EvalConfig::default()
94    }
95
96    pub fn parse_formula(formula: &str) -> ASTNode {
97        parse(formula).unwrap_or_else(|err| panic!("parse {formula}: {err}"))
98    }
99
100    pub fn cell(sheet_id: u16, row: u32, col: u32) -> CellRef {
101        CellRef::new(sheet_id, Coord::from_excel(row, col, true, true))
102    }
103
104    pub fn assert_case<R: EvaluationContext>(
105        engine: &mut Engine<R>,
106        formula: &str,
107        placement: CellRef,
108    ) -> Fp8ParityObservation {
109        let parsed = parse_formula(formula);
110        assert_case_ast(engine, formula, parsed, placement)
111    }
112
113    pub fn assert_case_ast<R: EvaluationContext>(
114        engine: &mut Engine<R>,
115        formula: &str,
116        parsed: ASTNode,
117        placement: CellRef,
118    ) -> Fp8ParityObservation {
119        let mut old_ast = parsed.clone();
120        let old_rewrite = engine
121            .graph
122            .rewrite_structured_references_for_cell(&mut old_ast, placement);
123        let old = old_rewrite.and_then(|_| old_path(engine, &old_ast, placement));
124
125        let new = {
126            let mut pipeline = engine.ingest_pipeline();
127            pipeline.ingest_formula(
128                crate::engine::ingest_pipeline::FormulaAstInput::Tree(parsed),
129                placement,
130                Some(Arc::<str>::from(formula)),
131            )
132        };
133
134        match (old, new) {
135            (Ok(old), Ok(new)) => {
136                let new_direct = sorted_cells(new.dep_plan.direct_cell_deps.clone());
137                assert_eq!(
138                    old.direct_cells, new_direct,
139                    "direct deps differ for {formula} at {placement:?}\nold={:?}\nnew={:?}",
140                    old.direct_cells, new_direct
141                );
142                assert_eq!(
143                    old.range_deps, new.dep_plan.range_deps,
144                    "range deps differ for {formula} at {placement:?}"
145                );
146                assert_eq!(
147                    old.unresolved_names, new.dep_plan.named_refs,
148                    "unresolved names differ for {formula} at {placement:?}"
149                );
150                assert_eq!(
151                    old.volatile, new.dep_plan.volatile,
152                    "volatile flag differs for {formula} at {placement:?}"
153                );
154                assert_eq!(
155                    old.dynamic, new.dep_plan.dynamic,
156                    "dynamic flag differs for {formula} at {placement:?}"
157                );
158                let mut expected_labels = canonical_labels_from_old(&old.labels);
159                if old.dynamic {
160                    expected_labels.flags |= CanonicalLabels::FLAG_DYNAMIC;
161                }
162                assert_eq!(
163                    expected_labels.flags, new.labels.flags,
164                    "canonical label flags differ for {formula} at {placement:?}\nold={:?}\nnew={:#x}",
165                    old.labels.flags, new.labels.flags
166                );
167                assert_eq!(
168                    expected_labels.rejects, new.labels.rejects,
169                    "canonical label rejects differ for {formula} at {placement:?}\nold={:?}\nnew={:#x}",
170                    old.labels.reject_reasons, new.labels.rejects
171                );
172                assert_eq!(
173                    old.read_summary_debug,
174                    new.read_summary.as_ref().map(|s| format!("{s:?}")),
175                    "read summary differs for {formula} at {placement:?}"
176                );
177                assert_eq!(new.formula_text.as_deref(), Some(formula));
178                assert_eq!(new.placement, placement);
179                Fp8ParityObservation {
180                    formula: formula.to_string(),
181                    placement,
182                    old_payload: old.payload,
183                    new_hash: new.canonical_hash,
184                }
185            }
186            (Err(old), Err(new)) => {
187                assert_eq!(
188                    old.kind.to_string(),
189                    new.kind.to_string(),
190                    "old and new errored differently for {formula} at {placement:?}: old={old:?} new={new:?}"
191                );
192                Fp8ParityObservation {
193                    formula: formula.to_string(),
194                    placement,
195                    old_payload: format!("ERR:{:?}", old.kind),
196                    new_hash: 0,
197                }
198            }
199            (Ok(_), Err(new)) => panic!(
200                "new pipeline errored but old path succeeded for {formula} at {placement:?}: {new:?}"
201            ),
202            (Err(old), Ok(_)) => panic!(
203                "old path errored but new pipeline succeeded for {formula} at {placement:?}: {old:?}"
204            ),
205        }
206    }
207
208    #[derive(Debug)]
209    struct OldOutput {
210        payload: String,
211        labels: crate::formula_plane::template_canonical::CanonicalTemplateLabels,
212        direct_cells: Vec<CellRef>,
213        range_deps: Vec<crate::reference::SharedRangeRef<'static>>,
214        unresolved_names: Vec<String>,
215        volatile: bool,
216        dynamic: bool,
217        read_summary_debug: Option<String>,
218    }
219
220    fn old_path<R: EvaluationContext>(
221        engine: &mut Engine<R>,
222        ast: &ASTNode,
223        placement: CellRef,
224    ) -> Result<OldOutput, ExcelError> {
225        let (_deps, ranges, placeholders, _named, unresolved_names) = engine
226            .graph
227            .fp8_parity_extract_dependencies_with_pending_names(ast, placement.sheet_id)?;
228        let volatile = engine.graph.fp8_parity_is_ast_volatile(ast);
229        let dynamic = engine.graph.is_ast_dynamic(ast);
230        let template =
231            canonicalize_template(ast, placement.coord.row() + 1, placement.coord.col() + 1);
232        let summary = summarize_canonical_template(&template);
233        let scalar_domain = PlacementDomain::row_run(
234            placement.sheet_id,
235            placement.coord.row(),
236            placement.coord.row(),
237            placement.coord.col(),
238        );
239        let result_region = ResultRegion::scalar_cells(scalar_domain);
240        let read_summary = SpanReadSummary::from_formula_summary(
241            placement.sheet_id,
242            &result_region,
243            &summary,
244            engine.graph.sheet_reg(),
245        )
246        .ok();
247        Ok(OldOutput {
248            payload: template.key.payload().to_string(),
249            labels: template.labels,
250            direct_cells: sorted_cells(placeholders),
251            range_deps: ranges,
252            unresolved_names,
253            volatile,
254            dynamic,
255            read_summary_debug: read_summary.as_ref().map(|s| format!("{s:?}")),
256        })
257    }
258
259    fn sorted_cells(mut cells: Vec<CellRef>) -> Vec<CellRef> {
260        cells.sort();
261        cells.dedup();
262        cells
263    }
264
265    fn canonical_labels_from_old(
266        old: &crate::formula_plane::template_canonical::CanonicalTemplateLabels,
267    ) -> CanonicalLabels {
268        let mut labels = CanonicalLabels::default();
269        for flag in &old.flags {
270            labels.flags |= match flag {
271                CanonicalTemplateFlag::ParserVolatileFlag => CanonicalLabels::FLAG_VOLATILE,
272                CanonicalTemplateFlag::FunctionCall => CanonicalLabels::FLAG_CONTAINS_FUNCTION,
273                CanonicalTemplateFlag::CurrentSheetBinding => CanonicalLabels::FLAG_CURRENT_SHEET,
274                CanonicalTemplateFlag::ExplicitSheetBinding => CanonicalLabels::FLAG_EXPLICIT_SHEET,
275                CanonicalTemplateFlag::RelativeReferenceAxis => CanonicalLabels::FLAG_RELATIVE_ONLY,
276                CanonicalTemplateFlag::AbsoluteReferenceAxis => CanonicalLabels::FLAG_ABSOLUTE_ONLY,
277                CanonicalTemplateFlag::MixedAnchors => CanonicalLabels::FLAG_MIXED_ANCHORS,
278                CanonicalTemplateFlag::FiniteRangeReference => CanonicalLabels::FLAG_CONTAINS_RANGE,
279            };
280        }
281        for reason in &old.reject_reasons {
282            labels.flags |= match reason {
283                CanonicalRejectReason::DynamicReferenceFunction { .. } => {
284                    CanonicalLabels::FLAG_DYNAMIC
285                }
286                CanonicalRejectReason::ParserVolatileFlag
287                | CanonicalRejectReason::VolatileFunction { .. } => CanonicalLabels::FLAG_VOLATILE,
288                CanonicalRejectReason::LocalEnvironmentFunction { .. } => {
289                    CanonicalLabels::FLAG_CONTAINS_LET_LAMBDA
290                }
291                CanonicalRejectReason::ArrayOrSpillFunction { .. }
292                | CanonicalRejectReason::ArrayLiteral => CanonicalLabels::FLAG_CONTAINS_ARRAY,
293                CanonicalRejectReason::NamedReference { .. } => CanonicalLabels::FLAG_CONTAINS_NAME,
294                CanonicalRejectReason::StructuredReference { .. }
295                | CanonicalRejectReason::StructuredReferenceCurrentRow { .. } => {
296                    CanonicalLabels::FLAG_CONTAINS_TABLE
297                        | CanonicalLabels::FLAG_CONTAINS_STRUCTURED_REF
298                }
299                CanonicalRejectReason::OpenRangeReference { .. }
300                | CanonicalRejectReason::WholeAxisReference { .. } => {
301                    CanonicalLabels::FLAG_CONTAINS_RANGE
302                }
303                _ => 0,
304            };
305            labels.rejects |= match reason {
306                CanonicalRejectReason::InvalidPlacementAnchor { .. } => {
307                    CanonicalLabels::REJECT_INVALID_PLACEMENT_ANCHOR
308                }
309                CanonicalRejectReason::DynamicReferenceFunction { .. } => {
310                    CanonicalLabels::REJECT_DYNAMIC_REFERENCE
311                }
312                CanonicalRejectReason::UnknownOrCustomFunction { .. } => {
313                    CanonicalLabels::REJECT_UNKNOWN_OR_CUSTOM_FUNCTION
314                }
315                CanonicalRejectReason::LocalEnvironmentFunction { .. } => {
316                    CanonicalLabels::REJECT_LOCAL_ENVIRONMENT
317                }
318                CanonicalRejectReason::ParserVolatileFlag => {
319                    CanonicalLabels::REJECT_PARSER_VOLATILE_FLAG
320                }
321                CanonicalRejectReason::VolatileFunction { .. } => {
322                    CanonicalLabels::REJECT_VOLATILE_FUNCTION
323                }
324                CanonicalRejectReason::ReferenceReturningFunction { .. } => {
325                    CanonicalLabels::REJECT_REFERENCE_RETURNING_FUNCTION
326                }
327                CanonicalRejectReason::ArrayOrSpillFunction { .. } => {
328                    CanonicalLabels::REJECT_ARRAY_OR_SPILL_FUNCTION
329                }
330                CanonicalRejectReason::ArrayLiteral => CanonicalLabels::REJECT_ARRAY_LITERAL,
331                CanonicalRejectReason::SpillReference { .. } => {
332                    CanonicalLabels::REJECT_SPILL_REFERENCE
333                }
334                CanonicalRejectReason::SpillResultRegionOperator => {
335                    CanonicalLabels::REJECT_SPILL_RESULT_REGION_OPERATOR
336                }
337                CanonicalRejectReason::ImplicitIntersectionOperator => {
338                    CanonicalLabels::REJECT_IMPLICIT_INTERSECTION_OPERATOR
339                }
340                CanonicalRejectReason::CallExpression => CanonicalLabels::REJECT_CALL_EXPRESSION,
341                CanonicalRejectReason::NamedReference { .. } => {
342                    CanonicalLabels::REJECT_NAMED_REFERENCE
343                }
344                CanonicalRejectReason::StructuredReference { .. } => {
345                    CanonicalLabels::REJECT_STRUCTURED_REFERENCE
346                }
347                CanonicalRejectReason::StructuredReferenceCurrentRow { .. } => {
348                    CanonicalLabels::REJECT_STRUCTURED_REFERENCE_CURRENT_ROW
349                }
350                CanonicalRejectReason::ThreeDReference { .. } => {
351                    CanonicalLabels::REJECT_THREE_D_REFERENCE
352                }
353                CanonicalRejectReason::ExternalReference { .. } => {
354                    CanonicalLabels::REJECT_EXTERNAL_REFERENCE
355                }
356                CanonicalRejectReason::OpenRangeReference { .. } => {
357                    CanonicalLabels::REJECT_OPEN_RANGE_REFERENCE
358                }
359                CanonicalRejectReason::WholeAxisReference { .. } => {
360                    CanonicalLabels::REJECT_WHOLE_AXIS_REFERENCE
361                }
362                CanonicalRejectReason::UnsupportedReference { .. } => {
363                    CanonicalLabels::REJECT_UNSUPPORTED_REFERENCE
364                }
365            };
366        }
367        labels
368    }
369
370    pub fn literal_number(value: f64) -> LiteralValue {
371        LiteralValue::Number(value)
372    }
373}
374
375// CalcObserver is defined below
376
377use crate::timezone::TimeZoneSpec;
378use crate::traits::EvaluationContext;
379use crate::traits::VolatileLevel;
380use chrono::{DateTime, Utc};
381use formualizer_common::error::{ExcelError, ExcelErrorKind};
382use std::collections::HashMap;
383
384impl<R: EvaluationContext> Engine<R> {
385    pub fn begin_bulk_ingest(&mut self) -> ingest_builder::BulkIngestBuilder<'_> {
386        ingest_builder::BulkIngestBuilder::new(&mut self.graph)
387    }
388
389    pub fn intern_formula_ast(&mut self, ast: &formualizer_parse::parser::ASTNode) -> AstNodeId {
390        self.graph.store_ast(ast)
391    }
392}
393
394/// 🔮 Scalability Hook: Performance monitoring trait for calculation observability
395pub trait CalcObserver: Send + Sync {
396    fn on_eval_start(&self, vertex_id: VertexId);
397    fn on_eval_complete(&self, vertex_id: VertexId, duration: std::time::Duration);
398    fn on_cycle_detected(&self, cycle: &[VertexId]);
399    fn on_dirty_propagation(&self, vertex_id: VertexId, affected_count: usize);
400}
401
402/// Default no-op observer
403impl CalcObserver for () {
404    fn on_eval_start(&self, _vertex_id: VertexId) {}
405    fn on_eval_complete(&self, _vertex_id: VertexId, _duration: std::time::Duration) {}
406    fn on_cycle_detected(&self, _cycle: &[VertexId]) {}
407    fn on_dirty_propagation(&self, _vertex_id: VertexId, _affected_count: usize) {}
408}
409
410/// Deterministic evaluation configuration.
411///
412/// When enabled, volatile sources (clock/timezone) are derived solely from this config.
413#[derive(Debug, Clone, PartialEq, Eq)]
414pub enum DeterministicMode {
415    /// Non-deterministic: uses the system clock.
416    Disabled {
417        /// Timezone used by volatile date/time builtins.
418        timezone: TimeZoneSpec,
419    },
420    /// Deterministic: uses a fixed timestamp in the provided timezone.
421    Enabled {
422        /// Fixed timestamp expressed in UTC.
423        timestamp_utc: DateTime<Utc>,
424        /// Timezone used to interpret `timestamp_utc` for NOW()/TODAY().
425        timezone: TimeZoneSpec,
426    },
427}
428
429impl Default for DeterministicMode {
430    fn default() -> Self {
431        Self::Disabled {
432            timezone: TimeZoneSpec::default(),
433        }
434    }
435}
436
437impl DeterministicMode {
438    pub fn is_enabled(&self) -> bool {
439        matches!(self, DeterministicMode::Enabled { .. })
440    }
441
442    pub fn timezone(&self) -> &TimeZoneSpec {
443        match self {
444            DeterministicMode::Disabled { timezone } => timezone,
445            DeterministicMode::Enabled { timezone, .. } => timezone,
446        }
447    }
448
449    pub fn validate(&self) -> Result<(), ExcelError> {
450        if let DeterministicMode::Enabled { timezone, .. } = self {
451            timezone
452                .validate_for_determinism()
453                .map_err(|msg| ExcelError::new(ExcelErrorKind::Value).with_message(msg))?;
454        }
455        Ok(())
456    }
457
458    pub fn build_clock(
459        &self,
460    ) -> Result<std::sync::Arc<dyn crate::timezone::ClockProvider>, ExcelError> {
461        self.validate()?;
462        Ok(match self {
463            #[cfg(feature = "system-clock")]
464            DeterministicMode::Disabled { timezone } => {
465                std::sync::Arc::new(crate::timezone::SystemClock::new(timezone.clone()))
466            }
467            #[cfg(not(feature = "system-clock"))]
468            DeterministicMode::Disabled { timezone: _ } => {
469                // Without the system-clock feature, Disabled mode falls back to a
470                // UTC epoch clock so the engine still initialises cleanly in portable
471                // wasm guests.  Callers that need real wall-clock time must inject a
472                // `ClockProvider` via `EvalConfig::clock`.
473                std::sync::Arc::new(crate::timezone::FixedClock::new(
474                    chrono::DateTime::UNIX_EPOCH,
475                    crate::timezone::TimeZoneSpec::Utc,
476                ))
477            }
478            DeterministicMode::Enabled {
479                timestamp_utc,
480                timezone,
481            } => std::sync::Arc::new(crate::timezone::FixedClock::new(
482                *timestamp_utc,
483                timezone.clone(),
484            )),
485        })
486    }
487}
488
489/// Policy for handling malformed formulas encountered during workbook ingest.
490#[derive(Debug, Clone, Copy, PartialEq, Eq)]
491pub enum FormulaParsePolicy {
492    /// Reject malformed formulas and fail the load/evaluation path.
493    Strict,
494    /// Convert malformed formulas into literal error formulas (`#ERROR!`).
495    CoerceToError,
496    /// Keep the backend-provided cached value and drop the formula.
497    KeepCachedValue,
498    /// Treat the original formula text as a plain text literal.
499    AsText,
500}
501
502/// Captured diagnostic for a malformed formula encountered during ingest/graph-build.
503#[derive(Debug, Clone, PartialEq, Eq)]
504pub struct FormulaParseDiagnostic {
505    pub sheet: String,
506    pub row: u32,
507    pub col: u32,
508    pub formula: String,
509    pub message: String,
510    pub policy: FormulaParsePolicy,
511}
512
513#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
514pub enum FormulaPlaneMode {
515    /// Disable FormulaPlane promotion/evaluation. This is the stable default;
516    /// span evaluation is explicitly opt-in through configuration.
517    #[default]
518    Off,
519    Shadow,
520    /// Experimental mode: accepted FormulaPlane spans are installed into
521    /// graph-owned authority and are not materialized as per-cell graph formulas.
522    AuthoritativeExperimental,
523}
524
525/// Workbook ingest limits applied by loader backends before they materialize large sheets.
526#[derive(Debug, Clone, PartialEq, Eq)]
527pub struct WorkbookLoadLimits {
528    /// Hard cap for declared/logical sheet rows.
529    pub max_sheet_rows: u32,
530    /// Hard cap for declared/logical sheet columns.
531    pub max_sheet_cols: u32,
532    /// Hard cap for the rectangular logical area a backend may materialize.
533    pub max_sheet_logical_cells: u64,
534    /// Sparse-sheet checks only trigger once a sheet reaches this many logical cells.
535    pub sparse_sheet_cell_threshold: u64,
536    /// Maximum allowed logical-to-populated-cell ratio once the sparse threshold is crossed.
537    pub max_sparse_cell_ratio: u64,
538}
539
540impl Default for WorkbookLoadLimits {
541    fn default() -> Self {
542        Self {
543            max_sheet_rows: 1_048_576,
544            max_sheet_cols: 16_384,
545            max_sheet_logical_cells: 128_000_000,
546            sparse_sheet_cell_threshold: 250_000,
547            max_sparse_cell_ratio: 1_024,
548        }
549    }
550}
551
552/// Configuration for the evaluation engine
553#[derive(Debug, Clone)]
554pub struct EvalConfig {
555    pub enable_parallel: bool,
556    pub max_threads: Option<usize>,
557    // 🔮 Scalability Hook: Resource limits (future-proofing)
558    pub max_vertices: Option<usize>,
559    pub max_eval_time: Option<std::time::Duration>,
560    pub max_memory_mb: Option<usize>,
561
562    /// Default sheet name used when no sheet is provided.
563    pub default_sheet_name: String,
564
565    /// When false, resolve defined names case-insensitively (ASCII only).
566    ///
567    /// This matches Excel behavior for defined names.
568    pub case_sensitive_names: bool,
569
570    /// When false, resolve table names case-insensitively (ASCII only).
571    ///
572    /// This matches Excel behavior for native table (ListObject) names.
573    pub case_sensitive_tables: bool,
574
575    /// Stable workbook seed used for deterministic RNG composition
576    pub workbook_seed: u64,
577
578    /// Volatile granularity for RNG seeding and re-evaluation policy
579    pub volatile_level: VolatileLevel,
580
581    /// Deterministic evaluation configuration (clock/timezone injection).
582    pub deterministic_mode: DeterministicMode,
583
584    // Range handling configuration (Phase 5)
585    /// Ranges with size <= this limit are expanded into individual Cell dependencies
586    pub range_expansion_limit: usize,
587
588    /// Fallback maximum row bound for open-ended references (e.g. `A:A`, `A1:A`).
589    ///
590    /// This is only used when used-bounds cannot be determined.
591    pub max_open_ended_rows: u32,
592
593    /// Fallback maximum column bound for open-ended references (e.g. `1:1`, `A1:1`).
594    ///
595    /// This is only used when used-bounds cannot be determined.
596    pub max_open_ended_cols: u32,
597
598    /// Height of stripe blocks for dense range indexing
599    pub stripe_height: u32,
600    /// Width of stripe blocks for dense range indexing  
601    pub stripe_width: u32,
602    /// Enable block stripes for dense ranges (vs row/column stripes only)
603    pub enable_block_stripes: bool,
604
605    /// Spill behavior configuration (conflicts, bounds, buffering)
606    pub spill: SpillConfig,
607
608    /// Use dynamic topological ordering (Pearce-Kelly algorithm)
609    pub use_dynamic_topo: bool,
610    /// Maximum nodes to visit before falling back to full rebuild
611    pub pk_visit_budget: usize,
612    /// Operations between periodic rank compaction
613    pub pk_compaction_interval_ops: u64,
614    /// Maximum width for parallel evaluation layers
615    pub max_layer_width: Option<usize>,
616    /// If true, reject edge insertions that would create a cycle (skip adding that dependency).
617    /// If false, allow insertion and let scheduler handle cycles at evaluation time.
618    pub pk_reject_cycle_edges: bool,
619    /// Sheet index build strategy for bulk loads
620    pub sheet_index_mode: SheetIndexMode,
621
622    /// Warmup configuration for global pass planning (Phase 1)
623    pub warmup: tuning::WarmupConfig,
624
625    /// Enable Arrow-backed storage reads (Phase A)
626    pub arrow_storage_enabled: bool,
627    /// Enable delta overlay for Arrow sheets (Phase C)
628    pub delta_overlay_enabled: bool,
629
630    /// Mirror formula scalar results into Arrow overlay for Arrow-backed reads
631    /// This enables Arrow-only RangeView correctness without Hybrid fallback.
632    pub write_formula_overlay_enabled: bool,
633
634    /// Optional memory budget (in bytes) for formula/spill computed Arrow overlays.
635    ///
636    /// When set, the engine will compact computed overlays into base lanes when the
637    /// estimated usage exceeds this cap.
638    pub max_overlay_memory_bytes: Option<usize>,
639
640    /// Workbook date system: Excel 1900 (default) or 1904.
641    pub date_system: DateSystem,
642
643    /// Policy for malformed formulas encountered during ingest/graph-build.
644    pub formula_parse_policy: FormulaParsePolicy,
645
646    /// Defer dependency graph building: ingest values immediately but stage formulas
647    /// for on-demand graph construction during evaluation.
648    pub defer_graph_building: bool,
649
650    /// Enable virtual dependency convergence telemetry collection.
651    ///
652    /// When disabled, the engine avoids per-pass timing/edge-count bookkeeping.
653    pub enable_virtual_dep_telemetry: bool,
654
655    /// FormulaPlane ingest/planning mode. Defaults to `Off`; span evaluation is
656    /// explicitly opt-in while `AuthoritativeExperimental` remains experimental.
657    /// `Shadow` may report candidate span opportunities but must still materialize
658    /// every formula via the legacy graph path.
659    pub formula_plane_mode: FormulaPlaneMode,
660
661    /// Maximum bytes for the engine-side lookup-index cache.
662    pub lookup_index_cache_max_bytes: usize,
663}
664
665impl Default for EvalConfig {
666    fn default() -> Self {
667        Self {
668            enable_parallel: true,
669            max_threads: None,
670            max_vertices: None,
671            max_eval_time: None,
672            max_memory_mb: None,
673
674            default_sheet_name: format!("Sheet{}", 1),
675
676            // Excel compatibility: identifiers are case-insensitive by default.
677            case_sensitive_names: false,
678            case_sensitive_tables: false,
679
680            // Deterministic RNG seed (matches traits default)
681            workbook_seed: 0xF0F0_D0D0_AAAA_5555,
682
683            // Volatile model default
684            volatile_level: VolatileLevel::Always,
685
686            deterministic_mode: DeterministicMode::default(),
687
688            // Range handling defaults (Phase 5)
689            range_expansion_limit: 64,
690            // Open-ended reference defaults (Excel max dimensions).
691            // Lower these to cap `A:A` / `1:1` when used-bounds are unknown.
692            max_open_ended_rows: 1_048_576,
693            max_open_ended_cols: 16_384,
694            stripe_height: 256,
695            stripe_width: 256,
696            enable_block_stripes: false,
697            spill: SpillConfig::default(),
698
699            // Dynamic topology configuration
700            use_dynamic_topo: false, // Disabled by default for compatibility
701            pk_visit_budget: 50_000,
702            pk_compaction_interval_ops: 100_000,
703            max_layer_width: None,
704            pk_reject_cycle_edges: false,
705            sheet_index_mode: SheetIndexMode::Eager,
706            warmup: tuning::WarmupConfig::default(),
707            arrow_storage_enabled: true,
708            delta_overlay_enabled: true,
709            write_formula_overlay_enabled: true,
710            max_overlay_memory_bytes: None,
711            date_system: DateSystem::Excel1900,
712            formula_parse_policy: FormulaParsePolicy::Strict,
713            defer_graph_building: false,
714            enable_virtual_dep_telemetry: false,
715            formula_plane_mode: FormulaPlaneMode::Off,
716            lookup_index_cache_max_bytes: 64 * 1024 * 1024,
717        }
718    }
719}
720
721impl EvalConfig {
722    #[inline]
723    pub fn with_range_expansion_limit(mut self, limit: usize) -> Self {
724        self.range_expansion_limit = limit;
725        self
726    }
727
728    #[inline]
729    pub fn with_parallel(mut self, enable: bool) -> Self {
730        self.enable_parallel = enable;
731        self
732    }
733
734    #[inline]
735    pub fn with_block_stripes(mut self, enable: bool) -> Self {
736        self.enable_block_stripes = enable;
737        self
738    }
739
740    #[inline]
741    pub fn with_case_sensitive_names(mut self, enable: bool) -> Self {
742        self.case_sensitive_names = enable;
743        self
744    }
745
746    #[inline]
747    pub fn with_case_sensitive_tables(mut self, enable: bool) -> Self {
748        self.case_sensitive_tables = enable;
749        self
750    }
751
752    #[inline]
753    pub fn with_arrow_storage(mut self, enable: bool) -> Self {
754        self.arrow_storage_enabled = enable;
755        self
756    }
757
758    #[inline]
759    pub fn with_delta_overlay(mut self, enable: bool) -> Self {
760        self.delta_overlay_enabled = enable;
761        self
762    }
763
764    #[inline]
765    pub fn with_formula_overlay(mut self, enable: bool) -> Self {
766        self.write_formula_overlay_enabled = enable;
767        self
768    }
769
770    #[inline]
771    pub fn with_date_system(mut self, system: DateSystem) -> Self {
772        self.date_system = system;
773        self
774    }
775
776    #[inline]
777    pub fn with_formula_parse_policy(mut self, policy: FormulaParsePolicy) -> Self {
778        self.formula_parse_policy = policy;
779        self
780    }
781
782    #[inline]
783    pub fn with_virtual_dep_telemetry(mut self, enable: bool) -> Self {
784        self.enable_virtual_dep_telemetry = enable;
785        self
786    }
787
788    #[inline]
789    pub fn with_formula_plane_mode(mut self, mode: FormulaPlaneMode) -> Self {
790        self.formula_plane_mode = mode;
791        self
792    }
793}
794
795#[derive(Debug, Clone, Copy, PartialEq, Eq)]
796pub enum SheetIndexMode {
797    /// Build full interval-tree based index during inserts (current behavior)
798    Eager,
799    /// Defer building any sheet index until first range query or explicit finalize
800    Lazy,
801    /// Use fast batch building (sorted arrays -> tree) when bulk loading, otherwise incremental
802    FastBatch,
803}
804
805pub use formualizer_common::DateSystem;
806
807/// Construct a new engine with the given resolver and configuration
808pub fn new_engine<R>(resolver: R, config: EvalConfig) -> Engine<R>
809where
810    R: EvaluationContext + 'static,
811{
812    Engine::new(resolver, config)
813}
814
815/// Configuration for spill behavior. Nested under EvalConfig to avoid bloating the top-level.
816#[derive(Debug, Clone, Copy, PartialEq, Eq)]
817pub struct SpillConfig {
818    /// What to do when target region overlaps non-empty cells or other spills.
819    pub conflict_policy: SpillConflictPolicy,
820    /// Tiebreaker used when policy allows preemption or multiple anchors race.
821    pub tiebreaker: SpillTiebreaker,
822    /// Bounds handling when result exceeds sheet capacity.
823    pub bounds_policy: SpillBoundsPolicy,
824    /// Buffering approach for spill writes.
825    pub buffer_mode: SpillBufferMode,
826    /// Optional memory budget for shadow buffering in bytes.
827    pub memory_budget_bytes: Option<u64>,
828    /// Cancellation behavior while streaming rows.
829    pub cancellation: SpillCancellationPolicy,
830    /// Visibility policy for staged writes.
831    pub visibility: SpillVisibility,
832
833    /// Hard cap on the number of cells a single spill may project.
834    ///
835    /// This prevents pathological vertex explosions from very large dynamic arrays.
836    pub max_spill_cells: u32,
837}
838
839impl Default for SpillConfig {
840    fn default() -> Self {
841        Self {
842            conflict_policy: SpillConflictPolicy::Error,
843            tiebreaker: SpillTiebreaker::FirstWins,
844            bounds_policy: SpillBoundsPolicy::Strict,
845            buffer_mode: SpillBufferMode::ShadowBuffer,
846            memory_budget_bytes: None,
847            cancellation: SpillCancellationPolicy::Cooperative,
848            visibility: SpillVisibility::OnCommit,
849            // Conservative: enough for common UI patterns, small enough to avoid graph blowups.
850            max_spill_cells: 10_000,
851        }
852    }
853}
854
855#[derive(Debug, Clone, Copy, PartialEq, Eq)]
856pub enum SpillConflictPolicy {
857    Error,
858    Preempt,
859}
860
861#[derive(Debug, Clone, Copy, PartialEq, Eq)]
862pub enum SpillTiebreaker {
863    FirstWins,
864    EvaluationEpochAsc,
865    AnchorAddressAsc,
866    FunctionPriorityThenAddress,
867}
868
869#[derive(Debug, Clone, Copy, PartialEq, Eq)]
870pub enum SpillBoundsPolicy {
871    Strict,
872    Truncate,
873}
874
875#[derive(Debug, Clone, Copy, PartialEq, Eq)]
876pub enum SpillBufferMode {
877    ShadowBuffer,
878    PersistenceJournal,
879}
880
881#[derive(Debug, Clone, Copy, PartialEq, Eq)]
882pub enum SpillCancellationPolicy {
883    Cooperative,
884    Strict,
885}
886
887#[derive(Debug, Clone, Copy, PartialEq, Eq)]
888pub enum SpillVisibility {
889    OnCommit,
890    StagedLayer,
891}
892
893/*
894 * Scenario: Tombstone Registry for Missing Sheets
895 * When a sheet is deleted, formulas pointing to it become "orphans."
896 * Instead of losing the connection, we store the formula's VertexId
897 * under the name of the missing sheet.
898 *
899 * Why it matters:
900 * This allows Sheet Addition to remain O(1) for the general case,
901 * while providing O(N_orphans) recovery for broken formulas.
902 */
903#[derive(Debug, Default)]
904pub struct TombstoneRegistry {
905    // Maps "SheetName" -> Vec<VertexId of formulas waiting for it>
906    pub pending_references: HashMap<String, Vec<VertexId>>,
907}
908
909impl TombstoneRegistry {
910    /// Record that a vertex is waiting for a specific sheet name to appear.
911    pub fn add_orphan(&mut self, sheet_name: String, vertex_id: VertexId) {
912        self.pending_references
913            .entry(sheet_name)
914            .or_default()
915            .push(vertex_id);
916    }
917
918    /// Retrieve and remove all vertices waiting for a specific sheet name.
919    pub fn take_orphans(&mut self, sheet_name: &str) -> Vec<VertexId> {
920        self.pending_references
921            .remove(sheet_name)
922            .unwrap_or_default()
923    }
924}