Skip to main content

formualizer_eval/engine/
mod.rs

1//! Formualizer Dependency Graph Engine
2//!
3//! Provides incremental formula evaluation with dependency tracking.
4
5pub mod arrow_ingest;
6pub(crate) mod convergence;
7pub mod effects;
8pub mod eval;
9pub mod eval_delta;
10pub mod formula_ingest;
11pub mod graph;
12pub mod ingest;
13pub mod ingest_builder;
14pub(crate) mod ingest_pipeline;
15pub mod journal;
16pub mod live_edges;
17pub mod live_graph;
18pub mod lookup_index_cache;
19pub mod plan;
20pub mod range_view;
21pub mod row_visibility;
22pub mod scheduler;
23pub mod spill;
24pub mod vertex;
25pub mod virtual_deps;
26
27// New SoA modules
28pub mod csr_edges;
29pub mod debug_views;
30pub mod delta_edges;
31pub mod interval_tree;
32pub mod named_range;
33pub mod sheet_index;
34pub mod sheet_registry;
35pub mod topo;
36pub mod vertex_store;
37
38// Phase 1: Arena modules
39pub mod arena;
40
41// Phase 1: Warmup configuration (kept for compatibility)
42pub mod tuning;
43
44#[cfg(test)]
45mod tests;
46
47pub use arena::AstNodeId;
48pub use eval::{
49    CycleTelemetry, Engine, EngineAction, EngineBaselineStats, EvalResult, RecalcPlan,
50    VirtualDepTelemetry,
51};
52pub use eval_delta::{DeltaMode, EvalDelta};
53pub use formula_ingest::{FormulaIngestBatch, FormulaIngestRecord, FormulaIngestReport};
54pub use journal::{ActionJournal, ArrowOp, ArrowUndoBatch, GraphUndoBatch};
55// Use SoA implementation
56pub use graph::snapshot::VertexSnapshot;
57pub use graph::{
58    ChangeEvent, DependencyGraph, DependencyRef, GraphBaselineStats, OperationSummary, StripeKey,
59    StripeType, block_index,
60};
61pub use row_visibility::{RowVisibilitySource, VisibilityMaskMode};
62pub use scheduler::{Layer, Schedule, ScheduleUnit, Scheduler};
63pub use vertex::{VertexId, VertexKind};
64
65pub use graph::editor::{
66    DataUpdateSummary, EditorError, MetaUpdateSummary, RangeSummary, ShiftSummary, TransactionId,
67    VertexDataPatch, VertexEditor, VertexMeta, VertexMetaPatch,
68};
69
70pub use graph::editor::change_log::{ChangeLog, ChangeLogger, NullChangeLogger};
71
72#[doc(hidden)]
73pub mod fp8_parity_test_support {
74    use super::{Engine, EvalConfig};
75    use crate::engine::arena::CanonicalLabels;
76    use crate::formula_plane::dependency_summary::summarize_canonical_template;
77    use crate::formula_plane::producer::SpanReadSummary;
78    use crate::formula_plane::runtime::{PlacementDomain, ResultRegion};
79    use crate::formula_plane::template_canonical::{
80        CanonicalRejectReason, CanonicalTemplateFlag, canonicalize_template,
81    };
82    use crate::reference::{CellRef, Coord};
83    use crate::traits::EvaluationContext;
84    use formualizer_common::{ExcelError, LiteralValue};
85    use formualizer_parse::parser::{ASTNode, parse};
86    use std::sync::Arc;
87
88    #[derive(Clone, Debug)]
89    pub struct Fp8ParityObservation {
90        pub formula: String,
91        pub placement: CellRef,
92        pub old_payload: String,
93        pub new_hash: u64,
94    }
95
96    pub fn default_config() -> EvalConfig {
97        EvalConfig::default()
98    }
99
100    pub fn parse_formula(formula: &str) -> ASTNode {
101        parse(formula).unwrap_or_else(|err| panic!("parse {formula}: {err}"))
102    }
103
104    pub fn cell(sheet_id: u16, row: u32, col: u32) -> CellRef {
105        CellRef::new(sheet_id, Coord::from_excel(row, col, true, true))
106    }
107
108    pub fn assert_case<R: EvaluationContext>(
109        engine: &mut Engine<R>,
110        formula: &str,
111        placement: CellRef,
112    ) -> Fp8ParityObservation {
113        let parsed = parse_formula(formula);
114        assert_case_ast(engine, formula, parsed, placement)
115    }
116
117    pub fn assert_case_ast<R: EvaluationContext>(
118        engine: &mut Engine<R>,
119        formula: &str,
120        parsed: ASTNode,
121        placement: CellRef,
122    ) -> Fp8ParityObservation {
123        let mut old_ast = parsed.clone();
124        let old_rewrite = engine
125            .graph
126            .rewrite_structured_references_for_cell(&mut old_ast, placement);
127        let old = old_rewrite.and_then(|_| old_path(engine, &old_ast, placement));
128
129        let new = {
130            let mut pipeline = engine.ingest_pipeline();
131            pipeline.ingest_formula(
132                crate::engine::ingest_pipeline::FormulaAstInput::Tree(parsed),
133                placement,
134                Some(Arc::<str>::from(formula)),
135            )
136        };
137
138        match (old, new) {
139            (Ok(old), Ok(new)) => {
140                let new_direct = sorted_cells(new.dep_plan.direct_cell_deps.clone());
141                assert_eq!(
142                    old.direct_cells, new_direct,
143                    "direct deps differ for {formula} at {placement:?}\nold={:?}\nnew={:?}",
144                    old.direct_cells, new_direct
145                );
146                assert_eq!(
147                    old.range_deps, new.dep_plan.range_deps,
148                    "range deps differ for {formula} at {placement:?}"
149                );
150                assert_eq!(
151                    old.unresolved_names, new.dep_plan.named_refs,
152                    "unresolved names differ for {formula} at {placement:?}"
153                );
154                assert_eq!(
155                    old.volatile, new.dep_plan.volatile,
156                    "volatile flag differs for {formula} at {placement:?}"
157                );
158                assert_eq!(
159                    old.dynamic, new.dep_plan.dynamic,
160                    "dynamic flag differs for {formula} at {placement:?}"
161                );
162                let mut expected_labels = canonical_labels_from_old(&old.labels);
163                if old.dynamic {
164                    expected_labels.flags |= CanonicalLabels::FLAG_DYNAMIC;
165                }
166                assert_eq!(
167                    expected_labels.flags, new.labels.flags,
168                    "canonical label flags differ for {formula} at {placement:?}\nold={:?}\nnew={:#x}",
169                    old.labels.flags, new.labels.flags
170                );
171                assert_eq!(
172                    expected_labels.rejects, new.labels.rejects,
173                    "canonical label rejects differ for {formula} at {placement:?}\nold={:?}\nnew={:#x}",
174                    old.labels.reject_reasons, new.labels.rejects
175                );
176                // The passive summary oracle cannot resolve defined names, so
177                // a named formula that the ingest pipeline resolved to a
178                // concrete region is an intentional superset: old None / new
179                // Some is allowed exactly when the only blocking reason was a
180                // named reference.
181                let named_resolution_superset = old.summary_rejected_only_for_named_reference
182                    && old.read_summary_debug.is_none();
183                if !named_resolution_superset {
184                    assert_eq!(
185                        old.read_summary_debug,
186                        new.read_summary.as_ref().map(|s| format!("{s:?}")),
187                        "read summary differs for {formula} at {placement:?}"
188                    );
189                }
190                assert_eq!(new.formula_text.as_deref(), Some(formula));
191                assert_eq!(new.placement, placement);
192                Fp8ParityObservation {
193                    formula: formula.to_string(),
194                    placement,
195                    old_payload: old.payload,
196                    new_hash: new.canonical_hash,
197                }
198            }
199            (Err(old), Err(new)) => {
200                assert_eq!(
201                    old.kind.to_string(),
202                    new.kind.to_string(),
203                    "old and new errored differently for {formula} at {placement:?}: old={old:?} new={new:?}"
204                );
205                Fp8ParityObservation {
206                    formula: formula.to_string(),
207                    placement,
208                    old_payload: format!("ERR:{:?}", old.kind),
209                    new_hash: 0,
210                }
211            }
212            (Ok(_), Err(new)) => panic!(
213                "new pipeline errored but old path succeeded for {formula} at {placement:?}: {new:?}"
214            ),
215            (Err(old), Ok(_)) => panic!(
216                "old path errored but new pipeline succeeded for {formula} at {placement:?}: {old:?}"
217            ),
218        }
219    }
220
221    #[derive(Debug)]
222    struct OldOutput {
223        payload: String,
224        labels: crate::formula_plane::template_canonical::CanonicalTemplateLabels,
225        direct_cells: Vec<CellRef>,
226        range_deps: Vec<crate::reference::SharedRangeRef<'static>>,
227        unresolved_names: Vec<String>,
228        volatile: bool,
229        dynamic: bool,
230        read_summary_debug: Option<String>,
231        summary_rejected_only_for_named_reference: bool,
232    }
233
234    fn old_path<R: EvaluationContext>(
235        engine: &mut Engine<R>,
236        ast: &ASTNode,
237        placement: CellRef,
238    ) -> Result<OldOutput, ExcelError> {
239        let (_deps, ranges, placeholders, _named, unresolved_names) = engine
240            .graph
241            .fp8_parity_extract_dependencies_with_pending_names(ast, placement.sheet_id)?;
242        let volatile = engine.graph.fp8_parity_is_ast_volatile(ast);
243        let dynamic = engine.graph.is_ast_dynamic(ast);
244        let template =
245            canonicalize_template(ast, placement.coord.row() + 1, placement.coord.col() + 1);
246        let summary = summarize_canonical_template(&template);
247        let scalar_domain = PlacementDomain::row_run(
248            placement.sheet_id,
249            placement.coord.row(),
250            placement.coord.row(),
251            placement.coord.col(),
252        );
253        let result_region = ResultRegion::scalar_cells(scalar_domain);
254        let read_summary = SpanReadSummary::from_formula_summary(
255            placement.sheet_id,
256            &result_region,
257            &summary,
258            engine.graph.sheet_reg(),
259        )
260        .ok();
261        let summary_rejected_only_for_named_reference = !summary.reject_reasons.is_empty()
262            && summary.reject_reasons.iter().all(|reason| {
263                matches!(
264                    reason,
265                    crate::formula_plane::dependency_summary::DependencyRejectReason
266                        ::NamedRangeUnsupported { .. }
267                )
268            });
269        Ok(OldOutput {
270            payload: template.key.payload().to_string(),
271            labels: template.labels,
272            direct_cells: sorted_cells(placeholders),
273            range_deps: ranges,
274            unresolved_names,
275            volatile,
276            dynamic,
277            read_summary_debug: read_summary.as_ref().map(|s| format!("{s:?}")),
278            summary_rejected_only_for_named_reference,
279        })
280    }
281
282    fn sorted_cells(mut cells: Vec<CellRef>) -> Vec<CellRef> {
283        cells.sort();
284        cells.dedup();
285        cells
286    }
287
288    fn canonical_labels_from_old(
289        old: &crate::formula_plane::template_canonical::CanonicalTemplateLabels,
290    ) -> CanonicalLabels {
291        let mut labels = CanonicalLabels::default();
292        for flag in &old.flags {
293            labels.flags |= match flag {
294                CanonicalTemplateFlag::ParserVolatileFlag => CanonicalLabels::FLAG_VOLATILE,
295                CanonicalTemplateFlag::FunctionCall => CanonicalLabels::FLAG_CONTAINS_FUNCTION,
296                CanonicalTemplateFlag::CurrentSheetBinding => CanonicalLabels::FLAG_CURRENT_SHEET,
297                CanonicalTemplateFlag::ExplicitSheetBinding => CanonicalLabels::FLAG_EXPLICIT_SHEET,
298                CanonicalTemplateFlag::RelativeReferenceAxis => CanonicalLabels::FLAG_RELATIVE_ONLY,
299                CanonicalTemplateFlag::AbsoluteReferenceAxis => CanonicalLabels::FLAG_ABSOLUTE_ONLY,
300                CanonicalTemplateFlag::MixedAnchors => CanonicalLabels::FLAG_MIXED_ANCHORS,
301                CanonicalTemplateFlag::FiniteRangeReference => CanonicalLabels::FLAG_CONTAINS_RANGE,
302                CanonicalTemplateFlag::NamedReference => CanonicalLabels::FLAG_CONTAINS_NAME,
303            };
304        }
305        for reason in &old.reject_reasons {
306            labels.flags |= match reason {
307                CanonicalRejectReason::DynamicReferenceFunction { .. } => {
308                    CanonicalLabels::FLAG_DYNAMIC
309                }
310                CanonicalRejectReason::ParserVolatileFlag
311                | CanonicalRejectReason::VolatileFunction { .. } => CanonicalLabels::FLAG_VOLATILE,
312                CanonicalRejectReason::LocalEnvironmentFunction { .. } => {
313                    CanonicalLabels::FLAG_CONTAINS_LET_LAMBDA
314                }
315                CanonicalRejectReason::ArrayOrSpillFunction { .. }
316                | CanonicalRejectReason::ArrayLiteral => CanonicalLabels::FLAG_CONTAINS_ARRAY,
317                CanonicalRejectReason::StructuredReference { .. }
318                | CanonicalRejectReason::StructuredReferenceCurrentRow { .. } => {
319                    CanonicalLabels::FLAG_CONTAINS_TABLE
320                        | CanonicalLabels::FLAG_CONTAINS_STRUCTURED_REF
321                }
322                CanonicalRejectReason::OpenRangeReference { .. }
323                | CanonicalRejectReason::WholeAxisReference { .. } => {
324                    CanonicalLabels::FLAG_CONTAINS_RANGE
325                }
326                _ => 0,
327            };
328            labels.rejects |= match reason {
329                CanonicalRejectReason::InvalidPlacementAnchor { .. } => {
330                    CanonicalLabels::REJECT_INVALID_PLACEMENT_ANCHOR
331                }
332                CanonicalRejectReason::DynamicReferenceFunction { .. } => {
333                    CanonicalLabels::REJECT_DYNAMIC_REFERENCE
334                }
335                CanonicalRejectReason::UnknownOrCustomFunction { .. } => {
336                    CanonicalLabels::REJECT_UNKNOWN_OR_CUSTOM_FUNCTION
337                }
338                CanonicalRejectReason::LocalEnvironmentFunction { .. } => {
339                    CanonicalLabels::REJECT_LOCAL_ENVIRONMENT
340                }
341                CanonicalRejectReason::ParserVolatileFlag => {
342                    CanonicalLabels::REJECT_PARSER_VOLATILE_FLAG
343                }
344                CanonicalRejectReason::VolatileFunction { .. } => {
345                    CanonicalLabels::REJECT_VOLATILE_FUNCTION
346                }
347                CanonicalRejectReason::ReferenceReturningFunction { .. } => {
348                    CanonicalLabels::REJECT_REFERENCE_RETURNING_FUNCTION
349                }
350                CanonicalRejectReason::ArrayOrSpillFunction { .. } => {
351                    CanonicalLabels::REJECT_ARRAY_OR_SPILL_FUNCTION
352                }
353                CanonicalRejectReason::ArrayLiteral => CanonicalLabels::REJECT_ARRAY_LITERAL,
354                CanonicalRejectReason::SpillReference { .. } => {
355                    CanonicalLabels::REJECT_SPILL_REFERENCE
356                }
357                CanonicalRejectReason::SpillResultRegionOperator => {
358                    CanonicalLabels::REJECT_SPILL_RESULT_REGION_OPERATOR
359                }
360                CanonicalRejectReason::ImplicitIntersectionOperator => {
361                    CanonicalLabels::REJECT_IMPLICIT_INTERSECTION_OPERATOR
362                }
363                CanonicalRejectReason::CallExpression => CanonicalLabels::REJECT_CALL_EXPRESSION,
364                CanonicalRejectReason::StructuredReference { .. } => {
365                    CanonicalLabels::REJECT_STRUCTURED_REFERENCE
366                }
367                CanonicalRejectReason::StructuredReferenceCurrentRow { .. } => {
368                    CanonicalLabels::REJECT_STRUCTURED_REFERENCE_CURRENT_ROW
369                }
370                CanonicalRejectReason::ThreeDReference { .. } => {
371                    CanonicalLabels::REJECT_THREE_D_REFERENCE
372                }
373                CanonicalRejectReason::ExternalReference { .. } => {
374                    CanonicalLabels::REJECT_EXTERNAL_REFERENCE
375                }
376                CanonicalRejectReason::OpenRangeReference { .. } => {
377                    CanonicalLabels::REJECT_OPEN_RANGE_REFERENCE
378                }
379                CanonicalRejectReason::WholeAxisReference { .. } => {
380                    CanonicalLabels::REJECT_WHOLE_AXIS_REFERENCE
381                }
382                CanonicalRejectReason::UnsupportedReference { .. } => {
383                    CanonicalLabels::REJECT_UNSUPPORTED_REFERENCE
384                }
385            };
386        }
387        labels
388    }
389
390    pub fn literal_number(value: f64) -> LiteralValue {
391        LiteralValue::Number(value)
392    }
393}
394
395// CalcObserver is defined below
396
397use crate::timezone::TimeZoneSpec;
398use crate::traits::EvaluationContext;
399use crate::traits::VolatileLevel;
400use chrono::{DateTime, Utc};
401use formualizer_common::error::{ExcelError, ExcelErrorKind};
402use std::collections::HashMap;
403
404impl<R: EvaluationContext> Engine<R> {
405    pub fn begin_bulk_ingest(&mut self) -> ingest_builder::BulkIngestBuilder<'_> {
406        ingest_builder::BulkIngestBuilder::new(&mut self.graph)
407    }
408
409    pub fn intern_formula_ast(&mut self, ast: &formualizer_parse::parser::ASTNode) -> AstNodeId {
410        self.graph.store_ast(ast)
411    }
412}
413
414/// 🔮 Scalability Hook: Performance monitoring trait for calculation observability
415pub trait CalcObserver: Send + Sync {
416    fn on_eval_start(&self, vertex_id: VertexId);
417    fn on_eval_complete(&self, vertex_id: VertexId, duration: std::time::Duration);
418    fn on_cycle_detected(&self, cycle: &[VertexId]);
419    fn on_dirty_propagation(&self, vertex_id: VertexId, affected_count: usize);
420}
421
422/// Default no-op observer
423impl CalcObserver for () {
424    fn on_eval_start(&self, _vertex_id: VertexId) {}
425    fn on_eval_complete(&self, _vertex_id: VertexId, _duration: std::time::Duration) {}
426    fn on_cycle_detected(&self, _cycle: &[VertexId]) {}
427    fn on_dirty_propagation(&self, _vertex_id: VertexId, _affected_count: usize) {}
428}
429
430/// Deterministic evaluation configuration.
431///
432/// When enabled, volatile sources (clock/timezone) are derived solely from this config.
433#[derive(Debug, Clone, PartialEq, Eq)]
434pub enum DeterministicMode {
435    /// Non-deterministic: uses the system clock.
436    Disabled {
437        /// Timezone used by volatile date/time builtins.
438        timezone: TimeZoneSpec,
439    },
440    /// Deterministic: uses a fixed timestamp in the provided timezone.
441    Enabled {
442        /// Fixed timestamp expressed in UTC.
443        timestamp_utc: DateTime<Utc>,
444        /// Timezone used to interpret `timestamp_utc` for NOW()/TODAY().
445        timezone: TimeZoneSpec,
446    },
447}
448
449impl Default for DeterministicMode {
450    fn default() -> Self {
451        Self::Disabled {
452            timezone: TimeZoneSpec::default(),
453        }
454    }
455}
456
457impl DeterministicMode {
458    pub fn is_enabled(&self) -> bool {
459        matches!(self, DeterministicMode::Enabled { .. })
460    }
461
462    pub fn timezone(&self) -> &TimeZoneSpec {
463        match self {
464            DeterministicMode::Disabled { timezone } => timezone,
465            DeterministicMode::Enabled { timezone, .. } => timezone,
466        }
467    }
468
469    pub fn validate(&self) -> Result<(), ExcelError> {
470        if let DeterministicMode::Enabled { timezone, .. } = self {
471            timezone
472                .validate_for_determinism()
473                .map_err(|msg| ExcelError::new(ExcelErrorKind::Value).with_message(msg))?;
474        }
475        Ok(())
476    }
477
478    pub fn build_clock(
479        &self,
480    ) -> Result<std::sync::Arc<dyn crate::timezone::ClockProvider>, ExcelError> {
481        self.validate()?;
482        Ok(match self {
483            #[cfg(feature = "system-clock")]
484            DeterministicMode::Disabled { timezone } => {
485                std::sync::Arc::new(crate::timezone::SystemClock::new(timezone.clone()))
486            }
487            #[cfg(not(feature = "system-clock"))]
488            DeterministicMode::Disabled { timezone: _ } => {
489                // Without the system-clock feature, Disabled mode falls back to a
490                // UTC epoch clock so the engine still initialises cleanly in portable
491                // wasm guests.  Callers that need real wall-clock time must inject a
492                // `ClockProvider` via `EvalConfig::clock`.
493                std::sync::Arc::new(crate::timezone::FixedClock::new(
494                    chrono::DateTime::UNIX_EPOCH,
495                    crate::timezone::TimeZoneSpec::Utc,
496                ))
497            }
498            DeterministicMode::Enabled {
499                timestamp_utc,
500                timezone,
501            } => std::sync::Arc::new(crate::timezone::FixedClock::new(
502                *timestamp_utc,
503                timezone.clone(),
504            )),
505        })
506    }
507}
508
509/// Policy for handling malformed formulas encountered during workbook ingest.
510#[derive(Debug, Clone, Copy, PartialEq, Eq)]
511pub enum FormulaParsePolicy {
512    /// Reject malformed formulas and fail the load/evaluation path.
513    Strict,
514    /// Convert malformed formulas into literal error formulas (`#ERROR!`).
515    CoerceToError,
516    /// Keep the backend-provided cached value and drop the formula.
517    KeepCachedValue,
518    /// Treat the original formula text as a plain text literal.
519    AsText,
520}
521
522/// Captured diagnostic for a malformed formula encountered during ingest/graph-build.
523#[derive(Debug, Clone, PartialEq, Eq)]
524pub struct FormulaParseDiagnostic {
525    pub sheet: String,
526    pub row: u32,
527    pub col: u32,
528    pub formula: String,
529    pub message: String,
530    pub policy: FormulaParsePolicy,
531}
532
533#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
534pub enum FormulaPlaneMode {
535    /// Disable FormulaPlane promotion/evaluation. This is the stable default;
536    /// span evaluation is explicitly opt-in through configuration.
537    #[default]
538    Off,
539    Shadow,
540    /// Experimental mode: accepted FormulaPlane spans are installed into
541    /// graph-owned authority and are not materialized as per-cell graph formulas.
542    AuthoritativeExperimental,
543}
544
545/// Workbook ingest limits applied by loader backends before they materialize large sheets.
546#[derive(Debug, Clone, PartialEq, Eq)]
547pub struct WorkbookLoadLimits {
548    /// Hard cap for declared/logical sheet rows.
549    pub max_sheet_rows: u32,
550    /// Hard cap for declared/logical sheet columns.
551    pub max_sheet_cols: u32,
552    /// Hard cap for the rectangular logical area a backend may materialize.
553    pub max_sheet_logical_cells: u64,
554    /// Sparse-sheet checks only trigger once a sheet reaches this many logical cells.
555    pub sparse_sheet_cell_threshold: u64,
556    /// Maximum allowed logical-to-populated-cell ratio once the sparse threshold is crossed.
557    pub max_sparse_cell_ratio: u64,
558}
559
560impl Default for WorkbookLoadLimits {
561    fn default() -> Self {
562        Self {
563            max_sheet_rows: 1_048_576,
564            max_sheet_cols: 16_384,
565            max_sheet_logical_cells: 128_000_000,
566            sparse_sheet_cell_threshold: 250_000,
567            max_sparse_cell_ratio: 1_024,
568        }
569    }
570}
571
572/// Configuration for the evaluation engine
573#[derive(Debug, Clone)]
574pub struct EvalConfig {
575    pub enable_parallel: bool,
576    pub max_threads: Option<usize>,
577    // 🔮 Scalability Hook: Resource limits (future-proofing)
578    pub max_vertices: Option<usize>,
579    pub max_eval_time: Option<std::time::Duration>,
580    pub max_memory_mb: Option<usize>,
581
582    /// Default sheet name used when no sheet is provided.
583    pub default_sheet_name: String,
584
585    /// When false, resolve defined names case-insensitively (ASCII only).
586    ///
587    /// This matches Excel behavior for defined names.
588    pub case_sensitive_names: bool,
589
590    /// When false, resolve table names case-insensitively (ASCII only).
591    ///
592    /// This matches Excel behavior for native table (ListObject) names.
593    pub case_sensitive_tables: bool,
594
595    /// Stable workbook seed used for deterministic RNG composition
596    pub workbook_seed: u64,
597
598    /// Volatile granularity for RNG seeding and re-evaluation policy
599    pub volatile_level: VolatileLevel,
600
601    /// Deterministic evaluation configuration (clock/timezone injection).
602    pub deterministic_mode: DeterministicMode,
603
604    // Range handling configuration (Phase 5)
605    /// Ranges with size <= this limit are expanded into individual Cell dependencies
606    pub range_expansion_limit: usize,
607
608    /// Fallback maximum row bound for open-ended references (e.g. `A:A`, `A1:A`).
609    ///
610    /// This is only used when used-bounds cannot be determined.
611    pub max_open_ended_rows: u32,
612
613    /// Fallback maximum column bound for open-ended references (e.g. `1:1`, `A1:1`).
614    ///
615    /// This is only used when used-bounds cannot be determined.
616    pub max_open_ended_cols: u32,
617
618    /// Height of stripe blocks for dense range indexing
619    pub stripe_height: u32,
620    /// Width of stripe blocks for dense range indexing  
621    pub stripe_width: u32,
622    /// Enable block stripes for dense ranges (vs row/column stripes only)
623    pub enable_block_stripes: bool,
624
625    /// Spill behavior configuration (conflicts, bounds, buffering)
626    pub spill: SpillConfig,
627
628    /// Cycle handling configuration (detection mode + policy). Defaults to
629    /// `CycleDetection::Static` (today's stamp-every-static-SCC behavior);
630    /// `CycleDetection::Runtime` is opt-in (RFC #112).
631    pub cycle: CycleConfig,
632
633    /// Use dynamic topological ordering (Pearce-Kelly algorithm)
634    pub use_dynamic_topo: bool,
635    /// Maximum nodes to visit before falling back to full rebuild
636    pub pk_visit_budget: usize,
637    /// Operations between periodic rank compaction
638    pub pk_compaction_interval_ops: u64,
639    /// Maximum width for parallel evaluation layers
640    pub max_layer_width: Option<usize>,
641    /// If true, reject edge insertions that would create a cycle (skip adding that dependency).
642    /// If false, allow insertion and let scheduler handle cycles at evaluation time.
643    pub pk_reject_cycle_edges: bool,
644    /// Sheet index build strategy for bulk loads
645    pub sheet_index_mode: SheetIndexMode,
646
647    /// Warmup configuration for global pass planning (Phase 1)
648    pub warmup: tuning::WarmupConfig,
649
650    /// Enable Arrow-backed storage reads (Phase A)
651    pub arrow_storage_enabled: bool,
652    /// Enable delta overlay for Arrow sheets (Phase C)
653    pub delta_overlay_enabled: bool,
654
655    /// Mirror formula scalar results into Arrow overlay for Arrow-backed reads
656    /// This enables Arrow-only RangeView correctness without Hybrid fallback.
657    pub write_formula_overlay_enabled: bool,
658
659    /// Optional memory budget (in bytes) for formula/spill computed Arrow overlays.
660    ///
661    /// When set, the engine will compact computed overlays into base lanes when the
662    /// estimated usage exceeds this cap.
663    pub max_overlay_memory_bytes: Option<usize>,
664
665    /// Workbook date system: Excel 1900 (default) or 1904.
666    pub date_system: DateSystem,
667
668    /// Policy for malformed formulas encountered during ingest/graph-build.
669    pub formula_parse_policy: FormulaParsePolicy,
670
671    /// Defer dependency graph building: ingest values immediately but stage formulas
672    /// for on-demand graph construction during evaluation.
673    pub defer_graph_building: bool,
674
675    /// Enable virtual dependency convergence telemetry collection.
676    ///
677    /// When disabled, the engine avoids per-pass timing/edge-count bookkeeping.
678    pub enable_virtual_dep_telemetry: bool,
679
680    /// FormulaPlane ingest/planning mode. Defaults to `Off`; span evaluation is
681    /// explicitly opt-in while `AuthoritativeExperimental` remains experimental.
682    /// `Shadow` may report candidate span opportunities but must still materialize
683    /// every formula via the legacy graph path.
684    pub formula_plane_mode: FormulaPlaneMode,
685
686    /// Maximum bytes for the engine-side lookup-index cache.
687    pub lookup_index_cache_max_bytes: usize,
688}
689
690impl Default for EvalConfig {
691    fn default() -> Self {
692        Self {
693            enable_parallel: true,
694            max_threads: None,
695            max_vertices: None,
696            max_eval_time: None,
697            max_memory_mb: None,
698
699            default_sheet_name: format!("Sheet{}", 1),
700
701            // Excel compatibility: identifiers are case-insensitive by default.
702            case_sensitive_names: false,
703            case_sensitive_tables: false,
704
705            // Deterministic RNG seed (matches traits default)
706            workbook_seed: 0xF0F0_D0D0_AAAA_5555,
707
708            // Volatile model default
709            volatile_level: VolatileLevel::Always,
710
711            deterministic_mode: DeterministicMode::default(),
712
713            // Range handling defaults (Phase 5)
714            range_expansion_limit: 64,
715            // Open-ended reference defaults (Excel max dimensions).
716            // Lower these to cap `A:A` / `1:1` when used-bounds are unknown.
717            max_open_ended_rows: 1_048_576,
718            max_open_ended_cols: 16_384,
719            stripe_height: 256,
720            stripe_width: 256,
721            enable_block_stripes: false,
722            spill: SpillConfig::default(),
723            cycle: CycleConfig::default(),
724
725            // Dynamic topology configuration
726            use_dynamic_topo: false, // Disabled by default for compatibility
727            pk_visit_budget: 50_000,
728            pk_compaction_interval_ops: 100_000,
729            max_layer_width: None,
730            pk_reject_cycle_edges: false,
731            sheet_index_mode: SheetIndexMode::Eager,
732            warmup: tuning::WarmupConfig::default(),
733            arrow_storage_enabled: true,
734            delta_overlay_enabled: true,
735            write_formula_overlay_enabled: true,
736            max_overlay_memory_bytes: None,
737            date_system: DateSystem::Excel1900,
738            formula_parse_policy: FormulaParsePolicy::Strict,
739            defer_graph_building: false,
740            enable_virtual_dep_telemetry: false,
741            formula_plane_mode: FormulaPlaneMode::Off,
742            lookup_index_cache_max_bytes: 64 * 1024 * 1024,
743        }
744    }
745}
746
747impl EvalConfig {
748    #[inline]
749    pub fn with_range_expansion_limit(mut self, limit: usize) -> Self {
750        self.range_expansion_limit = limit;
751        self
752    }
753
754    #[inline]
755    pub fn with_parallel(mut self, enable: bool) -> Self {
756        self.enable_parallel = enable;
757        self
758    }
759
760    #[inline]
761    pub fn with_block_stripes(mut self, enable: bool) -> Self {
762        self.enable_block_stripes = enable;
763        self
764    }
765
766    #[inline]
767    pub fn with_case_sensitive_names(mut self, enable: bool) -> Self {
768        self.case_sensitive_names = enable;
769        self
770    }
771
772    #[inline]
773    pub fn with_case_sensitive_tables(mut self, enable: bool) -> Self {
774        self.case_sensitive_tables = enable;
775        self
776    }
777
778    #[inline]
779    pub fn with_arrow_storage(mut self, enable: bool) -> Self {
780        self.arrow_storage_enabled = enable;
781        self
782    }
783
784    #[inline]
785    pub fn with_delta_overlay(mut self, enable: bool) -> Self {
786        self.delta_overlay_enabled = enable;
787        self
788    }
789
790    #[inline]
791    pub fn with_formula_overlay(mut self, enable: bool) -> Self {
792        self.write_formula_overlay_enabled = enable;
793        self
794    }
795
796    #[inline]
797    pub fn with_date_system(mut self, system: DateSystem) -> Self {
798        self.date_system = system;
799        self
800    }
801
802    #[inline]
803    pub fn with_formula_parse_policy(mut self, policy: FormulaParsePolicy) -> Self {
804        self.formula_parse_policy = policy;
805        self
806    }
807
808    #[inline]
809    pub fn with_virtual_dep_telemetry(mut self, enable: bool) -> Self {
810        self.enable_virtual_dep_telemetry = enable;
811        self
812    }
813
814    #[inline]
815    pub fn with_formula_plane_mode(mut self, mode: FormulaPlaneMode) -> Self {
816        self.formula_plane_mode = mode;
817        self
818    }
819
820    /// Set the cycle configuration.
821    ///
822    /// # Panics
823    /// Panics when `cycle` is invalid (see [`CycleConfig::validate`]):
824    /// `Iterate` with `detection: Static`, `max_iterations == 0`, or a
825    /// negative/non-finite `max_change` are config errors rejected at build
826    /// (spec §2). [`Engine::new`] re-validates for configs assembled via
827    /// struct literals.
828    #[inline]
829    pub fn with_cycle(mut self, cycle: CycleConfig) -> Self {
830        if let Err(msg) = cycle.validate() {
831            panic!("invalid CycleConfig: {msg}");
832        }
833        self.cycle = cycle;
834        self
835    }
836}
837
838/// Cycle handling configuration (spec: `formualizer-cycle-semantics-spec.md` §2).
839///
840/// Nested under [`EvalConfig`] like [`SpillConfig`]; flows through
841/// `WorkbookConfig.eval` automatically.
842#[derive(Debug, Clone, Copy, PartialEq, Default)]
843pub struct CycleConfig {
844    pub detection: CycleDetection,
845    pub policy: CyclePolicy,
846}
847
848impl CycleConfig {
849    /// Runtime detection + Excel-default iterative calculation
850    /// (`max_iterations: 100`, `max_change: 0.001`).
851    pub fn iterate_excel_defaults() -> Self {
852        Self {
853            detection: CycleDetection::Runtime,
854            policy: CyclePolicy::iterate_excel_defaults(),
855        }
856    }
857
858    /// Runtime detection + iterative calculation with explicit knobs.
859    pub fn iterate(max_iterations: u32, max_change: f64) -> Self {
860        Self {
861            detection: CycleDetection::Runtime,
862            policy: CyclePolicy::Iterate {
863                max_iterations,
864                max_change,
865            },
866        }
867    }
868
869    /// Validate the configuration (spec §2). Invalid combinations are
870    /// rejected at build: [`EvalConfig::with_cycle`] and engine construction
871    /// both panic on `Err`.
872    pub fn validate(&self) -> Result<(), String> {
873        if let CyclePolicy::Iterate {
874            max_iterations,
875            max_change,
876        } = self.policy
877        {
878            if self.detection == CycleDetection::Static {
879                return Err(
880                    "CyclePolicy::Iterate requires CycleDetection::Runtime (spec §2)".to_string(),
881                );
882            }
883            if max_iterations == 0 {
884                return Err("CyclePolicy::Iterate max_iterations must be >= 1".to_string());
885            }
886            if !max_change.is_finite() || max_change < 0.0 {
887                return Err(format!(
888                    "CyclePolicy::Iterate max_change must be finite and >= 0 (got {max_change})"
889                ));
890            }
891        }
892        Ok(())
893    }
894
895    /// Whether ingest may accept formulas whose dependencies include the
896    /// formula's own cell (`=B1+A1` in B1). Excel accepts these only with
897    /// iterative calculation enabled; everywhere else the edit-time
898    /// "Self-reference detected" rejection stands.
899    #[inline]
900    pub(crate) fn allows_self_dependency(&self) -> bool {
901        self.detection == CycleDetection::Runtime
902            && matches!(self.policy, CyclePolicy::Iterate { .. })
903    }
904}
905
906/// How statically-cyclic SCCs are treated at evaluation time.
907#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
908pub enum CycleDetection {
909    /// Today's behavior: every static SCC is stamped `#CIRC!`. Compat escape
910    /// hatch; no live-edge machinery runs.
911    #[default]
912    Static,
913    /// Static SCCs are candidates; members are evaluated with live-edge
914    /// recording and only *live* cycles get the policy verdict. Phantom
915    /// (live-acyclic) SCCs produce ordinary values (discussion #99).
916    Runtime,
917}
918
919/// What happens to witnessed (live) cycles under `CycleDetection::Runtime`.
920#[derive(Debug, Clone, Copy, PartialEq, Default)]
921pub enum CyclePolicy {
922    /// Live cycles produce `#CIRC!`.
923    #[default]
924    Error,
925    /// Excel-style iterative calculation (RFC #113, spec §3.5/§6):
926    /// live cycles keep running full passes over all SCC members in member
927    /// order (Gauss–Seidel: each result is committed before the next member
928    /// runs) until every member converges per the spec-§6 rules or
929    /// `max_iterations` total passes (pass 1 included) have run. Hitting the
930    /// cap keeps the last values and is NOT an error (Excel parity);
931    /// telemetry records `capped_sccs`.
932    Iterate {
933        /// Total passes per SCC per recalc, pass 1 included. `1` means each
934        /// member evaluates exactly once per recalc (the Excel accumulator
935        /// contract, spec §7.6); `0` is a config error.
936        max_iterations: u32,
937        /// Absolute per-member convergence threshold on f64 serial values
938        /// (`|Δ| < max_change`, strict — Excel semantics). Negative or
939        /// non-finite values are config errors.
940        max_change: f64,
941    },
942}
943
944impl CyclePolicy {
945    /// Excel's default iterative-calculation knobs.
946    pub const EXCEL_DEFAULT_MAX_ITERATIONS: u32 = 100;
947    /// Excel's default maximum-change threshold.
948    pub const EXCEL_DEFAULT_MAX_CHANGE: f64 = 0.001;
949
950    /// `Iterate` with Excel's defaults (100 iterations, 0.001 max change).
951    pub fn iterate_excel_defaults() -> Self {
952        CyclePolicy::Iterate {
953            max_iterations: Self::EXCEL_DEFAULT_MAX_ITERATIONS,
954            max_change: Self::EXCEL_DEFAULT_MAX_CHANGE,
955        }
956    }
957}
958
959#[derive(Debug, Clone, Copy, PartialEq, Eq)]
960pub enum SheetIndexMode {
961    /// Build full interval-tree based index during inserts (current behavior)
962    Eager,
963    /// Defer building any sheet index until first range query or explicit finalize
964    Lazy,
965    /// Use fast batch building (sorted arrays -> tree) when bulk loading, otherwise incremental
966    FastBatch,
967}
968
969pub use formualizer_common::DateSystem;
970
971/// Construct a new engine with the given resolver and configuration
972pub fn new_engine<R>(resolver: R, config: EvalConfig) -> Engine<R>
973where
974    R: EvaluationContext + 'static,
975{
976    Engine::new(resolver, config)
977}
978
979/// Configuration for spill behavior. Nested under EvalConfig to avoid bloating the top-level.
980#[derive(Debug, Clone, Copy, PartialEq, Eq)]
981pub struct SpillConfig {
982    /// What to do when target region overlaps non-empty cells or other spills.
983    pub conflict_policy: SpillConflictPolicy,
984    /// Tiebreaker used when policy allows preemption or multiple anchors race.
985    pub tiebreaker: SpillTiebreaker,
986    /// Bounds handling when result exceeds sheet capacity.
987    pub bounds_policy: SpillBoundsPolicy,
988    /// Buffering approach for spill writes.
989    pub buffer_mode: SpillBufferMode,
990    /// Optional memory budget for shadow buffering in bytes.
991    pub memory_budget_bytes: Option<u64>,
992    /// Cancellation behavior while streaming rows.
993    pub cancellation: SpillCancellationPolicy,
994    /// Visibility policy for staged writes.
995    pub visibility: SpillVisibility,
996
997    /// Hard cap on the number of cells a single spill may project.
998    ///
999    /// This prevents pathological vertex explosions from very large dynamic arrays.
1000    pub max_spill_cells: u32,
1001}
1002
1003impl Default for SpillConfig {
1004    fn default() -> Self {
1005        Self {
1006            conflict_policy: SpillConflictPolicy::Error,
1007            tiebreaker: SpillTiebreaker::FirstWins,
1008            bounds_policy: SpillBoundsPolicy::Strict,
1009            buffer_mode: SpillBufferMode::ShadowBuffer,
1010            memory_budget_bytes: None,
1011            cancellation: SpillCancellationPolicy::Cooperative,
1012            visibility: SpillVisibility::OnCommit,
1013            // Conservative: enough for common UI patterns, small enough to avoid graph blowups.
1014            max_spill_cells: 10_000,
1015        }
1016    }
1017}
1018
1019#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1020pub enum SpillConflictPolicy {
1021    Error,
1022    Preempt,
1023}
1024
1025#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1026pub enum SpillTiebreaker {
1027    FirstWins,
1028    EvaluationEpochAsc,
1029    AnchorAddressAsc,
1030    FunctionPriorityThenAddress,
1031}
1032
1033#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1034pub enum SpillBoundsPolicy {
1035    Strict,
1036    Truncate,
1037}
1038
1039#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1040pub enum SpillBufferMode {
1041    ShadowBuffer,
1042    PersistenceJournal,
1043}
1044
1045#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1046pub enum SpillCancellationPolicy {
1047    Cooperative,
1048    Strict,
1049}
1050
1051#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1052pub enum SpillVisibility {
1053    OnCommit,
1054    StagedLayer,
1055}
1056
1057/*
1058 * Scenario: Tombstone Registry for Missing Sheets
1059 * When a sheet is deleted, formulas pointing to it become "orphans."
1060 * Instead of losing the connection, we store the formula's VertexId
1061 * under the name of the missing sheet.
1062 *
1063 * Why it matters:
1064 * This allows Sheet Addition to remain O(1) for the general case,
1065 * while providing O(N_orphans) recovery for broken formulas.
1066 */
1067#[derive(Debug, Default)]
1068pub struct TombstoneRegistry {
1069    // Maps "SheetName" -> Vec<VertexId of formulas waiting for it>
1070    pub pending_references: HashMap<String, Vec<VertexId>>,
1071}
1072
1073impl TombstoneRegistry {
1074    /// Record that a vertex is waiting for a specific sheet name to appear.
1075    pub fn add_orphan(&mut self, sheet_name: String, vertex_id: VertexId) {
1076        self.pending_references
1077            .entry(sheet_name)
1078            .or_default()
1079            .push(vertex_id);
1080    }
1081
1082    /// Retrieve and remove all vertices waiting for a specific sheet name.
1083    pub fn take_orphans(&mut self, sheet_name: &str) -> Vec<VertexId> {
1084        self.pending_references
1085            .remove(sheet_name)
1086            .unwrap_or_default()
1087    }
1088}