Skip to main content

formualizer_eval/engine/
mod.rs

1//! Formualizer Dependency Graph Engine
2//!
3//! Provides incremental formula evaluation with dependency tracking.
4
5pub mod arrow_ingest;
6pub mod effects;
7pub mod eval;
8pub mod eval_delta;
9pub mod graph;
10pub mod ingest;
11pub mod ingest_builder;
12pub mod journal;
13pub mod plan;
14pub mod range_view;
15pub mod row_visibility;
16pub mod scheduler;
17pub mod spill;
18pub mod vertex;
19pub mod virtual_deps;
20
21// New SoA modules
22pub mod csr_edges;
23pub mod debug_views;
24pub mod delta_edges;
25pub mod interval_tree;
26pub mod named_range;
27pub mod sheet_index;
28pub mod sheet_registry;
29pub mod topo;
30pub mod vertex_store;
31
32// Phase 1: Arena modules
33pub mod arena;
34
35// Phase 1: Warmup configuration (kept for compatibility)
36pub mod tuning;
37
38#[cfg(test)]
39mod tests;
40
41pub use eval::{Engine, EngineAction, EvalResult, RecalcPlan, VirtualDepTelemetry};
42pub use eval_delta::{DeltaMode, EvalDelta};
43pub use journal::{ActionJournal, ArrowOp, ArrowUndoBatch, GraphUndoBatch};
44// Use SoA implementation
45pub use graph::snapshot::VertexSnapshot;
46pub use graph::{
47    ChangeEvent, DependencyGraph, DependencyRef, OperationSummary, StripeKey, StripeType,
48    block_index,
49};
50pub use row_visibility::{RowVisibilitySource, VisibilityMaskMode};
51pub use scheduler::{Layer, Schedule, Scheduler};
52pub use vertex::{VertexId, VertexKind};
53
54pub use graph::editor::{
55    DataUpdateSummary, EditorError, MetaUpdateSummary, RangeSummary, ShiftSummary, TransactionId,
56    VertexDataPatch, VertexEditor, VertexMeta, VertexMetaPatch,
57};
58
59pub use graph::editor::change_log::{ChangeLog, ChangeLogger, NullChangeLogger};
60
61// CalcObserver is defined below
62
63use crate::timezone::TimeZoneSpec;
64use crate::traits::EvaluationContext;
65use crate::traits::VolatileLevel;
66use chrono::{DateTime, Utc};
67use formualizer_common::error::{ExcelError, ExcelErrorKind};
68use std::collections::HashMap;
69
70impl<R: EvaluationContext> Engine<R> {
71    pub fn begin_bulk_ingest(&mut self) -> ingest_builder::BulkIngestBuilder<'_> {
72        ingest_builder::BulkIngestBuilder::new(&mut self.graph)
73    }
74}
75
76/// 🔮 Scalability Hook: Performance monitoring trait for calculation observability
77pub trait CalcObserver: Send + Sync {
78    fn on_eval_start(&self, vertex_id: VertexId);
79    fn on_eval_complete(&self, vertex_id: VertexId, duration: std::time::Duration);
80    fn on_cycle_detected(&self, cycle: &[VertexId]);
81    fn on_dirty_propagation(&self, vertex_id: VertexId, affected_count: usize);
82}
83
84/// Default no-op observer
85impl CalcObserver for () {
86    fn on_eval_start(&self, _vertex_id: VertexId) {}
87    fn on_eval_complete(&self, _vertex_id: VertexId, _duration: std::time::Duration) {}
88    fn on_cycle_detected(&self, _cycle: &[VertexId]) {}
89    fn on_dirty_propagation(&self, _vertex_id: VertexId, _affected_count: usize) {}
90}
91
92/// Deterministic evaluation configuration.
93///
94/// When enabled, volatile sources (clock/timezone) are derived solely from this config.
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub enum DeterministicMode {
97    /// Non-deterministic: uses the system clock.
98    Disabled {
99        /// Timezone used by volatile date/time builtins.
100        timezone: TimeZoneSpec,
101    },
102    /// Deterministic: uses a fixed timestamp in the provided timezone.
103    Enabled {
104        /// Fixed timestamp expressed in UTC.
105        timestamp_utc: DateTime<Utc>,
106        /// Timezone used to interpret `timestamp_utc` for NOW()/TODAY().
107        timezone: TimeZoneSpec,
108    },
109}
110
111impl Default for DeterministicMode {
112    fn default() -> Self {
113        Self::Disabled {
114            timezone: TimeZoneSpec::default(),
115        }
116    }
117}
118
119impl DeterministicMode {
120    pub fn is_enabled(&self) -> bool {
121        matches!(self, DeterministicMode::Enabled { .. })
122    }
123
124    pub fn timezone(&self) -> &TimeZoneSpec {
125        match self {
126            DeterministicMode::Disabled { timezone } => timezone,
127            DeterministicMode::Enabled { timezone, .. } => timezone,
128        }
129    }
130
131    pub fn validate(&self) -> Result<(), ExcelError> {
132        if let DeterministicMode::Enabled { timezone, .. } = self {
133            timezone
134                .validate_for_determinism()
135                .map_err(|msg| ExcelError::new(ExcelErrorKind::Value).with_message(msg))?;
136        }
137        Ok(())
138    }
139
140    pub fn build_clock(
141        &self,
142    ) -> Result<std::sync::Arc<dyn crate::timezone::ClockProvider>, ExcelError> {
143        self.validate()?;
144        Ok(match self {
145            #[cfg(feature = "system-clock")]
146            DeterministicMode::Disabled { timezone } => {
147                std::sync::Arc::new(crate::timezone::SystemClock::new(timezone.clone()))
148            }
149            #[cfg(not(feature = "system-clock"))]
150            DeterministicMode::Disabled { timezone: _ } => {
151                // Without the system-clock feature, Disabled mode falls back to a
152                // UTC epoch clock so the engine still initialises cleanly in portable
153                // wasm guests.  Callers that need real wall-clock time must inject a
154                // `ClockProvider` via `EvalConfig::clock`.
155                std::sync::Arc::new(crate::timezone::FixedClock::new(
156                    chrono::DateTime::UNIX_EPOCH,
157                    crate::timezone::TimeZoneSpec::Utc,
158                ))
159            }
160            DeterministicMode::Enabled {
161                timestamp_utc,
162                timezone,
163            } => std::sync::Arc::new(crate::timezone::FixedClock::new(
164                *timestamp_utc,
165                timezone.clone(),
166            )),
167        })
168    }
169}
170
171/// Policy for handling malformed formulas encountered during workbook ingest.
172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173pub enum FormulaParsePolicy {
174    /// Reject malformed formulas and fail the load/evaluation path.
175    Strict,
176    /// Convert malformed formulas into literal error formulas (`#ERROR!`).
177    CoerceToError,
178    /// Keep the backend-provided cached value and drop the formula.
179    KeepCachedValue,
180    /// Treat the original formula text as a plain text literal.
181    AsText,
182}
183
184/// Captured diagnostic for a malformed formula encountered during ingest/graph-build.
185#[derive(Debug, Clone, PartialEq, Eq)]
186pub struct FormulaParseDiagnostic {
187    pub sheet: String,
188    pub row: u32,
189    pub col: u32,
190    pub formula: String,
191    pub message: String,
192    pub policy: FormulaParsePolicy,
193}
194
195/// Workbook ingest limits applied by loader backends before they materialize large sheets.
196#[derive(Debug, Clone, PartialEq, Eq)]
197pub struct WorkbookLoadLimits {
198    /// Hard cap for declared/logical sheet rows.
199    pub max_sheet_rows: u32,
200    /// Hard cap for declared/logical sheet columns.
201    pub max_sheet_cols: u32,
202    /// Hard cap for the rectangular logical area a backend may materialize.
203    pub max_sheet_logical_cells: u64,
204    /// Sparse-sheet checks only trigger once a sheet reaches this many logical cells.
205    pub sparse_sheet_cell_threshold: u64,
206    /// Maximum allowed logical-to-populated-cell ratio once the sparse threshold is crossed.
207    pub max_sparse_cell_ratio: u64,
208}
209
210impl Default for WorkbookLoadLimits {
211    fn default() -> Self {
212        Self {
213            max_sheet_rows: 1_048_576,
214            max_sheet_cols: 16_384,
215            max_sheet_logical_cells: 128_000_000,
216            sparse_sheet_cell_threshold: 250_000,
217            max_sparse_cell_ratio: 1_024,
218        }
219    }
220}
221
222/// Configuration for the evaluation engine
223#[derive(Debug, Clone)]
224pub struct EvalConfig {
225    pub enable_parallel: bool,
226    pub max_threads: Option<usize>,
227    // 🔮 Scalability Hook: Resource limits (future-proofing)
228    pub max_vertices: Option<usize>,
229    pub max_eval_time: Option<std::time::Duration>,
230    pub max_memory_mb: Option<usize>,
231
232    /// Default sheet name used when no sheet is provided.
233    pub default_sheet_name: String,
234
235    /// When false, resolve defined names case-insensitively (ASCII only).
236    ///
237    /// This matches Excel behavior for defined names.
238    pub case_sensitive_names: bool,
239
240    /// When false, resolve table names case-insensitively (ASCII only).
241    ///
242    /// This matches Excel behavior for native table (ListObject) names.
243    pub case_sensitive_tables: bool,
244
245    /// Stable workbook seed used for deterministic RNG composition
246    pub workbook_seed: u64,
247
248    /// Volatile granularity for RNG seeding and re-evaluation policy
249    pub volatile_level: VolatileLevel,
250
251    /// Deterministic evaluation configuration (clock/timezone injection).
252    pub deterministic_mode: DeterministicMode,
253
254    // Range handling configuration (Phase 5)
255    /// Ranges with size <= this limit are expanded into individual Cell dependencies
256    pub range_expansion_limit: usize,
257
258    /// Fallback maximum row bound for open-ended references (e.g. `A:A`, `A1:A`).
259    ///
260    /// This is only used when used-bounds cannot be determined.
261    pub max_open_ended_rows: u32,
262
263    /// Fallback maximum column bound for open-ended references (e.g. `1:1`, `A1:1`).
264    ///
265    /// This is only used when used-bounds cannot be determined.
266    pub max_open_ended_cols: u32,
267
268    /// Height of stripe blocks for dense range indexing
269    pub stripe_height: u32,
270    /// Width of stripe blocks for dense range indexing  
271    pub stripe_width: u32,
272    /// Enable block stripes for dense ranges (vs row/column stripes only)
273    pub enable_block_stripes: bool,
274
275    /// Spill behavior configuration (conflicts, bounds, buffering)
276    pub spill: SpillConfig,
277
278    /// Use dynamic topological ordering (Pearce-Kelly algorithm)
279    pub use_dynamic_topo: bool,
280    /// Maximum nodes to visit before falling back to full rebuild
281    pub pk_visit_budget: usize,
282    /// Operations between periodic rank compaction
283    pub pk_compaction_interval_ops: u64,
284    /// Maximum width for parallel evaluation layers
285    pub max_layer_width: Option<usize>,
286    /// If true, reject edge insertions that would create a cycle (skip adding that dependency).
287    /// If false, allow insertion and let scheduler handle cycles at evaluation time.
288    pub pk_reject_cycle_edges: bool,
289    /// Sheet index build strategy for bulk loads
290    pub sheet_index_mode: SheetIndexMode,
291
292    /// Warmup configuration for global pass planning (Phase 1)
293    pub warmup: tuning::WarmupConfig,
294
295    /// Enable Arrow-backed storage reads (Phase A)
296    pub arrow_storage_enabled: bool,
297    /// Enable delta overlay for Arrow sheets (Phase C)
298    pub delta_overlay_enabled: bool,
299
300    /// Mirror formula scalar results into Arrow overlay for Arrow-backed reads
301    /// This enables Arrow-only RangeView correctness without Hybrid fallback.
302    pub write_formula_overlay_enabled: bool,
303
304    /// Optional memory budget (in bytes) for formula/spill computed Arrow overlays.
305    ///
306    /// When set, the engine will compact computed overlays into base lanes when the
307    /// estimated usage exceeds this cap.
308    pub max_overlay_memory_bytes: Option<usize>,
309
310    /// Workbook date system: Excel 1900 (default) or 1904.
311    pub date_system: DateSystem,
312
313    /// Policy for malformed formulas encountered during ingest/graph-build.
314    pub formula_parse_policy: FormulaParsePolicy,
315
316    /// Defer dependency graph building: ingest values immediately but stage formulas
317    /// for on-demand graph construction during evaluation.
318    pub defer_graph_building: bool,
319
320    /// Enable virtual dependency convergence telemetry collection.
321    ///
322    /// When disabled, the engine avoids per-pass timing/edge-count bookkeeping.
323    pub enable_virtual_dep_telemetry: bool,
324}
325
326impl Default for EvalConfig {
327    fn default() -> Self {
328        Self {
329            enable_parallel: true,
330            max_threads: None,
331            max_vertices: None,
332            max_eval_time: None,
333            max_memory_mb: None,
334
335            default_sheet_name: format!("Sheet{}", 1),
336
337            // Excel compatibility: identifiers are case-insensitive by default.
338            case_sensitive_names: false,
339            case_sensitive_tables: false,
340
341            // Deterministic RNG seed (matches traits default)
342            workbook_seed: 0xF0F0_D0D0_AAAA_5555,
343
344            // Volatile model default
345            volatile_level: VolatileLevel::Always,
346
347            deterministic_mode: DeterministicMode::default(),
348
349            // Range handling defaults (Phase 5)
350            range_expansion_limit: 64,
351            // Open-ended reference defaults (Excel max dimensions).
352            // Lower these to cap `A:A` / `1:1` when used-bounds are unknown.
353            max_open_ended_rows: 1_048_576,
354            max_open_ended_cols: 16_384,
355            stripe_height: 256,
356            stripe_width: 256,
357            enable_block_stripes: false,
358            spill: SpillConfig::default(),
359
360            // Dynamic topology configuration
361            use_dynamic_topo: false, // Disabled by default for compatibility
362            pk_visit_budget: 50_000,
363            pk_compaction_interval_ops: 100_000,
364            max_layer_width: None,
365            pk_reject_cycle_edges: false,
366            sheet_index_mode: SheetIndexMode::Eager,
367            warmup: tuning::WarmupConfig::default(),
368            arrow_storage_enabled: true,
369            delta_overlay_enabled: true,
370            write_formula_overlay_enabled: true,
371            max_overlay_memory_bytes: None,
372            date_system: DateSystem::Excel1900,
373            formula_parse_policy: FormulaParsePolicy::Strict,
374            defer_graph_building: false,
375            enable_virtual_dep_telemetry: false,
376        }
377    }
378}
379
380impl EvalConfig {
381    #[inline]
382    pub fn with_range_expansion_limit(mut self, limit: usize) -> Self {
383        self.range_expansion_limit = limit;
384        self
385    }
386
387    #[inline]
388    pub fn with_parallel(mut self, enable: bool) -> Self {
389        self.enable_parallel = enable;
390        self
391    }
392
393    #[inline]
394    pub fn with_block_stripes(mut self, enable: bool) -> Self {
395        self.enable_block_stripes = enable;
396        self
397    }
398
399    #[inline]
400    pub fn with_case_sensitive_names(mut self, enable: bool) -> Self {
401        self.case_sensitive_names = enable;
402        self
403    }
404
405    #[inline]
406    pub fn with_case_sensitive_tables(mut self, enable: bool) -> Self {
407        self.case_sensitive_tables = enable;
408        self
409    }
410
411    #[inline]
412    pub fn with_arrow_storage(mut self, enable: bool) -> Self {
413        self.arrow_storage_enabled = enable;
414        self
415    }
416
417    #[inline]
418    pub fn with_delta_overlay(mut self, enable: bool) -> Self {
419        self.delta_overlay_enabled = enable;
420        self
421    }
422
423    #[inline]
424    pub fn with_formula_overlay(mut self, enable: bool) -> Self {
425        self.write_formula_overlay_enabled = enable;
426        self
427    }
428
429    #[inline]
430    pub fn with_date_system(mut self, system: DateSystem) -> Self {
431        self.date_system = system;
432        self
433    }
434
435    #[inline]
436    pub fn with_formula_parse_policy(mut self, policy: FormulaParsePolicy) -> Self {
437        self.formula_parse_policy = policy;
438        self
439    }
440
441    #[inline]
442    pub fn with_virtual_dep_telemetry(mut self, enable: bool) -> Self {
443        self.enable_virtual_dep_telemetry = enable;
444        self
445    }
446}
447
448#[derive(Debug, Clone, Copy, PartialEq, Eq)]
449pub enum SheetIndexMode {
450    /// Build full interval-tree based index during inserts (current behavior)
451    Eager,
452    /// Defer building any sheet index until first range query or explicit finalize
453    Lazy,
454    /// Use fast batch building (sorted arrays -> tree) when bulk loading, otherwise incremental
455    FastBatch,
456}
457
458pub use formualizer_common::DateSystem;
459
460/// Construct a new engine with the given resolver and configuration
461pub fn new_engine<R>(resolver: R, config: EvalConfig) -> Engine<R>
462where
463    R: EvaluationContext + 'static,
464{
465    Engine::new(resolver, config)
466}
467
468/// Configuration for spill behavior. Nested under EvalConfig to avoid bloating the top-level.
469#[derive(Debug, Clone, Copy, PartialEq, Eq)]
470pub struct SpillConfig {
471    /// What to do when target region overlaps non-empty cells or other spills.
472    pub conflict_policy: SpillConflictPolicy,
473    /// Tiebreaker used when policy allows preemption or multiple anchors race.
474    pub tiebreaker: SpillTiebreaker,
475    /// Bounds handling when result exceeds sheet capacity.
476    pub bounds_policy: SpillBoundsPolicy,
477    /// Buffering approach for spill writes.
478    pub buffer_mode: SpillBufferMode,
479    /// Optional memory budget for shadow buffering in bytes.
480    pub memory_budget_bytes: Option<u64>,
481    /// Cancellation behavior while streaming rows.
482    pub cancellation: SpillCancellationPolicy,
483    /// Visibility policy for staged writes.
484    pub visibility: SpillVisibility,
485
486    /// Hard cap on the number of cells a single spill may project.
487    ///
488    /// This prevents pathological vertex explosions from very large dynamic arrays.
489    pub max_spill_cells: u32,
490}
491
492impl Default for SpillConfig {
493    fn default() -> Self {
494        Self {
495            conflict_policy: SpillConflictPolicy::Error,
496            tiebreaker: SpillTiebreaker::FirstWins,
497            bounds_policy: SpillBoundsPolicy::Strict,
498            buffer_mode: SpillBufferMode::ShadowBuffer,
499            memory_budget_bytes: None,
500            cancellation: SpillCancellationPolicy::Cooperative,
501            visibility: SpillVisibility::OnCommit,
502            // Conservative: enough for common UI patterns, small enough to avoid graph blowups.
503            max_spill_cells: 10_000,
504        }
505    }
506}
507
508#[derive(Debug, Clone, Copy, PartialEq, Eq)]
509pub enum SpillConflictPolicy {
510    Error,
511    Preempt,
512}
513
514#[derive(Debug, Clone, Copy, PartialEq, Eq)]
515pub enum SpillTiebreaker {
516    FirstWins,
517    EvaluationEpochAsc,
518    AnchorAddressAsc,
519    FunctionPriorityThenAddress,
520}
521
522#[derive(Debug, Clone, Copy, PartialEq, Eq)]
523pub enum SpillBoundsPolicy {
524    Strict,
525    Truncate,
526}
527
528#[derive(Debug, Clone, Copy, PartialEq, Eq)]
529pub enum SpillBufferMode {
530    ShadowBuffer,
531    PersistenceJournal,
532}
533
534#[derive(Debug, Clone, Copy, PartialEq, Eq)]
535pub enum SpillCancellationPolicy {
536    Cooperative,
537    Strict,
538}
539
540#[derive(Debug, Clone, Copy, PartialEq, Eq)]
541pub enum SpillVisibility {
542    OnCommit,
543    StagedLayer,
544}
545
546/*
547 * Scenario: Tombstone Registry for Missing Sheets
548 * When a sheet is deleted, formulas pointing to it become "orphans."
549 * Instead of losing the connection, we store the formula's VertexId
550 * under the name of the missing sheet.
551 *
552 * Why it matters:
553 * This allows Sheet Addition to remain O(1) for the general case,
554 * while providing O(N_orphans) recovery for broken formulas.
555 */
556#[derive(Debug, Default)]
557pub struct TombstoneRegistry {
558    // Maps "SheetName" -> Vec<VertexId of formulas waiting for it>
559    pub pending_references: HashMap<String, Vec<VertexId>>,
560}
561
562impl TombstoneRegistry {
563    /// Record that a vertex is waiting for a specific sheet name to appear.
564    pub fn add_orphan(&mut self, sheet_name: String, vertex_id: VertexId) {
565        self.pending_references
566            .entry(sheet_name)
567            .or_default()
568            .push(vertex_id);
569    }
570
571    /// Retrieve and remove all vertices waiting for a specific sheet name.
572    pub fn take_orphans(&mut self, sheet_name: &str) -> Vec<VertexId> {
573        self.pending_references
574            .remove(sheet_name)
575            .unwrap_or_default()
576    }
577}