Skip to main content

formualizer_eval/engine/
mod.rs

1//! Formualizer Dependency Graph Engine
2//!
3//! Provides incremental formula evaluation with dependency tracking.
4
5pub mod arrow_ingest;
6pub mod effects;
7pub mod eval;
8pub mod eval_delta;
9pub mod graph;
10pub mod ingest;
11pub mod ingest_builder;
12pub mod journal;
13pub mod plan;
14pub mod range_view;
15pub mod row_visibility;
16pub mod scheduler;
17pub mod spill;
18pub mod vertex;
19pub mod virtual_deps;
20
21// New SoA modules
22pub mod csr_edges;
23pub mod debug_views;
24pub mod delta_edges;
25pub mod interval_tree;
26pub mod named_range;
27pub mod sheet_index;
28pub mod sheet_registry;
29pub mod topo;
30pub mod vertex_store;
31
32// Phase 1: Arena modules
33pub mod arena;
34
35// Phase 1: Warmup configuration (kept for compatibility)
36pub mod tuning;
37
38#[cfg(test)]
39mod tests;
40
41pub use eval::{Engine, EngineAction, EvalResult, RecalcPlan, VirtualDepTelemetry};
42pub use eval_delta::{DeltaMode, EvalDelta};
43pub use journal::{ActionJournal, ArrowOp, ArrowUndoBatch, GraphUndoBatch};
44// Use SoA implementation
45pub use graph::snapshot::VertexSnapshot;
46pub use graph::{
47    ChangeEvent, DependencyGraph, DependencyRef, OperationSummary, StripeKey, StripeType,
48    block_index,
49};
50pub use row_visibility::{RowVisibilitySource, VisibilityMaskMode};
51pub use scheduler::{Layer, Schedule, Scheduler};
52pub use vertex::{VertexId, VertexKind};
53
54pub use graph::editor::{
55    DataUpdateSummary, EditorError, MetaUpdateSummary, RangeSummary, ShiftSummary, TransactionId,
56    VertexDataPatch, VertexEditor, VertexMeta, VertexMetaPatch,
57};
58
59pub use graph::editor::change_log::{ChangeLog, ChangeLogger, NullChangeLogger};
60
61// CalcObserver is defined below
62
63use crate::timezone::TimeZoneSpec;
64use crate::traits::EvaluationContext;
65use crate::traits::VolatileLevel;
66use chrono::{DateTime, Utc};
67use formualizer_common::error::{ExcelError, ExcelErrorKind};
68use std::collections::HashMap;
69
70impl<R: EvaluationContext> Engine<R> {
71    pub fn begin_bulk_ingest(&mut self) -> ingest_builder::BulkIngestBuilder<'_> {
72        ingest_builder::BulkIngestBuilder::new(&mut self.graph)
73    }
74}
75
76/// 🔮 Scalability Hook: Performance monitoring trait for calculation observability
77pub trait CalcObserver: Send + Sync {
78    fn on_eval_start(&self, vertex_id: VertexId);
79    fn on_eval_complete(&self, vertex_id: VertexId, duration: std::time::Duration);
80    fn on_cycle_detected(&self, cycle: &[VertexId]);
81    fn on_dirty_propagation(&self, vertex_id: VertexId, affected_count: usize);
82}
83
84/// Default no-op observer
85impl CalcObserver for () {
86    fn on_eval_start(&self, _vertex_id: VertexId) {}
87    fn on_eval_complete(&self, _vertex_id: VertexId, _duration: std::time::Duration) {}
88    fn on_cycle_detected(&self, _cycle: &[VertexId]) {}
89    fn on_dirty_propagation(&self, _vertex_id: VertexId, _affected_count: usize) {}
90}
91
92/// Deterministic evaluation configuration.
93///
94/// When enabled, volatile sources (clock/timezone) are derived solely from this config.
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub enum DeterministicMode {
97    /// Non-deterministic: uses the system clock.
98    Disabled {
99        /// Timezone used by volatile date/time builtins.
100        timezone: TimeZoneSpec,
101    },
102    /// Deterministic: uses a fixed timestamp in the provided timezone.
103    Enabled {
104        /// Fixed timestamp expressed in UTC.
105        timestamp_utc: DateTime<Utc>,
106        /// Timezone used to interpret `timestamp_utc` for NOW()/TODAY().
107        timezone: TimeZoneSpec,
108    },
109}
110
111impl Default for DeterministicMode {
112    fn default() -> Self {
113        Self::Disabled {
114            timezone: TimeZoneSpec::default(),
115        }
116    }
117}
118
119impl DeterministicMode {
120    pub fn is_enabled(&self) -> bool {
121        matches!(self, DeterministicMode::Enabled { .. })
122    }
123
124    pub fn timezone(&self) -> &TimeZoneSpec {
125        match self {
126            DeterministicMode::Disabled { timezone } => timezone,
127            DeterministicMode::Enabled { timezone, .. } => timezone,
128        }
129    }
130
131    pub fn validate(&self) -> Result<(), ExcelError> {
132        if let DeterministicMode::Enabled { timezone, .. } = self {
133            timezone
134                .validate_for_determinism()
135                .map_err(|msg| ExcelError::new(ExcelErrorKind::Value).with_message(msg))?;
136        }
137        Ok(())
138    }
139
140    pub fn build_clock(
141        &self,
142    ) -> Result<std::sync::Arc<dyn crate::timezone::ClockProvider>, ExcelError> {
143        self.validate()?;
144        Ok(match self {
145            #[cfg(feature = "system-clock")]
146            DeterministicMode::Disabled { timezone } => {
147                std::sync::Arc::new(crate::timezone::SystemClock::new(timezone.clone()))
148            }
149            #[cfg(not(feature = "system-clock"))]
150            DeterministicMode::Disabled { timezone: _ } => {
151                // Without the system-clock feature, Disabled mode falls back to a
152                // UTC epoch clock so the engine still initialises cleanly in portable
153                // wasm guests.  Callers that need real wall-clock time must inject a
154                // `ClockProvider` via `EvalConfig::clock`.
155                std::sync::Arc::new(crate::timezone::FixedClock::new(
156                    chrono::DateTime::UNIX_EPOCH,
157                    crate::timezone::TimeZoneSpec::Utc,
158                ))
159            }
160            DeterministicMode::Enabled {
161                timestamp_utc,
162                timezone,
163            } => std::sync::Arc::new(crate::timezone::FixedClock::new(
164                *timestamp_utc,
165                timezone.clone(),
166            )),
167        })
168    }
169}
170
171/// Policy for handling malformed formulas encountered during workbook ingest.
172#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173pub enum FormulaParsePolicy {
174    /// Reject malformed formulas and fail the load/evaluation path.
175    Strict,
176    /// Convert malformed formulas into literal error formulas (`#ERROR!`).
177    CoerceToError,
178    /// Keep the backend-provided cached value and drop the formula.
179    KeepCachedValue,
180    /// Treat the original formula text as a plain text literal.
181    AsText,
182}
183
184/// Captured diagnostic for a malformed formula encountered during ingest/graph-build.
185#[derive(Debug, Clone, PartialEq, Eq)]
186pub struct FormulaParseDiagnostic {
187    pub sheet: String,
188    pub row: u32,
189    pub col: u32,
190    pub formula: String,
191    pub message: String,
192    pub policy: FormulaParsePolicy,
193}
194
195/// Configuration for the evaluation engine
196#[derive(Debug, Clone)]
197pub struct EvalConfig {
198    pub enable_parallel: bool,
199    pub max_threads: Option<usize>,
200    // 🔮 Scalability Hook: Resource limits (future-proofing)
201    pub max_vertices: Option<usize>,
202    pub max_eval_time: Option<std::time::Duration>,
203    pub max_memory_mb: Option<usize>,
204
205    /// Default sheet name used when no sheet is provided.
206    pub default_sheet_name: String,
207
208    /// When false, resolve defined names case-insensitively (ASCII only).
209    ///
210    /// This matches Excel behavior for defined names.
211    pub case_sensitive_names: bool,
212
213    /// When false, resolve table names case-insensitively (ASCII only).
214    ///
215    /// This matches Excel behavior for native table (ListObject) names.
216    pub case_sensitive_tables: bool,
217
218    /// Stable workbook seed used for deterministic RNG composition
219    pub workbook_seed: u64,
220
221    /// Volatile granularity for RNG seeding and re-evaluation policy
222    pub volatile_level: VolatileLevel,
223
224    /// Deterministic evaluation configuration (clock/timezone injection).
225    pub deterministic_mode: DeterministicMode,
226
227    // Range handling configuration (Phase 5)
228    /// Ranges with size <= this limit are expanded into individual Cell dependencies
229    pub range_expansion_limit: usize,
230
231    /// Fallback maximum row bound for open-ended references (e.g. `A:A`, `A1:A`).
232    ///
233    /// This is only used when used-bounds cannot be determined.
234    pub max_open_ended_rows: u32,
235
236    /// Fallback maximum column bound for open-ended references (e.g. `1:1`, `A1:1`).
237    ///
238    /// This is only used when used-bounds cannot be determined.
239    pub max_open_ended_cols: u32,
240
241    /// Height of stripe blocks for dense range indexing
242    pub stripe_height: u32,
243    /// Width of stripe blocks for dense range indexing  
244    pub stripe_width: u32,
245    /// Enable block stripes for dense ranges (vs row/column stripes only)
246    pub enable_block_stripes: bool,
247
248    /// Spill behavior configuration (conflicts, bounds, buffering)
249    pub spill: SpillConfig,
250
251    /// Use dynamic topological ordering (Pearce-Kelly algorithm)
252    pub use_dynamic_topo: bool,
253    /// Maximum nodes to visit before falling back to full rebuild
254    pub pk_visit_budget: usize,
255    /// Operations between periodic rank compaction
256    pub pk_compaction_interval_ops: u64,
257    /// Maximum width for parallel evaluation layers
258    pub max_layer_width: Option<usize>,
259    /// If true, reject edge insertions that would create a cycle (skip adding that dependency).
260    /// If false, allow insertion and let scheduler handle cycles at evaluation time.
261    pub pk_reject_cycle_edges: bool,
262    /// Sheet index build strategy for bulk loads
263    pub sheet_index_mode: SheetIndexMode,
264
265    /// Warmup configuration for global pass planning (Phase 1)
266    pub warmup: tuning::WarmupConfig,
267
268    /// Enable Arrow-backed storage reads (Phase A)
269    pub arrow_storage_enabled: bool,
270    /// Enable delta overlay for Arrow sheets (Phase C)
271    pub delta_overlay_enabled: bool,
272
273    /// Mirror formula scalar results into Arrow overlay for Arrow-backed reads
274    /// This enables Arrow-only RangeView correctness without Hybrid fallback.
275    pub write_formula_overlay_enabled: bool,
276
277    /// Optional memory budget (in bytes) for formula/spill computed Arrow overlays.
278    ///
279    /// When set, the engine will compact computed overlays into base lanes when the
280    /// estimated usage exceeds this cap.
281    pub max_overlay_memory_bytes: Option<usize>,
282
283    /// Workbook date system: Excel 1900 (default) or 1904.
284    pub date_system: DateSystem,
285
286    /// Policy for malformed formulas encountered during ingest/graph-build.
287    pub formula_parse_policy: FormulaParsePolicy,
288
289    /// Defer dependency graph building: ingest values immediately but stage formulas
290    /// for on-demand graph construction during evaluation.
291    pub defer_graph_building: bool,
292
293    /// Enable virtual dependency convergence telemetry collection.
294    ///
295    /// When disabled, the engine avoids per-pass timing/edge-count bookkeeping.
296    pub enable_virtual_dep_telemetry: bool,
297}
298
299impl Default for EvalConfig {
300    fn default() -> Self {
301        Self {
302            enable_parallel: true,
303            max_threads: None,
304            max_vertices: None,
305            max_eval_time: None,
306            max_memory_mb: None,
307
308            default_sheet_name: format!("Sheet{}", 1),
309
310            // Excel compatibility: identifiers are case-insensitive by default.
311            case_sensitive_names: false,
312            case_sensitive_tables: false,
313
314            // Deterministic RNG seed (matches traits default)
315            workbook_seed: 0xF0F0_D0D0_AAAA_5555,
316
317            // Volatile model default
318            volatile_level: VolatileLevel::Always,
319
320            deterministic_mode: DeterministicMode::default(),
321
322            // Range handling defaults (Phase 5)
323            range_expansion_limit: 64,
324            // Open-ended reference defaults (Excel max dimensions).
325            // Lower these to cap `A:A` / `1:1` when used-bounds are unknown.
326            max_open_ended_rows: 1_048_576,
327            max_open_ended_cols: 16_384,
328            stripe_height: 256,
329            stripe_width: 256,
330            enable_block_stripes: false,
331            spill: SpillConfig::default(),
332
333            // Dynamic topology configuration
334            use_dynamic_topo: false, // Disabled by default for compatibility
335            pk_visit_budget: 50_000,
336            pk_compaction_interval_ops: 100_000,
337            max_layer_width: None,
338            pk_reject_cycle_edges: false,
339            sheet_index_mode: SheetIndexMode::Eager,
340            warmup: tuning::WarmupConfig::default(),
341            arrow_storage_enabled: true,
342            delta_overlay_enabled: true,
343            write_formula_overlay_enabled: true,
344            max_overlay_memory_bytes: None,
345            date_system: DateSystem::Excel1900,
346            formula_parse_policy: FormulaParsePolicy::Strict,
347            defer_graph_building: false,
348            enable_virtual_dep_telemetry: false,
349        }
350    }
351}
352
353impl EvalConfig {
354    #[inline]
355    pub fn with_range_expansion_limit(mut self, limit: usize) -> Self {
356        self.range_expansion_limit = limit;
357        self
358    }
359
360    #[inline]
361    pub fn with_parallel(mut self, enable: bool) -> Self {
362        self.enable_parallel = enable;
363        self
364    }
365
366    #[inline]
367    pub fn with_block_stripes(mut self, enable: bool) -> Self {
368        self.enable_block_stripes = enable;
369        self
370    }
371
372    #[inline]
373    pub fn with_case_sensitive_names(mut self, enable: bool) -> Self {
374        self.case_sensitive_names = enable;
375        self
376    }
377
378    #[inline]
379    pub fn with_case_sensitive_tables(mut self, enable: bool) -> Self {
380        self.case_sensitive_tables = enable;
381        self
382    }
383
384    #[inline]
385    pub fn with_arrow_storage(mut self, enable: bool) -> Self {
386        self.arrow_storage_enabled = enable;
387        self
388    }
389
390    #[inline]
391    pub fn with_delta_overlay(mut self, enable: bool) -> Self {
392        self.delta_overlay_enabled = enable;
393        self
394    }
395
396    #[inline]
397    pub fn with_formula_overlay(mut self, enable: bool) -> Self {
398        self.write_formula_overlay_enabled = enable;
399        self
400    }
401
402    #[inline]
403    pub fn with_date_system(mut self, system: DateSystem) -> Self {
404        self.date_system = system;
405        self
406    }
407
408    #[inline]
409    pub fn with_formula_parse_policy(mut self, policy: FormulaParsePolicy) -> Self {
410        self.formula_parse_policy = policy;
411        self
412    }
413
414    #[inline]
415    pub fn with_virtual_dep_telemetry(mut self, enable: bool) -> Self {
416        self.enable_virtual_dep_telemetry = enable;
417        self
418    }
419}
420
421#[derive(Debug, Clone, Copy, PartialEq, Eq)]
422pub enum SheetIndexMode {
423    /// Build full interval-tree based index during inserts (current behavior)
424    Eager,
425    /// Defer building any sheet index until first range query or explicit finalize
426    Lazy,
427    /// Use fast batch building (sorted arrays -> tree) when bulk loading, otherwise incremental
428    FastBatch,
429}
430
431pub use formualizer_common::DateSystem;
432
433/// Construct a new engine with the given resolver and configuration
434pub fn new_engine<R>(resolver: R, config: EvalConfig) -> Engine<R>
435where
436    R: EvaluationContext + 'static,
437{
438    Engine::new(resolver, config)
439}
440
441/// Configuration for spill behavior. Nested under EvalConfig to avoid bloating the top-level.
442#[derive(Debug, Clone, Copy, PartialEq, Eq)]
443pub struct SpillConfig {
444    /// What to do when target region overlaps non-empty cells or other spills.
445    pub conflict_policy: SpillConflictPolicy,
446    /// Tiebreaker used when policy allows preemption or multiple anchors race.
447    pub tiebreaker: SpillTiebreaker,
448    /// Bounds handling when result exceeds sheet capacity.
449    pub bounds_policy: SpillBoundsPolicy,
450    /// Buffering approach for spill writes.
451    pub buffer_mode: SpillBufferMode,
452    /// Optional memory budget for shadow buffering in bytes.
453    pub memory_budget_bytes: Option<u64>,
454    /// Cancellation behavior while streaming rows.
455    pub cancellation: SpillCancellationPolicy,
456    /// Visibility policy for staged writes.
457    pub visibility: SpillVisibility,
458
459    /// Hard cap on the number of cells a single spill may project.
460    ///
461    /// This prevents pathological vertex explosions from very large dynamic arrays.
462    pub max_spill_cells: u32,
463}
464
465impl Default for SpillConfig {
466    fn default() -> Self {
467        Self {
468            conflict_policy: SpillConflictPolicy::Error,
469            tiebreaker: SpillTiebreaker::FirstWins,
470            bounds_policy: SpillBoundsPolicy::Strict,
471            buffer_mode: SpillBufferMode::ShadowBuffer,
472            memory_budget_bytes: None,
473            cancellation: SpillCancellationPolicy::Cooperative,
474            visibility: SpillVisibility::OnCommit,
475            // Conservative: enough for common UI patterns, small enough to avoid graph blowups.
476            max_spill_cells: 10_000,
477        }
478    }
479}
480
481#[derive(Debug, Clone, Copy, PartialEq, Eq)]
482pub enum SpillConflictPolicy {
483    Error,
484    Preempt,
485}
486
487#[derive(Debug, Clone, Copy, PartialEq, Eq)]
488pub enum SpillTiebreaker {
489    FirstWins,
490    EvaluationEpochAsc,
491    AnchorAddressAsc,
492    FunctionPriorityThenAddress,
493}
494
495#[derive(Debug, Clone, Copy, PartialEq, Eq)]
496pub enum SpillBoundsPolicy {
497    Strict,
498    Truncate,
499}
500
501#[derive(Debug, Clone, Copy, PartialEq, Eq)]
502pub enum SpillBufferMode {
503    ShadowBuffer,
504    PersistenceJournal,
505}
506
507#[derive(Debug, Clone, Copy, PartialEq, Eq)]
508pub enum SpillCancellationPolicy {
509    Cooperative,
510    Strict,
511}
512
513#[derive(Debug, Clone, Copy, PartialEq, Eq)]
514pub enum SpillVisibility {
515    OnCommit,
516    StagedLayer,
517}
518
519/*
520 * Scenario: Tombstone Registry for Missing Sheets
521 * When a sheet is deleted, formulas pointing to it become "orphans."
522 * Instead of losing the connection, we store the formula's VertexId
523 * under the name of the missing sheet.
524 *
525 * Why it matters:
526 * This allows Sheet Addition to remain O(1) for the general case,
527 * while providing O(N_orphans) recovery for broken formulas.
528 */
529#[derive(Debug, Default)]
530pub struct TombstoneRegistry {
531    // Maps "SheetName" -> Vec<VertexId of formulas waiting for it>
532    pub pending_references: HashMap<String, Vec<VertexId>>,
533}
534
535impl TombstoneRegistry {
536    /// Record that a vertex is waiting for a specific sheet name to appear.
537    pub fn add_orphan(&mut self, sheet_name: String, vertex_id: VertexId) {
538        self.pending_references
539            .entry(sheet_name)
540            .or_default()
541            .push(vertex_id);
542    }
543
544    /// Retrieve and remove all vertices waiting for a specific sheet name.
545    pub fn take_orphans(&mut self, sheet_name: &str) -> Vec<VertexId> {
546        self.pending_references
547            .remove(sheet_name)
548            .unwrap_or_default()
549    }
550}